In [1]:
!pip install openai==0.28
!pip install langchain==0.0.191

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Collecting tqdm (from openai==0.28)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting aiohttp (from openai==0.28)
  Downloading aiohttp-3.11.10-cp311-cp311-macosx_11_0_arm64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->openai==0.28)
  Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->openai==0.28)
  Using cached aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting attrs>=17.3.0 (from aiohttp->openai==0.28)
  Using cached attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->openai==0.28)
  Using cached frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->openai==0.28)
  Using cached multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (5.0 kB)
Collecting propcache>=0.2.0 (from aioh

In [1]:
import sys
sys.path.append('../')

from common_imports import *
from common_functions import *

In [2]:
import openai
openai.api_key = ""

In [3]:
def preprocess_df(df):
    # Preprocessing the data
    df['datetime'] = pd.to_datetime(df['datetime'])  # Convert 'date' column to datetime
    df.sort_values(by=['name', 'datetime'], inplace=True)

    weather_features = ["temp", "dew", "precip", "precipcover",
                        "windspeed","winddir","sealevelpressure",
                        "cloudcover","solarradiation", "elevation"]

    df['FireOccurred'] = df['Started'].notna().astype(int)

    df = df.drop(columns=['windgust', 'Lat', 'Long', 'Duration', 'Started', 'OptimizedSeverityScore', "closest_city"])

    df.fillna(0, inplace=True)
    return df, weather_features

In [4]:
df = pd.read_csv("lookup.csv")
df, weather_features = preprocess_df(df)

In [5]:
df.columns

Index(['name', 'datetime', 'temp', 'dew', 'precip', 'precipcover', 'snow',
       'snowdepth', 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover',
       'solarradiation', 'county', 'elevation', 'OptimizedSeverityScore_Log',
       'FireOccurred'],
      dtype='object')

In [6]:
weather_features

['temp',
 'dew',
 'precip',
 'precipcover',
 'windspeed',
 'winddir',
 'sealevelpressure',
 'cloudcover',
 'solarradiation',
 'elevation']

In [7]:
df['datetime'] = pd.to_datetime(df['datetime'])

In [8]:
df = df.sort_values(by=['name', 'datetime'])

In [15]:
# Function to get past 15 days of weather data
def get_past_15_days(city, date, data):
    past_data = data[(data['name'] == city) & (data['datetime'] < date) & (data['datetime'] >= date - pd.Timedelta(days=15))]
    return past_data

# Function to summarize past 15 days for the prompt
def summarize_past_data(past_data):
    if past_data.empty:
        return "No data available for the past 15 days."
    avg_temp = past_data['temp'].mean()
    avg_dew = past_data['dew'].mean()
    avg_precipitation = past_data['precip'].mean()
    avg_precp_cover = past_data['precipcover'].mean()
    avg_wind_speed = past_data['windspeed'].mean()
    avg_sealevelpressure = past_data['sealevelpressure'].mean()
    avg_solarradiation = past_data['solarradiation'].mean()
    elevation = past_data['elevation'].mean()
    avg_fire_occcurred = past_data['FireOccurred'].mean()

    return f"""
    Over the past 15 days:
    - Average Temperature: {avg_temp:.1f}°C
    - Average Dew Point: {avg_dew:.1f}%
    - Average Precipitation: {avg_precipitation}mm
    - Average Precipitation Cover: {avg_precp_cover}%
    - Average Wind Speed: {avg_wind_speed:.1f}kph
    - Average Sea Level Pressure: {avg_sealevelpressure}mb
    - Average Solar Radiation: {avg_solarradiation}watts per square meter
    - Average Elevation: {elevation}m 
    - Average Time Fire Has Occurred: {avg_fire_occcurred}
    """

# Generate prompts with past 15-day summaries
def generate_prompt(row, data):
    # Get past 15 days of weather data for the specific city and date
    past_data = get_past_15_days(row['name'], row['datetime'], data)
    
    # Summarize the past data (e.g., average temperature, precipitation, etc.)
    past_summary = summarize_past_data(past_data)
    
    # Format current weather data
    current_weather = f"""
    Current weather:
    - Temperature: {row['temp']}°C
    - Dew Point: {row['dew']}%
    - Precipitation: {row['precip']}mm
    - Precipitation Cover: {row['precipcover']}%
    - Wind Speed: {row['windspeed']}kph
    - Sea Level Pressure: {row["sealevelpressure"]}mb
    - Solar Radiation: {row['solarradiation']} watts per square meter
    - Elevation: {row["elevation"]}m
    """
    
    # Construct the full prompt using past summary and current weather data
    full_prompt = f"""
    Analyze the wildfire risk for {row['name']} on {row['datetime'].strftime('%Y-%m-%d')}:
    {past_summary}

    {current_weather}
    Based on the past 15 days of weather and current conditions:
    Step 1. Give a one word prediction the wildfire risk.
    Step 2. Give a one word predciction for the severity of a potential wildfire. 
    Example of responses in Step 1 and Step 2: Low, Moderate, or High. There can be combinations of them such as Low-Moderate. 
    Step 3. List the top 3 reasons why these predictions were chosen.
    """
    
    # Return the past summary, current weather, and full prompt
    return past_summary, current_weather, full_prompt


In [16]:
def get_openai_response(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Or "gpt-4"
            messages=[
                {"role": "system", "content": "You are an expert in wildfire predictions."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.6
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"Error: {e}"

In [13]:
columns_to_keep = ['name', 'datetime', 'History', 'CurrentWeather', 'WildfireRiskPrediction', 'FireOccurred']

In [17]:
# Create an empty DataFrame to collect all predictions
results_df = pd.DataFrame()

city_of_interest = ['Fresno, CA', "San Diego, CA", "Bakersfield, CA",
                    "Redding, CA", "Susanville, CA", "Crescent City, CA"]

dates_of_interest = ["2019-07-15", "2019-09-11", "2019-04-04", "2019-05-25"]
dates_of_interest = pd.to_datetime(dates_of_interest)  # Convert to datetime

# Loop through unique city and datetime combinations
for city in city_of_interest:
    for date in dates_of_interest:
        # Filter the DataFrame for the specific city and date
        filtered_df = df.loc[(df['name'] == city) & (df['datetime'] == date)]
        
        # Check if there are any rows to process
        if filtered_df.empty:
            print(f"No data available for {city} on {date}.")
            continue
        
        # Apply the function to generate prompts
        filtered_df['History'], filtered_df['CurrentWeather'], filtered_df['Prompt'] = zip(*filtered_df.apply(lambda row: generate_prompt(row, df), axis=1))
        
        # Query OpenAI for each prompt and store the result
        filtered_df['WildfireRiskPrediction'] = filtered_df['Prompt'].apply(get_openai_response)
        

        # Append results to the results DataFrame
        results_df = pd.concat([results_df, filtered_df], ignore_index=True)

final_results_df = results_df[columns_to_keep]
final_results_df.drop_duplicates(inplace=True)
# Save the aggregated results to a new CSV file
output_file_path = "model_predictions/output_files/wildfire_risk_predictions_with_fire_hist_ai.csv"
final_results_df.to_csv(output_file_path, index=False)

print(f"All predictions saved to {output_file_path}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['History'], filtered_df['CurrentWeather'], filtered_df['Prompt'] = zip(*filtered_df.apply(lambda row: generate_prompt(row, df), axis=1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['History'], filtered_df['CurrentWeather'], filtered_df['Prompt'] = zip(*filtered_df.apply(lambda row: generate_prompt(row, df), axis=1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pa

All predictions saved to wildfire_risk_predictions_with_fire_hist_ai.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['WildfireRiskPrediction'] = filtered_df['Prompt'].apply(get_openai_response)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_results_df.drop_duplicates(inplace=True)


In [107]:
print(results_df["Prompt"][1])


    Analyze the wildfire risk for Fresno, CA on 2019-09-11:
    
    Over the past 15 days:
    - Average Temperature: 28.2°C
    - Average Dew Point: 11.3%
    - Average Precipitation: 0.0mm
    - Average Precipitation Cover: 0.0%
    - Average Wind Speed: 22.5kph
    - Average Sea Level Pressure: 1011.8600000000001mb
    - Average Solar Radiation: 257.3933333333333watts per square meter
    - Average Elevation: 93.0m 
    - Average Time Fire Has Occurred: 0.0
    

    
    Current weather:
    - Temperature: 23.1°C
    - Dew Point: 9.3%
    - Precipitation: 0.0mm
    - Precipitation Cover: 0.0%
    - Wind Speed: 21.9kph
    - Sea Level Pressure: 1015.1mb
    - Solar Radiation: 257.7 watts per square meter
    - Elevation: 93m
    
    Based on the past 15 days of weather and current conditions:
    Step 1. Give a one word prediction the wildfire risk.
    Step 2. Give a one word predciction for the severity of a potential wildfire. 
    Example of responses in Step 1 and Step 2: Lo

In [4]:
output_file_path = "wildfire_risk_predictions_ai.csv"

final_results_df = pd.read_csv("wildfire_risk_predictions_ai.csv")
final_results_df.drop_duplicates(inplace=True)
final_results_df.to_csv(output_file_path, index=False)