In [1]:
import pandas as pd
from scipy.stats import pearsonr

In [2]:
wildfire_df = pd.read_csv('intermediary_files/madison_wildfire_1964_2024.csv')
aqi_df = pd.read_csv("intermediary_files/yearly_aqi_1964_2024.csv")

In [3]:
def calculate_smoke_estimate(row):
    gis_acres = row['GIS_Acres']
    assigned_fire_type = row['Assigned_Fire_Type']
    shape_area = row['Shape_Area']
    shape_length = row['Shape_Length']
    shortest_dist = row['shortest_dist']
    circleness = row['Circleness_Scale']

    # Fire Intensity Factor
    if assigned_fire_type == "Wildfire":
        fire_intensity_factor = 5
    elif assigned_fire_type == "Likely Wildfire":
        fire_intensity_factor = 4
    elif assigned_fire_type == "Prescribed Fire":
        fire_intensity_factor = 5
    elif assigned_fire_type == "Unknown - Wildfire":
        fire_intensity_factor = 3
    else:
        fire_intensity_factor = 0.5

    # Duration Factor
    if shape_area > 1000000:
        duration_factor = 2
    elif shape_area > 500000:
        duration_factor = 1.5
    elif shape_area > 100000:
        duration_factor = 1
    else:
        duration_factor = 0.5

    # Wind Factor
    wind_factor = 2 if shape_length / shape_area > 0.01 else 0.5

    # Calculate Smoke Estimate
    smoke_estimate = (gis_acres * fire_intensity_factor * duration_factor * wind_factor * circleness ) / (shortest_dist)

    return smoke_estimate

In [4]:
wildfire_df['Smoke_Estimate'] = wildfire_df.apply(calculate_smoke_estimate, axis=1)

# Extract the year from Fire_Year and group by year to sum the smoke impact
smoke_by_year = wildfire_df.groupby('Fire_Year')['Smoke_Estimate'].sum().reset_index()

# Calculate annual average by dividing the annual sum by 184 (number of days betweem May to Oct)
smoke_by_year['Smoke_Estimate'] = smoke_by_year['Smoke_Estimate']/184

# Save fire year and smoke by year into a CSV file
smoke_by_year.to_csv('intermediary_files/madison_annual_smoke_estimate_1964_2024.csv', index=False)

In [5]:
# Merge smoke_df and aqi_df on the year column
merged_df = pd.merge(smoke_by_year, aqi_df, left_on='Fire_Year', right_on='year', how='inner')

pearson_corr = pearsonr(merged_df['Smoke_Estimate'], merged_df['aqi'])
correlation = pearson_corr[0]

print("Correlation between smoke and AQI:", correlation)


Correlation between smoke and AQI: 0.4489186560871011


A Pearson correlation coefficient of 0.45 between smoke and AQI indicates that there is a moderate positive correlation between the two variables. This is a moderate correlation which makes sense because there are other factors influencing both smoke and AQI than the ones that were available for the calculation. we will now calculate a p value to see if this moderate correlation is statistically significant.


In [6]:
p_value = pearson_corr[1]
print("P-value of the correlation test:", p_value)

P-value of the correlation test: 0.0009524095656508743


A p-value of 0.0009 from a correlation test indicates a statistically significant relationship between the two variables - calculated smoke and AQI. I think it is safe to assume at this point that, we have made a fairly good estimation of smoke which can be used for further analysis.