**Code in this notebook adds trailing x-month average columns for select weather variables to the combined weather + fires dataset used in modeling.**

In [1]:
import pandas as pd
import numpy as np
import boto3
import io

_Reading in consolidated weather + fire data for overlapping period and states:_

In [2]:
noaa_on_fire = pd.read_csv('./data/mfi_df_yr.csv')

_Dropping redundant and not useful columns:_

In [3]:
noaa_on_fire.drop(columns = ['Unnamed: 0', 'time_burnt_fixed', 'fire_year', 'statecode', 'division'], inplace = True)
noaa_on_fire.columns

Index(['stat_cause_descr', 'fire_size', 'fire_size_class', 'state',
       'cont_date_fixed', 'disc_date_fixed', 'time_burnt', 'month', 'year',
       'yearmonth', 'pcp', 'tavg', 'pdsi', 'phdi', 'zndx', 'pmdi', 'cdd',
       'hdd', 'sp02', 'sp03', 'sp06', 'sp09', 'sp12', 'sp24', 'tmin', 'tmax'],
      dtype='object')

_Sorting data chronologically using the `yearmonth` column:_

In [4]:
noaa_on_fire.sort_values('yearmonth', inplace = True)

_Building a list of unique `yearmonth` indicators, sorted chronologically from oldest to newest:_

In [5]:
yrmth_sorted = list(noaa_on_fire['yearmonth'].unique()) 
# yrmth_sorted[(yrmth_sorted.index(199304) -12):yrmth_sorted.index(199304)] # this was a preview to validate the method

_Defining a function which takes in the number of months (X) and a variable available in the dataset and adds a new column for a trailing X months average of the specified variable:_

In [6]:
def trail_x_mth_avg(n_months, var_name):
    temp_list = [] # this is an empty list to temporarily store trailing averages in the same order as yrmth_sorted
    for yrmth in yrmth_sorted[:n_months]: 
        temp_list.append(np.nan) # the first n values can't be calculated
    for yrmth in yrmth_sorted[n_months:]:  #below we average the values for each set of the n months after the first n:
        temp_list.append(noaa_on_fire[(noaa_on_fire['yearmonth'] >= yrmth_sorted[yrmth_sorted.index(yrmth) - n_months])&
                   (noaa_on_fire['yearmonth'] <= yrmth_sorted[yrmth_sorted.index(yrmth)])][var_name].mean())
    noaa_on_fire[str(var_name)+'_t'+str(n_months)+'m'] = noaa_on_fire['yearmonth'].apply(lambda x: temp_list[yrmth_sorted.index(x)])
    # last line adds this information to the dataset with a descriptive column name

_Applying the function to climate variables for 12, 9, 6 and 3 months trailing averages:_

In [7]:
variables = ['tavg','pcp', 'pmdi', 'pdsi'] #select NOAA / weather variables for which to make trailing averages
ns = [12, 9, 6, 3] # n-month periods to calculate trailing averages

for var in variables:
    for n in ns:
        trail_x_mth_avg(n, var)

_Checking that new variables are included in the dataset and are calculated correctly:_

In [8]:
noaa_on_fire.columns

Index(['stat_cause_descr', 'fire_size', 'fire_size_class', 'state',
       'cont_date_fixed', 'disc_date_fixed', 'time_burnt', 'month', 'year',
       'yearmonth', 'pcp', 'tavg', 'pdsi', 'phdi', 'zndx', 'pmdi', 'cdd',
       'hdd', 'sp02', 'sp03', 'sp06', 'sp09', 'sp12', 'sp24', 'tmin', 'tmax',
       'tavg_t12m', 'tavg_t9m', 'tavg_t6m', 'tavg_t3m', 'pcp_t12m', 'pcp_t9m',
       'pcp_t6m', 'pcp_t3m', 'pmdi_t12m', 'pmdi_t9m', 'pmdi_t6m', 'pmdi_t3m',
       'pdsi_t12m', 'pdsi_t9m', 'pdsi_t6m', 'pdsi_t3m'],
      dtype='object')

In [9]:
noaa_on_fire[noaa_on_fire['yearmonth'] > 199212]

Unnamed: 0,stat_cause_descr,fire_size,fire_size_class,state,cont_date_fixed,disc_date_fixed,time_burnt,month,year,yearmonth,...,pcp_t6m,pcp_t3m,pmdi_t12m,pmdi_t9m,pmdi_t6m,pmdi_t3m,pdsi_t12m,pdsi_t9m,pdsi_t6m,pdsi_t3m
272140,Equipment Use,1.0,B,CA,1993-01-22,1993-01-22,0.0,1,1993,199301,...,1.219071,1.754632,-0.840196,-0.852933,-0.961387,-0.704188,-0.829705,-0.838818,-0.935206,-0.400671
279711,Smoking,0.1,A,AZ,1993-01-29,1993-01-29,0.0,1,1993,199301,...,1.219071,1.754632,-0.840196,-0.852933,-0.961387,-0.704188,-0.829705,-0.838818,-0.935206,-0.400671
152744,Structure,0.1,A,NM,1993-01-01,1993-01-01,0.0,1,1993,199301,...,1.219071,1.754632,-0.840196,-0.852933,-0.961387,-0.704188,-0.829705,-0.838818,-0.935206,-0.400671
145414,Debris Burning,0.1,A,CA,1993-01-29,1993-01-29,0.0,1,1993,199301,...,1.219071,1.754632,-0.840196,-0.852933,-0.961387,-0.704188,-0.829705,-0.838818,-0.935206,-0.400671
154560,Miscellaneous,30.0,C,CA,1993-01-27,1993-01-27,0.0,1,1993,199301,...,1.219071,1.754632,-0.840196,-0.852933,-0.961387,-0.704188,-0.829705,-0.838818,-0.935206,-0.400671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414626,Missing/Undefined,0.1,A,AZ,2015-12-22,2015-12-22,0.0,12,2015,201512,...,1.078850,1.332475,-2.687270,-2.726656,-2.689989,-2.380456,-2.529519,-2.540082,-2.511070,-2.215478
414625,Missing/Undefined,0.1,A,AZ,2015-12-21,2015-12-21,0.0,12,2015,201512,...,1.078850,1.332475,-2.687270,-2.726656,-2.689989,-2.380456,-2.529519,-2.540082,-2.511070,-2.215478
414624,Missing/Undefined,0.1,A,AZ,2015-12-21,2015-12-21,0.0,12,2015,201512,...,1.078850,1.332475,-2.687270,-2.726656,-2.689989,-2.380456,-2.529519,-2.540082,-2.511070,-2.215478
414637,Missing/Undefined,0.1,A,AZ,2015-12-25,2015-12-25,0.0,12,2015,201512,...,1.078850,1.332475,-2.687270,-2.726656,-2.689989,-2.380456,-2.529519,-2.540082,-2.511070,-2.215478


_Saving updated dataset to a csv (the resulting file was huge, hence use of a shared AWS S3 bucket):_

In [12]:
noaa_on_fire.to_csv('s3://git-to-amazon-s3-outputbucket-rorni8oehk4l/soulclimberchick/meteorology-fire-impact/data-files/mfi_df_yr_trail.csv')