# Import libraries

In [1]:
import pandas as pd
import datetime
import functools as ft
import seaborn as sns
import geopandas as gp




# Read data

In [5]:
data_folder = "./Desktop/wildfires_project/Data/"

Read shapefiles

In [6]:
df_location = pd.read_csv(data_folder+"USA_shapefile/us_shapefile_county.csv", dtype={"GEOID": str})
df_location["pop_dens2020"] = df_location["pop2020"]/df_location["CENSUSAREA"]

In [7]:
shp=gp.read_file(data_folder+"USA_shapefile/county_level/us_shapefile_county.shp")

Read community class data and air quality data to define the reference timeframe


In [8]:
df_community = pd.read_csv(data_folder+"community_structure_counties.csv")
all_airquality = pd.read_csv(data_folder+"all_AQI.csv")

all_airquality["date"] = pd.to_datetime(all_airquality["date"])
all_airquality["year"] = all_airquality["date"].dt.year

airquality_target = all_airquality[(all_airquality["date"] >= "2020-07-01")&(all_airquality["date"] <= "2020-10-31")]


Spatial distance file 

In [9]:
dist=pd.read_csv(data_folder+'distance_counties.csv').set_index('county1')

# Oregon

Read air quality data for the country of reference

In [10]:
df_2020 = pd.read_csv(data_folder+"air quality/air_quality_OR_2020.csv")

Define "after the shock" timeframe and analyze counties and dates with problematic AQI 

In [11]:
df_2020["date"] = pd.to_datetime(df_2020["date"])
df_2020["year"] = df_2020["date"].dt.year
df_2020['week_id'] = df_2020['date'].dt.week

df_fire = df_2020[(df_2020["date"] >= "2020-09-10")&(df_2020["date"] <= "2020-10-31")]
df_fire_hazard = df_fire[df_fire["AQI index"]>150]


  df_2020['week_id'] = df_2020['date'].dt.week


Merge fire hazard dataframe with location dataframe, to get info on FIPS code and population

In [12]:
df_counties_select = pd.merge(df_fire_hazard,df_location[['FIPS code','Alpha code', 'NAME', 'pop2020']],left_on ='fips_code', right_on='FIPS code', how='left')

Select counties that have had problematic AQI for at least 3 days in a row

In [13]:
list_select_names = []
for name in df_counties_select["NAME"].unique():
    if len(df_counties_select[df_counties_select["NAME"]==name])>=3:
        list_select_names.append(name)

In [14]:
df_counties_select = df_counties_select[df_counties_select["NAME"].isin(list_select_names)]

In [15]:
list_counties = list(df_counties_select[df_counties_select["NAME"].isin(list_select_names)]["fips_code"].unique())

Select only counties that are in "0" modularity class

In [16]:
class0_counties = df_community[(df_community["node"].isin(list_counties))&(df_community["modularity_class"]==0)]

Merge with population size and create dataframe with node, population size

In [17]:
df_hit_counties = class0_counties.merge(df_counties_select, left_on="node", right_on="fips_code")

list_df = []
for node in df_hit_counties["node"].unique():
    pop_size = df_hit_counties[df_hit_counties["node"]==node]["pop2020"].values[0]
    list_df.append([node, pop_size])

df_hit_counties = pd.DataFrame(list_df, columns=["county", "population"])

Order by population size and take top 5 most populated counties as the reference affected one

In [18]:
df_hit_counties = df_hit_counties.sort_values(by="population", ascending=False)[:5]
df_hit_counties

Unnamed: 0,county,population
3,41051,815871.0
13,41067,600689.0
11,41005,422160.0
5,41039,382940.0
14,41047,346202.0


Save dataframe as csv

In [19]:
df_hit_counties.to_csv(data_folder+"/affected_counties_OR.csv")

# Washington

Read air quality data for the country of reference

In [20]:
df_2020 = pd.read_csv(data_folder+"air quality/air_quality_WA_2020.csv")

Define "after the shock" timeframe and analyze counties and dates with problematic AQI 

In [21]:
df_2020["date"] = pd.to_datetime(df_2020["date"])
df_2020["year"] = df_2020["date"].dt.year
df_2020['week_id'] = df_2020['date'].dt.week

df_fire = df_2020[(df_2020["date"] >= "2020-09-10")&(df_2020["date"] <= "2020-10-31")]
df_fire_hazard = df_fire[df_fire["AQI index"]>150]


  df_2020['week_id'] = df_2020['date'].dt.week


Merge fire hazard dataframe with location dataframe, to get info on FIPS code and population

In [22]:
df_counties_select = pd.merge(df_fire_hazard,df_location[['FIPS code','Alpha code', 'NAME', 'pop2020']],left_on ='fips_code', right_on='FIPS code', how='left')

Select counties that have had problematic AQI for at least 3 days in a row

In [23]:
list_select_names = []
for name in df_counties_select["NAME"].unique():
    if len(df_counties_select[df_counties_select["NAME"]==name])>=3:
        list_select_names.append(name)

In [24]:
df_counties_select = df_counties_select[df_counties_select["NAME"].isin(list_select_names)]

In [25]:
list_counties = list(df_counties_select[df_counties_select["NAME"].isin(list_select_names)]["fips_code"].unique())

Select only counties that are in "0" modularity class

In [26]:
class0_counties = df_community[(df_community["node"].isin(list_counties))&(df_community["modularity_class"]==0)]

Merge with population size and create dataframe with node, population size

In [27]:
df_hit_counties = class0_counties.merge(df_counties_select, left_on="node", right_on="fips_code")

list_df = []
for node in df_hit_counties["node"].unique():
    pop_size = df_hit_counties[df_hit_counties["node"]==node]["pop2020"].values[0]
    list_df.append([node, pop_size])

df_hit_counties = pd.DataFrame(list_df, columns=["county", "population"])

Order by population size and take top 5 most populated counties as the reference affected one

In [28]:
df_hit_counties = df_hit_counties.sort_values(by="population", ascending=False)[:5]
df_hit_counties

Unnamed: 0,county,population
6,53033,2272571.0
3,53063,540700.0
10,53011,505013.0
2,53067,295729.0
0,53077,256533.0


Save dataframe as csv

In [29]:
df_hit_counties.to_csv(data_folder+"/affected_counties_WA.csv")

# Select non-affected counties

By using the distance file, select counties that have a population size in the 75th percentile of the distribution

In [30]:
perc_75 = shp['pop2020'].astype("int").describe()["75%"]

Useful pre-processing and rank counties by latitude

In [31]:
counties_filtered=shp[shp['pop2020']>11000]['GEO_ID'].values
shp["lat"] = shp.centroid.y
shp["lon"] = shp.centroid.x
shp=shp.sort_values(by='lat',ascending=False)
codes_rank=shp['GEO_ID'].values # ranking counties by latitutde


  shp["lat"] = shp.centroid.y

  shp["lon"] = shp.centroid.x


Select non-affected counties as those which are in modularity class 0, have at least one observation in the timeframe of reference and have a max value of air quality index of 100 (non problematic air quality).

Read WA affected counties as reference to select non-affected counties that will then be considered as baseline for the entire analysis

In [32]:
df_hit_counties = pd.read_csv(data_folder+"/affected_counties_WA.csv")

In [33]:
list_nohit = []
for county in df_hit_counties["county"]:
    dist_c=dist.loc[county].sort_values(by='dist_km')
    dist_c=dist_c[(dist_c.county2.isin(shp.GEO_ID))]["county2"]
    for el in list(dist_c):
        if len(df_community[(df_community["node"]==el)&(df_community["modularity_class"]==0)])>0:
            if len(airquality_target[airquality_target["fips_code"]==el])>0:
                if max(airquality_target[airquality_target["fips_code"]==el]["AQI index"])<=100:
                    list_nohit.append(el)

Save population of non-affected counties

In [34]:
list_pop = []
for el in list(set(list_nohit)):
    pop = shp[shp["GEO_ID"]==el]["pop2020"]
    list_pop.append(pop.values[0])

In [35]:
list_nohit = list(set(list_nohit))

Save non-affected counties in csv file

In [36]:
df_pop = pd.DataFrame({"county": list_nohit, 
                       "pop": list_pop})

df_pop = df_pop[df_pop["pop"]>=perc_75]

df_pop.to_csv(data_folder+"/non_affected_top25pop_only2020.csv")