# Climate Exposure Dataset

- Contains information on climate exposure variables like temperature, humidity, smoke PM
- Contains information on population counts and population density
- Cover the year 2020
- Spatial coverage: California (census tract)

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import seaborn as sns
import geopandas as gpd

In [2]:
path = "../data/"

In [3]:
files = ["MinTemp_2006_2021_Cali.nc",            
            "RelHum_2006_2021_Cali.nc",
            "MaxTemp_2006_2021_Cali.nc",
            "smoke_2006_21_cali.nc"]

In [4]:
df = pd.read_parquet(path + "outputs/esri_tmin_tmax_pm25_merged.parquet")

In [5]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,tmax,tmin,pm25
time,FIPS,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-12-31,1765.0,13.35,7.85,4.986
2005-12-31,1766.0,14.950006,8.249994,5.5
2005-12-31,1767.0,14.950006,8.249994,5.486667
2005-12-31,1768.0,14.950006,8.249994,5.437778
2005-12-31,1769.0,14.950006,8.249994,5.48


In [6]:
rhum = xr.open_dataset(path + "data/esri/"+ files[1])

In [7]:
rhum = rhum[['FIPS','RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS']].to_dataframe()
rhum = rhum.drop(columns=['lat', 'lon'])
rhum = rhum.groupby(['time','FIPS']).first()

df_ = df.join(rhum, rsuffix= "_rhum")
df_ = df_.rename(columns={"RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS":"rhum"})

In [8]:
df_.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,tmax,tmin,pm25,rhum
time,FIPS,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-12-31,1765.0,13.35,7.85,4.986,88.400002
2005-12-31,1766.0,14.950006,8.249994,5.5,91.400002
2005-12-31,1767.0,14.950006,8.249994,5.486667,91.400002
2005-12-31,1768.0,14.950006,8.249994,5.437778,91.400002
2005-12-31,1769.0,14.950006,8.249994,5.48,91.400002


In [14]:
df = df_.reset_index()

In [15]:
df.to_parquet("exposure_per_fips_time.parquet")

In [37]:
df.head()

Unnamed: 0,time,FIPS,tmax,tmin,pm25,rhum
0,2005-12-31,1765.0,13.35,7.85,4.986,88.400002
1,2005-12-31,1766.0,14.950006,8.249994,5.5,91.400002
2,2005-12-31,1767.0,14.950006,8.249994,5.486667,91.400002
3,2005-12-31,1768.0,14.950006,8.249994,5.437778,91.400002
4,2005-12-31,1769.0,14.950006,8.249994,5.48,91.400002


In [38]:
df = df[df["time"].dt.year == 2020]

In [39]:
df = df.drop(columns="pm25")

## Add population count and density

In [40]:
df.FIPS = df.FIPS.astype(int)

In [42]:
gdf = gpd.read_file("../data/esri/FIPSMapping.gdb")
gdf["FIPS_1"] = gdf.FIPS_1.astype(int)

In [43]:
df = df.merge(gdf, left_on="FIPS", right_on="FIPS")

In [46]:
pop = pd.read_csv(
    "../data/esri/crosswalk_ana/FIPSMapping_gdb_censustract_2020_ca.csv.csv",
    usecols=["FIPS", "POPULATION_2020", "POP20_SQMI"],
)

In [48]:
df = df.merge(pop, left_on="FIPS_1", right_on="FIPS")

In [50]:
df.columns

Index(['time', 'FIPS_x', 'tmax', 'tmin', 'rhum', 'FIPS_1', 'Shape_Length',
       'Shape_Area', 'geometry', 'FIPS_y', 'POPULATION_2020', 'POP20_SQMI'],
      dtype='object')

In [51]:
df = df.drop(columns=['FIPS_x', 'Shape_Length', 'Shape_Area', 'FIPS_y'])

In [58]:
len(df)

3174318

## Add wildfire smoke PM2.5

In [59]:
len(pspm25)

3341214

In [55]:
pspm25 = pd.read_csv("../data/smoke_pm/smoke_pm_2020.csv")

In [60]:
pspm25['date'] = pd.to_datetime(pspm25['date'])

In [61]:
pspm25.head()

Unnamed: 0,GEOID,smoke_pm,date
0,6001400100,0.0,2020-01-01
1,6001400200,0.0,2020-01-01
2,6001400300,0.0,2020-01-01
3,6001400400,0.0,2020-01-01
4,6001400500,0.0,2020-01-01


In [65]:
df = df.merge(pspm25, right_on=['GEOID',"date"], left_on=['FIPS_1',"time"])

## Get avg summer month values

In [67]:
df = df.drop(columns=['FIPS_1', 'date'])

In [69]:
df['month'] = df['time'].dt.month

# Function to calculate the monthly averages for each variable
def calculate_monthly_averages(df, variable):
    grouped = df.groupby(['GEOID', 'month'])[variable].mean().reset_index()
    pivoted = grouped.pivot_table(index='GEOID', columns='month', values=variable)
    pivoted = pivoted.rename(columns={6:"jun", 7:"jul", 8:"aug", 9:"sep", 10:"oct"})
    pivoted = pivoted.drop(columns=[1,2,3,4,5,11,12])
    pivoted.columns = [f'avg_{variable}_{month}' for month in ['jun', 'jul', 'aug', 'sep', 'oct']]
    return pivoted


In [71]:
tmin_monthly = calculate_monthly_averages(df, 'tmin')
tmax_monthly = calculate_monthly_averages(df, 'tmax')
rhum_monthly = calculate_monthly_averages(df, 'rhum')
smokepm_monthly = calculate_monthly_averages(df, 'smoke_pm')

result = pd.concat([tmin_monthly, tmax_monthly, rhum_monthly, smokepm_monthly], axis=1)
result.reset_index(inplace=True)

In [72]:
result.head()

Unnamed: 0,GEOID,tmin_jun_avg,tmin_jul_avg,tmin_aug_avg,tmin_sep_avg,tmin_oct_avg,tmax_jun_avg,tmax_jul_avg,tmax_aug_avg,tmax_sep_avg,...,rhum_jun_avg,rhum_jul_avg,rhum_aug_avg,rhum_sep_avg,rhum_oct_avg,smoke_pm_jun_avg,smoke_pm_jul_avg,smoke_pm_aug_avg,smoke_pm_sep_avg,smoke_pm_oct_avg
0,6001400100,11.860003,11.801613,14.837099,15.480001,15.253225,24.04,23.924194,26.943547,27.180002,...,78.096667,84.36129,77.332258,71.41,55.306452,0.0,0.133076,6.670296,25.23939,5.944235
1,6001400200,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,23.956451,26.169998,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
2,6001400300,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,23.956451,26.169998,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
3,6001400400,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,23.956451,26.169998,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
4,6001400500,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,23.956451,26.169998,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484


In [74]:
df = df.groupby("GEOID")["POPULATION_2020","POP20_SQMI","geometry"].first().reset_index()

  df = df.groupby("GEOID")["POPULATION_2020","POP20_SQMI","geometry"].first().reset_index()


In [76]:
df = df.merge(result, on="GEOID")

In [77]:
temp = df.drop(columns="geometry")

In [78]:
temp.head()

Unnamed: 0,GEOID,POPULATION_2020,POP20_SQMI,tmin_jun_avg,tmin_jul_avg,tmin_aug_avg,tmin_sep_avg,tmin_oct_avg,tmax_jun_avg,tmax_jul_avg,...,rhum_jun_avg,rhum_jul_avg,rhum_aug_avg,rhum_sep_avg,rhum_oct_avg,smoke_pm_jun_avg,smoke_pm_jul_avg,smoke_pm_aug_avg,smoke_pm_sep_avg,smoke_pm_oct_avg
0,6001400100,3038,1133.6,11.860003,11.801613,14.837099,15.480001,15.253225,24.04,23.924194,...,78.096667,84.36129,77.332258,71.41,55.306452,0.0,0.133076,6.670296,25.23939,5.944235
1,6001400200,2001,8700.0,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
2,6001400300,5504,12800.0,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
3,6001400400,4112,14685.7,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484
4,6001400500,3644,15843.5,12.723333,12.966127,15.201613,15.856664,14.656451,23.176668,22.333869,...,79.64,84.2,80.367742,74.666667,62.9,0.0,0.14501,6.921538,25.34102,6.179484


In [79]:
temp.to_csv("climate_exposure_and_population_ca_year_2020.csv")