In [None]:
import numpy as np
import pandas as pd
import xarray as xr
from tqdm import tqdm

%load_ext lab_black

## ESRI data

The most recent 2020 census tracts are used to aggregate the data. More info about each netCDF is below:
 
- Daily Minimum temperature from 2006-2021  
- Daily Maximum temperature from 2006-2021
- Daily Relative Humidity from 2006-2021  
- Daily Smoke from 2006-2021 
- and Daily PM 2.5 data from 2006-2020

In [None]:
tmax = xr.open_dataset("data/esri/MaxTemp_2006_2021_Cali.nc")

In [None]:
tmax["MAX_TEMPERATURE_NONE_SPATIAL_NEIGHBORS"].plot()

In [None]:
tmin = xr.open_dataset("data/esri/MinTemp_2006_2021_Cali.nc")

In [None]:
rhum = xr.open_dataset("data/esri/RelHum_2006_2021_Cali.nc")

## Join tmax, tmin and rhum

In [None]:
df = xr.merge([tmax, tmin[["MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS"]]])

In [None]:
df = xr.merge([df, rhum[["RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS"]]])

In [None]:
del tmin
del tmax
del rhum

In [None]:
df = df[
    [
        "FIPS",
        "MAX_TEMPERATURE_NONE_SPATIAL_NEIGHBORS",
        "MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS",
        "RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS",
    ]
].to_dataframe()

In [None]:
df = df.rename(
    columns={
        "MAX_TEMPERATURE_NONE_SPATIAL_NEIGHBORS": "tmax",
        "MIN_TEMPERATURE_NONE_SPATIAL_NEIGHBORS": "tmin",
        "RELATIVE_HUMIDITY_NONE_SPATIAL_NEIGHBORS": "rhum",
    }
)

In [None]:
df.head()

In [None]:
df = df.loc[(slice(pd.Timestamp("2006-01-01"), None), slice(None)), :]

In [None]:
df = df.loc[(slice(None, pd.Timestamp("2020-12-31")), slice(None)), :]

In [None]:
df.to_parquet("outputs/temp_merge_1.parquet")

## Add wildfire day

In [None]:
df = pd.read_parquet("outputs/temp_merge_1.parquet")

In [None]:
len(df)

In [None]:
df = df.reset_index()

In [None]:
df = df.groupby(by=["time", "lat", "lon"]).first()

In [None]:
wf = xr.open_dataset("data/esri/WildfirePresence_2006_2021_CA_daily.nc")

In [None]:
# "FIPS" start at 0, 1, 2, 3,...

In [None]:
wf = wf["FIREDAY_NONE_ZEROS"].to_dataframe()

In [None]:
wf = wf.rename(columns={"FIREDAY_NONE_ZEROS": "wfday"})

In [None]:
len(wf)

In [None]:
wf = wf.groupby(by=["time", "lat", "lon"]).first()

In [None]:
tqdm.pandas()

In [None]:
df = df.join(wf, rsuffix="_wf").progress_apply(lambda x: x)

In [None]:
df.to_parquet("outputs/temp_merge_2.parquet")

In [None]:
len(df)

In [None]:
df.isna().sum()

## Add PM 2.5

In [None]:
pm25 = xr.open_dataset("data/esri/PM25_Nature_2006_2020_Cali.nc")

In [None]:
df_pm25 = pm25[["MEAN_NONE_SPATIAL_NEIGHBORS"]].to_dataframe()

In [None]:
df_pm25 = df_pm25.rename(columns={"MEAN_NONE_SPATIAL_NEIGHBORS": "pm25"})

In [None]:
df_pm25 = df_pm25.reset_index()

In [None]:
df_pm25 = df_pm25.groupby(by=["time", "lat", "lon"]).first()

In [None]:
df_pm25.head()

In [None]:
df.head()

In [None]:
df = df.join(df_pm25, rsuffix="_pm25").progress_apply(lambda x: x)

In [None]:
df.to_parquet("outputs/temp_merge_3.parquet")

In [None]:
df.isna().sum()

In [None]:
df.head()

In [None]:
len(df.FIPS.unique())

In [None]:
df = df.reset_index()

## Add crosswalk

In [None]:
df = pd.read_parquet("outputs/temp_merge_3.parquet")

In [None]:
df.head()

In [None]:
df = df.reset_index()

In [None]:
import geopandas as gpd

In [None]:
gdf = gpd.read_file("data/esri/FIPSMapping.gdb")

In [None]:
df.FIPS = df.FIPS.astype(int)

In [None]:
gdf = gdf.drop(columns=["Shape_Length", "Shape_Area"])

In [None]:
df = df.merge(gdf, on="FIPS")

In [None]:
len(df)

In [None]:
df.isna().sum()

In [None]:
type(df)

In [None]:
df = df.drop(columns="geometry")

In [None]:
df.to_parquet("outputs/temp_merge_4.parquet")

## Join smoke PM2.5

In [None]:
# smoke_df = pd.read_parquet("outputs/smoke_pm25_predicted_with_fips.parquet")

In [None]:
smoke_df = pd.read_csv(
    "data/smoke_pm/smoke_pm_all_time_2020_map.csv", parse_dates=["date"]
)

In [None]:
smoke_df.head()

In [None]:
df.head()

In [None]:
df["FIPS_1"] = df["FIPS_1"].astype(int)

In [None]:
merged = df.merge(smoke_df, left_on=["FIPS_1", "time"], right_on=["GEOID", "date"])

In [None]:
merged.head()

In [None]:
len(merged)

In [None]:
merged = merged.drop(
    columns=["lat", "lon", "locations", "FIPS", "locations_pm25", "date"]
)

In [None]:
merged.to_parquet("outputs/final_merge_5.parquet")

In [None]:
merged.isna().sum()

In [None]:
gdf = gpd.read_file("data/esri/FIPSMapping.gdb")

In [None]:
gdf.plot()

In [None]:
temp = merged.groupby("FIPS").sum()

In [None]:
temp = temp.reset_index()

In [None]:
temp.dtypes

In [None]:
gdf.dtypes

In [None]:
temp.FIPS = temp.FIPS.astype(int)

In [None]:
gdf = gdf.merge(temp, left_on="FIPS", right_on="FIPS")

In [None]:
fig, ax = plt.subplots(1, 1)
gdf.plot(column="tmax", ax=ax, legend=True, linewidth=0, cmap="viridis_r")

plt.show()

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots(1, 1)
gdf.plot(column="smoke_pm", ax=ax, legend=True, linewidth=0, cmap="viridis_r")

plt.show()