# Hotspots

Hotspots California map: 
- Wildifre-wildifreSmokePM2.5, 
- Heat-wildfire,
- Heat-PM2.5, 
- Wildifre-Heat-SmokePM2.5P

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [None]:
df = pd.read_parquet("outputs/merged_heatday_coldday_wfday_polluted_smoke_polluted_rolling.parquet")

In [None]:
df = df.drop(columns = ['tmin','tmax','pm25','smokePM_pred'])

In [None]:
df.head()

### Find hotspots

Terminology:
- h heat
- w wildfire
- p polluted
- s smoke polluted

Example:
- hwp is a hotspot where heat wildfire and pollution are co-occuring 

In [None]:
df['ws'] = df['wfday'] & df['smoke_polluted']

df['hw'] = df['wfday'] & df['heatday']

df['hp'] = df['heatday'] & df['polluted']
df['hs'] = df['heatday'] & df['smoke_polluted']

df['hwp'] = df['heatday'] & df['wfday'] & df['polluted']

df['hws'] = df['heatday'] & df['wfday'] & df['smoke_polluted']

df['hwps'] = df[['heatday','wfday','polluted','smoke_polluted']].sum(axis=1)

In [None]:
df.to_parquet("outputs/hotspots_per_fips_rolling.parquet")

### Get county codes for visualization

In [None]:
crosswalk = pd.read_csv(
    "data/fips_crosswalk_merged_county.csv", 
    usecols=["FIPS", "COUNTY_CODE"],
    #index_col=["FIPS"],
    dtype={"COUNTY_CODE":str})

In [None]:
df = df.reset_index()
df.head()

In [None]:
merged_df = df.merge(crosswalk, how='left', on="FIPS")

### Aggregate per county

In [None]:
# if h/w/p/s occured anywhere in the county 
# set the True value for that day

merged_df = merged_df.sort_values('hwps', ascending=False).drop_duplicates(['time','COUNTY_CODE'])

In [None]:
merged_df.head()

In [None]:
merged_df = merged_df[
    ['time','COUNTY_CODE','ws','hw','hp', 'hs','hwp','hws','hwps','wfday', 'heatday', 'coldday', 'polluted',
       'smoke_polluted']]

### Hotspot trends over time on Census Tract

In [None]:
trends_df = df.groupby("FIPS")[['time','ws','hw','hp','hs','hwp','hws','hwps','wfday', 'heatday', 'coldday', 'polluted',
       'smoke_polluted']].resample('Y', on="time").sum()

In [None]:
trends_df.head()

In [None]:
import plotting
%load_ext autoreload
%autoreload 2
hs_dict = plotting.get_hotspopt_dict()

In [None]:
trends_df = trends_df.reset_index()

In [None]:
trends_df.head()

In [None]:
for x in hs_dict.keys():
    l = trends_df.groupby("FIPS")[x].sum().sort_values().tail(5).index
    
    plotting.trend_plot(
        trends_df, 
        l,
        x,
        hs_dict[x]["title"],
        hs_dict[x]["year_min"],
        hs_dict[x]["year_max"]
    )

### Wildfire-Heat-PM25

In [None]:
merged_df=merged_df[['COUNTY_CODE','ws','hw','hp','hs','hwp','hws','hwps','wfday', 'heatday', 'coldday', 'polluted',
       'smoke_polluted']].groupby('COUNTY_CODE').sum()

In [None]:
merged_df=merged_df.reset_index()

In [None]:
counties = plotting.get_counties()

In [None]:
for x in hs_dict.keys():
    plotting.draw_map(
        merged_df, 
        counties, 
        x,
        hs_dict[x]["title_map"]
    )

## Add Vunerability Index

In [None]:
cols = ["STCNTY","FIPS","E_TOTPOP","RPL_THEMES"]
vind = pd.read_csv("data/California.csv", usecols=cols, dtype={"STCNTY":str})
vind = vind[vind["RPL_THEMES"] > -1]

In [None]:
#vind_grouped = vind.groupby("STCNTY")["RPL_THEMES"].mean() #apply(pd.Series.mode)

In [None]:
vind["STCNTY_TOTALPOP"] = vind.groupby('STCNTY')["E_TOTPOP"].transform(lambda x: np.sum(x))
vind['WEIGHT'] = vind['E_TOTPOP']/vind['STCNTY_TOTALPOP']

In [None]:
def weighted_mean(df):
    temp = df['RPL_THEMES'].mul(df['WEIGHT']).sum()
    return temp

In [None]:
vind_grouped = vind.groupby("STCNTY").apply(weighted_mean)

In [None]:
vind_grouped=vind_grouped.reset_index()

In [None]:
hotspotvind = pd.merge(merged_df, vind_grouped, left_on="COUNTY_CODE", right_on="STCNTY", how="outer")

In [None]:
hotspotvind.to_csv("hotspot_vind_county.csv")

In [None]:
hotspotvind = hotspotvind.rename(columns={
    0: 'svi'})

In [None]:
hotspotvind.head()

In [None]:
plotting.draw_map(
    hotspotvind,
    counties, 
    'svi',
    "Vunerability index")

### Window of 7 days

Hotspot is if all three events happened within 7 days

In [None]:
df = pd.read_parquet("outputs/merged_heatday_coldday_wfday_polluted_smoke_polluted.parquet")

In [None]:
df = df.drop(columns = ['tmin','tmax','pm25','smokePM_pred'])

In [None]:
df = df.reset_index()

In [None]:
df.set_index('time', inplace=True)

In [None]:
df.head()

In [None]:
# create a new column that indicates if all three variables are True in the last 7 days
df['all_true_last_7_days'] = df[["wfday","heatday","smoke_polluted"]].rolling(
    window='7D', min_periods=1).max().sum(axis=1).apply(lambda x: True if x == 3.0 else False)

In [None]:
df.head()

In [None]:
df["all_true_last_7_days"].sum()