# Hotspots

Hotspots California map: 
- Wildifre-wildifreSmokePM2.5, 
- Heat-wildfire,
- Heat-PM2.5, 
- Wildifre-Heat-SmokePM2.5P

In [5]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [None]:
df = pd.read_parquet("outputs/merged_heatday_coldday_wfday_polluted_smoke_polluted_rolling_wc.parquet")

In [2]:
df = df.drop(columns = ['tmax', 'smoke_pm']) # ['tmin','tmax','pm25','smoke_pm'])

### Find hotspots

Terminology:
- h heat
- w wildfire
- p polluted
- s smoke polluted

Example:
- hwp is a hotspot where heat wildfire and pollution are co-occurring 

- _hws is where heat OR wildfire OR smoke are occurring on the same day

- hws_2d is where heat wildfire and smoke are occurring within 24 hours window (2 days)

In [3]:
df['hw'] = df['wfday'] & df['heatday']
#df['hp'] = df['heatday'] & df['polluted']
#df['hwp'] = df['heatday'] & df['wfday'] & df['polluted']

In [4]:
df['_hws'] = df[['heatday','wfday','smoke_pm_non_zero']].any(axis=1)

In [5]:
# smoke_pm_non_zero taken as main

df['hs'] = df['heatday'] & df['smoke_pm_non_zero']
df['hws'] = df['heatday'] & df['wfday'] & df['smoke_pm_non_zero']
df['ws'] = df['wfday'] & df['smoke_pm_non_zero']

In [6]:
df['hs5'] = df['heatday'] & df['smoke_pm_gt_five']
df['hws5'] = df['heatday'] & df['wfday'] & df['smoke_pm_gt_five']
df['ws5'] = df['wfday'] & df['smoke_pm_gt_five']

In [7]:
#df = df.drop(columns=['index', 'FIPS_1'])

In [8]:
df.head()

Unnamed: 0,time,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5
0,2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False
1,2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False
2,2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False
3,2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False
4,2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False


In [9]:
df.to_parquet("outputs/hotspots_per_fips_rolling_wc.parquet")

## Hotspots in time window of 2D

In [18]:
df = pd.read_parquet("outputs/hotspots_per_fips_rolling.parquet")

In [11]:
df.set_index('time', inplace=True)

In [12]:
df.head()

Unnamed: 0_level_0,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False


In [13]:
def get_time_hotspot(x):
    x['heatday_2d'] = x['heatday'].rolling('2D', min_periods=1).max()
    x['smoke_pm_non_zero_2d'] = x['smoke_pm_non_zero'].rolling('2D', min_periods=1).max()
    x['wfday_2d'] = x['wfday'].rolling('2D', min_periods=1).max()
    return x

df['heatday_2d'] = False  # Initialize the 'new' column as False
df['smoke_pm_non_zero_2d'] = False  # Initialize the 'new' column as False
df['wfday_2d'] = False  # Initialize the 'new' column as False

df = df.groupby(by=['GEOID']).apply(get_time_hotspot)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby(by=['GEOID']).apply(get_time_hotspot)


In [14]:
df['heatday_2d'] = df['heatday_2d'].astype(int)
df['smoke_pm_non_zero_2d'] = df['smoke_pm_non_zero_2d'].astype(int)
df['wfday_2d'] = df['wfday_2d'].astype(int)

In [15]:
df['_hws_2d'] = df[['heatday_2d','wfday_2d','smoke_pm_non_zero_2d']].any(axis=1)
df['hs_2d'] = df['heatday_2d'] & df['smoke_pm_non_zero_2d']
df['hws_2d'] = df['heatday_2d'] & df['wfday_2d'] & df['smoke_pm_non_zero_2d']
df['ws_2d'] = df['wfday_2d'] & df['smoke_pm_non_zero_2d']

In [18]:
df.columns

Index(['GEOID', 'wfday', 'heatday', 'smoke_pm_non_zero', 'smoke_pm_gt_five',
       'hw', '_hws', 'hs', 'hws', 'ws', 'hs5', 'hws5', 'ws5', 'heatday_2d',
       'smoke_pm_non_zero_2d', 'wfday_2d', '_hws_2d', 'hs_2d', 'hws_2d',
       'ws_2d'],
      dtype='object')

In [19]:
df.head()

Unnamed: 0_level_0,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0


In [20]:
df.to_parquet("outputs/hotspots_per_fips_rolling_wc.parquet") 

### Get stats

In [54]:
df = pd.read_parquet("outputs/hotspots_per_fips_rolling_wc.parquet") 

In [55]:
df.head()

Unnamed: 0_level_0,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0
2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False,0,0,0,False,0,0,0


In [57]:
total_census_tracts = len(df.GEOID.unique())

In [71]:
total_census_tracts

18108

In [72]:
total_days

99213732

In [58]:
import plotting

hspts = plotting.get_hotspot_list()

In [59]:
df = df[hspts]

In [60]:
total_days = len(df)

In [61]:
exposure_day_counts = df.sum()

In [62]:
df_stats = exposure_day_counts.reset_index()
df_stats.columns = ["value", "days"]
df_stats["percent"] = round(df_stats["days"]/total_days*100., 3)

In [68]:
df_stats["avg_census"] = round(df_stats["days"]/total_census_tracts, 3)

In [63]:
temp_dict = {}
hsptdict = plotting.get_hotspopt_dict()

for key in hsptdict.keys():
    temp_dict[key] = hsptdict[key]['title_map']

In [64]:
df_stats['value_full'] = df_stats['value'].replace(temp_dict)

In [65]:
from tabulate import tabulate

In [70]:
markdown_table = tabulate(df_stats[["value_full", "days", "percent", "avg_census"]], headers='keys', tablefmt='pipe', showindex=False)
print(markdown_table)

| value_full                                                                      |   avg_census |
|:--------------------------------------------------------------------------------|-------------:|
| Total days of wildfire                                                          |        6.386 |
| Total days of heat (over 95th percentile over last 5 years)                     |      302.783 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |      398.949 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |      133.612 |
| Total days of heat and wildfire concurrence                                     |        0.619 |
| Total days of heat or wildfire or smoke $PM_{2.5}$                              |      629.969 |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence                    |       75.584 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence          |        0.467 |
| Total da

| value_full                                                                      |     days |   percent |
|:--------------------------------------------------------------------------------|---------:|----------:|
| Total days of wildfire                                                          |   115632 |     0.117 |
| Total days of heat (over 95th percentile over last 5 years)                     |  5482803 |     5.526 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |  7224168 |     7.281 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |  2419449 |     2.439 |
| Total days of heat and wildfire concurrence                                     |    11205 |     0.011 |
| Total days of heat or wildfire or smoke $PM_{2.5}$                              | 11407479 |    11.498 |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence                    |  1368675 |     1.38  |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence          |     8448 |     0.009 |
| Total days of wildfire & smoke $PM_{2.5}$                                       |    43692 |     0.044 |
| Total days of heat and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence           |   495876 |     0.5   |
| Total days of heat, wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence |     4853 |     0.005 |
| Total days of wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence       |    19280 |     0.019 |
| Total days of heat (over 95th percentile over last 5 years) (2D)                |  7625211 |     7.686 |
| Total days of smoke $PM_{2.5}$ (non zero) (2D)                                  | 10364963 |    10.447 |
| Total days of wildfire (2D)                                                     |   197017 |     0.199 |
| Total days of heat or wildfire or smoke $PM_{2.5}$ (2D)                         | 15606396 |    15.73  |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence (2D)               |  2491521 |     2.511 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence (2D)     |    17305 |     0.017 |
| Total days of wildfire & smoke $PM_{2.5}$ (2D)                                  |    84732 |     0.085 |