# Hotspots

Hotspots California map: 
- Wildifre-wildifreSmokePM2.5, 
- Heat-wildfire,
- Heat-PM2.5, 
- Wildifre-Heat-SmokePM2.5P

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [None]:
df = pd.read_parquet("outputs/d1-heat-wf-smokeday-230929.parquet")
# df.to_parquet("outputs/d1-heat-wf-smokeday-230929.parquet")

In [7]:
df = df.drop(columns = ['tmax', 'smoke_pm']) # ['tmin','tmax','pm25','smoke_pm'])

### Find hotspots

Terminology:
- h heat
- w wildfire
- p polluted
- s smoke polluted

Example:
- hwp is a hotspot where heat wildfire and pollution are co-occurring 

- _hws is where heat OR wildfire OR smoke are occurring on the same day

- hws_2d is where heat wildfire and smoke are occurring within 24 hours window (2 days)

In [8]:
df['hw'] = df['wfday'] & df['heatday']
#df['hp'] = df['heatday'] & df['polluted']
#df['hwp'] = df['heatday'] & df['wfday'] & df['polluted']

In [9]:
df['_hws'] = df[['heatday','wfday','smoke_pm_non_zero']].any(axis=1)

In [10]:
# smoke_pm_non_zero taken as main

df['hs'] = df['heatday'] & df['smoke_pm_non_zero']
df['hws'] = df['heatday'] & df['wfday'] & df['smoke_pm_non_zero']
df['ws'] = df['wfday'] & df['smoke_pm_non_zero']

In [11]:
df['hs5'] = df['heatday'] & df['smoke_pm_gt_five']
df['hws5'] = df['heatday'] & df['wfday'] & df['smoke_pm_gt_five']
df['ws5'] = df['wfday'] & df['smoke_pm_gt_five']

In [12]:
#df = df.drop(columns=['index', 'FIPS_1'])

In [13]:
df.head()

Unnamed: 0,time,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5
0,2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False
1,2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False
2,2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False
3,2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False
4,2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False


In [14]:
df.to_parquet("outputs/d2-events-230929.parquet")

## Hotspots in time window of 2D

In [18]:
df = pd.read_parquet("outputs/d2-events-230929.parquet")

In [15]:
df.set_index('time', inplace=True)

In [16]:
df.head()

Unnamed: 0_level_0,GEOID,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2006-01-01,4001942600,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001942700,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944000,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944100,False,False,False,False,False,False,False,False,False,False,False,False
2006-01-01,4001944201,False,False,False,False,False,False,False,False,False,False,False,False


In [17]:
def get_time_hotspot(x):
    x['heatday_2d'] = x['heatday'].rolling('2D', min_periods=1).max()
    x['smoke_pm_non_zero_2d'] = x['smoke_pm_non_zero'].rolling('2D', min_periods=1).max()
    x['wfday_2d'] = x['wfday'].rolling('2D', min_periods=1).max()
    return x

df['heatday_2d'] = False  # Initialize the 'new' column as False
df['smoke_pm_non_zero_2d'] = False  # Initialize the 'new' column as False
df['wfday_2d'] = False  # Initialize the 'new' column as False

df = df.groupby(by=['GEOID']).apply(get_time_hotspot)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby(by=['GEOID']).apply(get_time_hotspot)


In [18]:
df['heatday_2d'] = df['heatday_2d'].astype(int)
df['smoke_pm_non_zero_2d'] = df['smoke_pm_non_zero_2d'].astype(int)
df['wfday_2d'] = df['wfday_2d'].astype(int)

In [19]:
df['_hws_2d'] = df[['heatday_2d','wfday_2d','smoke_pm_non_zero_2d']].any(axis=1)
df['hs_2d'] = df['heatday_2d'] & df['smoke_pm_non_zero_2d']
df['hws_2d'] = df['heatday_2d'] & df['wfday_2d'] & df['smoke_pm_non_zero_2d']
df['ws_2d'] = df['wfday_2d'] & df['smoke_pm_non_zero_2d']

In [20]:
df.to_parquet("outputs/d2-events-2d-230929.parquet") 

In [2]:
df = pd.read_parquet("outputs/d2-events-2d-230929.parquet")

In [4]:
def get_time_hotspot(x):
    x['smoke_pm_gt_five_2d'] = x['smoke_pm_gt_five'].rolling('2D', min_periods=1).max()
    return x

df['smoke_pm_gt_five_2d'] = False  

df = df.groupby(by=['GEOID']).apply(get_time_hotspot)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby(by=['GEOID']).apply(get_time_hotspot)


In [5]:
df['hw_2d'] = df['heatday_2d'] & df['wfday_2d']

In [8]:
df['smoke_pm_gt_five_2d'] = df['smoke_pm_gt_five_2d'].astype(int)

In [9]:
df['hs_2d_5'] = df['heatday_2d'] & df['smoke_pm_gt_five_2d']

In [10]:
df['ws_2d_5'] = df['wfday_2d'] & df['smoke_pm_gt_five_2d']

In [11]:
df['hws_2d_5'] = df['heatday_2d'] & df['wfday_2d'] & df['smoke_pm_gt_five_2d']

In [12]:
df.to_parquet("outputs/d2-events-2d5-230929.parquet") 