# Hotspots

Hotspots California map: 
- Wildifre-wildifreSmokePM2.5, 
- Heat-wildfire,
- Heat-PM2.5, 
- Wildifre-Heat-SmokePM2.5P

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

In [2]:
df = pd.read_parquet("outputs/hotspots_per_fips_rolling_wc.parquet") 

In [3]:
gdf = gpd.read_file('data/WesternUSCensusTract/CensusTract2020_WesternUS.shp')

In [4]:
gdf = gdf[['FIPS', 'POPULATION', 'STATE_ABBR']]

## Total counts (tracts, days)

In [7]:
total_census_tracts = len(df.GEOID.unique())
total_census_tracts

18108

In [8]:
total_days = len(df)
total_days = total_days / total_census_tracts
total_days

5479.0

In [9]:
total_years = 15

In [10]:
import plotting
hspts = plotting.get_hotspot_list()

## Get Exposure days per census tract per year

In [11]:
def get_exposure_per_census_per_year(data):
    # sum all the exposure days per coexposure
    exposure_day_counts = data.sum().reset_index()
    exposure_day_counts.columns = ["value", "days"]

    # divide the sum by the numbers of census tracts and the number of years
    exposure_day_counts["avg_census"] = round(exposure_day_counts["days"]/total_census_tracts/total_years, 2)
    # df_stats["percent"] = round(df_stats["days"]/total_days*100., 3)
    return exposure_day_counts


In [12]:
df_ecy = get_exposure_per_census_per_year(df[hspts])
df_ecy

Unnamed: 0,value,days,avg_census
0,wfday,115632,0.43
1,heatday,1560169,5.74
2,smoke_pm_non_zero,7224168,26.6
3,smoke_pm_gt_five,2419449,8.91
4,hw,2595,0.01
5,_hws,8391047,30.89
6,hs,464589,1.71
7,hws,1954,0.01
8,ws,43692,0.16
9,hs5,179849,0.66


## Get Avg Exposure Person Year

In [15]:
# Groupby operation
data = df[hspts + ['GEOID']]

# sum exposure days per GEOID (census tract (ct) number)
grouped_df = data.groupby('GEOID').sum()
df = gdf.merge(grouped_df, left_on="FIPS", right_on="GEOID")

In [16]:
df.head()

Unnamed: 0,FIPS,POPULATION,STATE_ABBR,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,...,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d
0,4001942600,1549,AZ,2,92,406,69,0,475,25,...,6,0,0,139,614,4,702,55,0,0
1,4001942700,4491,AZ,9,92,437,68,0,505,29,...,8,0,0,139,667,17,754,60,0,9
2,4001944000,5348,AZ,43,30,411,61,0,462,7,...,0,0,1,49,627,71,701,14,0,32
3,4001944100,5495,AZ,13,92,421,58,0,488,30,...,10,0,0,136,642,24,728,56,0,18
4,4001944201,4021,AZ,0,92,346,57,0,413,25,...,8,0,0,136,545,0,631,50,0,0


In [17]:
# total population

df["POPULATION"].sum() 

76334073

In [20]:
def get_avg_exposure_person_year(data):
    for col in hspts:
        # multiply the number of exposure days by ct population and divide it by total years
        data[col] = round(data[col] * data["POPULATION"] / total_years, 2)

    # sum per each exposure
    exposure_person_day_per_year = grouped_df.sum().reset_index()
    exposure_person_day_per_year.columns = ["value", "exposure_person_day_per_year"]

    # new column for total exposure person day per year per ct
    exposure_person_day_per_year["exposure_person_day_per_year_per_ct"] = round(exposure_person_day_per_year["exposure_person_day_per_year"]/total_census_tracts, 2)
    return exposure_person_day_per_year

In [21]:
df_epy = get_avg_exposure_person_year(df)
df_epy

Unnamed: 0,value,exposure_person_day_per_year,exposure_person_day_per_year_per_ct
0,wfday,115632,6.39
1,heatday,1560169,86.16
2,smoke_pm_non_zero,7224168,398.95
3,smoke_pm_gt_five,2419449,133.61
4,hw,2595,0.14
5,_hws,8391047,463.39
6,hs,464589,25.66
7,hws,1954,0.11
8,ws,43692,2.41
9,hs5,179849,9.93


In [28]:
merged_df = df_ecy.merge(df_epy, on=["value"])
merged_df

Unnamed: 0,value,days,avg_census,exposure_person_day_per_year,exposure_person_day_per_year_per_ct
0,wfday,115632,0.43,115632,6.39
1,heatday,1560169,5.74,1560169,86.16
2,smoke_pm_non_zero,7224168,26.6,7224168,398.95
3,smoke_pm_gt_five,2419449,8.91,2419449,133.61
4,hw,2595,0.01,2595,0.14
5,_hws,8391047,30.89,8391047,463.39
6,hs,464589,1.71,464589,25.66
7,hws,1954,0.01,1954,0.11
8,ws,43692,0.16,43692,2.41
9,hs5,179849,0.66,179849,9.93


## Merge tables and add full exposure name

In [29]:
temp_dict = {}
hsptdict = plotting.get_hotspopt_dict()

# get full name of the exposure
for key in hsptdict.keys():
    temp_dict[key] = hsptdict[key]['title_map']

merged_df['value_full'] = merged_df['value'].replace(temp_dict)

In [30]:
merged_df = merged_df[["value_full", "avg_census", "exposure_person_day_per_year", "exposure_person_day_per_year_per_ct"]]

In [31]:
from tabulate import tabulate
# for col in merged_df.select_dtypes(include=['float64']).columns:
#     merged_df[col] = merged_df[col].apply(lambda x: '{:.2f}'.format(x))

markdown_table = tabulate(merged_df, headers='keys', tablefmt='pipe', showindex=False)
print(markdown_table)

| value_full                                                                      |   avg_census |   exposure_person_day_per_year |   exposure_person_day_per_year_per_ct |
|:--------------------------------------------------------------------------------|-------------:|-------------------------------:|--------------------------------------:|
| Total days of wildfire                                                          |         0.43 |                         115632 |                                  6.39 |
| Total days of heat (over 95th percentile over last 5 years)                     |         5.74 |                        1560169 |                                 86.16 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |        26.6  |                        7224168 |                                398.95 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |         8.91 |                        2419449 |         

| value_full                                                                      |   avg_census |   exposure_person_day_per_year |   exposure_person_day_per_year_per_ct |
|:--------------------------------------------------------------------------------|-------------:|-------------------------------:|--------------------------------------:|
| Total days of wildfire                                                          |         0.43 |                         115632 |                                  6.39 |
| Total days of heat (over 95th percentile over last 5 years)                     |         5.74 |                        1560169 |                                 86.16 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |        26.6  |                        7224168 |                                398.95 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |         8.91 |                        2419449 |                                133.61 |
| Total days of heat and wildfire concurrence                                     |         0.01 |                           2595 |                                  0.14 |
| Total days of heat or wildfire or smoke $PM_{2.5}$                              |        30.89 |                        8391047 |                                463.39 |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence                    |         1.71 |                         464589 |                                 25.66 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence          |         0.01 |                           1954 |                                  0.11 |
| Total days of wildfire & smoke $PM_{2.5}$                                       |         0.16 |                          43692 |                                  2.41 |
| Total days of heat and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence           |         0.66 |                         179849 |                                  9.93 |
| Total days of heat, wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence |         0    |                           1141 |                                  0.06 |
| Total days of wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence       |         0.07 |                          19280 |                                  1.06 |
| Total days of heat (over 95th percentile over last 5 years) (2D)                |         8.66 |                        2350900 |                                129.83 |
| Total days of smoke $PM_{2.5}$ (non zero) (2D)                                  |        38.16 |                       10364963 |                                572.4  |
| Total days of wildfire (2D)                                                     |         0.73 |                         197017 |                                 10.88 |
| Total days of heat or wildfire or smoke $PM_{2.5}$ (2D)                         |        43.85 |                       11911409 |                                657.8  |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence (2D)               |         3.37 |                         915543 |                                 50.56 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence (2D)     |         0.02 |                           4755 |                                  0.26 |
| Total days of wildfire & smoke $PM_{2.5}$ (2D)                                  |         0.31 |                          84732 |                                  4.68 |

## State Stats

In [32]:
df.head()

Unnamed: 0,FIPS,POPULATION,STATE_ABBR,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,...,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d
0,4001942600,1549,AZ,21327.66,981088.06,4329586.15,735816.31,0.0,5065402.46,266600.46,...,63984.03,0.0,0.0,1482296.96,6547698.38,42656.36,7486131.12,586520.59,0.0,0.0
1,4001942700,4491,AZ,806763.24,8246913.12,39172837.32,6095544.48,0.0,45268381.8,2599570.44,...,717122.88,0.0,0.0,12460010.04,59790120.12,1523886.12,67588831.44,5378421.6,0.0,806763.24
2,4001944000,5348,AZ,5465987.58,3813480.53,52244683.31,7754075.9,0.0,58727600.21,889810.94,...,0.0,0.0,127114.83,6228683.68,79701743.15,9025238.45,89108329.65,1779625.44,0.0,4067713.76
3,4001944100,5495,AZ,1744600.22,12346411.44,56498245.56,7783605.22,0.0,65489655.44,4026003.33,...,1341999.89,0.0,0.0,18251213.89,86156471.33,3220802.67,97697682.11,7515207.44,0.0,2415602.0
4,4001944201,4021,AZ,0.0,6611094.98,24863470.16,4096005.05,0.0,29678070.81,1796494.34,...,574877.01,0.0,0.0,9772925.23,39163556.2,0.0,45343495.43,3592986.0,0.0,0.0


In [33]:
# Groupby operation
data = df[hspts + ['POPULATION', 'STATE_ABBR']]

In [34]:
grouped_df = data.groupby('STATE_ABBR').sum()
for col in hspts:
    grouped_df[col] = round(grouped_df[col] * grouped_df["POPULATION"] / total_years, 2)

In [35]:
grouped_df

Unnamed: 0_level_0,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,POPULATION
STATE_ABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AZ,238495800000000.0,6490316000000000.0,1.496066e+16,3362186000000000.0,8182064000000.0,2.009406e+16,1499998000000000.0,6027137000000.0,93256620000000.0,552042500000000.0,3197088000000.0,16313170000000.0,9651119000000000.0,2.258357e+16,394535900000000.0,2.941854e+16,3034383000000000.0,12477350000000.0,172240300000000.0,7151502
CA,9211483000000000.0,2.24467e+17,8.7817e+17,3.022442e+17,341090900000000.0,1.05213e+18,5.684953e+16,251110000000000.0,2779272000000000.0,2.225499e+16,87804380000000.0,1163608000000000.0,3.347278e+17,1.274801e+18,1.665519e+16,1.508379e+18,1.112289e+17,673781700000000.0,6429109000000000.0,39507131
CO,109066500000000.0,3991488000000000.0,2.18582e+16,6661344000000000.0,2346665000000.0,2.436473e+16,1565144000000000.0,1430418000000.0,27966690000000.0,560143100000000.0,622574900000.0,11659890000000.0,6545140000000000.0,3.107932e+16,196710700000000.0,3.454634e+16,3217101000000000.0,4194083000000.0,56090680000000.0,5773714
ID,94031830000000.0,410306100000000.0,3081576000000000.0,1343819000000000.0,1577444000000.0,3340154000000000.0,206539700000000.0,1349733000000.0,38991930000000.0,94040810000000.0,848464600000.0,15828880000000.0,626028500000000.0,4157359000000000.0,157177100000000.0,4464647000000000.0,398527600000000.0,3647419000000.0,76815560000000.0,1839106
MT,27531280000000.0,114229400000000.0,1031118000000000.0,436849900000000.0,670879300000.0,1095765000000000.0,65446220000000.0,525536000000.0,11522210000000.0,28277600000000.0,282857200000.0,6312095000000.0,179280900000000.0,1358837000000000.0,47604010000000.0,1442511000000000.0,120983600000000.0,1574345000000.0,22000420000000.0,1084225
NM,37037860000000.0,492564200000000.0,1812380000000000.0,398791900000000.0,1077922000000.0,2187969000000000.0,143551600000000.0,723017000000.0,10106640000000.0,32387570000000.0,188882900000.0,2377182000000.0,719186600000000.0,2734729000000000.0,64433360000000.0,3219483000000000.0,277124400000000.0,1876997000000.0,21143280000000.0,2117522
NV,19405300000000.0,1189263000000000.0,3870704000000000.0,1570506000000000.0,1455672000000.0,4722053000000000.0,348327800000000.0,1034201000000.0,8569851000000.0,158318200000000.0,620640100000.0,3633120000000.0,1677321000000000.0,5564867000000000.0,33232410000000.0,6656823000000000.0,601021300000000.0,2546836000000.0,16721130000000.0,3104614
OR,262607800000000.0,2241513000000000.0,1.361223e+16,4082431000000000.0,5810002000000.0,1.522524e+16,794880900000000.0,4340713000000.0,94762770000000.0,287991800000000.0,3033421000000.0,38922800000000.0,3404142000000000.0,1.930711e+16,462207900000000.0,2.13266e+16,1638246000000000.0,10589110000000.0,206179800000000.0,4224980
TX,0.0,226689900.0,633253500.0,170017500.0,0.0,803271000.0,56672440.0,0.0,0.0,19712110.0,0.0,0.0,325250900.0,933864300.0,0.0,1155626000.0,103488900.0,0.0,0.0,2026
UT,62163760000000.0,1443699000000000.0,6345804000000000.0,2309870000000000.0,1805649000000.0,7288635000000000.0,540488000000000.0,1236582000000.0,21975300000000.0,151303900000000.0,499425500000.0,7994593000000.0,2175761000000000.0,9215673000000000.0,109164200000000.0,1.035532e+16,1101031000000000.0,3160291000000.0,43058630000000.0,3271616


In [36]:
grouped_df["POPULATION"].sum()

76334073