# Hotspots

Hotspots California map: 
- Wildifre-wildifreSmokePM2.5, 
- Heat-wildfire,
- Heat-PM2.5, 
- Wildifre-Heat-SmokePM2.5P

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

In [2]:
df = pd.read_parquet("outputs/hotspots_per_fips_rolling_wc.parquet") 

In [3]:
gdf = gpd.read_file('data/WesternUSCensusTract/CensusTract2020_WesternUS.shp')

In [4]:
gdf = gdf[['FIPS', 'POPULATION', 'STATE_ABBR']]

In [5]:
df = gdf.merge(df, left_on="FIPS", right_on="GEOID")

## Total counts (tracts, days)

In [6]:
total_census_tracts = len(df.GEOID.unique())
total_census_tracts

18108

In [7]:
total_days = len(df)
total_days = total_days / total_census_tracts
total_days

5479.0

In [8]:
total_years = 15

In [9]:
import plotting
hspts = plotting.get_hotspot_list()

## Get Exposure days per census tract per year

In [13]:
def get_exposure_per_census_per_year(data):
    # sum all the exposure days per coexposure
    exposure_day_counts = data.sum().reset_index()
    exposure_day_counts.columns = ["value", "days"]

    # divide the sum by the numbers of census tracts and the number of years
    exposure_day_counts["avg_census"] = round(exposure_day_counts["days"]/total_census_tracts/total_years, 2)
    # df_stats["percent"] = round(df_stats["days"]/total_days*100., 3)
    return exposure_day_counts


In [14]:
df_ecy = get_exposure_per_census_per_year(df[hspts])
df_ecy

Unnamed: 0,value,days,avg_census
0,wfday,115632,0.43
1,heatday,1560169,5.74
2,smoke_pm_non_zero,7224168,26.6
3,smoke_pm_gt_five,2419449,8.91
4,hw,2595,0.01
5,_hws,8391047,30.89
6,hs,464589,1.71
7,hws,1954,0.01
8,ws,43692,0.16
9,hs5,179849,0.66


## Get Avg Exposure Person Year

In [15]:
# Groupby operation
data = df[hspts + ['POPULATION', 'GEOID']]
agg_functions = {col: 'sum' for col in data.columns if col not in ['GEOID', 'POPULATION']}
agg_functions['POPULATION'] = 'first'

In [16]:
def get_avg_exposure_person_year(data):
    # sum exposure days per GEOID (census tract (ct) number)
    grouped_df = data.groupby('GEOID').agg(agg_functions)
    for col in hspts:
        # multiply the number of exposure days by ct population and divide it by total years
        grouped_df[col] = round(grouped_df[col] * grouped_df["POPULATION"] / total_years, 2)

    # sum per each exposure
    exposure_person_day_per_year = grouped_df.sum().reset_index()
    exposure_person_day_per_year.columns = ["value", "exposure_person_day_per_year"]

    # new column for total exposure person day per year per ct
    exposure_person_day_per_year["exposure_person_day_per_year_per_ct"] = exposure_person_day_per_year["exposure_person_day_per_year"]/total_census_tracts
    return exposure_person_day_per_year

In [None]:
df_epy = get_avg_exposure_person_year(data)

In [31]:
merged_df = df_ecy.merge(df_epy, on=["value"])
merged_df

Unnamed: 0,value,days,avg_census,exposure_person_day_per_year,exposure_person_day_per_year_per_ct
0,wfday,115632,0,25616759,1415
1,heatday,1560169,6,445520277,24604
2,smoke_pm_non_zero,7224168,27,2004249606,110683
3,smoke_pm_gt_five,2419449,9,671891398,37105
4,hw,2595,0,591603,33
5,_hws,8391047,31,2335258513,128963
6,hs,464589,2,130873725,7227
7,hws,1954,0,438872,24
8,ws,43692,0,9101673,503
9,hs5,179849,1,50587782,2794


## Merge tables and add full exposure name

In [32]:
temp_dict = {}
hsptdict = plotting.get_hotspopt_dict()

# get full name of the exposure
for key in hsptdict.keys():
    temp_dict[key] = hsptdict[key]['title_map']

merged_df['value_full'] = merged_df['value'].replace(temp_dict)

In [33]:
merged_df = merged_df[["value_full", "avg_census", "exposure_person_day_per_year", "exposure_person_day_per_year_per_ct"]]

In [35]:
from tabulate import tabulate
for col in merged_df.select_dtypes(include=['float64']).columns:
    merged_df[col] = merged_df[col].apply(lambda x: '{:.0f}'.format(x))

markdown_table = tabulate(merged_df, headers='keys', tablefmt='pipe', showindex=False)
print(markdown_table)

| value_full                                                                      |   avg_census |   exposure_person_day_per_year |   exposure_person_day_per_year_per_ct |
|:--------------------------------------------------------------------------------|-------------:|-------------------------------:|--------------------------------------:|
| Total days of wildfire                                                          |            0 |                       25616759 |                                  1415 |
| Total days of heat (over 95th percentile over last 5 years)                     |            6 |                      445520277 |                                 24604 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |           27 |                     2004249606 |                                110683 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |            9 |                      671891398 |         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df[col] = merged_df[col].apply(lambda x: '{:.0f}'.format(x))


18108
5479.0
| value_full                                                                      |   avg_census |   exposure_person_day_per_year |   exposure_person_day_per_year_per_ct |
|:--------------------------------------------------------------------------------|-------------:|-------------------------------:|--------------------------------------:|
| Total days of wildfire                                                          |            0 |                       25616759 |                                  1415 |
| Total days of heat (over 95th percentile over last 5 years)                     |            6 |                      445520277 |                                 24604 |
| Total days of smoke $PM_{2.5}$ (non zero)                                       |           27 |                     2004249606 |                                110683 |
| Total days of smoke $PM_{2.5}$ (over >5μg/$m^3$)                                |            9 |                      671891398 |                                 37105 |
| Total days of heat and wildfire concurrence                                     |            0 |                         591603 |                                    33 |
| Total days of heat or wildfire or smoke $PM_{2.5}$                              |           31 |                     2335258513 |                                128963 |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence                    |            2 |                      130873725 |                                  7227 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence          |            0 |                         438872 |                                    24 |
| Total days of wildfire & smoke $PM_{2.5}$                                       |            0 |                        9101673 |                                   503 |
| Total days of heat and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence           |            1 |                       50587782 |                                  2794 |
| Total days of heat, wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence |            0 |                         249168 |                                    14 |
| Total days of wildfire and smoke $PM_{2.5}$ (over >5μg/$m^3$) concurrence       |            0 |                        3907912 |                                   216 |
| Total days of heat (over 95th percentile over last 5 years) (2D)                |            9 |                      671301173 |                                 37072 |
| Total days of smoke $PM_{2.5}$ (non zero) (2D)                                  |           38 |                     2880978677 |                                159100 |
| Total days of wildfire (2D)                                                     |            1 |                       44422748 |                                  2453 |
| Total days of heat or wildfire or smoke $PM_{2.5}$ (2D)                         |           44 |                     3319297126 |                                183306 |
| Total days of heat and non-zero smoke $PM_{2.5}$ concurrence (2D)               |            3 |                      258797055 |                                 14292 |
| Total days of heat, wildfire and non-zero smoke $PM_{2.5}$ concurrence (2D)     |            0 |                        1101330 |                                    61 |
| Total days of wildfire & smoke $PM_{2.5}$ (2D)                                  |            0 |                       18323027 |                                  1012 |

## State Stats

In [37]:
# Groupby operation
data = df[hspts + ['POPULATION', 'STATE_ABBR']]
agg_functions = {col: 'sum' for col in data.columns if col not in ['POPULATION','STATE_ABBR']}
agg_functions['POPULATION'] = 'first'

In [38]:
grouped_df = data.groupby('STATE_ABBR').agg(agg_functions)
for col in hspts:
    grouped_df[col] = round(grouped_df[col] * grouped_df["POPULATION"] / total_years, 2)

In [39]:
grouped_df

Unnamed: 0_level_0,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,hws5,ws5,heatday_2d,smoke_pm_non_zero_2d,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,POPULATION
STATE_ABBR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AZ,1269354,16445320,39705930,8852122,37176,53128738,3896458,24061,382293,1433858,11979,70221,24423806,60015592,2075867,77953941,7821004,53595,714399,1549
CA,7089477,173619877,705484157,247001957,139140,839301576,44346496,103495,2509793,17530273,72102,1326391,259071931,1021005167,12345014,1200977299,86417125,229065,4960446,3038
CO,1324595,29982874,173091373,51814816,20730,192203277,11791731,11021,394125,4120730,3149,157440,49050957,246363424,2344019,272875270,24079923,35162,786413,3936
ID,3183845,9151206,73041242,31628655,54234,79307363,4587021,45433,1473108,2067061,29020,642002,13963249,98182797,5094628,105697005,8855062,116079,2671243,3568
MT,770245,1971827,19808636,7923570,17594,20996837,1176637,14703,374343,497913,8590,219633,3116911,26070955,1296407,27619953,2184192,43943,675668,1239
NM,977020,9501084,36896075,8060149,34291,44213135,2842192,26571,311131,649372,8259,74327,13873977,55418888,1662658,64854262,5493002,54219,594614,2693
NV,330833,11327008,40069239,16457385,22176,48128491,3451900,16591,141105,1575646,8870,52894,16033741,57252026,561628,67611339,5957952,40410,268412,2464
OR,3037771,15829325,102934788,31002065,67687,114827448,5775216,56501,1188034,2142859,38110,481205,24076735,145410118,5238079,160437245,11804686,129876,2463852,2844
TX,0,12426,34712,9320,0,44032,3107,0,0,1081,0,0,17829,51190,0,63346,5673,0,0,2026
UT,1137187,17784848,81616030,29405459,29507,93519019,6577626,20950,432863,1832069,7967,141337,26840444,118368059,1953046,132826916,13483366,50456,833858,4426
