In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns   
import plotting
hspts = plotting.get_hotspot_list()

This notebook provides statistics from Table 1 and Supplementary Table 1.

In [3]:
gdf = gpd.read_file("outputs/d2-events-2d-230929_SVI_shapefile.geojson")
gdf = gdf[['FIPS', 'ST_ABBR', 'E_TOTPOP']]

In [4]:
gdf = gdf[gdf['ST_ABBR'] != 'TX']

In [5]:
gdf.FIPS.nunique()

18106

In [6]:
gdf["E_TOTPOP"].sum()

75506421

In [7]:
df = pd.read_parquet("outputs/d2-events-2d5-230929.parquet") 

In [8]:
# Get yearly sums of exposure per census tract
df = df.reset_index()
df = df.groupby("GEOID").resample("Y", on="time").sum()

In [9]:
df.drop(columns=["GEOID"], inplace=True)

In [10]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
GEOID,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
4001942600,2006-12-31,0,8,29,2,0,37,0,0,0,0,...,0,54,4,0,0,0,3,0,0,0
4001942600,2007-12-31,0,11,19,2,0,28,2,0,0,1,...,0,43,3,0,0,0,3,2,0,0
4001942600,2008-12-31,1,2,24,0,0,26,1,0,0,0,...,2,43,2,0,0,0,0,0,0,0
4001942600,2009-12-31,0,4,22,3,0,25,1,0,0,0,...,0,37,2,0,0,0,5,0,0,0
4001942600,2010-12-31,0,6,18,0,0,24,0,0,0,0,...,0,35,0,0,0,0,0,0,0,0


In [11]:
hspts = ['wfday', 'heatday', 'smoke_pm_non_zero', 'smoke_pm_gt_five',
       'hw', '_hws', 'hs', 'hws', 'ws', 'hs5', 'hws5', 'ws5', 'heatday_2d',
       'smoke_pm_non_zero_2d', 'wfday_2d', '_hws_2d', 'hs_2d', 'hws_2d',
       'ws_2d', 'hw_2d', 'smoke_pm_gt_five_2d', 'hs_2d_5', 'ws_2d_5',
       'hws_2d_5']

In [12]:
df = df.reset_index()

In [13]:
def get_mean_and_std(groupped_df, hspts):
    """Get mean and standard deviation of a list of hotspots for a groupped dataframe."""
    mean = groupped_df[hspts].mean()
    std = groupped_df[hspts].std()
    #geoid_combined = mean.map('{:,.1f}'.format) + " (" + std.map('{:,.1f}'.format) + ")"
    geoid_combined = mean.map('{:,.0f}'.format) + " (" + std.map('{:,.0f}'.format) + ")"
    return geoid_combined

# Tract-day exposure

In [14]:
gdf["FIPS"].nunique()

18106

# Census-tract inclusions

In [104]:
gdf.groupby("ST_ABBR").count()

Unnamed: 0_level_0,FIPS,E_TOTPOP
ST_ABBR,Unnamed: 1_level_1,Unnamed: 2_level_1
AZ,1765,1765
CA,9095,9095
CO,1447,1447
ID,456,456
MT,319,319
NM,612,612
NV,779,779
OR,991,991
UT,716,716
WA,1766,1766


## Census tract numbers from the web:

- Arizona 1,765
- California      9,129
- Colorado        1,447
- Idaho   456
- Montana 319
- Nevada  779
- New Mexico      612
- Oregon  1,001
- Utah    716
- Washington      1,784
- Wyoming 160

In [109]:
1765+9129+1447+456+319+779+612+1001+716+1784+160

18168

In [111]:
# Difference from our map
18168-18106

62

In [85]:
df["GEOID"].nunique()

18106

In [86]:
# include just FIPS from SVI index data

df = df[df['GEOID'].isin(gdf["FIPS"].unique())]

In [100]:
df["GEOID"].nunique()

18106

In [88]:
df = df.reset_index()

In [89]:
geoid_group = df[["GEOID"]+ hspts].groupby(["GEOID"]).sum().div(15) # 15 years
year_group = df[["time"]+ hspts].groupby(["time"]).sum()

In [90]:
res = pd.DataFrame()
res["avg_exposure_days_per_year"] = get_mean_and_std(year_group, hspts)
res["avg_exposure_days_per_ct"] = get_mean_and_std(geoid_group, hspts)

# Person-day exposure

In [None]:
pdf = gdf.merge(df, left_on="FIPS", right_on="GEOID")

In [92]:
# multiply hspts by E_TOTPOP to get person-day exposure
pdf[hspts] = pdf[hspts].mul(pdf["E_TOTPOP"], axis=0)
pdf.head()

Unnamed: 0,FIPS,ST_ABBR,E_TOTPOP,level_0,index,GEOID,time,wfday,heatday,smoke_pm_non_zero,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
0,4001942600,AZ,1711,0,0,4001942600,2006-12-31,0,13688,49619,...,0,92394,6844,0,0,0,5133,0,0,0
1,4001942600,AZ,1711,1,1,4001942600,2007-12-31,0,18821,32509,...,0,73573,5133,0,0,0,5133,3422,0,0
2,4001942600,AZ,1711,2,2,4001942600,2008-12-31,1711,3422,41064,...,3422,73573,3422,0,0,0,0,0,0,0
3,4001942600,AZ,1711,3,3,4001942600,2009-12-31,0,6844,37642,...,0,63307,3422,0,0,0,8555,0,0,0
4,4001942600,AZ,1711,4,4,4001942600,2010-12-31,0,10266,30798,...,0,59885,0,0,0,0,0,0,0,0


In [93]:
pdf["FIPS"].nunique()

18106

In [15]:
geoid_group = pdf[["GEOID"]+ hspts].groupby(["GEOID"]).sum() #.div(15) # 15 years
year_group = pdf[["time"]+ hspts].groupby(["time"]).sum()

res["avg_exposure_person_days_per_year"] = get_mean_and_std(year_group, hspts)
res["avg_exposure_person_days_per_ct"] = get_mean_and_std(geoid_group, hspts)

NameError: name 'pdf' is not defined

In [95]:
geoid_group

Unnamed: 0_level_0,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
04001942600,228.133333,11292.600000,46311.066667,7870.600000,0.000000,54752.000000,3079.800000,0.000000,0.000000,798.466667,...,456.266667,81101.400000,6387.733333,0.0,0.000000,0.0,11292.600000,1368.800000,0.000000,0.000000
04001942700,3281.400000,50314.800000,159330.200000,24792.800000,0.000000,196154.800000,15313.200000,0.000000,1458.400000,3646.000000,...,6198.200000,288034.000000,30991.000000,0.0,3281.400000,0.0,36095.400000,6927.400000,0.000000,0.000000
04001944000,18854.066667,13154.000000,180209.800000,26746.466667,0.000000,202571.600000,3069.266667,0.000000,6577.000000,0.000000,...,31131.133333,307365.133333,6138.533333,0.0,14030.933333,0.0,40777.400000,876.933333,1315.400000,0.000000
04001944100,5358.600000,55647.000000,173536.200000,23907.600000,0.000000,214344.000000,16900.200000,0.000000,3297.600000,4946.400000,...,9892.800000,314096.400000,32976.000000,0.0,7419.600000,0.0,35861.400000,8244.000000,0.000000,0.000000
04001944201,0.000000,50848.000000,104722.666667,17252.000000,0.000000,141042.666667,14528.000000,0.000000,0.000000,3934.666667,...,0.000000,208234.666667,28450.666667,0.0,0.000000,0.0,25424.000000,6961.333333,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56043000200,24158.933333,15828.266667,179734.133333,57898.133333,416.533333,202851.733333,9996.800000,208.266667,6664.533333,3540.533333,...,43736.000000,272829.333333,18535.733333,1249.6,16453.066667,1874.4,76850.400000,8538.933333,5206.666667,1041.333333
56043000301,0.000000,15022.066667,89240.000000,42983.933333,0.000000,96527.933333,7734.133333,0.000000,0.000000,3420.866667,...,0.000000,129844.200000,14724.600000,0.0,0.000000,0.0,57708.533333,7585.400000,0.000000,0.000000
56043000302,1203.066667,16842.933333,107588.533333,49497.600000,0.000000,116010.000000,9108.933333,0.000000,515.600000,3781.066667,...,2406.133333,155023.733333,16842.933333,0.0,1203.066667,0.0,66512.400000,8421.466667,515.600000,0.000000
56045951100,7425.600000,21621.600000,187168.800000,39967.200000,1528.800000,199836.000000,12885.600000,1528.800000,3494.400000,3494.400000,...,12448.800000,265137.600000,25116.000000,2402.4,5241.600000,2402.4,55255.200000,7862.400000,2184.000000,1528.800000


In [99]:
res

Unnamed: 0,avg_exposure_days_per_year,avg_exposure_days_per_ct,avg_exposure_person_days_per_year,avg_exposure_person_days_per_ct
wfday,"7,709 (1,219)",0 (3),"25,236,989 (4,118,724)","1,394 (7,851)"
heatday,"133,714 (47,400)",7 (3),"564,384,773 (201,498,211)","31,171 (19,861)"
smoke_pm_non_zero,"481,564 (259,819)",27 (11),"1,976,217,709 (1,087,474,243)","109,147 (60,131)"
smoke_pm_gt_five,"161,283 (167,991)",9 (5),"661,418,741 (700,910,652)","36,530 (24,946)"
hw,209 (107),0 (0),"699,180 (359,094)",39 (364)
_hws,"581,806 (268,376)",32 (10),"2,397,625,195 (1,122,359,277)","132,422 (65,985)"
hs,"38,214 (30,679)",2 (1),"159,109,815 (129,945,693)","8,788 (5,771)"
hws,154 (99),0 (0),"511,904 (330,385)",28 (290)
ws,"2,913 (984)",0 (1),"8,917,186 (3,074,409)","492 (3,372)"
hs5,"14,662 (16,560)",1 (0),"60,861,691 (70,261,909)","3,361 (2,440)"


# California stats

In [38]:
pdf.head()

Unnamed: 0,FIPS,ST_ABBR,E_TOTPOP,GEOID,time,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
0,4001942600,AZ,1711,4001942600,2006-12-31,0,13688,49619,3422,0,...,0,92394,6844,0,0,0,5133,0,0,0
1,4001942600,AZ,1711,4001942600,2007-12-31,0,18821,32509,3422,0,...,0,73573,5133,0,0,0,5133,3422,0,0
2,4001942600,AZ,1711,4001942600,2008-12-31,1711,3422,41064,0,0,...,3422,73573,3422,0,0,0,0,0,0,0
3,4001942600,AZ,1711,4001942600,2009-12-31,0,6844,37642,5133,0,...,0,63307,3422,0,0,0,8555,0,0,0
4,4001942600,AZ,1711,4001942600,2010-12-31,0,10266,30798,0,0,...,0,59885,0,0,0,0,0,0,0,0


In [39]:
temp = pdf[pdf["ST_ABBR"] == "CA"]
temp

Unnamed: 0,FIPS,ST_ABBR,E_TOTPOP,GEOID,time,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
26475,06001400100,CA,3035,06001400100,2006-12-31,0,15175,60700,6070,0,...,0,124435,0,0,0,0,9105,0,0,0
26476,06001400100,CA,3035,06001400100,2007-12-31,0,6070,39455,9105,0,...,0,60700,0,0,0,0,15175,0,0,0
26477,06001400100,CA,3035,06001400100,2008-12-31,0,24280,142645,69805,0,...,0,221555,9105,0,0,0,94085,9105,0,0
26478,06001400100,CA,3035,06001400100,2009-12-31,0,15175,36420,12140,0,...,0,75875,6070,0,0,0,18210,3035,0,0
26479,06001400100,CA,3035,06001400100,2010-12-31,0,27315,33385,0,0,...,0,81945,6070,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162895,06115041102,CA,1027,06115041102,2016-12-31,0,5135,49296,10270,0,...,0,64701,6162,0,0,0,14378,0,0,0
162896,06115041102,CA,1027,06115041102,2017-12-31,1027,17459,70863,33891,0,...,2054,96538,15405,0,1027,0,43134,9243,1027,0
162897,06115041102,CA,1027,06115041102,2018-12-31,1027,5135,87295,55458,0,...,2054,115024,5135,0,0,0,65728,1027,0,0
162898,06115041102,CA,1027,06115041102,2019-12-31,1027,1027,40053,4108,0,...,2054,68809,0,0,1027,0,6162,0,0,0


In [41]:
year_group = temp[["time"]+ hspts].groupby(["time"]).sum()
year_group

Unnamed: 0_level_0,wfday,heatday,smoke_pm_non_zero,smoke_pm_gt_five,hw,_hws,hs,hws,ws,hs5,...,wfday_2d,_hws_2d,hs_2d,hws_2d,ws_2d,hw_2d,smoke_pm_gt_five_2d,hs_2d_5,ws_2d_5,hws_2d_5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-12-31,8895908,249808124,697738378,189821532,256147,942320709,11478341,98604,2485817,1336383,...,15904410,1489053282,37675788,256273,5020910,507369,324105331,3713729,1879389,65268
2007-12-31,9354580,130086132,820364123,307360764,43593,946511006,11028545,20896,2242587,9853666,...,17136208,1452209669,27720579,93973,4964052,153429,434860974,18487369,1858900,55686
2008-12-31,12043600,186240708,1366388442,688298772,232590,1496498304,63288138,195489,4849207,46091472,...,20164697,2099679053,104906585,391033,8268243,462595,918020144,66453508,4672674,342521
2009-12-31,8309643,206231435,724555962,232381115,117576,882394409,54808420,78505,1855140,20522960,...,15216822,1336889257,111724946,208141,4467515,303247,360400162,47309239,1253410,122741
2010-12-31,5242252,314832700,290929169,15546588,64484,582244852,27764106,28487,959166,1386591,...,9851106,894625506,70471996,120871,2795148,200990,28820435,3512535,317703,1705
2011-12-31,5951088,106672303,263205620,31754770,31626,363161694,11401306,8577,1242962,212002,...,11189496,631071156,20638771,40798,3888296,80917,60437915,761487,468081,0
2012-12-31,6099927,285661229,405540729,70847786,273668,671012040,24592657,233933,1657453,7742206,...,10990999,1070055780,55657880,380613,3634352,473052,121083082,14791069,1130793,280803
2013-12-31,7453213,191555336,1019928340,180202329,87735,1138705291,78175247,72040,2040656,14573965,...,13614931,1689670706,135065361,163280,4839577,218711,295090513,27688206,1346193,87169
2014-12-31,6057460,422779162,500207674,123302357,278973,881638154,45782461,184698,1529406,19554990,...,11068775,1353798605,93252931,477678,3610384,587932,205276274,38157253,1449957,267414
2015-12-31,7838389,464458253,762143615,103949806,183897,1184627965,47546942,148935,2230388,15576626,...,13959228,1753226052,115979307,424562,4597443,503437,162930895,29921011,1574691,91998


In [42]:
year_group["hs"].mean()

72538135.4

In [None]:
# Just 2020 for CA

279,940,365