# Model Aggregation
Roll up Census block group estimates to the Census tract and county levels.

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import re

## Gather & Combine Model Outputs

In [2]:
# set relative paths
p = Path.cwd()
data_path = p.parent.parent / 'Data' / 'Model Outputs' 

fire_propensity_path =  data_path / 'PropensityBlockModel_2014_2016.csv'

smoke_alarm_path = data_path / 'SmokeAlarmModelOutput.csv'

In [3]:
# load fire propensity
dtypes = {'GEOID': str}
cols_to_use = ['Predicted_Probs', 'GEOID']
fire_prop = pd.read_csv(fire_propensity_path, usecols=cols_to_use, dtype=dtypes)
fire_prop.rename({'Predicted_Probs': 'fire_propensity_bg', 'GEOID': 'geoid'}, axis='columns', inplace=True)
fire_prop['geoid'] = '#_' + fire_prop['geoid']
fire_prop = fire_prop[['geoid', 'fire_propensity_bg']]
print('{} geoids'.format(fire_prop.shape[0]))
display(fire_prop.head())

117897 geoids


Unnamed: 0,geoid,fire_propensity_bg
0,#_484391003002,0.331762
1,#_240338074052,0.525867
2,#_090075702002,0.11628
3,#_280679504021,0.849792
4,#_191050701003,0.261825


In [4]:
# load smoke alarm
smoke_alarm = pd.read_csv(smoke_alarm_path, dtype=dtypes)
smoke_alarm.drop(smoke_alarm.columns[0], axis='columns', inplace=True)
smoke_alarm.drop('num_surveys', axis='columns', inplace=True)

geos = {'County': 'cnty', 'Tract': 'trt', 'Block': 'bg'}
smoke_alarm['estimate_geography'] = smoke_alarm['estimate_geography'].map(geos)
smoke_alarm.drop('detectors_prc_ci', axis='columns', inplace=True)
smoke_alarm = smoke_alarm.pivot(index='geoid', columns='estimate_geography', values='detectors_prc_mean')
smoke_alarm.columns = ['detectors_{}'.format(col) for col in smoke_alarm.columns.values]
smoke_alarm.reset_index(inplace=True)

cols = ['geoid', 'detectors_cnty', 'detectors_trt', 'detectors_bg']
smoke_alarm = smoke_alarm[cols]

print('{} geoids'.format(smoke_alarm.shape[0]))
display(smoke_alarm.head())

195114 geoids


Unnamed: 0,geoid,detectors_cnty,detectors_trt,detectors_bg
0,#_010010201001,52.63,,
1,#_010010201002,52.63,,
2,#_010010202001,52.63,,
3,#_010010202002,52.63,,
4,#_010010203001,52.63,,


In [5]:
# combine
models_raw = pd.merge(fire_prop, smoke_alarm, how='outer')

print('{} geoids'.format(models_raw.shape[0]))
display(models_raw.head())

225570 geoids


Unnamed: 0,geoid,fire_propensity_bg,detectors_cnty,detectors_trt,detectors_bg
0,#_484391003002,0.331762,40.62,,
1,#_240338074052,0.525867,26.39,,
2,#_090075702002,0.11628,55.15,,
3,#_280679504021,0.849792,,,14.63
4,#_280679504021,0.860052,,,14.63


## Aggregate Model Values to Tract/County
Also split out predicted values at each level to be applied later

In [6]:
# pull out bg values
models_bg = models_raw.copy()
to_drop = [c for c in models_raw.columns if '_trt' in c or '_cnty' in c]
models_bg.drop(to_drop, axis='columns', inplace=True)

print('{} block groups'.format(models_bg.shape[0]))
display(models_bg['geoid'].str[2:].str.len().value_counts())
display(models_bg.head())

225570 block groups


12    225570
Name: geoid, dtype: int64

Unnamed: 0,geoid,fire_propensity_bg,detectors_bg
0,#_484391003002,0.331762,
1,#_240338074052,0.525867,
2,#_090075702002,0.11628,
3,#_280679504021,0.849792,14.63
4,#_280679504021,0.860052,14.63


In [7]:
# roll up to tract
models_trt = models_raw.copy()
to_drop = [c for c in models_raw.columns if '_cnty' in c]
models_trt.drop(to_drop, axis='columns', inplace=True)

models_trt['geoid_tract'] = models_trt['geoid'].str[:-1]
models_trt['fire_propensity_trt'] = models_trt.groupby('geoid_tract').transform('mean')['fire_propensity_bg']
models_trt['trt_rollup_detectors'] = models_trt.groupby('geoid_tract').transform('mean')['detectors_bg']

models_trt.loc[models_trt['detectors_trt'].isnull(), 'detectors_trt'] = models_trt['trt_rollup_detectors']

keep_cols = ['geoid_tract', 'fire_propensity_trt', 'detectors_trt']
models_trt = models_trt[keep_cols]
models_trt = models_trt.drop_duplicates()

print('{} tracts'.format(models_trt.shape[0]))
display(models_trt['geoid_tract'].str[2:].str.len().value_counts())
display(models_trt.head())


80017 tracts


11    80017
Name: geoid_tract, dtype: int64

Unnamed: 0,geoid_tract,fire_propensity_trt,detectors_trt
0,#_48439100300,0.302572,
1,#_24033807405,0.471269,
2,#_09007570200,0.11628,
3,#_28067950402,0.7213,20.016
5,#_19105070100,0.192179,


In [8]:
# roll up to county

# pull county detector estimates out of raw models dataframe
models_cnty_temp = models_raw.copy()
to_drop = [c for c in models_raw.columns if '_bg' in c or '_trt' in c]
models_cnty_temp.drop(to_drop, axis='columns', inplace=True)
models_cnty_temp['geoid_cnty'] = models_cnty_temp['geoid'].str[:-7]
models_cnty_temp.drop('geoid', axis='columns', inplace=True)
models_cnty_temp = models_cnty_temp.drop_duplicates()

print('{} counties in temp'.format(models_cnty_temp.shape[0]))
display(models_cnty_temp.head())

# build rollup from tract data
models_cnty = models_trt.copy()
models_cnty['geoid_cnty'] = models_cnty['geoid_tract'].str[:-6]
models_cnty = pd.merge(models_cnty, models_cnty_temp, how='outer')

models_cnty['fire_propensity_cnty'] = models_cnty.groupby('geoid_cnty').transform('mean')['fire_propensity_trt']
models_cnty['cnty_rollup_detectors'] = models_cnty.groupby('geoid_cnty').transform('mean')['detectors_trt']

models_cnty.loc[models_cnty['detectors_cnty'].isnull(), 'detectors_cnty'] = models_cnty['cnty_rollup_detectors']

keep_cols = ['geoid_cnty', 'fire_propensity_cnty', 'detectors_cnty']
models_cnty = models_cnty[keep_cols]
models_cnty = models_cnty.drop_duplicates()

print('{} counties'.format(models_cnty.shape[0]))
display(models_cnty['geoid_cnty'].str.len().value_counts())
display(models_cnty.head())

6208 counties in temp


Unnamed: 0,detectors_cnty,geoid_cnty
0,40.62,#_48439
1,26.39,#_24033
2,55.15,#_09007
3,,#_28067
5,28.73,#_19105


6203 counties


7    6203
Name: geoid_cnty, dtype: int64

Unnamed: 0,geoid_cnty,fire_propensity_cnty,detectors_cnty
0,#_48439,0.25071,40.62
1,#_48439,0.25071,49.73
2,#_48439,0.25071,48.62
3,#_48439,0.25071,51.521926
4,#_48439,0.25071,55.36


## Load Full National Set

In [9]:
# set relative paths
p = Path.cwd()
acs_path = p.parent.parent / 'Data' / 'Master Project Data' /  'ACS 5YR Block Group Data.csv'

In [10]:
# load full data
cols_to_use = ['GEOID', 'tot_population']
natl = pd.read_csv(acs_path, usecols=cols_to_use, encoding='latin-1')
natl.rename({'GEOID': 'geoid'}, axis='columns', inplace=True)

print('{} total block groups nationally'.format(natl.shape[0]))
display(natl.head())

217739 total block groups nationally


Unnamed: 0,geoid,tot_population
0,#_530019501001,842
1,#_530019501002,922
2,#_530019501003,515
3,#_530019502001,530
4,#_530019502002,679


## Fill in Gaps

In [11]:
natl['geoid_tract'] = natl['geoid'].str[:-1]
natl['geoid_cnty'] = natl['geoid_tract'].str[:-6]

natl = pd.merge(natl, models_bg, how='left')
natl = pd.merge(natl, models_trt, how='left', on='geoid_tract')
natl = pd.merge(natl, models_cnty, how='left', on='geoid_cnty')

display(natl.head())


Unnamed: 0,geoid,tot_population,geoid_tract,geoid_cnty,fire_propensity_bg,detectors_bg,fire_propensity_trt,detectors_trt,fire_propensity_cnty,detectors_cnty
0,#_530019501001,842,#_53001950100,#_53001,,,,,0.166635,
1,#_530019501002,922,#_53001950100,#_53001,,,,,0.166635,
2,#_530019501003,515,#_53001950100,#_53001,,,,,0.166635,
3,#_530019502001,530,#_53001950200,#_53001,,,,,0.166635,
4,#_530019502002,679,#_53001950200,#_53001,,,,,0.166635,


In [17]:
natl['fire_propensity_comb'] = natl['fire_propensity_bg']
natl.loc[natl['fire_propensity_comb'].isnull(), 'fire_propensity_comb'] = natl['fire_propensity_trt']
natl.loc[natl['fire_propensity_comb'].isnull(), 'fire_propensity_comb'] = natl['fire_propensity_cnty']


natl['detectors_comb'] = natl['detectors_bg']
natl.loc[natl['detectors_comb'].isnull(), 'detectors_comb'] = natl['detectors_trt']
natl.loc[natl['detectors_comb'].isnull(), 'detectors_comb'] = natl['detectors_cnty']

col_order = ['geoid', 'geoid_tract', 'geoid_cnty', 'tot_population', 'fire_propensity_comb', 'detectors_comb',
            'fire_propensity_bg', 'detectors_bg', 'fire_propensity_trt', 'detectors_trt', 
            'fire_propensity_cnty', 'detectors_cnty']
natl = natl[col_order]
natl.sort_values('geoid', inplace=True)

natl.head()

Unnamed: 0,geoid,geoid_tract,geoid_cnty,tot_population,fire_propensity_comb,detectors_comb,fire_propensity_bg,detectors_bg,fire_propensity_trt,detectors_trt,fire_propensity_cnty,detectors_cnty
5258,#_010010201001,#_01001020100,#_01001,692,0.524071,52.63,0.524071,,0.524071,,0.198921,52.63
5259,#_010010201002,#_01001020100,#_01001,1153,0.524071,52.63,,,0.524071,,0.198921,52.63
5260,#_010010202001,#_01001020200,#_01001,1015,0.198921,52.63,,,,,0.198921,52.63
5261,#_010010202002,#_01001020200,#_01001,957,0.198921,52.63,,,,,0.198921,52.63
5262,#_010010203001,#_01001020300,#_01001,2555,0.198921,52.63,,,,,0.198921,52.63


## Export Data

In [18]:
output_path = data_path / 'aggregated_models.csv'
natl.to_csv(output_path, index=False)