# Child Poverty Rates

This notebook serves as an example of how to use the Census lib CensusDataFrame sum_m and ratio() methods to compute sums and ratios of census variable with error propagation. 


In [1]:
%load_ext autoreload
%autoreload 1
%aimport censuslib.dataframe

from ambry import get_library

l = get_library()
b = l.bundle('census.gov-acs_p5ye2014-poverty-hdp-0.0.1')
p = l.partition('census.gov-acs-p5ye2014-b17020')

w = b.warehouse('child_poverty')

w.query("""

-- Get only counties in California

CREATE TABLE counties AS 
SELECT gvid, name AS county_name FROM census.gov-tiger-2015-counties
WHERE statefp = 6;

INDEX counties (gvid);


""").close()


Failed to index p04M02C002; SQLError: no such table: main.counties


In [2]:
def pred(r):
    from geoid.civick import GVid
    return (r.stusab == 'ca' and GVid.parse(r.gvid, exception=False).level == 'county')
  
df = p.dataframe(pred)


In [None]:
sexbyage =  l.partition('census.gov-acs-p5ye2014-b01001').dataframe(pred).set_index('gvid')

In [None]:
%aimport censuslib.dataframe

cdf = censuslib.dataframe.CensusDataFrame(df)
cdf.set_index('gvid')
cdf['below_lt18'],cdf['below_lt18_m90']  = cdf.sum_m(3,4,5)
cdf['above_lt18'],cdf['above_lt18_m90']  = cdf.sum_m(11,12,13)
cdf['total_lt18'],cdf['total_lt18_m90']  = cdf.sum_m(3,4,5,11,12,13)

cdf['below_lt18_rate'], cdf['below_lt18_rate_m90']  = cdf.ratio((cdf['below_lt18'],cdf['below_lt18_m90']), 
                                                                (cdf['total_lt18'],cdf['total_lt18_m90']))

cdf['below_lt18_rse'] = (cdf['below_lt18_rate_m90']  / 1.645) / cdf['below_lt18_rate']


In [None]:
cols =  ['county_name', 'b17020001', 'b17020001_m90', 'below_lt18_rate', 'below_lt18_rate_m90', 'below_lt18_rse']
cdf.set_index('gvid').join(counties)[cols].sort('below_lt18_rate', ascending=False)

In [None]:
%aimport censuslib.dataframe
from censuslib.dataframe import CensusDataFrame
cdf2 = CensusDataFrame(l.partition('census.gov-acs-p5ye2014-b17020').dataframe(pred).set_index('gvid'))
cdf2['under18'],cdf2['under18_m90']  = cdf2.sum_m(3,4,5)
cdf2['under18rate'], cdf2['under18rate_m90']  = cdf2.ratio(cdf2['under18'], 1)
cdf2.head()

In [None]:
%aimport censuslib.dataframe
from censuslib.dataframe import CensusDataFrame
cdf2 = CensusDataFrame(l.partition('census.gov-acs-p5ye2014-b17010').dataframe(pred).set_index('gvid'))
cdf2['under18'],cdf2['under18_m90']  = cdf2.sum_m('b17010006')
cdf2['under18rate'], cdf2['under18rate_m90']  = cdf2.ratio(cdf2['under18'], 'b17010001')
cdf2.head()

In [None]:
cols =  ['county_name', 'b17010001', 'b17010001_m90', 'b17010002', 'b17010006',  'under18', 'under18rate', 'under18rate_m90']
cdf2.join(counties)[cols].sort('under18rate', ascending=False)

In [10]:
sexbyage =  l.partition('census.gov-acs-p5ye2014-b01001').dataframe(pred) #.set_index('gvid')

In [39]:
from censuslib.dataframe import CensusDataFrame
import pandas as pd
sexbyage.__class__ = CensusDataFrame
total = sexbyage.dim_columns(sex='female', age='na')
ages = list(set(sexbyage.dim_columns(sex='female')) - set(total))
df = pd.DataFrame()

df['total'], df['total_m'] = sexbyage.sum_m(*total) 
df['total2'], df['total2_m'] =  sexbyage.sum_m(*ages) 

df.head()


Unnamed: 0,total,total_m,total2,total2_m
0,795343,76,795343,3349.70342
1,537,104,537,84.970583
2,17193,119,17193,635.334558
3,111833,148,111833,1526.512365
4,22664,168,22664,794.636395


In [69]:
melted = pd.melt(sexbyage, id_vars=list(sexbyage.columns[:9]), value_vars=list(sexbyage.columns[9:]))
melted = melted[['gvid','variable','value']][melted.gvid=='0O061R']
estimates = melted[~melted.variable.str.contains('_m90')].set_index(['gvid','variable'])
margins = melted[melted.variable.str.contains('_m90')].copy()
margins.columns = ['gvid', 'ovariable', 'm90']
margins['variable'] = margins.ovariable.str.replace('_m90','')
final = estimates.join(margins.set_index(['gvid', 'variable']).drop('ovariable',1))
final.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,value,m90
gvid,variable,Unnamed: 2_level_1,Unnamed: 3_level_1
0O061R,b01001001,73059,0
0O061R,b01001002,37020,139
0O061R,b01001003,3263,128
0O061R,b01001004,2921,316
0O061R,b01001005,2998,316
