# Census Column Manipulation Tests



In [1]:
%load_ext autoreload
%autoreload 1
%aimport censuslib.dataframe
from censuslib.dataframe import CensusDataFrame

In [2]:
from ambry import get_library

l = get_library()
b = l.bundle('census.gov-acs_p5ye2014-poverty-hdp-0.0.1')
p = l.partition('census.gov-acs-p5ye2014-b17020')


In [3]:
# Select only California counties
def pred(r):
    from geoid.civick import GVid
    return (r.stusab == 'ca' and GVid.parse(r.gvid, exception=False).level == 'county')

from censuslib.dataframe import CensusDataFrame

df = CensusDataFrame(p.dataframe(pred))


In [28]:
# Test the sum_m and ratio functions, using explicit column positions

cdf = df.copy()
cdf.set_index('gvid')
cdf['below_lt18'],cdf['below_lt18_m90']  = cdf.sum_m(3,4,5)
cdf['above_lt18'],cdf['above_lt18_m90']  = cdf.sum_m(11,12,13)
cdf['total_lt18'],cdf['total_lt18_m90']  = cdf.sum_m(3,4,5,11,12,13)

cdf['below_lt18_rate'], cdf['below_lt18_rate_m90']  = cdf.ratio((cdf['below_lt18'],cdf['below_lt18_m90']), 
                                                                (cdf['total_lt18'],cdf['total_lt18_m90']))

cdf['below_lt18_rse'] = (cdf['below_lt18_rate_m90']  / 1.645) / cdf['below_lt18_rate']
cdf.head()

Unnamed: 0,id,stusab,chariter,sequence,logrecno,geoid,gvid,sumlevel,jam_flags,b17020001,...,b17020017_m90,below_lt18,below_lt18_m90,above_lt18,above_lt18_m90,total_lt18,total_lt18_m90,below_lt18_rate,below_lt18_rate_m90,below_lt18_rse
0,123331,ca,0,53,13,05000US06001,0O0601,50,,1531346,...,815,53306,1798.435431,284398,2434.175425,337704,3026.479803,0.158,0.005134,0.019752
1,123332,ca,0,53,14,05000US06003,0O0603,50,,1184,...,20,21,18.814888,196,57.636794,217,60.630026,0.097,0.08236,0.516153
2,123333,ca,0,53,15,05000US06005,0O0605,50,,32932,...,193,1230,293.325757,4415,373.840875,5645,475.181018,0.218,0.048614,0.135562
3,123334,ca,0,53,16,05000US06007,0O0607,50,,216592,...,433,10901,809.08405,33585,1042.113238,44486,1319.324448,0.245,0.016673,0.041369
4,123335,ca,0,53,17,05000US06009,0O0609,50,,44223,...,228,953,264.123077,7099,458.979302,8052,529.549809,0.118,0.031871,0.16419


In [25]:
# Test the col_group functions
df2 = df[['gvid','b17020001','b17020002','b17020002_m90', 'b17020010','b17020010_m90']].copy()
df2['all_below'], df2['all_below_m90'] = df.sum_col_group(3, 9)
df2['all_above'], df2['all_above_m90'] = df.sum_col_group(11, 17)
assert df2.b17020002.equals(df2.all_below)
assert df2.b17020010.equals(df2.all_above)
df2.head()

Unnamed: 0,gvid,b17020001,b17020002,b17020002_m90,b17020010,b17020010_m90,all_below,all_below_m90,all_above,all_above_m90
0,0O0601,1531346,197191,4256,1334155,4578,197191,3312.377545,1334155,4381.812867
1,0O0603,1184,179,72,1005,160,179,68.095521,1005,149.672977
2,0O0605,32932,4277,732,28655,827,4277,544.889897,28655,822.734465
3,0O0607,216592,46643,1844,169949,1883,46643,1632.086701,169949,2069.94541
4,0O0609,44223,5352,776,38871,802,5352,645.48354,38871,952.67833


In [27]:
# Same as above, but use a dim_columns predicate
df3 = df[['gvid','b17020001','b17020002','b17020002_m90', 'b17020010','b17020010_m90']].copy()
df3['all_below'], df3['all_below_m90'] = df.sum_m(df.dim_columns(" age != 'na' and col_num < 10 "))
df3['all_above'], df3['all_above_m90'] = df.sum_m(df.dim_columns(" age != 'na' and col_num > 10 "))
assert df3.b17020002.equals(df3.all_below)
assert df3.b17020010.equals(df3.all_above)
assert df2.equals(df3)
df3.head()

Unnamed: 0,gvid,b17020001,b17020002,b17020002_m90,b17020010,b17020010_m90,all_below,all_below_m90,all_above,all_above_m90
0,0O0601,1531346,197191,4256,1334155,4578,197191,3312.377545,1334155,4381.812867
1,0O0603,1184,179,72,1005,160,179,68.095521,1005,149.672977
2,0O0605,32932,4277,732,28655,827,4277,544.889897,28655,822.734465
3,0O0607,216592,46643,1844,169949,1883,46643,1632.086701,169949,2069.94541
4,0O0609,44223,5352,776,38871,802,5352,645.48354,38871,952.67833


In [20]:
%load_ext autoreload
%autoreload 1
%aimport censuslib.dataframe
from censuslib.dataframe import CensusDataFrame
