# School Enrollment Rates

School Enrollment Rates, organized in a file that can be broken out into counties, where each county partition will also get the state and county average.


In [1]:
import pandas as pd
import numpy as np
from ambry import get_library
from geoid.civick import State

l = get_library()
b = l.bundle('census.gov-acs-enrollment-p5ye2014-hdp')

counties = b.partition(grain='counties').analysis.dataframe()
counties_col_join = counties[['county','gvid','not_enrolled_rate', 'enrolled_rate_rse']].copy()
counties_col_join.columns = ['county','county_gvid','county_not_enrolled_rate', 'county_not_enrolled_rate_rse']

state = b.partition(grain='states').analysis.dataframe()
state['state'] = 'CA'
state['geoid'] = State(6)
state_col_join = state[['state','not_enrolled_rate', 'enrolled_rate_rse']].copy()
state_col_join.columns = ['state','state_not_enrolled_rate', 'state_enrolled_rate_rse']

districts = b.partition(grain='districts').analysis.dataframe()

# We need to take these cols out so we can join them back in 
districts.drop( ['med_house_inc', 'child_care_cost'], axis=1, inplace=True)


In [2]:
import censuslib.dataframe

# The CensusDataFrame has mehods for doing sums and ratios with 90% margins and
# relative standard errors. 
df_class = censuslib.dataframe.CensusDataFrame

def ca_csd_pred(r):
    """Select California counties, state and districts using USPS Post codes"""
    from geoid.civick import GVid
    return (r.stusab == 'ca' and 
            GVid.parse(r.gvid, exception=False).level in ('county', 'state','sduni', 'sdelem') and 
            r.sumlevel != 40 or (r.stusab == 'ca' and r.logrecno == 1) # Source  doesn't have geo component
           )

hinc = b.dep('household_income').analysis.dataframe(ca_csd_pred, df_class=df_class).set_index('gvid')
hinc = hinc[['b19013001', 'b19013001_m90']]
hinc.columns = ['med_house_inc', 'med_house_inc_m90']

In [3]:
# Combined dataset on columns. 
combined_cols = districts.sort('county')\
    .join(counties_col_join.set_index('county'), on='county', how='outer',rsuffix='_county')\
    .join(state_col_join.set_index('state'), on='state', how='outer',rsuffix='_state')\
    .join(hinc, on='gvid')

combined_cols.to_csv('enrollment_combined_cols.csv', encoding='utf8')
combined_cols

Unnamed: 0,id,geotype,gvid,state,county,district,enrolled_prek,not_enrolled_prek,enrolled_rate,not_enrolled_rate,enrolled_rate_m90,enrolled_rate_rse,county_gvid,county_not_enrolled_rate,county_not_enrolled_rate_rse,state_not_enrolled_rate,state_enrolled_rate_rse,med_house_inc,med_house_inc_m90
499,499,,fE0600j,CA,Alameda,Dublin Unified,908,295,75.5,24.5,12.452772,10.026589,0O0601,0.342,2.085717,0.44,0.482241,113499,3577
611,611,,fE0600l,CA,Alameda,Sunol Glen Unified,12,0,100.0,0.0,0.000000,0.000000,0O0601,0.342,2.085717,0.44,0.482241,85313,23397
188,188,,fE064m0,CA,Alameda,Hayward Unified,2219,2547,46.6,53.4,4.158414,5.424702,0O0601,0.342,2.085717,0.44,0.482241,61873,1887
198,198,,fE0691Q,CA,Alameda,San Lorenzo Unified,1010,1121,47.4,52.6,9.740011,12.491517,0O0601,0.342,2.085717,0.44,0.482241,58754,3442
399,399,,fE067iq,CA,Alameda,Oakland Unified,6904,4086,62.8,37.2,3.394566,3.285933,0O0601,0.342,2.085717,0.44,0.482241,52943,1031
618,618,,fE060u0,CA,Alameda,Albany City Unified,604,0,100.0,0.0,0.000000,0.000000,0O0601,0.342,2.085717,0.44,0.482241,78769,7420
477,477,,fE0691m,CA,Alameda,San Leandro Unified,1031,402,71.9,28.1,11.944082,10.098526,0O0601,0.342,2.085717,0.44,0.482241,63481,2000
488,488,,fE063hI,CA,Alameda,Emery Unified,132,47,73.7,26.3,24.360365,20.093260,0O0601,0.342,2.085717,0.44,0.482241,69329,6248
496,496,,fE065KC,CA,Alameda,Livermore Valley Joint Unified,1793,596,75.1,24.9,4.607889,3.729892,0O0601,0.342,2.085717,0.44,0.482241,101391,3372
501,501,,fE0600k,CA,Alameda,Pleasanton Unified,1047,337,75.7,24.3,4.085880,3.281133,0O0601,0.342,2.085717,0.44,0.482241,123181,3497


In [11]:
state_row_join = state.drop(['med_house_inc', 'child_care_cost'], axis=1).join(hinc, on='gvid')
state_row_join['geotype'] = 'state'

counties_row_join = counties.drop(['med_house_inc', 'child_care_cost'], axis=1).join(hinc, on='gvid')
counties_row_join['geotype'] ='county'

districts_row_join = districts.join(hinc, on='gvid')
districts_row_join['geotype'] ='district'

counties_idx = counties[['state','county']]
states_per_county = counties_row_join.join(state_row_join.set_index('state'), on='state', lsuffix='_c')[['county_c']+list(state_row_join.columns)]
states_per_county.drop('county', axis=1, inplace=True)
states_per_county.columns = ['county']+list(states_per_county)[1:]
states_per_county = states_per_county[list(districts_row_join.columns)].copy()

cc_cost = b.partition(table='childcare_cost').analysis.dataframe().set_index('gvid')[['child_care_cost']]

combined_rows = pd.concat([states_per_county, counties_row_join, districts_row_join]).sort('county')\
                .join(cc_cost, on='gvid')\
                .fillna(value='') # Can't fill with None


In [12]:
combined_rows


Unnamed: 0,id,geotype,gvid,state,county,district,enrolled_prek,not_enrolled_prek,enrolled_rate,not_enrolled_rate,enrolled_rate_m90,enrolled_rate_rse,med_house_inc,med_house_inc_m90,child_care_cost
444,444,district,fE063Kg,CA,Alameda,Fremont Unified,4553,2105,68.400,31.600,4.036400,3.587337,103591,1759,
504,504,district,fE0621O,CA,Alameda,Castro Valley Unified,1122,353,76.100,23.900,7.204340,5.754978,95835,4156,
611,611,district,fE0600l,CA,Alameda,Sunol Glen Unified,12,0,100.000,0.000,0.000000,0.000000,85313,23397,
46,0,state,0E06,CA,Alameda,,590364,464217,0.560,0.440,0.004442,0.482241,61489,154,9106
618,618,district,fE060u0,CA,Alameda,Albany City Unified,604,0,100.000,0.000,0.000000,0.000000,78769,7420,
501,501,district,fE0600k,CA,Alameda,Pleasanton Unified,1047,337,75.700,24.300,4.085880,3.281133,123181,3497,
198,198,district,fE0691Q,CA,Alameda,San Lorenzo Unified,1010,1121,47.400,52.600,9.740011,12.491517,58754,3442,
188,188,district,fE064m0,CA,Alameda,Hayward Unified,2219,2547,46.600,53.400,4.158414,5.424702,61873,1887,
499,499,district,fE0600j,CA,Alameda,Dublin Unified,908,295,75.500,24.500,12.452772,10.026589,113499,3577,
17,17,district,fk066Ns,CA,Alameda,Mountain House Elementary,0,4,0.000,100.000,300.000000,0.000000,,,


Unnamed: 0_level_0,med_house_inc,med_house_inc_m90
gvid,Unnamed: 1_level_1,Unnamed: 2_level_1
0E06,61489,154
0O0601,73775,762
0O0603,61343,4265
0O0605,52964,3595
0O0607,43165,1130
0O0609,54936,3175
0O060b,50503,3403
0O060d,79799,955
0O060f,39302,4825
0O060h,68507,2109
