# School Enrollment Rates

School Enrollment Rates, organized in a file that can be broken out into counties, where each county partition will also get the state and county average.


In [1]:
import pandas as pd
import numpy as np
from ambry import get_library
from geoid.civick import State

l = get_library()
b = l.bundle('census.gov-acs-enrollment-p5ye2014-hdp')

counties = b.partition(grain='counties').analysis.dataframe()
counties_col_join = counties[['county','gvid','not_enrolled_rate', 'enrolled_rate_rse']].copy()
counties_col_join.columns = ['county','county_gvid','county_not_enrolled_rate', 'county_not_enrolled_rate_rse']

state = b.partition(grain='states').analysis.dataframe()
state['state'] = 'CA'
state['geoid'] = State(6)
state_col_join = state[['state','not_enrolled_rate', 'enrolled_rate_rse']].copy()
state_col_join.columns = ['state','state_not_enrolled_rate', 'state_enrolled_rate_rse']

districts = b.partition(grain='districts').analysis.dataframe()

# We need to take these cols out so we can join them back in 
#districts.drop( ['med_house_inc', 'child_care_cost'], axis=1, inplace=True)


In [2]:
import censuslib.dataframe

# The CensusDataFrame has mehods for doing sums and ratios with 90% margins and
# relative standard errors. 
df_class = censuslib.dataframe.CensusDataFrame

def ca_csd_pred(r):
    """Select California counties, state and districts using USPS Post codes"""
    from geoid.civick import GVid
    return (r.stusab == 'ca' and 
            GVid.parse(r.gvid, exception=False).level in ('county', 'state','sduni', 'sdelem') and 
            r.sumlevel != 40 or (r.stusab == 'ca' and r.logrecno == 1) # Source  doesn't have geo component
           )

hinc = b.dep('household_income').analysis.dataframe(ca_csd_pred, df_class=df_class).set_index('gvid')
hinc = hinc[['b19013001', 'b19013001_m90']]
hinc.columns = ['med_house_inc', 'med_house_inc_m90']

In [3]:
# Combined dataset on columns. 
combined_cols = districts.sort_values(by='county')\
    .join(counties_col_join.set_index('county'), on='county', how='outer',rsuffix='_county')\
    .join(state_col_join.set_index('state'), on='state', how='outer',rsuffix='_state')\
    .join(hinc, on='gvid')

combined_cols.to_csv('enrollment_combined_cols.csv', encoding='utf8')
combined_cols

Unnamed: 0,id,geotype,gvid,state,county,district,enrolled_prek,not_enrolled_prek,enrolled_rate,not_enrolled_rate,enrolled_rate_m90,enrolled_rate_rse,county_gvid,county_not_enrolled_rate,county_not_enrolled_rate_rse,state_not_enrolled_rate,state_enrolled_rate_rse,med_house_inc,med_house_inc_m90
306,306,,fE0672s,CA,Alameda,Newark Unified,541.0,444.0,54.9,45.1,9.258759,10.252140,0O0601,0.342,2.085717,0.44,0.482241,86521.0,2361.0
615,615,,fE060u0,CA,Alameda,Albany City Unified,604.0,0.0,100.0,0.0,0.000000,0.000000,0O0601,0.342,2.085717,0.44,0.482241,78769.0,7420.0
395,395,,fE067iq,CA,Alameda,Oakland Unified,6904.0,4086.0,62.8,37.2,3.394566,3.285933,0O0601,0.342,2.085717,0.44,0.482241,52943.0,1031.0
198,198,,fE0691Q,CA,Alameda,San Lorenzo Unified,1010.0,1121.0,47.4,52.6,9.740011,12.491517,0O0601,0.342,2.085717,0.44,0.482241,58754.0,3442.0
17,17,,fk066Ns,CA,Alameda,Mountain House Elementary,0.0,4.0,0.0,100.0,300.000000,0.000000,0O0601,0.342,2.085717,0.44,0.482241,,
474,474,,fE0691m,CA,Alameda,San Leandro Unified,1031.0,402.0,71.9,28.1,11.944082,10.098526,0O0601,0.342,2.085717,0.44,0.482241,63481.0,2000.0
320,320,,fE06702,CA,Alameda,New Haven Unified,989.0,780.0,55.9,44.1,4.400767,4.785758,0O0601,0.342,2.085717,0.44,0.482241,82145.0,2505.0
486,486,,fE063hI,CA,Alameda,Emery Unified,132.0,47.0,73.7,26.3,24.360365,20.093260,0O0601,0.342,2.085717,0.44,0.482241,69329.0,6248.0
494,494,,fE065KC,CA,Alameda,Livermore Valley Joint Unified,1793.0,596.0,75.1,24.9,4.607889,3.729892,0O0601,0.342,2.085717,0.44,0.482241,101391.0,3372.0
497,497,,fE0600j,CA,Alameda,Dublin Unified,908.0,295.0,75.5,24.5,12.452772,10.026589,0O0601,0.342,2.085717,0.44,0.482241,113499.0,3577.0


In [4]:
state_row_join = state.join(hinc, on='gvid')
state_row_join['geotype'] = 'state'

counties_row_join = counties.join(hinc, on='gvid')
counties_row_join['geotype'] ='county'

districts_row_join = districts.join(hinc, on='gvid')
districts_row_join['geotype'] ='district'

counties_idx = counties[['state','county']]
states_per_county = counties_row_join.join(state_row_join.set_index('state'), on='state', lsuffix='_c')[['county_c']+list(state_row_join.columns)]
states_per_county.drop('county', axis=1, inplace=True)
states_per_county.columns = ['county']+list(states_per_county)[1:]
states_per_county = states_per_county[list(districts_row_join.columns)].copy()

In [5]:
cc_cost = b.partition(table='childcare_cost').analysis.dataframe().set_index('gvid')[['child_care_cost']]

combined_rows = pd.concat([states_per_county, counties_row_join, districts_row_join]).sort('county')\
                .join(cc_cost, on='gvid')\
                .fillna(value='') # Can't fill with None

In [7]:
combined_rows


Unnamed: 0,id,geotype,gvid,state,county,district,enrolled_prek,not_enrolled_prek,enrolled_rate,not_enrolled_rate,enrolled_rate_m90,enrolled_rate_rse,med_house_inc,med_house_inc_m90,child_care_cost
486,486,district,fE063hI,CA,Alameda,Emery Unified,132.0,47.0,73.700,26.300,24.360365,20.093260,69329,6248,
306,306,district,fE0672s,CA,Alameda,Newark Unified,541.0,444.0,54.900,45.100,9.258759,10.252140,86521,2361,
188,188,district,fE064m0,CA,Alameda,Hayward Unified,2219.0,2547.0,46.600,53.400,4.158414,5.424702,61873,1887,
502,502,district,fE0621O,CA,Alameda,Castro Valley Unified,1122.0,353.0,76.100,23.900,7.204340,5.754978,95835,4156,
615,615,district,fE060u0,CA,Alameda,Albany City Unified,604.0,0.0,100.000,0.000,0.000000,0.000000,78769,7420,
474,474,district,fE0691m,CA,Alameda,San Leandro Unified,1031.0,402.0,71.900,28.100,11.944082,10.098526,63481,2000,
440,440,district,fE063Kg,CA,Alameda,Fremont Unified,4553.0,2105.0,68.400,31.600,4.036400,3.587337,103591,1759,
198,198,district,fE0691Q,CA,Alameda,San Lorenzo Unified,1010.0,1121.0,47.400,52.600,9.740011,12.491517,58754,3442,
494,494,district,fE065KC,CA,Alameda,Livermore Valley Joint Unified,1793.0,596.0,75.100,24.900,4.607889,3.729892,101391,3372,
46,0,state,0E06,CA,Alameda,,590364.0,464217.0,0.560,0.440,0.004442,0.482241,61489,154,9106
