# School Enrollment Rates

Compute enrollment and non enrollment rates for 3 and 4 year olds in pre-k and nursery school, linked to counties and school districts. 


In [1]:
from ambry import get_library
import censuslib.dataframe

# The CensusDataFrame has mehods for doing sums and ratios with 90% margins and
# relative standard errors. 
df_class = censuslib.dataframe.CensusDataFrame

l = get_library()
b = l.bundle('census.gov-acs-enrollment-p5ye2014-hdp')

def ca_county_pred(r):
    """Select California counties using USPS Post codes"""
    from geoid.civick import GVid
    return (r.stusab == 'ca' and GVid.parse(r.gvid, exception=False).level == 'county')

enroll = b.dep('enrollment').analysis.dataframe(ca_county_pred, df_class=df_class).set_index('gvid')

# Population under 18
u18 = b.dep('under18').analysis.dataframe(ca_county_pred, df_class=df_class).set_index('gvid')

def ca_county_pred_fp(r):
    """Select california counties using FIPS codes"""
    from geoid.civick import GVid
    return (r.statefp == 6 and GVid.parse(r.gvid, exception=False).level == 'county')

counties = b.dep('counties').analysis.dataframe(ca_county_pred_fp, df_class=df_class)
counties['idx'] = counties.gvid
counties.set_index('idx', inplace = True)


In [2]:
# Create a new dataframe, and add the county name and state. LiveStories needs the state to trigger
# the maps display. 
df = counties[['name','gvid']].copy()
df.columns = ['county'] + list(df.columns)[1:]
df['state'] = 'CA'

# Re-name a few columns and add them to the new dataframe
df['enrolled_prek'],df['enrolled_prek_m90']  = enroll['b14001003'], enroll['b14001003_m90']
df['3to4'], df['3to4_m90'] = u18['b09001004'], u18['b09001004_m90']

df['not_enrolled_prek'] = df['3to4'] - df['enrolled_prek'
                                         ]
df['enrolled_rate'], df['enrolled_rate_m90']  = df.ratio('enrolled_prek', '3to4')
df['enrolled_rate_rse'] = df['enrolled_rate_m90'] / 1.645 / df['enrolled_rate'] * 100

df['not_enrolled_rate'] = 1.0 - df['enrolled_rate']

not_enrolled = df[['state','county','gvid','enrolled_prek','not_enrolled_prek','enrolled_rate', 
                   'not_enrolled_rate', 'enrolled_rate_m90','enrolled_rate_rse']] \
    .sort('not_enrolled_rate', ascending=False)
    
#not_enrolled.to_csv('not_enrolled_prek.csv')

not_enrolled_stable = not_enrolled[not_enrolled.enrolled_rate_rse < 30]
