# School Enrollment Rates By District

School Enrollment Rates


In [1]:
from ambry import get_library
import censuslib.dataframe
import pandas as pd
import numpy as np

# The CensusDataFrame has mehods for doing sums and ratios with 90% margins and
# relative standard errors. 
df_class = censuslib.dataframe.CensusDataFrame

l = get_library()
b = l.bundle('census.gov-acs-enrollment-p5ye2014-hdp')

def ca_county_pred(r):
    """Select California districts using USPS Post codes"""
    from geoid.civick import GVid
    return (r.stusab == 'ca' and GVid.parse(r.gvid, exception=False).level in ('sdelm','sduni','sdsec'))

enroll = b.dep('enrollment').analysis.dataframe(ca_county_pred, df_class=df_class).set_index('gvid')

# Population under 18
u18 = b.dep('under18').analysis.dataframe(ca_county_pred, df_class=df_class).set_index('gvid')

districts = b.dep('districts').analysis.dataframe(df_class=df_class).set_index('gvid')


districts.name = districts.name.str.replace('School District(.*)?[,;] California','')

In [2]:
import numpy as np
# Create a new dataframe, and add the county name and state. LiveStories needs the state to trigger
# the maps display. 
df = districts[['name', 'county']].copy()
df.columns = ['district'] + list(df.columns)[1:]
df['state'] = 'CA'

# Re-name a few columns and add them to the new dataframe
df['enrolled_prek'],df['enrolled_prek_m90']  = enroll['b14001003'], enroll['b14001003_m90']
df['3to4'], df['3to4_m90'] = u18['b09001004'], u18['b09001004_m90']

# Don't allow more enrolled kids than there are kids
df.loc[df.enrolled_prek > df['3to4'], 'enrolled_prek' ] = df.loc[df.enrolled_prek > df['3to4'], '3to4' ]


df['not_enrolled_prek'] = df['3to4'] - df['enrolled_prek']
df['enrolled_rate'], df['enrolled_rate_m90']  = df.ratio('enrolled_prek', '3to4') 
df['enrolled_rate'] = df['enrolled_rate'] * 100.0 
df['enrolled_rate_m90'] = df['enrolled_rate_m90'] * 100.0 
df['enrolled_rate_rse'] = df['enrolled_rate_m90'].astype(float) / 1.645 / df['enrolled_rate'].astype(float) * 100
df.replace([np.inf, -np.inf], [0,0], inplace=True)


df['not_enrolled_rate'] = 100 - df['enrolled_rate']

out_cols = ['state','district','county','enrolled_prek','not_enrolled_prek','enrolled_rate', 
            'not_enrolled_rate', 'enrolled_rate_m90','enrolled_rate_rse']

not_enrolled = df[out_cols].sort('not_enrolled_rate', ascending=False).dropna()

not_enrolled_stable = not_enrolled[not_enrolled.enrolled_rate_rse < 30]
      
#not_enrolled_stable.to_csv('not_enrolled_prek_districts.csv', encoding='utf8')
