# Districts with Census Geoids

A mapping between NCES codes, Census geoids, GVids and California cd_codes


In [2]:
from ambry import get_library
import censuslib.dataframe
import pandas as pd

# The CensusDataFrame has mehods for doing sums and ratios with 90% margins and
# relative standard errors. 
df_class = censuslib.dataframe.CensusDataFrame

l = get_library()
b = l.bundle('census.gov-acs-enrollment-p5ye2014-hdp-0.0.1')


# The district NCES codes aren't in the district file, although they are in the school file. 
schools = b.dep('schools').analysis.dataframe()
schools['cd_code'] = schools.cdscode.apply(lambda cdscode: cdscode[:7])

nces_districts = schools[schools.statustype=='Active'][['ncesdist', 'cd_code']].drop_duplicates()

# The actual districts file
ca_districts = b.dep('districts').analysis.dataframe()[['cd_code', 'county_sos','county_fips',
                                                        'county_gvid','county','district']]

assert len(nces_districts) == len(ca_districts)

# Combine the codes from the school file with the district file
cd_code_districts = ca_districts.set_index('cd_code').join(nces_districts.set_index('cd_code')).reset_index()
cd_code_districts.sort('district').head()

Unnamed: 0,cd_code,county_sos,county_fips,county_gvid,county,district,ncesdist
290,1964212,19,37,0O060B,Los Angeles,ABC Unified,601620
57,761630,7,13,0O060d,Contra Costa,Acalanes Union High,601650
542,3166761,31,61,0O060Z,Placer,Ackerman Charter,601680
377,1975309,19,37,0O060B,Los Angeles,Acton-Agua Dulce Unified,600001
619,3667587,36,71,0O0619,San Bernardino,Adelanto Elementary,601710


In [9]:
##
## Join the California state districts list with the Census districts list
###

from geoid.acs import AcsGeoid
from geoid.civick import GVid
dist_pred = lambda row: row.state ==6

def mk_cd_code(nces):
    return '06{:05d}'.format(nces)

# Combine the three partitions for school districts in the census, and extract the NCES code
elem = b.dep('elementary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
elem['nces'] = elem.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdelm) )
second = b.dep('secondary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
second['nces'] = second.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdsec) )
unified = b.dep('unified').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
unified['nces'] = unified.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sduni) )

# Add a GVID
census_districts = pd.concat([elem, second, unified], axis=0)
census_districts['gvid'] = census_districts.geoid.apply(lambda geoid: AcsGeoid.parse(geoid).convert(GVid) )

# Do the join
districts = cd_code_districts.set_index('ncesdist').join(census_districts.drop_duplicates().set_index('nces')).reset_index()
districts.columns = ['ncesdist'] + list(districts.columns)[1:]

# These are different sizes, don't know why. The de-duplicated census file is smaller than the 
# list from California, probably because many of the districts are smaller than the reporting limits. 
# NOTE: Becase join is a left join, (a) it must be joined in the order above ( cd_code_districts, the larger list, 
# is the base ) and (b) the joined 'districts' dataframe will have some missing geoids. 
# The missing geoids appears to be primarily for County offices of education and districts in small counties. 
print len(census_districts), len(census_districts.drop_duplicates()), len(cd_code_districts), len(districts)
districts.head()


1976 988 1098 1098


Unnamed: 0,ncesdist,cd_code,county_sos,county_fips,county_gvid,county,district,geoid,name,gvid
0,600001,1975309,19,37,0O060B,Los Angeles,Acton-Agua Dulce Unified,97000US0600001,"Acton-Agua Dulce Unified School District, Cali...",fE06001
1,600002,131609,1,1,0O0601,Alameda,California School for the Blind (State Special...,,,
2,600003,131617,1,1,0O0601,Alameda,California School for the Deaf-Fremont (State ...,,,
3,600006,2175002,21,41,0O060F,Marin,Ross Valley Elementary,95000US0600006,"Ross Valley Elementary School District, Califo...",fk06006
4,600007,3331625,33,65,0O0613,Riverside,California School for the Deaf-Riverside (Stat...,,,
