In [651]:
import pandas as pd

### Extract methodology fields for each year

In [652]:
mf = pd.read_excel('/Users/cave/Desktop/discriminology/methodology_fields.xlsx')

In [653]:
fields_1112 = mf['Field_Name 2011/12'].str.replace('\n', ',').str.split(',').apply(pd.Series).stack().drop_duplicates().values

fields_1314 = mf['Field_Name 2013/14'].str.replace('\n', ',').str.split(',').apply(pd.Series).stack().drop_duplicates().values

fields_1516 = mf['Field_Name 2015/16'].str.replace('\n', ',').str.split(',').apply(pd.Series).stack().drop_duplicates().values


Check that there are no duplicates in the desired fields

In [654]:
assert len(fields_1112) == pd.Series(fields_1112).nunique()
assert len(fields_1314) == pd.Series(fields_1314).nunique()
assert len(fields_1516) == pd.Series(fields_1516).nunique()

In [655]:
print(f"{len(fields_1112)} desired columns from 11/12 data")
print(f"{len(fields_1314)} desired columns from 13/14 data")
print(f"{len(fields_1516)} desired columns from 15/16 data")

221 desired columns from 11/12 data
219 desired columns from 13/14 data
231 desired columns from 15/16 data


Read field mappings and data types

In [559]:
field_mappings = pd.read_excel('/Users/cave/Desktop/discriminology/field_mapping.xlsx')

In [565]:
map_ = field_mappings[['11_12_field', '13_14_field', 'type']].dropna(subset=['11_12_field'])
map_2 = field_mappings[['11_12_field', '13_14_field']].dropna(subset=['11_12_field', '13_14_field'])
map_11_12 = pd.Series(map_2['13_14_field'].values, index=map_2['11_12_field']).to_dict()
type_dict_11_12 = pd.Series(map_['type'].values, index=map_['11_12_field']).to_dict()
final_type_dict_11_12 = {k:v for k,v in type_dict_11_12.items() if v !='str'}

In [566]:
field_mappings['col_superset'] = field_mappings['15_16_field'].combine_first(field_mappings['11_12_field'])
type_map = pd.Series(field_mappings.type.values, index=field_mappings.col_superset).to_dict()
numeric_type_map = {k:v for k,v in type_map.items() if v !='str'}

In [567]:
field_mappings[['col_superset', 'description']]
descriptions = field_mappings[['col_superset', 'description']].set_index('col_superset')

In [568]:
universal_types = {'SCH_ZIP ': str
                   ,'SCHID': str
                   ,'COMBOKEY': str
                   ,'LEAID': str
                  }

### Create helper functions to aggregate dataframes and label coded columns

In [13]:
def aggregate_data(frame_array, desired_fields):

    clean_frames = []

    for df in frame_array:
        # subset dataframe to desired columns

        temp = df[list(set(df.columns[df.columns.isin(desired_fields)]))]

        num = temp._get_numeric_data()
        num[num < 0] = 0 # replace negative numbers with 0
        temp = temp.replace('<=2', '0') # remove misc symbols from values
        temp = temp.replace('‡', None) # remove misc symbols from values
        temp['COMBOKEY'] = temp['COMBOKEY'].astype(str)
        temp.set_index('COMBOKEY', inplace=True)
        clean_frames.append(temp)


    concat = pd.concat(clean_frames, axis=1, sort=True)
    flipped = concat.T.drop_duplicates()
    grouped = flipped.groupby(lambda x: x).agg({c: 'last' for c in flipped.columns})
    return grouped.T

In [109]:
def join_col_descriptions(agg_data, year):
    '''
    INPUTS
    
    frames (list of DataFrames): Array of dataframes with field codes and descriptions
    agg_data (DataFrame): Aggregated data with coded fields as columns
    year (STR): year range of data e.g. '2015-16'
    
    '''
    final = pd.merge(agg_data.T, descriptions, left_index=True, right_index=True, how='inner').set_index('description', append=True).T
    final['YEAR','School Year'] = year
    final.to_csv(f'~/Desktop/discriminology/output/final_data_{year}.csv')
    return final
    

### Load all data + col description files for 2011-12

In [15]:
## Reads originals from excel

# df1_1112, df1_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Enrollment/05 - Overall Enrollment.xlsx', sheet_name=None).values()
# df2_1112, df2_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Enrollment/08 - Students enrolled in Gifted-Talented Programs.xlsx', sheet_name=None).values()
# df3_1112, df3_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Enrollment/10-1 - Students with Disabilities Served under IDEA Enrollment.xlsx', sheet_name=None).values()
# df4_1112, df4_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Enrollment/10-2 - Students with Disabilities Served under 504 Enrollment.xlsx', sheet_name=None).values()
# df5_1112, df5_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Out of School Suspensions/W:O Disabilities/35-3 - Students WO Disab Receiving only one out-of-school suspension.xlsx', sheet_name=None).values()
# df6_1112, df6_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Out of School Suspensions/W:O Disabilities/35-4 - Students WO Disab Rec more than one out-of-school suspension.xlsx', sheet_name=None).values()
# df7_1112, df7_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Out of School Suspensions/With Disabilities/36-3 - Students With Disabilities Receiving only one out-of-school suspension.xlsx', sheet_name=None).values()
# df8_1112, df8_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Out of School Suspensions/With Disabilities/36-4 - Students With Disab Receiving more than one out-of-school suspension.xlsx', sheet_name=None).values()
# df9_1112, df9_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Academic/Advanced Placement/17 - Students who are taking at least one AP course.xlsx', sheet_name=None).values()
# df10_1112, df10_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Referral to law enforcement/W:O Disabilities/35-8 - Students Without Disabilities Referral to law enforcement.xlsx', sheet_name=None).values()
# df11_1112, df11_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/Referral to law enforcement/With Disabilities/36-8 - Students With Disabilities Referral to law enforcement.xlsx', sheet_name=None).values()
# df12_1112, df12_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/School Related Arrest/W:O Disabilities/35-9 - Students Without Disabilities School-related arrest.xlsx', sheet_name=None).values()
# df13_1112, df13_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Discipline/School Related Arrest/With Disabilities/36-9 - Students With Disabilities School-related arrest.xlsx', sheet_name=None).values()
# df14_1112, df14_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/School Characteristics/02 - School Characteristics.xlsx', sheet_name=None).values()
# df15_1112, df15_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Staff/08-1 School Support and Security Staff (required elements).xlsx', sheet_name=None).values()
# df16_1112, df16_desc_1112 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2011:12/Enrollment/06 - Enrolled in Early Childhood and Prekindergarten.xlsx', sheet_name=None).values()

In [16]:
# ctr = 0
# for frame in desc_frames_1112:
#     frame.to_csv(f'/Users/cave/Desktop/discriminology/2011_12/descriptions/file_{ctr}.csv')
#     ctr += 1

# ctr = 0
# for frame in frames_1314:
#     frame.to_csv(f'~/Desktop/discriminology/2013_14/data/file_{ctr}.csv')
#     ctr += 1


In [18]:
df1_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_1.csv', dtype=universal_types)
df2_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_2.csv', dtype=universal_types)
df3_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_3.csv', dtype=universal_types)
df4_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_4.csv', dtype=universal_types)
df5_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_5.csv', dtype=universal_types)
df6_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_6.csv', dtype=universal_types)
df7_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_7.csv', dtype=universal_types)
df8_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_8.csv', dtype=universal_types)
df9_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_9.csv', dtype=universal_types)
df10_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_10.csv', dtype=universal_types)
df11_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_11.csv', dtype=universal_types)
df12_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_12.csv', dtype=universal_types)
df13_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_13.csv', dtype=universal_types)
df14_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_14.csv', dtype=universal_types)
df15_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_15.csv', dtype=universal_types)
df16_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/data/file_0.csv', dtype=universal_types)

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [19]:
df1_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_1.csv', index_col=0)
df2_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_2.csv', index_col=0)
df3_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_3.csv', index_col=0)
df4_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_4.csv', index_col=0)
df5_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_5.csv', index_col=0)
df6_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_6.csv', index_col=0)
df7_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_7.csv', index_col=0)
df8_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_8.csv', index_col=0)
df9_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_9.csv', index_col=0)
df10_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_10.csv', index_col=0)
df11_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_11.csv', index_col=0)
df12_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_12.csv', index_col=0)
df13_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_13.csv', index_col=0)
df14_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_14.csv', index_col=0)
df15_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_15.csv', index_col=0)
df16_desc_1112 = pd.read_csv('~/Desktop/discriminology/2011_12/descriptions/file_0.csv', index_col=0)

In [20]:

frames_1112 = [df1_1112, df2_1112, df3_1112, df4_1112, df5_1112, df6_1112,
               df7_1112, df8_1112, df9_1112, df10_1112, df11_1112, df12_1112,
               df13_1112, df14_1112, df15_1112, df16_1112]



desc_frames_1112 = [df1_desc_1112, df2_desc_1112, df3_desc_1112, df4_desc_1112,
                    df5_desc_1112, df6_desc_1112, df7_desc_1112, df8_desc_1112,
                    df9_desc_1112, df10_desc_1112, df11_desc_1112, df12_desc_1112,
                    df13_desc_1112, df14_desc_1112, df15_desc_1112, df16_desc_1112]


Use helper function to aggregate frames and clean up negative values, standardize length of zipcodes.

In [219]:
# # Careful, this cell runs for a while
# df_1112_raw = aggregate_data(frames_1112, fields_1112)

In [333]:
df_1112 = df_1112_raw.copy()

In [335]:
df_1112['SCH_ZIP '] = df_1112['SCH_ZIP '].str.zfill(5) # pad zipcodes with leading zeroes

Change 0/1 indicator vars to Yes / No for selected columns

In [336]:
int_cols_to_str = [
    'PreK'
    ,'K'
    ,'G1'
    ,'G2'
    ,'G3'
    ,'G4'
    ,'G5'
    ,'G6'
    ,'G7'
    ,'G8' 
    ,'G9'
    ,'G10'
    ,'G11'
    ,'G12'
    ,'MG_SCH'
    ,'CHARTER_SCH'
    ,'ALT_SCH'
                  ]

In [337]:
df_1112[int_cols_to_str] =  df_1112[int_cols_to_str].replace('0', 'No').replace('1', 'Yes').replace(0, 'No').replace(1, 'Yes')

In [338]:
df_1112['SCH_FTESECURITY_IND'] = df_1112['SCH_FTESECURITY_IND'].str.replace('-9','No')

Make types consistent within each column

In [340]:
type_dict_11_12.pop('COMBOKEY')

In [343]:
df_1112 = df_1112.astype(final_type_dict_11_12)

Custom case handling: Juvenile Justice facilities

In [345]:
df_1112['JJ'] = df_1112['JJ'].str.replace('Z', 'Yes').str.replace('X', 'Yes')

Rename columns to match 13/14 and 15/16 data

In [348]:
df_1112.rename(columns=map_11_12, inplace=True)

In [351]:
superset = field_mappings['col_superset'].values
ordered_cols_1112 = [c for c in superset if c in df_1112.columns]

In [353]:
df_1112 = df_1112[ordered_cols_1112]

In [355]:
df_1112_final = join_col_descriptions(df_1112, '2011-12')

In [357]:
df_1112_final[::100].to_csv('~/Desktop/discriminology/output/11_12_sample.csv')

In [384]:
df_1112_final.shape

(101133, 220)

### Load all data + col description files for 2013-14

Collect column name descriptions from each spreadsheet

In [358]:
# # Read originals from excel

# df1_1314, df1_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/01 School Characteristics.xlsx', sheet_name=None).values()
# df2_1314, df2_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/03 Enrollment.xlsx', sheet_name=None).values()
# df3_1314, df3_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/04-1 Gifted and Talented Enrollment.xlsx', sheet_name=None).values()
# df4_1314, df4_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/06 Advanced Placement and International Baccalaureate Diploma Programme Enrollment.xlsx', sheet_name=None).values()
# df5_1314, df5_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/07-2 Advanced Placement Exams.xlsx', sheet_name=None).values()
# df6_1314, df6_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/08-1 School Support and Security Staff (required elements).xlsx', sheet_name=None).values()
# df7_1314, df7_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/09-1 Chronic Absenteeism.xlsx', sheet_name=None).values()
# df8_1314, df8_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/11-2 Suspensions (required elements).xlsx', sheet_name=None).values()
# df9_1314, df9_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/11-3 Expulsions.xlsx', sheet_name=None).values()
# df10_1314, df10_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/12 Student Referrals and Arrests.xlsx', sheet_name=None).values()
# df11_1314, df11_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/16 School Expenditures.xlsx', sheet_name=None).values()
# df12_1314, df12_desc_1314 = pd.read_excel('~/Desktop/discriminology/OCR School data sample/2013:14/CRDC-collected data file for Schools/17 Justice Facilities.xlsx', sheet_name=None).values()

In [359]:
# ctr = 0
# for frame in desc_frames_1314:
#     frame.to_csv(f'~/Desktop/discriminology/2013_14/descriptions/file_{ctr}.csv')
#     ctr += 1


# ctr = 0
# for frame in frames_1314:
#     frame.to_csv(f'~/Desktop/discriminology/2013_14/data/file_{ctr}.csv')
#     ctr += 1


In [361]:
df1_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_1.csv', dtype=universal_types)
df2_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_2.csv', dtype=universal_types)
df3_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_3.csv', dtype=universal_types)
df4_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_4.csv', dtype=universal_types)
df5_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_5.csv', dtype=universal_types)
df6_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_6.csv', dtype=universal_types)
df7_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_7.csv', dtype=universal_types)
df8_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_8.csv', dtype=universal_types)
df9_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_9.csv', dtype=universal_types)
df10_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_10.csv', dtype=universal_types)
df11_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_11.csv', dtype=universal_types)
df12_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/data/file_0.csv', dtype=universal_types)

In [363]:
df1_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_1.csv', dtype=universal_types, index_col=0)
df2_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_2.csv', dtype=universal_types, index_col=0)
df3_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_3.csv', dtype=universal_types, index_col=0)
df4_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_4.csv', dtype=universal_types, index_col=0)
df5_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_5.csv', dtype=universal_types, index_col=0)
df6_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_6.csv', dtype=universal_types, index_col=0)
df7_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_7.csv', dtype=universal_types, index_col=0)
df8_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_8.csv', dtype=universal_types, index_col=0)
df9_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_9.csv', dtype=universal_types, index_col=0)
df10_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_10.csv', dtype=universal_types, index_col=0)
df11_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_11.csv', dtype=universal_types, index_col=0)
df12_desc_1314 = pd.read_csv('~/Desktop/discriminology/2013_14/descriptions/file_0.csv', dtype=universal_types, index_col=0)


In [364]:
frames_1314 = [df1_1314, df2_1314, df3_1314, df4_1314, df5_1314,
               df6_1314, df7_1314, df8_1314, df9_1314, df10_1314,
               df11_1314, df12_1314]


desc_frames_1314 = [df1_desc_1314, df2_desc_1314, df3_desc_1314,
                    df4_desc_1314, df5_desc_1314, df6_desc_1314,
                    df7_desc_1314, df8_desc_1314, df9_desc_1314,
                    df10_desc_1314, df11_desc_1314, df12_desc_1314]


In [365]:
df_1314_raw = aggregate_data(frames_1314, fields_1314)

In [569]:
df_1314 = df_1314_raw.copy()

In [570]:
df_1314['SCH_FTESECURITY_IND'] = df_1314['SCH_FTESECURITY_IND'].str.replace('-9','No').str.replace('-5','No')
df_1314 = df_1314.replace('-5', None)
df_1314 = df_1314.replace('-9', None)

In [572]:
df_1314_final = join_col_descriptions(df_1314, '2013-14')

In [573]:
df_1314_final.head()

Unnamed: 0_level_0,JJ,LEAID,LEA_NAME,SCHID,SCH_APENR_AM_F,SCH_APENR_AM_M,SCH_APENR_AS_F,SCH_APENR_AS_M,SCH_APENR_BL_F,SCH_APENR_BL_M,...,TOT_DISCWODIS_REF_M,TOT_DISCWODIS_SINGOOS_F,TOT_DISCWODIS_SINGOOS_M,TOT_ENR_F,TOT_ENR_M,TOT_GTENR_F,TOT_GTENR_M,TOT_IDEAENR_F,TOT_IDEAENR_M,YEAR
description,"Juvenile Justice Facility: ""Yes"" indicates a long-term secure facility; ""No"" indicates not a JJ facility",7 Digit LEAID District Identification Code,District Name,5 Digit School Identification Code,AP Enrollment: American Indian/Alaska Native Female,AP Enrollment: American Indian/Alaska Native Male,AP Enrollment: Asian Female,AP Enrollment: Asian Male,AP Enrollment: Black Female,AP Enrollment: Black Male,...,Students without disabilities who were referred to a law enforcement agency or official: Calculated Male Total,Students without disabilities who received only one out-of-school suspension: Calculated Female Total,Students without disabilities who received only one out-of-school suspension: Calculated Male Total,Overall Student Enrollment: Calculated Female Total,Overall Student Enrollment: Calculated Male Total,Gifted and Talented Student Enrollment: Calculated Female Total,Gifted and Talented Student Enrollment: Calculated Male Total,Students with Disabilities Served Under IDEA Enrollment: Calculated Female Total,Students with Disabilities Served Under IDEA Enrollment: Calculated Male Total,School Year
10000201705,Yes,100002,ALABAMA YOUTH SERVICES,1705,0,0,0,0,0,0,...,0,0,0,0,1798,0,0,0,0,2013-14
10000201706,Yes,100002,ALABAMA YOUTH SERVICES,1706,0,0,0,0,0,0,...,0,0,0,0,994,0,0,0,38,2013-14
10000299995,Yes,100002,ALA YOUTH SER,99995,0,0,0,0,0,0,...,0,0,0,0,910,0,0,0,0,2013-14
10000500870,No,100005,ALBERTVILLE CITY,870,0,0,0,0,0,0,...,0,2,13,328,307,0,0,36,63,2013-14
10000500871,No,100005,ALBERTVILLE CITY,871,2,0,0,0,0,0,...,4,15,15,577,537,0,0,50,93,2013-14


In [574]:
df_1314_final[::100].to_csv('~/Desktop/discriminology/output/13_14_sample.csv')

In [575]:
df_1314_final.shape

(95507, 218)

### Get column decriptions for the 2015-16 data

In [385]:
col_descr_1516 = pd.read_excel('/Users/cave/Desktop/discriminology/OCR School data sample/2015:16/CRDC 2015-16 School Data Record Layout copy.xlsx', index_col='Field_Name')
decoded_names = pd.DataFrame(col_descr_1516.loc[fields_1516]['Field_Description'])
decoded_names.columns = ['description']

In [386]:
decoded_names.reset_index().to_csv('/Users/cave/Desktop/2015_16_field_descriptions.csv')

### Isolate numeric columns in 15/16 and replace negative numbers with zeroes.

In [592]:
df1516_raw = pd.read_csv('/Users/cave/Desktop/discriminology/OCR School data sample/2015:16/CRDC 2015-16 School Data copy.csv'
                      , encoding='iso-8859-1'
                     )
df1516_raw['COMBOKEY'] = df1516_raw['LEAID'].astype(str) + df1516_raw['SCHID'].astype(str).str.zfill(5)
df1516_raw = df1516_raw[list(set(fields_1516))]

  interactivity=interactivity, compiler=compiler, result=result)


### Replace negative values with zeroes.

In [593]:
num = df1516_raw._get_numeric_data()
num[num < 0] = 0
df1516_raw = df1516_raw.set_index(['COMBOKEY'])

In [594]:
df1516_raw = df1516_raw.replace('-5', 'No')

In [595]:
df_1516_final = pd.merge(df1516_raw.T, descriptions, left_index=True, right_index=True).set_index('description', append=True).T
df_1516_final['YEAR','School Year'] = '2015-16'

In [596]:
df1516_final[::100].to_csv('~/Desktop/discriminology/output/15_16_sample.csv')

In [597]:
df1516_final.to_csv('~/Desktop/discriminology/output/final_data_2015-16.csv')

### Concatenate all three years of data together

In [599]:
full_table = pd.concat([df_1516_final, df_1314_final, df_1112_final], axis=0)

In [600]:
full_table = full_table.astype(numeric_type_map)

In [601]:
full_table.head()

Unnamed: 0_level_0,JJ,LEAID,LEA_NAME,LEA_STATE_NAME,SCHID,SCH_ADDRESS,SCH_APENR_AM_F,SCH_APENR_AM_M,SCH_APENR_AS_F,SCH_APENR_AS_M,...,TOT_DISCWODIS_REF_M,TOT_DISCWODIS_SINGOOS_F,TOT_DISCWODIS_SINGOOS_M,TOT_ENR_F,TOT_ENR_M,TOT_GTENR_F,TOT_GTENR_M,TOT_IDEAENR_F,TOT_IDEAENR_M,YEAR
description,"Juvenile Justice Facility: ""Yes"" indicates a long-term secure facility; ""No"" indicates not a JJ facility",7 Digit LEAID District Identification Code,District Name,District State Name,5 Digit School Identification Code,School address,AP Enrollment: American Indian/Alaska Native Female,AP Enrollment: American Indian/Alaska Native Male,AP Enrollment: Asian Female,AP Enrollment: Asian Male,...,Students without disabilities who were referred to a law enforcement agency or official: Calculated Male Total,Students without disabilities who received only one out-of-school suspension: Calculated Female Total,Students without disabilities who received only one out-of-school suspension: Calculated Male Total,Overall Student Enrollment: Calculated Female Total,Overall Student Enrollment: Calculated Male Total,Gifted and Talented Student Enrollment: Calculated Female Total,Gifted and Talented Student Enrollment: Calculated Male Total,Students with Disabilities Served Under IDEA Enrollment: Calculated Female Total,Students with Disabilities Served Under IDEA Enrollment: Calculated Male Total,School Year
10000201705,Yes,100002,Alabama Youth Services,ALABAMA,1705,,0,0,0,0,...,0,0,0,0,128,0,0,0,10,2015-16
10000201706,Yes,100002,Alabama Youth Services,ALABAMA,1706,,0,0,0,0,...,0,0,0,0,52,0,0,0,11,2015-16
10000201876,No,100002,Alabama Youth Services,ALABAMA,1876,,0,0,0,0,...,0,0,0,0,908,0,0,0,236,2015-16
10000299995,Yes,100002,Alabama Youth Services,ALABAMA,99995,,0,0,0,0,...,0,0,0,0,38,0,0,0,4,2015-16
10000500870,No,100005,Albertville City,ALABAMA,870,,0,0,0,0,...,0,12,19,346,358,0,0,14,26,2015-16


In [602]:
full_table['LEA_NAME', 'District Name'] = full_table['LEA_NAME', 'District Name'].str.title()
full_table['SCH_NAME', 'School Name'] = full_table['SCH_NAME', 'School Name'].str.title()
full_table['SCH_ADDRESS', 'School address'] = full_table['SCH_ADDRESS', 'School address'].str.title()
full_table['SCHID', '5 Digit School Identification Code'] = full_table['SCHID', '5 Digit School Identification Code'].astype(str).str.zfill(5)

In [603]:
full_table.to_csv('/Users/cave/Desktop/discriminology/output/full_table_all_years.csv')

In [604]:
full_table[::100].to_csv('/Users/cave/Desktop/discriminology/output/full_table_sample.csv')

### Group by districts and sum over fields

In [619]:
district = full_table.copy()
district.reset_index(drop=True, inplace=True)

In [620]:
district.columns = district.columns.droplevel(1)

In [621]:
district.head()

Unnamed: 0,JJ,LEAID,LEA_NAME,LEA_STATE_NAME,SCHID,SCH_ADDRESS,SCH_APENR_AM_F,SCH_APENR_AM_M,SCH_APENR_AS_F,SCH_APENR_AS_M,...,TOT_DISCWODIS_REF_M,TOT_DISCWODIS_SINGOOS_F,TOT_DISCWODIS_SINGOOS_M,TOT_ENR_F,TOT_ENR_M,TOT_GTENR_F,TOT_GTENR_M,TOT_IDEAENR_F,TOT_IDEAENR_M,YEAR
0,Yes,100002,Alabama Youth Services,ALABAMA,1705,,0,0,0,0,...,0,0,0,0,128,0,0,0,10,2015-16
1,Yes,100002,Alabama Youth Services,ALABAMA,1706,,0,0,0,0,...,0,0,0,0,52,0,0,0,11,2015-16
2,No,100002,Alabama Youth Services,ALABAMA,1876,,0,0,0,0,...,0,0,0,0,908,0,0,0,236,2015-16
3,Yes,100002,Alabama Youth Services,ALABAMA,99995,,0,0,0,0,...,0,0,0,0,38,0,0,0,4,2015-16
4,No,100005,Albertville City,ALABAMA,870,,0,0,0,0,...,0,12,19,346,358,0,0,14,26,2015-16


In [622]:
district.head()

Unnamed: 0,JJ,LEAID,LEA_NAME,LEA_STATE_NAME,SCHID,SCH_ADDRESS,SCH_APENR_AM_F,SCH_APENR_AM_M,SCH_APENR_AS_F,SCH_APENR_AS_M,...,TOT_DISCWODIS_REF_M,TOT_DISCWODIS_SINGOOS_F,TOT_DISCWODIS_SINGOOS_M,TOT_ENR_F,TOT_ENR_M,TOT_GTENR_F,TOT_GTENR_M,TOT_IDEAENR_F,TOT_IDEAENR_M,YEAR
0,Yes,100002,Alabama Youth Services,ALABAMA,1705,,0,0,0,0,...,0,0,0,0,128,0,0,0,10,2015-16
1,Yes,100002,Alabama Youth Services,ALABAMA,1706,,0,0,0,0,...,0,0,0,0,52,0,0,0,11,2015-16
2,No,100002,Alabama Youth Services,ALABAMA,1876,,0,0,0,0,...,0,0,0,0,908,0,0,0,236,2015-16
3,Yes,100002,Alabama Youth Services,ALABAMA,99995,,0,0,0,0,...,0,0,0,0,38,0,0,0,4,2015-16
4,No,100005,Albertville City,ALABAMA,870,,0,0,0,0,...,0,12,19,346,358,0,0,14,26,2015-16


In [623]:
district.groupby(['LEAID', 'LEA_NAME'])['SCH_FTECOUNSELORS',
       'SCH_FTESERVICES_PSY', 'SCH_FTESERVICES_SOC',
       'SCH_FTESECURITY_IND', 'SCH_FTESECURITY_LEO',
       'SCH_FTESECURITY_GUA', 'SCH_ENR_AM_M', 'SCH_ENR_AS_M',
       'SCH_ENR_HI_M', 'SCH_ENR_BL_M', 'SCH_ENR_WH_M', 'SCH_ENR_HP_M',
       'SCH_ENR_TR_M', 'TOT_ENR_M', 'SCH_ENR_AM_F', 'SCH_ENR_AS_F',
       'SCH_ENR_HI_F', 'SCH_ENR_BL_F', 'SCH_ENR_WH_F', 'SCH_ENR_HP_F',
       'SCH_ENR_TR_F', 'TOT_ENR_F', 'SCH_GT_IND', 'SCH_GTENR_AM_M',
       'SCH_GTENR_AS_M', 'SCH_GTENR_HI_M', 'SCH_GTENR_BL_M',
       'SCH_GTENR_WH_M', 'SCH_GTENR_HP_M', 'SCH_GTENR_TR_M',
       'TOT_GTENR_M', 'SCH_GTENR_AM_F', 'SCH_GTENR_AS_F',
       'SCH_GTENR_HI_F', 'SCH_GTENR_BL_F', 'SCH_GTENR_WH_F',
       'SCH_GTENR_HP_F', 'SCH_GTENR_TR_F', 'TOT_GTENR_F',
       'SCH_GTENR_LEP_M', 'SCH_GTENR_LEP_F', 'SCH_GTENR_IDEA_M',
       'SCH_GTENR_IDEA_F', 'SCH_APENR_AM_M', 'SCH_APENR_AS_M',
       'SCH_APENR_HI_M', 'SCH_APENR_BL_M', 'SCH_APENR_WH_M',
       'SCH_APENR_HP_M', 'SCH_APENR_TR_M', 'TOT_APENR_M',
       'SCH_APENR_AM_F', 'SCH_APENR_AS_F', 'SCH_APENR_HI_F',
       'SCH_APENR_BL_F', 'SCH_APENR_WH_F', 'SCH_APENR_HP_F',
       'SCH_APENR_TR_F', 'TOT_APENR_F', 'SCH_GTENR_LEP_M',
       'SCH_GTENR_LEP_F', 'SCH_GTENR_IDEA_M', 'SCH_GTENR_IDEA_F',
       'SCH_IDEAENR_AM_M', 'SCH_IDEAENR_AS_M', 'SCH_IDEAENR_HI_M',
       'SCH_IDEAENR_BL_M', 'SCH_IDEAENR_WH_M', 'SCH_IDEAENR_HP_M',
       'SCH_IDEAENR_TR_M', 'TOT_IDEAENR_M', 'SCH_IDEAENR_AM_F',
       'SCH_IDEAENR_AS_F', 'SCH_IDEAENR_HI_F', 'SCH_IDEAENR_BL_F',
       'SCH_IDEAENR_WH_F', 'SCH_IDEAENR_HP_F', 'SCH_IDEAENR_TR_F',
       'TOT_IDEAENR_F', 'SCH_DISCWODIS_SINGOOS_AM_M',
       'SCH_DISCWODIS_SINGOOS_AS_M', 'SCH_DISCWODIS_SINGOOS_HI_M',
       'SCH_DISCWODIS_SINGOOS_BL_M', 'SCH_DISCWODIS_SINGOOS_WH_M',
       'SCH_DISCWODIS_SINGOOS_HP_M', 'SCH_DISCWODIS_SINGOOS_TR_M',
       'TOT_DISCWODIS_SINGOOS_M', 'SCH_DISCWODIS_SINGOOS_AM_F',
       'SCH_DISCWODIS_SINGOOS_AS_F', 'SCH_DISCWODIS_SINGOOS_HI_F',
       'SCH_DISCWODIS_SINGOOS_BL_F', 'SCH_DISCWODIS_SINGOOS_WH_F',
       'SCH_DISCWODIS_SINGOOS_HP_F', 'SCH_DISCWODIS_SINGOOS_TR_F',
       'TOT_DISCWODIS_SINGOOS_F', 'SCH_DISCWODIS_MULTOOS_AM_M',
       'SCH_DISCWODIS_MULTOOS_AS_M', 'SCH_DISCWODIS_MULTOOS_HI_M',
       'SCH_DISCWODIS_MULTOOS_BL_M', 'SCH_DISCWODIS_MULTOOS_WH_M',
       'SCH_DISCWODIS_MULTOOS_HP_M', 'SCH_DISCWODIS_MULTOOS_TR_M',
       'TOT_DISCWODIS_MULTOOS_M', 'SCH_DISCWODIS_MULTOOS_AM_F',
       'SCH_DISCWODIS_MULTOOS_AS_F', 'SCH_DISCWODIS_MULTOOS_HI_F',
       'SCH_DISCWODIS_MULTOOS_BL_F', 'SCH_DISCWODIS_MULTOOS_WH_F',
       'SCH_DISCWODIS_MULTOOS_HP_F', 'SCH_DISCWODIS_MULTOOS_TR_F',
       'TOT_DISCWODIS_MULTOOS_F', 'SCH_DISCWDIS_SINGOOS_IDEA_AM_M',
       'SCH_DISCWDIS_SINGOOS_IDEA_AS_M', 'SCH_DISCWDIS_SINGOOS_IDEA_HI_M',
       'SCH_DISCWDIS_SINGOOS_IDEA_BL_M', 'SCH_DISCWDIS_SINGOOS_IDEA_WH_M',
       'SCH_DISCWDIS_SINGOOS_IDEA_HP_M', 'SCH_DISCWDIS_SINGOOS_IDEA_TR_M',
       'TOT_DISCWDIS_SINGOOS_IDEA_M', 'SCH_DISCWDIS_SINGOOS_IDEA_AM_F',
       'SCH_DISCWDIS_SINGOOS_IDEA_AS_F', 'SCH_DISCWDIS_SINGOOS_IDEA_HI_F',
       'SCH_DISCWDIS_SINGOOS_IDEA_BL_F', 'SCH_DISCWDIS_SINGOOS_IDEA_WH_F',
       'SCH_DISCWDIS_SINGOOS_IDEA_HP_F', 'SCH_DISCWDIS_SINGOOS_IDEA_TR_F',
       'TOT_DISCWDIS_SINGOOS_IDEA_F', 'SCH_DISCWDIS_MULTOOS_IDEA_AM_M',
       'SCH_DISCWDIS_MULTOOS_IDEA_AS_M', 'SCH_DISCWDIS_MULTOOS_IDEA_HI_M',
       'SCH_DISCWDIS_MULTOOS_IDEA_BL_M', 'SCH_DISCWDIS_MULTOOS_IDEA_WH_M',
       'SCH_DISCWDIS_MULTOOS_IDEA_HP_M', 'SCH_DISCWDIS_MULTOOS_IDEA_TR_M',
       'TOT_DISCWDIS_MULTOOS_IDEA_M', 'SCH_DISCWDIS_MULTOOS_IDEA_AM_F',
       'SCH_DISCWDIS_MULTOOS_IDEA_AS_F', 'SCH_DISCWDIS_MULTOOS_IDEA_HI_F',
       'SCH_DISCWDIS_MULTOOS_IDEA_BL_F', 'SCH_DISCWDIS_MULTOOS_IDEA_WH_F',
       'SCH_DISCWDIS_MULTOOS_IDEA_HP_F', 'SCH_DISCWDIS_MULTOOS_IDEA_TR_F',
       'TOT_DISCWDIS_MULTOOS_IDEA_F', 'SCH_DISCWODIS_REF_AM_M',
       'SCH_DISCWODIS_REF_AS_M', 'SCH_DISCWODIS_REF_HI_M',
       'SCH_DISCWODIS_REF_BL_M', 'SCH_DISCWODIS_REF_WH_M',
       'SCH_DISCWODIS_REF_HP_M', 'SCH_DISCWODIS_REF_TR_M',
       'TOT_DISCWODIS_REF_M', 'SCH_DISCWODIS_REF_AM_F',
       'SCH_DISCWODIS_REF_AS_F', 'SCH_DISCWODIS_REF_HI_F',
       'SCH_DISCWODIS_REF_BL_F', 'SCH_DISCWODIS_REF_WH_F',
       'SCH_DISCWODIS_REF_HP_F', 'SCH_DISCWODIS_REF_TR_F',
       'TOT_DISCWODIS_REF_F', 'SCH_DISCWDIS_REF_IDEA_AM_M',
       'SCH_DISCWDIS_REF_IDEA_AS_M', 'SCH_DISCWDIS_REF_IDEA_HI_M',
       'SCH_DISCWDIS_REF_IDEA_BL_M', 'SCH_DISCWDIS_REF_IDEA_WH_M',
       'SCH_DISCWDIS_REF_IDEA_HP_M', 'SCH_DISCWDIS_REF_IDEA_TR_M',
       'TOT_DISCWDIS_REF_IDEA_M', 'SCH_DISCWDIS_REF_IDEA_AM_F',
       'SCH_DISCWDIS_REF_IDEA_AS_F', 'SCH_DISCWDIS_REF_IDEA_HI_F',
       'SCH_DISCWDIS_REF_IDEA_BL_F', 'SCH_DISCWDIS_REF_IDEA_WH_F',
       'SCH_DISCWDIS_REF_IDEA_HP_F', 'SCH_DISCWDIS_REF_IDEA_TR_F',
       'TOT_DISCWDIS_REF_IDEA_F', 'SCH_DISCWODIS_ARR_AM_M',
       'SCH_DISCWODIS_ARR_AS_M', 'SCH_DISCWODIS_ARR_HI_M',
       'SCH_DISCWODIS_ARR_BL_M', 'SCH_DISCWODIS_ARR_WH_M',
       'SCH_DISCWODIS_ARR_HP_M', 'SCH_DISCWODIS_ARR_TR_M',
       'TOT_DISCWODIS_ARR_M', 'SCH_DISCWODIS_ARR_AM_F',
       'SCH_DISCWODIS_ARR_AS_F', 'SCH_DISCWODIS_ARR_HI_F',
       'SCH_DISCWODIS_ARR_BL_F', 'SCH_DISCWODIS_ARR_WH_F',
       'SCH_DISCWODIS_ARR_HP_F', 'SCH_DISCWODIS_ARR_TR_F',
       'TOT_DISCWODIS_ARR_F', 'SCH_DISCWDIS_ARR_IDEA_AM_M',
       'SCH_DISCWDIS_ARR_IDEA_AS_M', 'SCH_DISCWDIS_ARR_IDEA_HI_M',
       'SCH_DISCWDIS_ARR_IDEA_BL_M', 'SCH_DISCWDIS_ARR_IDEA_WH_M',
       'SCH_DISCWDIS_ARR_IDEA_HP_M', 'SCH_DISCWDIS_ARR_IDEA_TR_M',
       'TOT_DISCWDIS_ARR_IDEA_M', 'SCH_DISCWDIS_ARR_IDEA_AM_F',
       'SCH_DISCWDIS_ARR_IDEA_AS_F', 'SCH_DISCWDIS_ARR_IDEA_HI_F',
       'SCH_DISCWDIS_ARR_IDEA_BL_F', 'SCH_DISCWDIS_ARR_IDEA_WH_F',
       'SCH_DISCWDIS_ARR_IDEA_HP_F', 'SCH_DISCWDIS_ARR_IDEA_TR_F',
       'TOT_DISCWDIS_ARR_IDEA_F'].sum()

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Unnamed: 1_level_0,SCH_FTECOUNSELORS,SCH_FTESERVICES_PSY,SCH_FTESERVICES_SOC,SCH_FTESECURITY_LEO,SCH_FTESECURITY_GUA,SCH_ENR_AM_M,SCH_ENR_AS_M,SCH_ENR_HI_M,SCH_ENR_BL_M,SCH_ENR_WH_M,...,SCH_DISCWDIS_ARR_IDEA_TR_M,TOT_DISCWDIS_ARR_IDEA_M,SCH_DISCWDIS_ARR_IDEA_AM_F,SCH_DISCWDIS_ARR_IDEA_AS_F,SCH_DISCWDIS_ARR_IDEA_HI_F,SCH_DISCWDIS_ARR_IDEA_BL_F,SCH_DISCWDIS_ARR_IDEA_WH_F,SCH_DISCWDIS_ARR_IDEA_HP_F,SCH_DISCWDIS_ARR_IDEA_TR_F,TOT_DISCWDIS_ARR_IDEA_F
LEAID,LEA_NAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
100002,Alabama Youth Services,4.0,3.0,0.0,0.00,6.0,14.0,0.0,7.0,680.0,401.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100005,Albertville City,10.5,0.0,0.0,6.00,1.0,10.0,13.0,1194.0,90.0,1263.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100006,Marshall County,14.5,0.0,0.0,4.17,0.0,16.0,12.0,613.0,33.0,2224.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100007,Hoover City,27.5,0.0,2.0,0.00,0.0,10.0,443.0,782.0,1754.0,4148.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100008,Madison City,17.5,0.0,0.2,5.90,1.1,40.0,460.0,230.0,1126.0,3286.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9999094,Metropolitan Arts And Technology High School,0.0,0.0,0.0,0.00,0.0,2.0,2.0,29.0,8.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9999095,Academy Of Academic Excellence,0.0,0.0,0.0,0.00,0.0,2.0,44.0,191.0,32.0,353.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9999096,New Millennium Institute Of Education Charter,0.0,0.0,0.0,0.00,0.0,0.0,0.0,68.0,11.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9999097,School Of Unlimited Learning,0.0,0.0,0.0,0.00,0.0,2.0,5.0,86.0,17.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [624]:
grouped_by_district = _

In [645]:
grouped_by_district.reset_index().set_index('LEAID', inplace=True)

In [647]:
district_merge = pd.merge(grouped_by_district.T, descriptions, left_index=True, right_index=True).set_index('description', append=True).T

In [650]:
district_merge[::100].to_csv('~/Desktop/discriminology/output/district_level_sample.csv')

In [649]:
district_merge.to_csv('~/Desktop/discriminology/output/district_level_aggregates.csv')

### Code Sandbox - everything below only needs to be run once

In [None]:
# descriptions = pd.concat(desc_frames_1112, axis=0)
# descriptions.drop_duplicates(inplace=True)
# descriptions.set_index('Field Name', inplace=True)
# descriptions = descriptions.loc[fields_1112]
# descriptions.columns = ['2011_12_description']
# descriptions.reset_index().to_csv('/Users/cave/Desktop/2011_12_field_descriptions.csv')

# descriptions = pd.concat(desc_frames_1314, axis=0)
# descriptions.drop_duplicates(inplace=True)
# descriptions.set_index('Field Name', inplace=True)
# descriptions = descriptions.loc[fields_1314]
# descriptions.columns = ['2013_14_description']
# descriptions.reset_index().to_csv('/Users/cave/Desktop/2013_14_field_descriptions.csv')