In [1]:
import pandas as pd
import numpy as np

print('Pandas version', pd.__version__)
print('Numpy version', np.__version__)

Pandas version 0.20.3
Numpy version 1.14.0


# Load Data

## Read in 2017-2018 Accountability Report (with 2016-2017 data)

#### Functions to load in data

In [2]:
def drop_first_two_rows(df):
    """
    Takes in a dataframe and drops the first two rows, returning a re-indexed dataframe
    """

    df.drop(df.index[:2], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df
    

In [3]:
def find_relevant_attributes(file, avg_attend_col, grad_rate_col):
    """
    Takes in a file with school attributes and returns a new dataframe with 
    the school id, school name, avg daily attendance, and graduation rates
    """
    school_id = file.iloc[:,0]
    school_name = file.iloc[:,1]
    avg_attend = file.iloc[:,avg_attend_col]
    grad_rate = file.iloc[:,grad_rate_col]
    
    schools = pd.DataFrame({'school_id': school_id, 
                        'school_name': school_name, 
                        'avg_daily_attend': avg_attend, 
                        'grad_rate': grad_rate})
    
    return schools
    

In [4]:
def fetch_school_data(filepath, sheetname, avg_attend_col, grad_rate_col):
    
    """
    Read in an excel doc sheet
    """
    
    assert type(avg_attend_col) == int, 'Please enter col position as integer'
    assert type(grad_rate_col) == int, 'Please enter col position as integer'
    
    file = pd.read_excel('../data/{}'.format(filepath), sheetname=sheetname)
    relevant_attributes = find_relevant_attributes(file, avg_attend_col, grad_rate_col)
    schools = drop_first_two_rows(relevant_attributes)
    
    return schools

#### Read in high school and combo schools, add a type column and concat together

In [5]:
high_schools = fetch_school_data(filepath='Accountability_SQRPratings_2017-2018_SchoolLevel.xls',
                                sheetname='High Schools (grds 9-12 only)',
                                avg_attend_col=56,
                                grad_rate_col=41)

high_schools['type'] = 'high_school'

In [6]:
combo_schools = fetch_school_data(filepath='Accountability_SQRPratings_2017-2018_SchoolLevel.xls',
                                sheetname='Combo Schools (grds PreK-12)',
                                avg_attend_col=129,
                                grad_rate_col=114)

combo_schools['type'] = 'combo_school'

In [11]:
alt_schools = fetch_school_data(filepath='Accountability_SQRPratings_2017-2018_SchoolLevel.xls',
                                sheetname='Option Schools',
                                avg_attend_col=31,
                                grad_rate_col=22)

alt_schools['type'] = 'alt_school'

In [12]:
alt_schools.head()

Unnamed: 0,avg_daily_attend,grad_rate,school_id,school_name,type
0,56.3,93.0,400123,YCCS - SCHOLASTIC ACHIEVEMENT,alt_school
1,63.4,98.1,400124,YCCS - MCKINLEY,alt_school
2,60.2,88.5,400125,YCCS - ASPIRA PANTOJA,alt_school
3,63.6,91.2,400126,YCCS - ASSOCIATION HOUSE,alt_school
4,70.9,91.1,400127,YCCS - AUSTIN CAREER,alt_school


In [13]:
schools = pd.concat([high_schools, combo_schools, alt_schools], ignore_index=True)

In [14]:
schools.sample(15)

Unnamed: 0,avg_daily_attend,grad_rate,school_id,school_name,type
1,90.5,71.4,400013,ASPIRA - EARLY COLLEGE HS,high_school
154,69.9,58.7,400136,YCCS - OLIVE HARVEY,alt_school
155,77.9,97.3,400137,LITTLE BLACK PEARL HS,alt_school
5,94.4,88.2,400051,NOBLE - NOBLE HS,high_school
111,91.1,86.9,610547,SOUTH SHORE INTL HS,high_school
151,48.3,99.3,400133,YCCS - INNOVATIONS,alt_school
49,87.2,54.3,609705,FENGER HS,high_school
67,87.4,58.8,609733,SULLIVAN HS,high_school
143,63.4,98.1,400124,YCCS - MCKINLEY,alt_school
57,89.7,72.2,609715,KELLY HS,high_school


In [26]:
print(len(schools))

# Pickle school accountability data
schools.to_pickle('../data/accountability_data.pkl')

186


## Read in 2016-2017 School Profile Info to compare

In [27]:
profiles = pd.read_csv('../data/Chicago_Public_Schools_-_School_Profile_Information_SY1617.csv', encoding = "ISO-8859-1")

In [28]:
profiles.head()

Unnamed: 0,School_ID,Legacy_Unit_ID,Finance_ID,Short_Name,Long_Name,School_Type,Primary_Category,Is_High_School,Is_Middle_School,Is_Elementary_School,...,Third_Contact_Name,Fourth_Contact_Title,Fourth_Contact_Name,Fifth_Contact_Title,Fifth_Contact_Name,Sixth_Contact_Title,Sixth_Contact_Name,Seventh_Contact_Title,Seventh_Contact_Name,Location
0,610158,5720,29271,SAYRE,Harriet E Sayre Elementary Language Academy,Neighborhood,ES,N,Y,Y,...,,,,,,,,,,"1850 N NEWLAND AVE\rChicago, Illinois 60707\r(..."
1,610282,7040,26301,MCNAIR,Ronald E McNair Elementary School,Neighborhood,ES,N,Y,Y,...,Tasia White,,,,,,,,,"4820 W WALTON ST\rChicago, Illinois 60651\r(41..."
2,609996,4020,23821,HOLDEN,Charles N Holden Elementary School,Neighborhood,ES,N,Y,Y,...,,,,,,,,,,"1104 W 31ST ST\rChicago, Illinois 60608\r(41.8..."
3,400079,3344,66395,ACERO - ZIZUMBO,Acero Charter Schools - SPC Daniel Zizumbo,Charter,ES,N,Y,Y,...,,,,,,,,,,"4248 W 47TH ST\rChicago, Illinois 60632\r(41.8..."
4,610089,5020,24621,MURPHY,John B Murphy Elementary School,Neighborhood,ES,N,Y,Y,...,Laura Aguirre,,,,,,,,,"3539 W GRACE ST\rChicago, Illinois 60618\r(41...."


In [19]:
# Left join to high schools
schools_pr = pd.merge(schools, profiles, how='left', left_on='school_id', right_on='School_ID')

In [20]:
schools_pr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 186 entries, 0 to 185
Data columns (total 96 columns):
avg_daily_attend                           186 non-null object
grad_rate                                  172 non-null object
school_id                                  186 non-null object
school_name                                186 non-null object
type                                       186 non-null object
School_ID                                  182 non-null float64
Legacy_Unit_ID                             182 non-null float64
Finance_ID                                 182 non-null float64
Short_Name                                 182 non-null object
Long_Name                                  182 non-null object
School_Type                                182 non-null object
Primary_Category                           182 non-null object
Is_High_School                             182 non-null object
Is_Middle_School                           182 non-null object
Is_Element

In [30]:
# the accountability report might not have actual 2016-2017 data, might be goal for 2018 based on actuals from past year?

schools_pr.loc[:,['grad_rate', 'Graduation_Rate_School', 'Graduation_Rate_Mean']].mean()

grad_rate                 76.126744
Graduation_Rate_School    72.891736
Graduation_Rate_Mean      73.500000
dtype: float64