In [1]:
import pandas as pd
%matplotlib inline
import pylab as plt
import numpy as np
import scipy as sc
import scipy.stats

In [143]:
def read_multi_xls(prefix, start_year, end_year, skip_row_count=0):
    data_frames = []
    for year in range(start_year, end_year+1):
        year_2dig = year % 100
        df = pd.read_excel(prefix + str(year_2dig).zfill(2) + '.xls',
                skiprows=list(range(skip_row_count)))
        df['year'] = year
        data_frames.append(df)
    return data_frames

def import_multi_xls(prefix, start_year, end_year, skip_row_count, columns_to_remove, must_have_columns=[]):
    data_frames = []
    column_names = None
    for year in range(start_year, end_year+1):
        year_2dig = year % 100
        df = pd.read_excel(prefix + str(year_2dig).zfill(2) + '.xls',
                skiprows=list(range(skip_row_count[year])))
        for c in columns_to_remove[year]:
            del df[c]
        
        # There are differences between column names, so we use the names from the first
        # dataframe
        if column_names is None:
            column_names = list(df)
        
        for col in must_have_columns:
            if col not in list(df):
                df.insert(column_names.index(col), col, np.nan)
                
        new_column_names = list(df)
        
        renamer = {}
        for i in range(len(column_names)):
            renamer[new_column_names[i]] = column_names[i]
        df.rename(columns=renamer, inplace=True)
        
        df['year'] = year
        
        data_frames.append(df)
    return pd.concat(data_frames)

In [144]:
ap_99_13 = import_multi_xls('data/test_scores/ap/ap', 1999, 2013, {
        1999: 2, 2000: 2, 2001: 2, 2002: 2, 2003: 2, 2004: 2, 2005: 2, 2006: 2, 2007: 2, 2008: 2,
        2009: 3, 2010: 3, 2011: 3, 2012: 3, 2013: 3,
    }, {
        1999: [], 2000: [], 2001: [], 2002: [], 2003: [],
        2004: ['Rate of Exams\nWith a Score of\n3 or Greater\nFor 12th Grade\nStudents', 'Rate of Exams\nWith a Score of\n3 or Greater\nFor 11th & 12th\nGrade Students'],
        2005: [], 2006: [], 2007: [], 2008: [], 2009: [], 2010: ['Year'], 2011: [], 2012: [], 2013: [],
    },
    ['Total\nNumber of\nExams Taken', 'Number\nof Exams\nWith a Score of\n3 or Greater']
)

In [146]:
ap_14_16 = pd.concat(read_multi_xls('data/test_scores/ap/ap', 2014, 2016))

In [152]:
ap_99_13

Unnamed: 0,County Number,District Number,School Number,County Name,District Name,School Name,Grade 11 Enrollment (October 1998 CBEDS),Grade 12 Enrollment (October 1998 CBEDS),Grade 11+12 Enrollment (October 1998 CBEDS),Number of AP Exam Takers,Total Number of Exams Taken,Number of Exams With a Score of 3 or Greater,Number of Exams With a Score of 1,Number of Exams With a Score of 2,Number of Exams With a Score of 3,Number of Exams With a Score of 4,Number of Exams With a Score of 5,year
0,1,10017,130401,Alameda,Alameda Co. Office Of Educatio,Juvenile Hall/Court,157.0,224.0,381.0,0.0,0,0,0,0,0,0,0,1999
1,1,10017,130419,Alameda,Alameda Co. Office Of Educatio,County Community,3.0,8.0,11.0,0.0,0,0,0,0,0,0,0,1999
2,1,10017,130427,Alameda,Alameda Co. Office Of Educatio,Alternative/Opportunity,35.0,67.0,102.0,0.0,0,0,0,0,0,0,0,1999
3,1,61119,130229,Alameda,Alameda City Unified,Alameda High,413.0,372.0,785.0,165.0,272,183,35,54,95,54,34,1999
4,1,61119,132878,Alameda,Alameda City Unified,Encinal High,350.0,240.0,590.0,138.0,267,77,86,104,53,22,2,1999
5,1,61119,134304,Alameda,Alameda City Unified,Island High (Cont.),67.0,119.0,186.0,0.0,0,0,0,0,0,0,0,1999
6,1,61127,130294,Alameda,Albany City Unified,Macgregor High (Cont.),9.0,23.0,32.0,0.0,0,0,0,0,0,0,0,1999
7,1,61127,130450,Alameda,Albany City Unified,Albany High,222.0,184.0,406.0,89.0,128,94,12,22,39,25,30,1999
8,1,61143,131177,Alameda,Berkeley Unified,Berkeley High,779.0,687.0,1466.0,325.0,514,443,25,46,90,130,223,1999
9,1,61143,134924,Alameda,Berkeley Unified,"East Campus, Berkeley Hig",61.0,44.0,105.0,0.0,0,0,0,0,0,0,0,1999


In [160]:
ap_14_16['School Number'] = pd.to_numeric(ap_14_16['cds'].astype(str).str[-6:])

In [161]:
ap_14_16

Unnamed: 0,Enroll1012,Enroll12,NumScr1,NumScr2,NumScr3,NumScr4,NumScr5,NumTstTakr,cds,cname,dname,enroll1012,enroll12,rtype,sname,year,School Number
0,1476.0,467.0,38,100,220,197,115,394,1612000133397,Alameda,Livermore Valley Joint Unified ...,,,S,Granada High ...,2014,133397
1,1294.0,438.0,37,87,156,98,62,255,1612000134536,Alameda,Livermore Valley Joint Unified ...,,,S,Livermore High ...,2014,134536
2,1539.0,491.0,117,181,172,103,63,324,1612340000000,Alameda,Newark Unified ...,,,D,...,2014,0
3,1414.0,418.0,117,181,172,103,63,324,1612340130054,Alameda,Newark Unified ...,,,S,Newark Memorial High ...,2014,130054
4,33.0,14.0,,,,,,0,1612340130484,Alameda,Newark Unified ...,,,S,Crossroads High (Alternative) ...,2014,130484
5,3208.0,1170.0,195,366,422,255,176,701,1612420000000,Alameda,New Haven Unified ...,,,D,...,2014,0
6,125.0,73.0,,,,,,0,1612420126763,Alameda,New Haven Unified ...,,,S,Decoto School for Independent Study ...,2014,126763
7,2908.0,943.0,195,366,422,255,176,701,1612420134668,Alameda,New Haven Unified ...,,,S,James Logan High ...,2014,134668
8,9016.0,3024.0,848,623,503,392,326,1644,1612590000000,Alameda,Oakland Unified ...,,,D,...,2014,0
9,204.0,67.0,1,10,20,9,4,44,1612590100065,Alameda,Oakland Unified ...,,,S,Oakland Unity High ...,2014,100065


In [155]:
ap_99_13[ap_99_13['School Name'] == 'Granada High']

Unnamed: 0,County Number,District Number,School Number,County Name,District Name,School Name,Grade 11 Enrollment (October 1998 CBEDS),Grade 12 Enrollment (October 1998 CBEDS),Grade 11+12 Enrollment (October 1998 CBEDS),Number of AP Exam Takers,Total Number of Exams Taken,Number of Exams With a Score of 3 or Greater,Number of Exams With a Score of 1,Number of Exams With a Score of 2,Number of Exams With a Score of 3,Number of Exams With a Score of 4,Number of Exams With a Score of 5,year
28,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,385.0,318.0,703.0,40.0,48,32,4,12,15,11,6,1999
27,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,397.0,358.0,755.0,52.0,61,37,3,21,19,12,6,2000
29,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,446.0,389.0,835.0,75.0,101,78,3,20,34,28,16,2001
31,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,405.0,420.0,825.0,87.0,143,109,7,27,44,29,36,2002
31,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,478.0,384.0,862.0,92.0,122,89,11,22,34,37,18,2003
32,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,508.0,446.0,954.0,99.0,161,123,5,33,41,47,35,2004
910,24,65730,2430064,Merced,Le Grand Union High,Granada High,27.0,0.0,27.0,0.0,--,--,--,--,--,--,--,2004
32,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,467.0,480.0,947.0,151.0,254,180,30,44,66,65,49,2005
956,24,65730,2430064,Merced,Le Grand Union High,Granada High,20.0,17.0,37.0,0.0,--,--,--,--,--,--,--,2005
34,1,61200,133397,Alameda,Livermore Valley Joint Unified,Granada High,561.0,515.0,1076.0,182.0,,,14,43,91,106,85,2007
