In [19]:
import pandas as pd
import os
import fnmatch
import numpy as np
from library import start
from library import clean_for_merge

In [20]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [21]:
year = 'yr1213'

In [22]:
folder = 'certification_' + year + '/'
teacher_datapath = os.path.join(start.data_path, 'tea', 'teachers', folder)

In [23]:
def filter_and_rename_cols(df, dict):
    """
    Keep some original cols from a dataframe, rename them to new column names
    Return a new data frame

    Arguments:
    df = data frame
    dict keys = original column names you want to keep
    dict values = new column names
    """
    df = df[list(dict.keys())]
    new_df = df.rename(index=str, columns=dict)
    return new_df

# Certfication

## Append all certification datasets from 2016-17

In [24]:
pattern = "CERTIFICATION*.csv"
if year == 'yr1213' or year == 'yr1314':
    pattern = "CERTIFICATION*.TXT"
cert_files = []
for entry in os.listdir(teacher_datapath)  :  
    if fnmatch.fnmatch(entry, pattern):
            cert_files.append(entry)
cert_files.sort()
dirs_cert = [teacher_datapath + file for file in cert_files]
df_list = [pd.read_csv(file, sep=",", encoding = "ISO-8859-1", dtype = object) for file in dirs_cert]
certification = pd.concat(df_list)
print(certification.columns)

In [26]:
if year in ['yr1415', 'yr1516', 'yr1617', 'yr1718']:
    vars_to_keep = {'PERSONID_SCRAM': 'teacher_id', 'DISTRICT': 'district', 
                    'CREDENTIAL TYPE': 'cert_type', 'CERTIFICATE_PREPARATION_ROUTE': 'cert_route', 
                    'CERTIFICATE LIFE': 'cert_life', 
                    'CERTIFICATE EFFECTIVE DATE': 'cert_startdate', 'CERTIFICATE EXPIRATION DATE': 'cert_enddate',
                    'CERTIFICATION_LEVEL': 'cert_level','CREDENTIALED_GRADES': 'cert_grades',
                    'POPULATION_CREDENTIALED_FOR': 'cert_pop', 
                    'SUBJECT AREA': 'cert_area', 'SUBJECT': 'cert_subject'}
if year in ['yr1213', 'yr1314']:
    vars_to_keep = {'personid_SCRAM': 'teacher_id', 'DISTRICT': 'district', 
                    'CREDENTIAL TYPE': 'cert_type', 'CERTIFICATE PREPARATION ROUTE': 'cert_route', 
                    'CERTIFICATE LIFE': 'cert_life', 
                    'CERTIFICATE EFFECTIVE DATE': 'cert_startdate', 'CERTIFICATE EXPIRATION DATE': 'cert_enddate',
                    'CERTIFICATION LEVEL': 'cert_level','CREDENTIALED GRADES': 'cert_grades',
                    'POPULATION CREDENTIALED FOR': 'cert_pop', 
                    'SUBJECT AREA': 'cert_area', 'SUBJECT': 'cert_subject'}

certification = filter_and_rename_cols(certification, vars_to_keep)
cert_vars = list(vars_to_keep.values()).remove('teacher_id')
certification

Unnamed: 0,teacher_id,district,cert_type,cert_route,cert_life,cert_startdate,cert_enddate,cert_level,cert_grades,cert_pop,cert_area,cert_subject
0,03Q*5YV40,015910,Standard,Standard Program,5YR,01FEB2012:00:00:00,31JAN2018:00:00:00,ELM,EC-4,Regular Students,Bilingual Education,Bilingual Spanish
1,03Q*5YV40,015910,Standard,Certification by Exam,5YR,01FEB2012:00:00:00,31JAN2018:00:00:00,ALL,EC-12,Special Education,Special Education,Generic Special Education
2,03Q*5YV40,015910,Standard Professional,Standard Program,5YR,01FEB2012:00:00:00,31JAN2018:00:00:00,PRF,EC-12,Not Applicable,Professional,Principal
3,03Q*5YV40,015910,Standard,Certification by Exam,5YR,20MAR2012:00:00:00,31JAN2018:00:00:00,ELM,4-8,Regular Students,General Elementary (Self-Contained),Generalist
4,*3736Y046,031803,Standard,Standard Program,5YR,26JUN2010:00:00:00,31JUL2015:00:00:00,ELM,EC-6,Regular Students,Bilingual Education,Bilingual Spanish
5,*373VY248,031803,Standard Paraprofessional,Paraprofessional Program,5YR,16SEP2010:00:00:00,29FEB2016:00:00:00,,,Not Applicable,Other,Not Applicable
6,*373VY248,031803,Standard,Alternative Program,5YR,07AUG2012:00:00:00,28FEB2018:00:00:00,ALL,EC-12,Regular Students,Computer Science,Technology Applications
7,031*3Y*34,031803,Standard,Standard Program,5YR,13APR2011:00:00:00,30NOV2016:00:00:00,ELM,EC-6,Regular Students,Bilingual Education,Bilingual Spanish
8,03246D543,031803,Standard,Standard Program,5YR,16MAY2011:00:00:00,30SEP2016:00:00:00,ELM,EC-4,Regular Students,Bilingual Education,Bilingual Spanish
9,0341QF335,031803,Standard,Standard Program,5YR,16MAY2009:00:00:00,31AUG2014:00:00:00,ELM,EC-4,Regular Students,Bilingual Education,Bilingual Spanish


In [37]:
cert_vars = list(vars_to_keep.values())
cert_vars.remove('teacher_id')
cert_vars

['district',
 'cert_type',
 'cert_route',
 'cert_life',
 'cert_startdate',
 'cert_enddate',
 'cert_level',
 'cert_grades',
 'cert_pop',
 'cert_area',
 'cert_subject']

In [27]:
certification = certification.sort_values(by = 'teacher_id')
print('number of certifications: ', len(certification))
certification

number of certifications:  1059387


Unnamed: 0,teacher_id,district,cert_type,cert_route,cert_life,cert_startdate,cert_enddate,cert_level,cert_grades,cert_pop,cert_area,cert_subject
1722,*30*0*045,061901,Standard,Certification by Exam,5YR,01APR2009:00:00:00,31MAR2015:00:00:00,END,,Special Education,Special Education,Generic Special Education
1723,*30*0*045,061901,Standard,Standard Program,5YR,01APR2009:00:00:00,31MAR2015:00:00:00,PRF,EC-12,Not Applicable,Professional,Principal
1719,*30*0*045,061901,Provisional,Certification by Exam,LIFE,14JUN1999:00:00:00,.,END,,Bilingual Students,Bilingual Education,Bilingual Spanish
1720,*30*0*045,061901,Paraprofessional,Paraprofessional Program,LIFE,15AUG1994:00:00:00,.,,,Not Applicable,Other,Not Applicable
1721,*30*0*045,061901,Paraprofessional,Paraprofessional Program,LIFE,15AUG1994:00:00:00,.,,,Not Applicable,Other,Not Applicable
1716,*30*0*045,061901,Provisional,Standard Program,LIFE,20FEB1999:00:00:00,.,ELM,1-8,Regular Students,English Language Arts,English
1718,*30*0*045,061901,Provisional,Standard Program,LIFE,20FEB1999:00:00:00,.,ELM,1-8,Regular Students,Foreign Language,Spanish
1717,*30*0*045,061901,Provisional,Standard Program,LIFE,20FEB1999:00:00:00,.,ELM,1-8,Regular Students,General Elementary (Self-Contained),Self-Contained
2106,*30*01145,066005,Paraprofessional,Paraprofessional Program,LIFE,14OCT1982:00:00:00,.,,,Not Applicable,Other,Not Applicable
95974,*30*0F443,101912,Provisional,Alternative Program,LIFE,16APR1996:00:00:00,.,ELM,PK-6,Bilingual Students,Bilingual Education,Bilingual/ESL


# Keep only latest certification of duplicates

In [28]:
certification['cert_startdate'] = pd.to_datetime(certification.cert_startdate.str.slice(0, 9), errors = 'coerce')
certification['cert_enddate'] = np.where(certification['cert_life'] == 'LIFE', '02JAN2050:00:00:00', certification['cert_enddate'])
certification['cert_enddate'] = pd.to_datetime(certification.cert_enddate.str.slice(0, 9), errors = 'coerce')

In [29]:
certification

Unnamed: 0,teacher_id,district,cert_type,cert_route,cert_life,cert_startdate,cert_enddate,cert_level,cert_grades,cert_pop,cert_area,cert_subject
1722,*30*0*045,061901,Standard,Certification by Exam,5YR,2009-04-01,2015-03-31,END,,Special Education,Special Education,Generic Special Education
1723,*30*0*045,061901,Standard,Standard Program,5YR,2009-04-01,2015-03-31,PRF,EC-12,Not Applicable,Professional,Principal
1719,*30*0*045,061901,Provisional,Certification by Exam,LIFE,1999-06-14,2050-01-02,END,,Bilingual Students,Bilingual Education,Bilingual Spanish
1720,*30*0*045,061901,Paraprofessional,Paraprofessional Program,LIFE,1994-08-15,2050-01-02,,,Not Applicable,Other,Not Applicable
1721,*30*0*045,061901,Paraprofessional,Paraprofessional Program,LIFE,1994-08-15,2050-01-02,,,Not Applicable,Other,Not Applicable
1716,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,English Language Arts,English
1718,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,Foreign Language,Spanish
1717,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,General Elementary (Self-Contained),Self-Contained
2106,*30*01145,066005,Paraprofessional,Paraprofessional Program,LIFE,1982-10-14,2050-01-02,,,Not Applicable,Other,Not Applicable
95974,*30*0F443,101912,Provisional,Alternative Program,LIFE,1996-04-16,2050-01-02,ELM,PK-6,Bilingual Students,Bilingual Education,Bilingual/ESL


In [30]:
cert_vars_dup = []
for var in cert_vars:
    if var not in ['cert_startdate', 'cert_enddate', 'cert_route']:
        cert_vars_dup.append(var)
cert_vars_dup.append('teacher_id')
certification = certification.sort_values(by = cert_vars_dup, ascending = True)
certification = certification.drop_duplicates(subset = cert_vars_dup, keep = 'last')
certification

Unnamed: 0,teacher_id,district,cert_type,cert_route,cert_life,cert_startdate,cert_enddate,cert_level,cert_grades,cert_pop,cert_area,cert_subject
1721,*30*0*045,061901,Paraprofessional,Paraprofessional Program,LIFE,1994-08-15,2050-01-02,,,Not Applicable,Other,Not Applicable
1716,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,English Language Arts,English
1718,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,Foreign Language,Spanish
1717,*30*0*045,061901,Provisional,Standard Program,LIFE,1999-02-20,2050-01-02,ELM,1-8,Regular Students,General Elementary (Self-Contained),Self-Contained
1719,*30*0*045,061901,Provisional,Certification by Exam,LIFE,1999-06-14,2050-01-02,END,,Bilingual Students,Bilingual Education,Bilingual Spanish
1722,*30*0*045,061901,Standard,Certification by Exam,5YR,2009-04-01,2015-03-31,END,,Special Education,Special Education,Generic Special Education
1723,*30*0*045,061901,Standard,Standard Program,5YR,2009-04-01,2015-03-31,PRF,EC-12,Not Applicable,Professional,Principal
2106,*30*01145,066005,Paraprofessional,Paraprofessional Program,LIFE,1982-10-14,2050-01-02,,,Not Applicable,Other,Not Applicable
95974,*30*0F443,101912,Provisional,Alternative Program,LIFE,1996-04-16,2050-01-02,ELM,PK-6,Bilingual Students,Bilingual Education,Bilingual/ESL
12516,*30*0FQ45,056901,Paraprofessional,Paraprofessional Program,LIFE,1987-09-28,2050-01-02,,,Not Applicable,Other,Not Applicable


## Reshape
Need to reshape for merge so that each teacher is a single row. 

In [31]:
# create count within each scrabled id
certification['idx'] = certification.groupby('teacher_id').cumcount()
print('some teachers have as many as ', certification.idx.max(), 'current certifications')
print(certification[certification.idx > 15].teacher_id.nunique(),
      'have over 15. We will drop some of these certification for readbility.',
      'Since it is so small a number it shouldnt impact estimates.')
certification = certification[certification.idx <=15]

some teachers have as many as  47 current certifications
16 have over 15. We will drop some of these certification for readbility. Since it is so small a number it shouldnt impact estimates.


In [38]:
certification_wide = certification.pivot(index='teacher_id',columns='idx')[cert_vars]
cols = certification_wide.columns
ind = pd.Index([e[0] + str(e[1]) for e in cols.tolist()])
certification_wide.columns = ind
certification_wide.head()

Unnamed: 0_level_0,district0,district1,district2,district3,district4,district5,district6,district7,district8,district9,district10,district11,district12,district13,district14,district15,cert_type0,cert_type1,cert_type2,cert_type3,cert_type4,cert_type5,cert_type6,cert_type7,cert_type8,cert_type9,cert_type10,cert_type11,cert_type12,cert_type13,cert_type14,cert_type15,cert_route0,cert_route1,cert_route2,cert_route3,cert_route4,cert_route5,cert_route6,cert_route7,cert_route8,cert_route9,cert_route10,cert_route11,cert_route12,cert_route13,cert_route14,cert_route15,cert_life0,cert_life1,cert_life2,cert_life3,cert_life4,cert_life5,cert_life6,cert_life7,cert_life8,cert_life9,cert_life10,cert_life11,cert_life12,cert_life13,cert_life14,cert_life15,cert_startdate0,cert_startdate1,cert_startdate2,cert_startdate3,cert_startdate4,cert_startdate5,cert_startdate6,cert_startdate7,cert_startdate8,cert_startdate9,cert_startdate10,cert_startdate11,cert_startdate12,cert_startdate13,cert_startdate14,cert_startdate15,cert_enddate0,cert_enddate1,cert_enddate2,cert_enddate3,cert_enddate4,cert_enddate5,cert_enddate6,cert_enddate7,cert_enddate8,cert_enddate9,cert_enddate10,cert_enddate11,cert_enddate12,cert_enddate13,cert_enddate14,cert_enddate15,cert_level0,cert_level1,cert_level2,cert_level3,cert_level4,cert_level5,cert_level6,cert_level7,cert_level8,cert_level9,cert_level10,cert_level11,cert_level12,cert_level13,cert_level14,cert_level15,cert_grades0,cert_grades1,cert_grades2,cert_grades3,cert_grades4,cert_grades5,cert_grades6,cert_grades7,cert_grades8,cert_grades9,cert_grades10,cert_grades11,cert_grades12,cert_grades13,cert_grades14,cert_grades15,cert_pop0,cert_pop1,cert_pop2,cert_pop3,cert_pop4,cert_pop5,cert_pop6,cert_pop7,cert_pop8,cert_pop9,cert_pop10,cert_pop11,cert_pop12,cert_pop13,cert_pop14,cert_pop15,cert_area0,cert_area1,cert_area2,cert_area3,cert_area4,cert_area5,cert_area6,cert_area7,cert_area8,cert_area9,cert_area10,cert_area11,cert_area12,cert_area13,cert_area14,cert_area15,cert_subject0,cert_subject1,cert_subject2,cert_subject3,cert_subject4,cert_subject5,cert_subject6,cert_subject7,cert_subject8,cert_subject9,cert_subject10,cert_subject11,cert_subject12,cert_subject13,cert_subject14,cert_subject15
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1
*30*0*045,61901,61901.0,61901.0,61901.0,61901.0,61901.0,61901.0,,,,,,,,,,Paraprofessional,Provisional,Provisional,Provisional,Provisional,Standard,Standard,,,,,,,,,,Paraprofessional Program,Standard Program,Standard Program,Standard Program,Certification by Exam,Certification by Exam,Standard Program,,,,,,,,,,LIFE,LIFE,LIFE,LIFE,LIFE,5YR,5YR,,,,,,,,,,1994-08-15,1999-02-20,1999-02-20,1999-02-20,1999-06-14,2009-04-01,2009-04-01,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,2050-01-02,2050-01-02,2050-01-02,2050-01-02,2015-03-31,2015-03-31,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,ELM,ELM,ELM,END,END,PRF,,,,,,,,,,,1-8,1-8,1-8,,,EC-12,,,,,,,,,,Not Applicable,Regular Students,Regular Students,Regular Students,Bilingual Students,Special Education,Not Applicable,,,,,,,,,,Other,English Language Arts,Foreign Language,General Elementary (Self-Contained),Bilingual Education,Special Education,Professional,,,,,,,,,,Not Applicable,English,Spanish,Self-Contained,Bilingual Spanish,Generic Special Education,Principal,,,,,,,,,
*30*01145,66005,,,,,,,,,,,,,,,,Paraprofessional,,,,,,,,,,,,,,,,Paraprofessional Program,,,,,,,,,,,,,,,,LIFE,,,,,,,,,,,,,,,,1982-10-14,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,,Other,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,
*30*0F443,101912,,,,,,,,,,,,,,,,Provisional,,,,,,,,,,,,,,,,Alternative Program,,,,,,,,,,,,,,,,LIFE,,,,,,,,,,,,,,,,1996-04-16,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ELM,,,,,,,,,,,,,,,,PK-6,,,,,,,,,,,,,,,,Bilingual Students,,,,,,,,,,,,,,,,Bilingual Education,,,,,,,,,,,,,,,,Bilingual/ESL,,,,,,,,,,,,,,,
*30*0FQ45,56901,,,,,,,,,,,,,,,,Paraprofessional,,,,,,,,,,,,,,,,Paraprofessional Program,,,,,,,,,,,,,,,,LIFE,,,,,,,,,,,,,,,,1987-09-28,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,,Other,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,
*30*0L*44,80901,,,,,,,,,,,,,,,,Standard Paraprofessional,,,,,,,,,,,,,,,,Paraprofessional Program,,,,,,,,,,,,,,,,5YR,,,,,,,,,,,,,,,,2010-02-04,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2015-11-30,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,,Other,,,,,,,,,,,,,,,,Not Applicable,,,,,,,,,,,,,,,


# Teachers

In [39]:
pattern = "TEACHER_MASTER*.TXT"
teacher_files = []
for entry in os.listdir(teacher_datapath)  :  
    if fnmatch.fnmatch(entry, pattern):
            teacher_files.append(entry)
teacher_files.sort()
dirs_teachers = [teacher_datapath + file for file in teacher_files]
df_list = [pd.read_csv(file, sep=",", encoding = "ISO-8859-1", dtype = object) for file in dirs_teachers]
teachers = pd.concat(df_list)
teachers.head()

Unnamed: 0,REGION NUMBER,COUNTY NUMBER,COUNTY NAME,DISTRICT NUMBER,DISTRICT NAME,DISTRICT TYPE CODE,DISTRICT TYPE NAME,DISTRICT CHARTER TYPE CODE,DISTRICT CHARTER TYPE NAME,DISTRICT CATEGORY CODE,DISTRICT CATEGORY NAME,DISTRICT PHONE,DISTRICT FAX,DISTRICT ADDRESS,DISTRICT CITY,DISTRICT STATE,DISTRICT ZIP,CAMPUS NAME,CAMPUS LOW GRADE,CAMPUS HIGH GRADE,CAMPUS TYPE CODE,CAMPUS TYPE NAME,CAMPUS CHARTER TYPE CODE,CAMPUS CHARTER TYPE NAME,CAMPUS GRADE GROUP CODE,CAMPUS GRADE GROUP NAME,CAMPUS ADDRESS,CAMPUS CITY,CAMPUS STATE,CAMPUS ZIP,CAMPUS PHONE,SCRAMBLED UNIQUE ID,FIRST NAME,MIDDLE NAME,LAST NAME,SEX CODE,SEX NAME,ETHNICITY CODE,ETHNICITY NAME,EXPERIENCE,DEGREE CODE,DEGREE NAME,FTE,BASE PAY,OTHER PAY,TOTAL PAY,ROLE CODE,ROLE NAME,ROLE FULL TIME EQUIVALENT,ROLE BASE PAY,SUBJECT AREA CODE 1,SUBJECT AREA NAME 1,SUBJECT AREA CODE 2,SUBJECT AREA NAME 2,SUBJECT AREA CODE 3,SUBJECT AREA NAME 3,SUBJECT AREA CODE 4,SUBJECT AREA NAME 4,SUBJECT AREA CODE 5,SUBJECT AREA NAME 5,SUBJECT AREA CODE 6,SUBJECT AREA NAME 6,SUBJECT AREA CODE 7,SUBJECT AREA NAME 7,SUBJECT AREA CODE 8,SUBJECT AREA NAME 8,SUBJECT AREA CODE 9,SUBJECT AREA NAME 9,SUBJECT AREA CODE 10,SUBJECT AREA NAME 10,PAY TYPE CODE 1,PAY TYPE NAME 1,PAY TYPE CODE 2,PAY TYPE NAME 2,PAY TYPE CODE 3,PAY TYPE NAME 3,PAY TYPE CODE 4,PAY TYPE NAME 4,PAY TYPE CODE 5,PAY TYPE NAME 5,CAMPUS NUMBER
0,1,31,CAMERON COUNTY,31803,HARMONY SCIENCE ACADEMY - BROWNSVI,R,OPEN ENROLL CHARTER SCHL DIST,1,OPEN ENROLLMENT CHARTER DIST,A,MAJOR URBAN,(713)343-3333,(713)777-8555,9321 W SAM HOUSTON PKWY S,HOUSTON,TX,77099,HARMONY SCIENCE ACADEMY - BROWNSVI,PK,12,1,INSTRUCTIONAL CAMPUS,1,MEMBER OF AN OPEN ENROLL DIST,5,ELEMENTARY / SECONDARY,1124 CENTRAL BLVD,BROWNSVILLE,TX,78520,(956)574-9555,*3736Y046,MAGDA,PATRICIA,APRESA,F,FEMALE,H,Hispanic/Latino,2,1,BACHELOR'S,1.0,37500,0,37500,87,TEACHER,1.0001,37504,2,ENGLISH LANGUAGE ARTS,3.0,MATHEMATICS,4.0,SCIENCE,5.0,SOCIAL STUDIES,,,,,,,,,,,,,80,BASE SALARY,,,,,,,,,31803001
1,1,31,CAMERON COUNTY,31803,HARMONY SCIENCE ACADEMY - BROWNSVI,R,OPEN ENROLL CHARTER SCHL DIST,1,OPEN ENROLLMENT CHARTER DIST,A,MAJOR URBAN,(713)343-3333,(713)777-8555,9321 W SAM HOUSTON PKWY S,HOUSTON,TX,77099,HARMONY SCIENCE ACADEMY - BROWNSVI,PK,12,1,INSTRUCTIONAL CAMPUS,1,MEMBER OF AN OPEN ENROLL DIST,5,ELEMENTARY / SECONDARY,1124 CENTRAL BLVD,BROWNSVILLE,TX,78520,(956)574-9555,*373VY248,JOEL,WILLIAM,ALAFFA,M,MALE,H,Hispanic/Latino,1,1,BACHELOR'S,1.0,40000,0,40000,87,TEACHER,1.0,40000,11,CAREER & TECHNOLOGY EDUCATION,,,,,,,,,,,,,,,,,,,80,BASE SALARY,,,,,,,,,31803001
2,1,31,CAMERON COUNTY,31803,HARMONY SCIENCE ACADEMY - BROWNSVI,R,OPEN ENROLL CHARTER SCHL DIST,1,OPEN ENROLLMENT CHARTER DIST,E,INDEPENDENT TOWN,(713)343-3333,(713)777-8555,9321 W SAM HOUSTON PKWY S,HOUSTON,TX,77099,HARMONY SCIENCE ACADEMY - BROWNSVI,PK,12,1,INSTRUCTIONAL CAMPUS,1,MEMBER OF AN OPEN ENROLL DIST,5,ELEMENTARY / SECONDARY,1124 CENTRAL BLVD,BROWNSVILLE,TX,78520,(956)574-9555,*3842D342,MARTIN,L,FELL,M,MALE,W,White,0,1,BACHELOR'S,1.0,36000,0,36000,87,TEACHER,1.0,36000,2,ENGLISH LANGUAGE ARTS,,,,,,,,,,,,,,,,,,,80,BASE SALARY,,,,,,,,,31803001
3,1,31,CAMERON COUNTY,31803,HARMONY SCIENCE ACADEMY - BROWNSVI,R,OPEN ENROLL CHARTER SCHL DIST,1,OPEN ENROLLMENT CHARTER DIST,A,MAJOR URBAN,(713)343-3333,(713)777-8555,9321 W SAM HOUSTON PKWY S,HOUSTON,TX,77099,HARMONY SCIENCE ACADEMY - BROWNSVI,PK,12,1,INSTRUCTIONAL CAMPUS,1,MEMBER OF AN OPEN ENROLL DIST,5,ELEMENTARY / SECONDARY,1124 CENTRAL BLVD,BROWNSVILLE,TX,78520,(956)574-9555,031*3Y*34,ANA,ABIGAIL,SOSA,F,FEMALE,H,Hispanic/Latino,2,1,BACHELOR'S,1.0,37500,0,37500,87,TEACHER,1.0001,37504,2,ENGLISH LANGUAGE ARTS,3.0,MATHEMATICS,4.0,SCIENCE,5.0,SOCIAL STUDIES,,,,,,,,,,,,,80,BASE SALARY,,,,,,,,,31803001
4,1,31,CAMERON COUNTY,31803,HARMONY SCIENCE ACADEMY - BROWNSVI,R,OPEN ENROLL CHARTER SCHL DIST,1,OPEN ENROLLMENT CHARTER DIST,A,MAJOR URBAN,(713)343-3333,(713)777-8555,9321 W SAM HOUSTON PKWY S,HOUSTON,TX,77099,HARMONY SCIENCE ACADEMY - BROWNSVI,PK,12,1,INSTRUCTIONAL CAMPUS,1,MEMBER OF AN OPEN ENROLL DIST,5,ELEMENTARY / SECONDARY,1124 CENTRAL BLVD,BROWNSVILLE,TX,78520,(956)574-9555,0320Q1148,GEORGETTE,,BETANCOURT,F,FEMALE,H,Hispanic/Latino,2,1,BACHELOR'S,1.0,41000,0,41000,87,TEACHER,1.0,41000,9,TECHNOLOGY APPLICATIONS,,,,,,,,,,,,,,,,,,,80,BASE SALARY,,,,,,,,,31803001


In [46]:
if year in ['yr1213', 'yr1314', 'yr1415', 'yr1516', 'yr1617', 'yr1718']:
    vars_to_keep = {'SCRAMBLED UNIQUE ID': 'teacher_id', 'DISTRICT NUMBER': 'district','DISTRICT NAME': 'distname',
                    'CAMPUS NUMBER': 'campus','CAMPUS NAME' : 'campname', 'FTE': 'fte',
                   'SUBJECT AREA NAME 1': 'sub_area1', 'SUBJECT AREA NAME 2': 'sub_area2', 'SUBJECT AREA NAME 3': 'sub_area3',
                   'SUBJECT AREA NAME 4': 'sub_area4','SUBJECT AREA NAME 5': 'sub_area5'}
teacher_vars = list(vars_to_keep.values())
teachers = filter_and_rename_cols(teachers, vars_to_keep)

KeyError: "['SCRAMBLED UNIQUE ID' 'DISTRICT NUMBER' 'DISTRICT NAME' 'CAMPUS NUMBER'\n 'CAMPUS NAME' 'FTE' 'SUBJECT AREA NAME 1' 'SUBJECT AREA NAME 2'\n 'SUBJECT AREA NAME 3' 'SUBJECT AREA NAME 4' 'SUBJECT AREA NAME 5'] not in index"

In [47]:
teachers['fte'] = teachers['fte'].apply(pd.to_numeric, errors='coerce')
teachers = teachers.set_index('teacher_id')
print(len(teachers)*teachers['fte'].mean(), 'teachers in dataset. TEA lists 352,756 FTEs')
teachers

330810.4337 teachers in dataset. TEA lists 352,756 FTEs


Unnamed: 0_level_0,district,distname,campus,campname,fte,sub_area1,sub_area2,sub_area3,sub_area4,sub_area5
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
*3736Y046,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,MATHEMATICS,SCIENCE,SOCIAL STUDIES,
*373VY248,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,CAREER & TECHNOLOGY EDUCATION,,,,
*3842D342,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,,,,
031*3Y*34,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,MATHEMATICS,SCIENCE,SOCIAL STUDIES,
0320Q1148,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,TECHNOLOGY APPLICATIONS,,,,
03246D543,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,,,,
033V7Y249,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,MATHEMATICS,,,
0341QF335,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,MATHEMATICS,SCIENCE,SOCIAL STUDIES,
03546F547,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,1.0000,ENGLISH LANGUAGE ARTS,,,,
03617LP45,031803,HARMONY SCIENCE ACADEMY - BROWNSVI,031803001,HARMONY SCIENCE ACADEMY - BROWNSVI,0.7647,ENGLISH LANGUAGE ARTS,,,,


# Merge

In [48]:
data = teachers.merge(certification_wide, how = 'left', left_index = True,
                       right_index = True, indicator = True)
print(len(data))
data.head()

333243


Unnamed: 0_level_0,district,distname,campus,campname,fte,sub_area1,sub_area2,sub_area3,sub_area4,sub_area5,district0,district1,district2,district3,district4,district5,district6,district7,district8,district9,district10,district11,district12,district13,district14,district15,cert_type0,cert_type1,cert_type2,cert_type3,cert_type4,cert_type5,cert_type6,cert_type7,cert_type8,cert_type9,cert_type10,cert_type11,cert_type12,cert_type13,cert_type14,cert_type15,cert_route0,cert_route1,cert_route2,cert_route3,cert_route4,cert_route5,cert_route6,cert_route7,cert_route8,cert_route9,cert_route10,cert_route11,cert_route12,cert_route13,cert_route14,cert_route15,cert_life0,cert_life1,cert_life2,cert_life3,cert_life4,cert_life5,cert_life6,cert_life7,cert_life8,cert_life9,cert_life10,cert_life11,cert_life12,cert_life13,cert_life14,cert_life15,cert_startdate0,cert_startdate1,cert_startdate2,cert_startdate3,cert_startdate4,cert_startdate5,cert_startdate6,cert_startdate7,cert_startdate8,cert_startdate9,cert_startdate10,cert_startdate11,cert_startdate12,cert_startdate13,cert_startdate14,cert_startdate15,cert_enddate0,cert_enddate1,cert_enddate2,cert_enddate3,cert_enddate4,cert_enddate5,cert_enddate6,cert_enddate7,cert_enddate8,cert_enddate9,cert_enddate10,cert_enddate11,cert_enddate12,cert_enddate13,cert_enddate14,cert_enddate15,cert_level0,cert_level1,cert_level2,cert_level3,cert_level4,cert_level5,cert_level6,cert_level7,cert_level8,cert_level9,cert_level10,cert_level11,cert_level12,cert_level13,cert_level14,cert_level15,cert_grades0,cert_grades1,cert_grades2,cert_grades3,cert_grades4,cert_grades5,cert_grades6,cert_grades7,cert_grades8,cert_grades9,cert_grades10,cert_grades11,cert_grades12,cert_grades13,cert_grades14,cert_grades15,cert_pop0,cert_pop1,cert_pop2,cert_pop3,cert_pop4,cert_pop5,cert_pop6,cert_pop7,cert_pop8,cert_pop9,cert_pop10,cert_pop11,cert_pop12,cert_pop13,cert_pop14,cert_pop15,cert_area0,cert_area1,cert_area2,cert_area3,cert_area4,cert_area5,cert_area6,cert_area7,cert_area8,cert_area9,cert_area10,cert_area11,cert_area12,cert_area13,cert_area14,cert_area15,cert_subject0,cert_subject1,cert_subject2,cert_subject3,cert_subject4,cert_subject5,cert_subject6,cert_subject7,cert_subject8,cert_subject9,cert_subject10,cert_subject11,cert_subject12,cert_subject13,cert_subject14,cert_subject15,_merge
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1
*30*0F443,101912,HOUSTON ISD,101912149,EMERSON EL,1.0,SELF-CONTAINED,MATHEMATICS,SCIENCE,OTHER,,101912.0,,,,,,,,,,,,,,,,Provisional,,,,,,,,,,,,,,,,Alternative Program,,,,,,,,,,,,,,,,LIFE,,,,,,,,,,,,,,,,1996-04-16,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ELM,,,,,,,,,,,,,,,,PK-6,,,,,,,,,,,,,,,,Bilingual Students,,,,,,,,,,,,,,,,Bilingual Education,,,,,,,,,,,,,,,,Bilingual/ESL,,,,,,,,,,,,,,,,both
*30*0LP36,15910,NORTH EAST ISD,15910048,BRADLEY MIDDLE,1.0,ENGLISH LANGUAGE ARTS,OTHER,,,,15910.0,,,,,,,,,,,,,,,,Standard,,,,,,,,,,,,,,,,Alternative Program,,,,,,,,,,,,,,,,5YR,,,,,,,,,,,,,,,,2010-01-13,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2015-04-30,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ELM,,,,,,,,,,,,,,,,4-8,,,,,,,,,,,,,,,,Regular Students,,,,,,,,,,,,,,,,General Elementary (Self-Contained),,,,,,,,,,,,,,,,Generalist,,,,,,,,,,,,,,,,both
*30*11241,71905,YSLETA ISD,71905050,INDIAN RIDGE MIDDLE,1.0,PHYSICAL ED. & HEALTH,,,,,71905.0,71905.0,71905.0,,,,,,,,,,,,,,Standard,Standard,Standard Paraprofessional,,,,,,,,,,,,,,Standard Program,Certification by Exam,Paraprofessional Program,,,,,,,,,,,,,,5YR,5YR,5YR,,,,,,,,,,,,,,2009-12-01,2009-12-01,2009-12-01,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2015-11-30,2015-11-30,2015-11-30,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ALL,ELM,,,,,,,,,,,,,,,PK-12,4-8,,,,,,,,,,,,,,,Regular Students,Regular Students,Not Applicable,,,,,,,,,,,,,,Health and Physical Education,General Elementary (Self-Contained),Other,,,,,,,,,,,,,,Physical Education,Generalist,Not Applicable,,,,,,,,,,,,,,both
*30*11245,227901,AUSTIN ISD,227901058,MENDEZ M S,1.0,ENGLISH LANGUAGE ARTS,,,,,227901.0,227901.0,227901.0,227901.0,227901.0,,,,,,,,,,,,Standard,Standard,Standard,Standard,Standard,,,,,,,,,,,,Alternative Program,Alternative Program,Alternative Program,Alternative Program,Alternative Program,,,,,,,,,,,,5YR,5YR,5YR,5YR,5YR,,,,,,,,,,,,2010-07-07,2010-07-07,2010-07-07,2010-07-07,2010-07-07,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2015-08-31,2015-08-31,2015-08-31,2015-08-31,2015-08-31,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ALL,ELM,ELM,SEC,SPE,,,,,,,,,,,,EC-12,4-8,EC-4,8-12,EC-12,,,,,,,,,,,,Regular Students,Regular Students,Regular Students,Regular Students,Regular Students,,,,,,,,,,,,Health and Physical Education,General Elementary (Self-Contained),General Elementary (Self-Contained),English Language Arts,Special Education,,,,,,,,,,,,Health Education,Generalist,Generalist,English Language Arts and Reading,Generic Special Education,,,,,,,,,,,,both
*30*1FV49,108902,DONNA ISD,108902106,C STAINKE EL,1.0,ENGLISH LANGUAGE ARTS,FINE ARTS,TECHNOLOGY APPLICATIONS,,,108902.0,108902.0,,,,,,,,,,,,,,,Standard,Standard,,,,,,,,,,,,,,,Standard Program,Standard Program,,,,,,,,,,,,,,,5YR,5YR,,,,,,,,,,,,,,,2009-02-01,2009-02-01,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2015-01-31,2015-01-31,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ELM,ELM,,,,,,,,,,,,,,,1-8,1-8,,,,,,,,,,,,,,,Regular Students,Regular Students,,,,,,,,,,,,,,,English Language Arts,General Elementary (Self-Contained),,,,,,,,,,,,,,,Reading,Self-Contained,,,,,,,,,,,,,,,both
*30*1LP34,15910,NORTH EAST ISD,15910050,BUSH MIDDLE,1.0,FINE ARTS,OTHER,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
*30*1LP36,15910,NORTH EAST ISD,15910047,WOOD MIDDLE,1.0,ENGLISH LANGUAGE ARTS,,,,,15910.0,,,,,,,,,,,,,,,,Standard,,,,,,,,,,,,,,,,Alternative Program,,,,,,,,,,,,,,,,5YR,,,,,,,,,,,,,,,,2010-10-13,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2016-07-31,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,ELM,,,,,,,,,,,,,,,,4-8,,,,,,,,,,,,,,,,Regular Students,,,,,,,,,,,,,,,,General Elementary (Self-Contained),,,,,,,,,,,,,,,,Generalist,,,,,,,,,,,,,,,,both
*30*2**40,214901,RIO GRANDE CITY CISD,214901104,NORTH GRAMMAR EL,1.0,ENGLISH LANGUAGE ARTS,MATHEMATICS,SCIENCE,SOCIAL STUDIES,OTHER,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only
*30*20149,220907,KELLER ISD,220907129,RIDGEVIEW EL,1.0,SELF-CONTAINED,,,,,220907.0,220907.0,220907.0,,,,,,,,,,,,,,Paraprofessional,Standard,Standard,,,,,,,,,,,,,,Paraprofessional Program,Alternative Program,Alternative Program,,,,,,,,,,,,,,LIFE,5YR,5YR,,,,,,,,,,,,,,1998-08-07,2008-11-01,2008-11-01,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,2014-10-31,2014-10-31,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,ALL,ELM,,,,,,,,,,,,,,,PK-12,EC-4,,,,,,,,,,,,,,Not Applicable,ESL Students,Regular Students,,,,,,,,,,,,,,Other,Bilingual Education,General Elementary (Self-Contained),,,,,,,,,,,,,,Not Applicable,English as a Second Language,Generalist,,,,,,,,,,,,,,both
*30*2F040,161914,WACO ISD,161914044,TENNYSON MIDDLE,0.7701,SPECIAL EDUCATION,,,,,161914.0,161914.0,,,,,,,,,,,,,,,Paraprofessional,Standard,,,,,,,,,,,,,,,Paraprofessional Program,Alternative Program,,,,,,,,,,,,,,,LIFE,5YR,,,,,,,,,,,,,,,1997-03-31,2009-06-05,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2050-01-02,2015-02-28,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,SPE,,,,,,,,,,,,,,,,EC-12,,,,,,,,,,,,,,,Not Applicable,Regular Students,,,,,,,,,,,,,,,Other,Special Education,,,,,,,,,,,,,,,Not Applicable,Generic Special Education,,,,,,,,,,,,,,,both


In [50]:
print(len(data[data._merge == 'left_only']), 'uncertified teachers')

60348 uncertified teachers
