In [328]:
import pandas as pd
import os
import fnmatch
import numpy as np
from library import start
from library import clean_tea
import datetime as dt
pd.options.display.max_columns = None
year = 'yr1213'
file = "CERTIFICATION_01.csv"
if year == 'yr1213' or year == 'yr1314':
    file = "CERTIFICATION_01.TXT"
folder = 'certification_' + year
teacher_datapath = os.path.join(start.data_path, 'tea', 'teachers', folder)
file = teacher_datapath + '/' + file
year

'yr1213'

# Import Certification, Rename Vars, Fix Grades, Expiration as Date Time

In [329]:
certification = pd.read_csv(file, sep=",", encoding="ISO-8859-1", dtype=object)
certification.columns

Index(['personid_SCRAM', 'DISTRICT', 'REGION', 'CERTIFICATE ID NUMBER',
       'CERTIFICATE LIFE', 'CREDENTIAL TYPE', 'CERTIFICATE PREPARATION ROUTE',
       'CERTIFICATE EFFECTIVE DATE', 'CERTIFICATE EXPIRATION DATE',
       'SUBJECT AREA', 'SUBJECT', 'FULLER CERTIFICATE DESCRIPTION',
       'NUMBER OF RENEWAL CYCLES', 'CREDENTIALED GRADES',
       'CERTIFICATION LEVEL', 'ROLE_CREDENTIALED FOR',
       'POPULATION CREDENTIALED FOR', 'CERTIFICATION BIRTH DATE'],
      dtype='object')

In [330]:
certification = pd.read_csv(file, sep=",", encoding="ISO-8859-1", dtype=object)
if year > 'yr1415':
    vars_to_keep = {'PERSONID_SCRAM': 'teacher_id', 'DISTRICT': 'district', 'ROLE_CREDENTIALED_FOR': 'role',
                    'CREDENTIAL_TYPE': 'cert_type', 'CERTIFICATE_PREPARATION_ROUTE': 'cert_route',
                    'CERTIFICATE_EXPIRATION_DATE': 'expiration',
                    'CERTIFICATION_LEVEL': 'cert_level', 'CREDENTIALED_GRADES': 'cert_grades',
                    'SUBJECT_AREA': 'cert_area', 'SUBJECT': 'cert_subject'}
else:
    vars_to_keep = {'personid_SCRAM': 'teacher_id', 'DISTRICT': 'district', 'ROLE_CREDENTIALED FOR': 'role',
                    'CERTIFICATE EXPIRATION DATE': 'expiration',
                    'CREDENTIAL TYPE': 'cert_type', 'CERTIFICATE PREPARATION ROUTE': 'cert_route',
                    'CERTIFICATION LEVEL': 'cert_level', 'CREDENTIALED GRADES': 'cert_grades',
                    'SUBJECT AREA': 'cert_area', 'SUBJECT': 'cert_subject'}
certification = clean_tea.filter_and_rename_cols(certification, vars_to_keep)

certification['cert_grades'] = certification['cert_grades'].replace({'Grades ':''}, regex = True)
grades = {'12-Aug': '8-12', '12-Jul': '7-12',
          '12-Jun': '6-12', '6-Jan': '1-6',
          '8-Apr': '4-8', '8-Jan': '1-8', 'EC-12': '0-12',
         'EC-4': '0-4', 'EC-6': '0-6', 'PK-12': '0-12',
         'PK-3': '0-3', 'PK-6': '0-6', 'PK-KG': '0-1'}
certification['cert_grades'] = certification['cert_grades'].replace(grades)
certification['cert_grade_low'],certification['cert_grade_high'] = certification['cert_grades'].str.split('-').str
certification['cert_grade_low'] = pd.to_numeric(certification.cert_grade_low, errors = 'coerce')
certification['cert_grade_high'] = pd.to_numeric(certification.cert_grade_high, errors = 'coerce')


certification['expiration'] = certification['expiration'].str[0:9]
certification['expiration']= pd.to_datetime(certification['expiration'], errors = 'coerce') 
certification.sample(5)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high
60264,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,Mathematics,Mathematics,1.0,8.0
80488,Y3425QV40,240903,Educational Secretary,NaT,Paraprofessional,Paraprofessional Program,,,Other,Not Applicable,,
53912,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,Mathematics,Mathematics,1.0,8.0
1192,03561**49,31901,Librarian,2018-06-30,Standard,Standard Program,PRF,0-12,Professional,School Librarian,0.0,12.0
78651,V33V6QP41,240903,Teacher,NaT,Provisional,Standard Program,SEC,6-12,Vocational Education,Business - Administration,6.0,12.0


In [331]:
pd.to_datetime(certification.expiration.head())

0   2018-01-31
1   2018-01-31
2   2018-01-31
3   2018-01-31
4   2015-07-31
Name: expiration, dtype: datetime64[ns]

## Only keep if certification is for teaching and unexpired

In [332]:
certification = certification[certification.role == 'Teacher']
timestamps = {'yr1213': '2012-07-01', 'yr1314': '2013-07-01', 'yr1415': '2014-07-01', 'yr1516': '2015-07-01',
             'yr1617': '2016-07-01', 'yr1718': '2017-07-01', 'yr1819': '2017-0701'}
certification['expired'] = np.where(certification.expiration < pd.Timestamp(timestamps[year]), True, False)
certification = certification[certification.expired == False]
certification.sample(5)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired
70926,2324QF343,240901,Teacher,2015-04-30,Standard,Alternative Program,ELM,4-8,Social Studies,Social Studies - Composite,4.0,8.0,False
57212,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,Mathematics,Mathematics,1.0,8.0,False
40107,V34Q60442,108909,Teacher,2018-04-30,Standard,Certification by Exam,ELM,0-4,General Elementary (Self-Contained),Generalist,0.0,4.0,False
37314,13007*P42,108909,Teacher,NaT,Provisional,Standard Program,SEC,6-12,English Language Arts,English,6.0,12.0,False
34169,0353Q*448,108908,Teacher,2016-06-30,Standard,Standard Program,ELM,0-4,General Elementary (Self-Contained),Generalist,0.0,4.0,False


## Create certified variable

In [333]:
cert_types = {'Emergency Non-Certified': False, 'Emergency Certified': True,
                  'Emergency': False, 'Emergency Teaching': False,
                  'Temporary Exemption': True, 'Temporary Teaching Certificate': False,
                  'Unknown Permit': False, 'Unknown': False,
                  'Special Assignment': True,
                  'Paraprofessional': False, 'Standard Paraprofessional': False, 'Non-renewable': False,
                  'Standard': True, 'Provisional': True,
                  'Probationary': True, 'Probationary Extension': True, 'Probationary Second Extension': True,
                   'One Year': True,
                  'Visiting International Teacher': True,
                  'Professional': True, 'Standard Professional': True}
certification['certified'] = certification['cert_type'].map(cert_types)
certification['vocational'] = np.where((certification['cert_type'] == "Vocational"), True, False)
certification.sample(7)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational
37152,03731L542,108909,Teacher,2013-10-31,Standard,Certification by Exam,SEC,6-12,Foreign Language,Spanish,6.0,12.0,False,True,False
15943,23805*V40,31912,Teacher,2015-06-30,Standard,Alternative Program,SEC,6-12,Mathematics,Mathematics,6.0,12.0,False,True,False
47707,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,General Elementary (Self-Contained),Self-Contained,1.0,8.0,False,True,False
134,*3462L*44,31901,Teacher,2016-05-31,Standard,Alternative Program,SPE,0-12,Special Education,Generic Special Education,0.0,12.0,False,True,False
78793,V3443QP43,240903,Teacher,2017-01-31,Standard,Standard Program,ELM,1-8,English Language Arts,Reading,1.0,8.0,False,True,False
23260,034Q3*P49,108904,Teacher,2014-05-31,Standard,Certification by Exam,END,,Bilingual Education,Bilingual Spanish,,,False,True,False
73431,V36531141,240901,Teacher,NaT,Provisional,Certification by Exam,END,,Bilingual Education,English as a Second Language,,,False,True,False


In [334]:
certification.cert_area.value_counts()

General Elementary (Self-Contained)    18115
Bilingual Education                    16115
Mathematics                             6001
English Language Arts                   5148
Health and Physical Education           4009
Special Education                       3681
Social Studies                          2823
Science                                 2197
Foreign Language                        1792
Vocational Education                    1670
Fine Arts                               1550
Computer Science                         398
Other                                    127
Professional                              11
Name: cert_area, dtype: int64

## Certification Areas

In [335]:
area = {'General Elementary (Self-Contained)': 'elem', 'Bilingual Education': 'biling', 'English Language Arts': 'ela',
       'Special Education': 'sped', 'Health and Physical Education': 'pe', 'Social Studies': 'ss','Mathematics': 'math',
       'Science': 'science', 'Vocational Education': 'voc', 'Fine Arts': 'art', 'Foreign Language': 'for',
       'Computer Science': 'cs', 'Other': 'other' }
certification['cert_area'] = certification['cert_area'].map(area)
certification.sample(10)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational
5590,V32QQL044,31901,Teacher,NaT,Provisional,Standard Program,END,0-12,sped,Mentally Retarded,0.0,12.0,False,True,False
57189,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,END,,biling,Bilingual/ESL,,,False,True,False
21065,V3236Y045,108902,Teacher,2016-03-31,Standard,Certification by Exam,SEC,6-12,ela,English Language Arts,6.0,12.0,False,True,False
30406,Q3106VP44,108906,Teacher,2017-01-31,Standard,Certification by Exam,SUP,,biling,English as a Second Language,,,False,True,False
46025,Q32VVQQ44,108912,Teacher,2016-11-30,Standard,Alternative Program,ELM,0-4,elem,Generalist,0.0,4.0,False,True,False
61409,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,END,,biling,Bilingual/ESL,,,False,True,False
40749,V364QY047,108909,Teacher,NaT,Provisional,Standard Program,VOC,6-12,voc,Trades and Industries,6.0,12.0,False,True,False
25931,V3266Y146,108904,Teacher,NaT,Provisional,Standard Program,ELM,1-8,science,Biology,1.0,8.0,False,True,False
81519,V37VVL144,245903,Teacher,NaT,Provisional,Out of State,ELM,1-6,ela,Reading,1.0,6.0,False,True,False
5717,V33*VY442,31901,Teacher,NaT,Provisional,Standard Program,SEC,6-12,ss,History,6.0,12.0,False,True,False


### General Elementary

In [365]:
certification['cert_area_elem'] = np.where(certification['cert_area'] == "elem",
                                           True, False)
certification['cert_area_elem'] = np.where((certification['cert_area'] == "biling") & 
                                    (certification['cert_grade_low'] < 6), True, certification.cert_area_elem)
certification['cert_area_elem'] = np.where((certification['cert_area'] == "spend") & 
                                    (certification['cert_grade_low'] < 6),
                                    True, certification.cert_area_elem)
certification.sample(7)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_high_science
33826,V3710F344,108907,Teacher,NaT,Provisional,Alternative Program,SEC,6-12,ss,History,6.0,12.0,False,True,False,False,False,False
9546,13062P433,31903,Teacher,2015-07-31,Standard,Alternative Program,ELM,1-6,elem,Self-Contained,1.0,6.0,False,True,False,True,False,False
0,03Q*5YV40,15910,Teacher,2018-01-31,Standard,Standard Program,ELM,0-4,biling,Bilingual Spanish,0.0,4.0,False,True,False,True,False,False
62199,V36270031,108912,Teacher,2015-06-30,Standard,Alternative Program,SUP,0-4,biling,Bilingual Ed Supplemental-Spanish,0.0,4.0,False,True,False,True,False,False
1473,03Q16D*45,31901,Teacher,2014-07-31,Standard,Certification by Exam,SUP,0-4,biling,Bilingual Ed Supplemental-Spanish,0.0,4.0,False,True,False,True,False,False
54105,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,END,,biling,Bilingual/ESL,,,False,True,False,False,False,False
31817,V36021P47,108906,Teacher,NaT,Provisional,Standard Program,ELM,0-6,elem,Elementary Early Childhood Educ.,0.0,6.0,False,True,False,True,False,False


### High School Math

In [372]:
certification['cert_area_high_math'] = np.where(certification['cert_area'] == "math",
                                           True, False)
certification['cert_area_high_math'] = np.where(certification.cert_grade_high > 8,
                                           certification.cert_area_high_math, False)
certification.sample(5)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_high_science
79811,V36Q3*P47,240903,Teacher,NaT,Provisional,Standard Program,ELM,1-8,ss,Government,1.0,8.0,False,True,False,False,False,False
28240,23Q32*346,108905,Teacher,2015-09-30,Standard,Alternative Program,SEC,6-12,math,Mathematics,6.0,12.0,False,True,False,False,True,True
7109,V35Q7D045,31901,Teacher,NaT,Provisional,Standard Program,ELM,1-8,for,Spanish,1.0,8.0,False,True,False,False,False,False
58480,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,math,Mathematics,1.0,8.0,False,True,False,False,False,False
65467,V34QQ0*40,108914,Teacher,NaT,Provisional,Out of State,ELM,1-8,elem,Self-Contained,1.0,8.0,False,True,False,True,False,False


In [383]:
## Science
certification['cert_area_high_science'] = np.where(certification['cert_area'] == "science",
                                           True, False)
certification['cert_area_high_science'] = np.where(certification.cert_grade_high > 8,
                                           certification.cert_area_high_science, False)
certification.sample(5)

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_high_science
59503,V3603DP45,108912,Teacher,NaT,Provisional,Standard Program,ELM,1-8,elem,Self-Contained,1.0,8.0,False,True,False,True,False,False
26182,V333QQ449,108904,Teacher,NaT,Provisional,Certification by Exam,SEC,6-12,ss,Geography,6.0,12.0,False,True,False,False,False,False
36456,*355QD539,108909,Teacher,2015-09-30,Standard,Alternative Program,SEC,8-12,math,Mathematics,8.0,12.0,False,True,False,False,True,False
20903,Q38120345,108902,Teacher,2013-12-31,Standard,Alternative Program,ELM,1-6,biling,Bilingual Spanish,1.0,6.0,False,True,False,True,False,False
24938,F364QD145,108904,Teacher,2015-07-31,Standard,Alternative Program,ELM,1-6,biling,Bilingual Spanish,1.0,6.0,False,True,False,True,False,False


In [384]:
certification[certification.teacher_id == 'P33Q1*040']

Unnamed: 0,teacher_id,district,role,expiration,cert_type,cert_route,cert_level,cert_grades,cert_area,cert_subject,cert_grade_low,cert_grade_high,expired,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_high_science


## Any Certification

In [385]:
teacher_yesno = certification[['teacher_id', 'district', 'certified', 'vocational',
                              'cert_area_elem', 'cert_area_high_math']]
teacher_yesno = teacher_yesno.groupby(['teacher_id']).max()
teacher_yesno.sample()

Unnamed: 0_level_0,district,certified,vocational,cert_area_elem,cert_area_high_math
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
V3346L548,240903,True,False,True,False


## Reshape long to wide

In [386]:
df = certification[['teacher_id', 'district',
                    'cert_area', 'cert_subject',
                    'cert_grade_low', 'cert_grade_high']]
df['idx'] = df.groupby('teacher_id').cumcount()
certs = certification[['teacher_id','certified', 'vocational']].groupby('teacher_id').max()
df = df.merge(certs, how = 'left', on = 'teacher_id')
df['cert_area_idx'] = 'cert_area_' + df.idx.astype(str)
df['cert_subject_idx'] = 'cert_subject_' + df.idx.astype(str)
df['cert_grade_low_idx'] = 'cert_grade_low_' + df.idx.astype(str)
df['cert_grade_high_idx'] = 'cert_grade_high_' + df.idx.astype(str)
df.sample(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,teacher_id,district,cert_area,cert_subject,cert_grade_low,cert_grade_high,idx,certified,vocational,cert_area_idx,cert_subject_idx,cert_grade_low_idx,cert_grade_high_idx
31775,V3557YP41,108909,pe,Health Education,0.0,12.0,0,True,False,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0
27517,P326V*444,108908,ss,History,6.0,12.0,0,True,False,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0
62994,V3465Q*45,245903,elem,Self-Contained,1.0,8.0,1,True,False,cert_area_1,cert_subject_1,cert_grade_low_1,cert_grade_high_1
37993,V3603DP45,108912,biling,Bilingual/ESL,,,527,True,False,cert_area_527,cert_subject_527,cert_grade_low_527,cert_grade_high_527
6416,V3665P543,31901,biling,Bilingual/ESL,1.0,8.0,0,True,False,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0


In [387]:
areas = df.pivot(index='teacher_id',columns='cert_area_idx', values='cert_area')
subjects = df.pivot(index='teacher_id',columns='cert_subject_idx', values='cert_subject')
low_grades = df.pivot(index='teacher_id',columns='cert_grade_low_idx', values='cert_grade_low')
high_grades = df.pivot(index='teacher_id',columns='cert_grade_high_idx', values='cert_grade_high')

teacher_cert_wide = pd.concat([areas, subjects, low_grades, high_grades], axis = 1)
max_certs = len(list(teacher_cert.filter(regex = ("cert_area"))))
variables = []
for num in range(0, max_certs):
    string = '_' + str(num) + '$'
    variables = variables + list(areas.filter(regex = (string)))
    variables = variables + list(subjects.filter(regex = (string)))
    variables = variables + list(low_grades.filter(regex = (string)))
    variables = variables + list(high_grades.filter(regex = (string)))
teacher_cert_wide = teacher_cert_wide[variables]
teacher_cert_wide

Unnamed: 0_level_0,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0,cert_area_1,cert_subject_1,cert_grade_low_1,cert_grade_high_1,cert_area_2,cert_subject_2,cert_grade_low_2,cert_grade_high_2,cert_area_3,cert_subject_3,cert_grade_low_3,cert_grade_high_3,cert_area_4,cert_subject_4,cert_grade_low_4,cert_grade_high_4,cert_area_5,cert_subject_5,cert_grade_low_5,cert_grade_high_5,cert_area_6,cert_subject_6,cert_grade_low_6,cert_grade_high_6,cert_area_7,cert_subject_7,cert_grade_low_7,cert_grade_high_7,cert_area_8,cert_subject_8,cert_grade_low_8,cert_grade_high_8,cert_area_9,cert_subject_9,cert_grade_low_9,cert_grade_high_9,cert_area_10,cert_subject_10,cert_grade_low_10,cert_grade_high_10,cert_area_11,cert_subject_11,cert_grade_low_11,cert_grade_high_11,cert_area_12,cert_subject_12,cert_grade_low_12,cert_grade_high_12,cert_area_13,cert_subject_13,cert_grade_low_13,cert_grade_high_13,cert_area_14,cert_subject_14,cert_grade_low_14,cert_grade_high_14,cert_area_15,cert_subject_15,cert_grade_low_15,cert_grade_high_15,cert_area_16,cert_subject_16,cert_grade_low_16,cert_grade_high_16,cert_area_17,cert_subject_17,cert_grade_low_17,cert_grade_high_17,cert_area_18,cert_subject_18,cert_grade_low_18,cert_grade_high_18,cert_area_19,cert_subject_19,cert_grade_low_19,cert_grade_high_19
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1
*30*1FV49,ela,Reading,1.0,8.0,elem,Self-Contained,1.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*30*5P341,sped,Generic Special Education,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*30*QF546,biling,Bilingual Spanish,1.0,8.0,elem,Self-Contained,1.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*3001L344,biling,Bilingual Spanish,1.0,8.0,elem,Self-Contained,1.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*3002PV46,science,Biology,1.0,8.0,elem,Self-Contained,1.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*301QYP40,sped,Generic Special Education,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*301VFV47,biling,Bilingual Spanish,0.0,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*30220240,voc,Business Education,6.0,12.0,pe,Health Education,0.0,12.0,sped,Generic Special Education,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*302Q0449,elem,Generalist,0.0,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
*302VY149,elem,Generalist,0.0,6.0,biling,Bilingual Ed Supplemental-Spanish,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Merge

In [343]:
teacher_cert = teacher_yesno.merge(teacher_cert_wide, left_index = True, right_index = True)
teacher_cert.sample(5)

Unnamed: 0_level_0,district,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0,cert_area_1,cert_subject_1,cert_grade_low_1,cert_grade_high_1,cert_area_2,cert_subject_2,cert_grade_low_2,cert_grade_high_2,cert_area_3,cert_subject_3,cert_grade_low_3,cert_grade_high_3,cert_area_4,cert_subject_4,cert_grade_low_4,cert_grade_high_4,cert_area_5,cert_subject_5,cert_grade_low_5,cert_grade_high_5,cert_area_6,cert_subject_6,cert_grade_low_6,cert_grade_high_6,cert_area_7,cert_subject_7,cert_grade_low_7,cert_grade_high_7,cert_area_8,cert_subject_8,cert_grade_low_8,cert_grade_high_8,cert_area_9,cert_subject_9,cert_grade_low_9,cert_grade_high_9,cert_area_10,cert_subject_10,cert_grade_low_10,cert_grade_high_10,cert_area_11,cert_subject_11,cert_grade_low_11,cert_grade_high_11,cert_area_12,cert_subject_12,cert_grade_low_12,cert_grade_high_12,cert_area_13,cert_subject_13,cert_grade_low_13,cert_grade_high_13,cert_area_14,cert_subject_14,cert_grade_low_14,cert_grade_high_14,cert_area_15,cert_subject_15,cert_grade_low_15,cert_grade_high_15,cert_area_16,cert_subject_16,cert_grade_low_16,cert_grade_high_16,cert_area_17,cert_subject_17,cert_grade_low_17,cert_grade_high_17
teacher_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1
131120249,108904,True,False,True,False,elem,Self-Contained,1.0,6.0,elem,Generalist,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
136*4*P47,31905,True,False,False,False,ela,English Language Arts and Reading,4.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
231441446,253901,True,False,False,True,math,Mathematics,8.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
V3664*241,31901,True,False,True,True,elem,Self-Contained,1.0,8.0,math,Mathematics,1.0,8.0,math,Mathematics,6.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
03303V443,108907,True,False,False,False,art,Music,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# Import classes

In [344]:
file = teacher_datapath + '/' + 'TEACHER_CLASS_01.TXT'
courses = pd.read_csv(file, sep=",", encoding="ISO-8859-1", dtype=object)
courses.columns

Index(['REGION NUMBER', 'COUNTY NUMBER', 'COUNTY NAME', 'DISTRICT NUMBER',
       'DISTRICT NAME', 'DISTRICT CHARTER TYPE CODE',
       'DISTRICT CHARTER TYPE NAME', 'CAMPUS NUMBER', 'CAMPUS NAME',
       'CAMPUS CHARTER TYPE CODE', 'CAMPUS CHARTER TYPE NAME',
       'CAMPUS ADDRESS', 'CAMPUS CITY', 'CAMPUS STATE', 'CAMPUS ZIP',
       'SCRAMBLED UNIQUE ID', 'FIRST NAME', 'MIDDLE NAME', 'LAST NAME',
       'ROLE CODE', 'ROLE NAME', 'CLASS NUMBER', 'CLASS NAME',
       'CLASS TYPE CODE', 'CLASS TYPE NAME', 'SUBJECT AREA CODE',
       'SUBJECT AREA NAME', 'SUBJECT CODE', 'SUBJECT NAME', 'GRADE LEVEL CODE',
       'GRADE LEVEL NAME', 'ADVANCED COURSE', 'PARTIAL FULL TIME EQUIVALENT',
       ' '],
      dtype='object')

In [345]:
file = teacher_datapath + '/' + 'TEACHER_CLASS_01.TXT'
courses = pd.read_csv(file, sep=",", encoding="ISO-8859-1", dtype=object)
vars_to_keep = {'SCRAMBLED UNIQUE ID': 'teacher_id', 'ROLE NAME': 'role',
                'DISTRICT NUMBER': 'district', 'CAMPUS NUMBER': 'campus',
                'FIRST NAME': 'first', 'MIDDLE NAME': 'middle', 'LAST NAME': 'last',
               'SUBJECT AREA NAME': 'course_area', 'SUBJECT NAME': 'course_subject',
                'CLASS TYPE NAME': 'population', 'CLASS NAME': 'class',
               'GRADE LEVEL NAME': 'grades', 'PARTIAL FULL TIME EQUIVALENT': 'fte'}
courses = clean_tea.filter_and_rename_cols(courses, vars_to_keep)
courses['fte'] = pd.to_numeric(courses.fte, errors = 'coerce')
courses.sample(10)

Unnamed: 0,teacher_id,role,district,campus,first,middle,last,course_area,course_subject,population,class,grades,fte
26870,V35VQDP41,TEACHER,31912,31912110,JAVIER,,GOMEZ,PHYSICAL ED. & HEALTH,HEALTH,ACADEMIC ACHIEVEMENT COURSE,HEALTH GRADE 2,ELEMENTARY (GRADES 1-6),0.1111
90258,23247YQ48,TEACHER,240903,240903001,CHRISTOPHER,MICHAEL,BIRCH,CAREER & TECHNOLOGY EDUCATION,LAW PUBLIC SAFE CORR & SEC,ACADEMIC ACHIEVEMENT COURSE,FORENSIC SCIENCE,SECONDARY (GRADES 7-12),1.0
78227,230Q2Y*44,TEACHER,108914,108914001,NOE,J,GARCIA,SOCIAL STUDIES,ECONOMICS,ACADEMIC ACHIEVEMENT COURSE,ECONOMICS W/EMPH FREE ENTERPR,SECONDARY (GRADES 7-12),0.0724
89372,13133Q147,TEACHER,240901,240901121,BRENDA,GUADALUPE,YBARRA,ENGLISH LANGUAGE ARTS,READING,ACADEMIC ACHIEVEMENT COURSE,READING GRADE 1,ELEMENTARY (GRADES 1-6),0.3
19755,V3413V*48,TEACHER,31903,31903117,STACEY,LEE,SNAVELY,FINE ARTS,ART,ACADEMIC ACHIEVEMENT COURSE,ART GRADE 4,ELEMENTARY (GRADES 1-6),0.1429
69120,V3164F437,TEACHER,108912,108912007,DANIEL,,HAMMOND,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,ALGEBRA II,SECONDARY (GRADES 7-12),0.372
22647,03406VQ46,TEACHER,31906,31906105,GERALD,R,ALMEIDA,SOCIAL STUDIES,HISTORY,ACADEMIC ACHIEVEMENT COURSE,SOCIAL STUDIES GRADE 8,SECONDARY (GRADES 7-12),0.6119
81697,03662V245,TEACHER,214901,214901105,NORMA,L,GARCIA,OTHER,OTHER,ACADEMIC ACHIEVEMENT COURSE,LDC-OTHER ELEMENTARY,ELEMENTARY (GRADES 1-6),0.0652
83851,0311VP548,TEACHER,214903,214903103,MELINDA,,GONZALEZ,SCIENCE,GENERAL SCIENCE,ACADEMIC ACHIEVEMENT COURSE,SCIENCE GRADE 3,ELEMENTARY (GRADES 1-6),0.0435
1130,23452VQ32,TEACHER,31901,31901003,MARIA,MATA,HULL,OTHER,OTHER,ACADEMIC ACHIEVEMENT COURSE,OTHER LOCALLY DEVELOPED COURSE,ALL GRADE LEVELS,0.75


In [346]:
courses.role.value_counts()

TEACHER               102290
SUBSTITUTE TEACHER       130
Name: role, dtype: int64

In [347]:
courses.course_area.value_counts()

ENGLISH LANGUAGE ARTS            23186
MATHEMATICS                      12713
SOCIAL STUDIES                   11726
SCIENCE                          11366
FINE ARTS                        10123
PHYSICAL ED. & HEALTH             9720
OTHER                             7774
CAREER & TECHNOLOGY EDUCATION     3676
SELF-CONTAINED                    3481
FOREIGN LANGUAGE                  2966
NOT APPLICABLE                    2229
TECHNOLOGY APPLICATIONS           1983
SPECIAL EDUCATION                 1477
Name: course_area, dtype: int64

In [348]:
area = {'SELF-CONTAINED': 'elem', 'ENGLISH LANGUAGE ARTS': 'ela',
       'SPECIAL EDUCATION': 'sped', 'PHYSICAL ED. & HEALTH': 'pe', 'SOCIAL STUDIES': 'ss','MATHEMATICS': 'math',
       'SCIENCE': 'science', 'CAREER & TECHNOLOGY EDUCATION': 'voc', 'FINE ARTS': 'art', 'FOREIGN LANGUAGE': 'for',
       'TECHNOLOGY APPLICATIONS': 'cs', 'NOT APPLICABLE': 'other'}
courses['courses_area_short'] = courses['course_area'].map(area)
courses

Unnamed: 0,teacher_id,role,district,campus,first,middle,last,course_area,course_subject,population,class,grades,fte,courses_area_short
0,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,ENGLISH LANGUAGE ARTS,READING,ACADEMIC ACHIEVEMENT COURSE,READING GRADE K,KINDERGARTEN,0.1031,ela
1,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH LANGUAGE ARTS GRADE K,KINDERGARTEN,0.4330,ela
2,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,MATHEMATICS GRADE K,KINDERGARTEN,0.3093,math
3,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,SCIENCE,GENERAL SCIENCE,ACADEMIC ACHIEVEMENT COURSE,SCIENCE KINDERGARTEN,KINDERGARTEN,0.0928,science
4,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,SOCIAL STUDIES,SOCIAL STUDIES,ACADEMIC ACHIEVEMENT COURSE,SOCIAL STUDIES KINDERGARTEN,KINDERGARTEN,0.0619,ss
5,*373VY248,TEACHER,031803,031803001,JOEL,WILLIAM,ALAFFA,CAREER & TECHNOLOGY EDUCATION,ARTS A/V TECH & COMM,ACADEMIC ACHIEVEMENT COURSE,PRINC ARTS/AUD VID TECH & COMM,SECONDARY (GRADES 7-12),0.5000,voc
6,*373VY248,TEACHER,031803,031803001,JOEL,WILLIAM,ALAFFA,CAREER & TECHNOLOGY EDUCATION,SCIENCE TECH ENG & MATH,ACADEMIC ACHIEVEMENT COURSE,ROBOTICS AND AUTOMATION,SECONDARY (GRADES 7-12),0.5000,voc
7,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH II (ENG 2),SECONDARY (GRADES 7-12),0.5000,ela
8,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH III (ENG 3),SECONDARY (GRADES 7-12),0.2500,ela
9,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,LDC-ENG LANG ARTS-GR 7-12,SECONDARY (GRADES 7-12),0.2500,ela


In [349]:
course_cert = courses.merge(teacher_cert, left_on = 'teacher_id', right_on = 'teacher_id', how = 'left')
course_cert

Unnamed: 0,teacher_id,role,district_x,campus,first,middle,last,course_area,course_subject,population,class,grades,fte,courses_area_short,district_y,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0,cert_area_1,cert_subject_1,cert_grade_low_1,cert_grade_high_1,cert_area_2,cert_subject_2,cert_grade_low_2,cert_grade_high_2,cert_area_3,cert_subject_3,cert_grade_low_3,cert_grade_high_3,cert_area_4,cert_subject_4,cert_grade_low_4,cert_grade_high_4,cert_area_5,cert_subject_5,cert_grade_low_5,cert_grade_high_5,cert_area_6,cert_subject_6,cert_grade_low_6,cert_grade_high_6,cert_area_7,cert_subject_7,cert_grade_low_7,cert_grade_high_7,cert_area_8,cert_subject_8,cert_grade_low_8,cert_grade_high_8,cert_area_9,cert_subject_9,cert_grade_low_9,cert_grade_high_9,cert_area_10,cert_subject_10,cert_grade_low_10,cert_grade_high_10,cert_area_11,cert_subject_11,cert_grade_low_11,cert_grade_high_11,cert_area_12,cert_subject_12,cert_grade_low_12,cert_grade_high_12,cert_area_13,cert_subject_13,cert_grade_low_13,cert_grade_high_13,cert_area_14,cert_subject_14,cert_grade_low_14,cert_grade_high_14,cert_area_15,cert_subject_15,cert_grade_low_15,cert_grade_high_15,cert_area_16,cert_subject_16,cert_grade_low_16,cert_grade_high_16,cert_area_17,cert_subject_17,cert_grade_low_17,cert_grade_high_17
0,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,ENGLISH LANGUAGE ARTS,READING,ACADEMIC ACHIEVEMENT COURSE,READING GRADE K,KINDERGARTEN,0.1031,ela,031803,True,False,False,False,biling,Bilingual Spanish,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH LANGUAGE ARTS GRADE K,KINDERGARTEN,0.4330,ela,031803,True,False,False,False,biling,Bilingual Spanish,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,MATHEMATICS GRADE K,KINDERGARTEN,0.3093,math,031803,True,False,False,False,biling,Bilingual Spanish,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,SCIENCE,GENERAL SCIENCE,ACADEMIC ACHIEVEMENT COURSE,SCIENCE KINDERGARTEN,KINDERGARTEN,0.0928,science,031803,True,False,False,False,biling,Bilingual Spanish,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,*3736Y046,TEACHER,031803,031803001,MAGDA,PATRICIA,APRESA,SOCIAL STUDIES,SOCIAL STUDIES,ACADEMIC ACHIEVEMENT COURSE,SOCIAL STUDIES KINDERGARTEN,KINDERGARTEN,0.0619,ss,031803,True,False,False,False,biling,Bilingual Spanish,0.0,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,*373VY248,TEACHER,031803,031803001,JOEL,WILLIAM,ALAFFA,CAREER & TECHNOLOGY EDUCATION,ARTS A/V TECH & COMM,ACADEMIC ACHIEVEMENT COURSE,PRINC ARTS/AUD VID TECH & COMM,SECONDARY (GRADES 7-12),0.5000,voc,031803,True,False,False,False,cs,Technology Applications,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,*373VY248,TEACHER,031803,031803001,JOEL,WILLIAM,ALAFFA,CAREER & TECHNOLOGY EDUCATION,SCIENCE TECH ENG & MATH,ACADEMIC ACHIEVEMENT COURSE,ROBOTICS AND AUTOMATION,SECONDARY (GRADES 7-12),0.5000,voc,031803,True,False,False,False,cs,Technology Applications,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH II (ENG 2),SECONDARY (GRADES 7-12),0.5000,ela,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,ENGLISH III (ENG 3),SECONDARY (GRADES 7-12),0.2500,ela,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,*3842D342,TEACHER,031803,031803001,MARTIN,L,FELL,ENGLISH LANGUAGE ARTS,ENGLISH,ACADEMIC ACHIEVEMENT COURSE,LDC-ENG LANG ARTS-GR 7-12,SECONDARY (GRADES 7-12),0.2500,ela,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [350]:
course_cert.grades.value_counts()

ELEMENTARY (GRADES 1-6)          57199
SECONDARY (GRADES 7-12)          30755
KINDERGARTEN                      7299
ALL GRADE LEVELS                  4445
MIDDLE SCHOOL (GRADES 6 - 8)      1045
PRE-KINDERGARTEN                   881
PRE-KINDERGARTEN/KINDERGARTEN      764
NOT APPLICABLE                      32
Name: grades, dtype: int64

In [359]:
if year == 'yr1213':
    high_school_math = course_cert[(course_cert.grades == "SECONDARY (GRADES 7-12)") & (course_cert.courses_area_short == "math")]
if year > 'yr1213':
    high_school_math = course_cert[(course_cert.grades == "GRADES 9-12") & (course_cert.courses_area_short == "math")]
high_school_math['infield_fte'] = high_school_math.cert_area_high_math* high_school_math.fte
high_school_math

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,teacher_id,role,district_x,campus,first,middle,last,course_area,course_subject,population,class,grades,fte,courses_area_short,district_y,certified,vocational,cert_area_elem,cert_area_high_math,cert_area_0,cert_subject_0,cert_grade_low_0,cert_grade_high_0,cert_area_1,cert_subject_1,cert_grade_low_1,cert_grade_high_1,cert_area_2,cert_subject_2,cert_grade_low_2,cert_grade_high_2,cert_area_3,cert_subject_3,cert_grade_low_3,cert_grade_high_3,cert_area_4,cert_subject_4,cert_grade_low_4,cert_grade_high_4,cert_area_5,cert_subject_5,cert_grade_low_5,cert_grade_high_5,cert_area_6,cert_subject_6,cert_grade_low_6,cert_grade_high_6,cert_area_7,cert_subject_7,cert_grade_low_7,cert_grade_high_7,cert_area_8,cert_subject_8,cert_grade_low_8,cert_grade_high_8,cert_area_9,cert_subject_9,cert_grade_low_9,cert_grade_high_9,cert_area_10,cert_subject_10,cert_grade_low_10,cert_grade_high_10,cert_area_11,cert_subject_11,cert_grade_low_11,cert_grade_high_11,cert_area_12,cert_subject_12,cert_grade_low_12,cert_grade_high_12,cert_area_13,cert_subject_13,cert_grade_low_13,cert_grade_high_13,cert_area_14,cert_subject_14,cert_grade_low_14,cert_grade_high_14,cert_area_15,cert_subject_15,cert_grade_low_15,cert_grade_high_15,cert_area_16,cert_subject_16,cert_grade_low_16,cert_grade_high_16,cert_area_17,cert_subject_17,cert_grade_low_17,cert_grade_high_17,infield_fte
25,033V7Y249,TEACHER,031803,031803001,RAYMUNDO,,MARTINEZ,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,GEOMETRY (GEOM),SECONDARY (GRADES 7-12),0.3948,math,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
26,033V7Y249,TEACHER,031803,031803001,RAYMUNDO,,MARTINEZ,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,PRECALCULUS (PRE CALC),SECONDARY (GRADES 7-12),0.1974,math,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
28,033V7Y249,TEACHER,031803,031803001,RAYMUNDO,,MARTINEZ,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,LDC-MATHEMATICS-GR 7-12,SECONDARY (GRADES 7-12),0.2763,math,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
43,13323Q448,TEACHER,031803,031803001,ANTHONY,,LEHMANN,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,ALGEBRA I (ALG 1),SECONDARY (GRADES 7-12),0.3750,math,031803,True,False,False,False,math,Mathematics,4.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
44,13323Q448,TEACHER,031803,031803001,ANTHONY,,LEHMANN,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,ALGEBRA II (ALG2),SECONDARY (GRADES 7-12),0.1705,math,031803,True,False,False,False,math,Mathematics,4.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
47,13323Q448,TEACHER,031803,031803001,ANTHONY,,LEHMANN,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,LDC-MATHEMATICS-GR 7-12,SECONDARY (GRADES 7-12),0.1705,math,031803,True,False,False,False,math,Mathematics,4.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
56,D3217Q330,TEACHER,031803,031803001,MUSTAFA,,SIRINEL,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,MATHEMATICS GRADE 7,SECONDARY (GRADES 7-12),0.6666,math,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
57,D3217Q330,TEACHER,031803,031803001,MUSTAFA,,SIRINEL,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,MATHEMATICS GRADE 8,SECONDARY (GRADES 7-12),0.3333,math,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
160,03003Y145,TEACHER,031901,031901001,MARIA,I,PONCE,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,MATHEMATICAL MODELS W/APPLCTN,SECONDARY (GRADES 7-12),0.1667,math,031901,True,False,False,False,sped,Hearing Impaired,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0
171,031041445,TEACHER,031901,031901001,JUAN,,HUERTA,MATHEMATICS,MATHEMATICS,ACADEMIC ACHIEVEMENT COURSE,ALGEBRA II,SECONDARY (GRADES 7-12),0.0376,math,031901,True,False,False,False,sped,Generic Special Education,0.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0


In [356]:
high_school_math['infield_fte'] = high_school_math.cert_area_high_math* high_school_math.fte
high_school_math.sample(10)

ValueError: a must be greater than 0 unless no samples are taken

In [None]:
# fte not numeric
high_school_math_teachers = high_school_math[['campus', 'infield', 
                                              'fte', 'infield_fte']].groupby(by = 'campus').mean()
high_school_math_teachers['infield_percent'] = high_school_math_teachers.infield_fte/high_school_math_teachers.fte
high_school_math_teachers.infield_percent.mean()

In [None]:
high_school_math_teachers['infield_percent'] = high_school_math_teachers.infield_fte/high_school_math_teachers.fte
high_school_math_teachers.infield_percent.mean()