In [1]:
import pandas as pd
import os
import re
import spacy
import en_core_web_sm
from spacy.matcher import Matcher
from IPython.display import clear_output
import pickle

# Certification

In [2]:
data_path = '/Users/kylieleblancKylie/domino/dofis/data/clean/text'

docs = pd.read_csv(os.path.join(data_path, 'phrases_certification.csv'),
                  sep=",")
docs.head()

Unnamed: 0.1,Unnamed: 0,district,link,text,variable,value,phrase
0,1,Abernathy ISD,https://1.cdn.edl.io/epGD4mZXjWUcPl8yA7IhlcDol...,Abernathy ISD District of Innovation Plan – Dr...,reg21_003,1,|21.003; TEC§ 21.057 The principal may submit ...
1,2,Abilene ISD,https://www.abileneisd.org/wp-content/uploads/...,0 ABILENE INDEPENDENT SCHOOL DISTRICT Local In...,reg21_003,1,|21.003 (a); T.E.C. Section §21.053 (a)-(b); C...
2,3,Academy ISD,https://4.files.edl.io/1a8f/06/29/18/204245-44...,District of Innovation Plan Developed in coope...,reg21_003,1,"|21.003, 21.053,|21.003, 21.053,"
3,4,Adrian ISD,http://www.adrianisd.net/UserFiles/Servers/Ser...,Adrian ISD District of Innovation Plan House B...,reg21_003,1,|21.003 DK LEGAL Current TEC §21.003 states a ...
4,5,Agua Dulce ISD,https://tx02206063.schoolwires.net/cms/lib/TX0...,Microsoft Word - DOI 2017-2022.docx Agua Dulce...,reg21_003,1,|21.003. CERTIFICATION REQUIRED. (a) A person ...


In [3]:
docs_to_label = docs.sample(n = 50, random_state = 45)
docs_to_label = docs_to_label[['phrase', 'link']]
docs_to_label = docs_to_label.rename({'phrase': 'text', 'link': 'source'}, axis = 1)

I have three hypotheses for how/why districts might use the certification exemption:

1. They might use the exemption for hard-to-staff subjects. This might mean they aren't looking as hard or they are doing they best they can to find a certified candidate and would have needed an emergency certification from the state anyway. 

2. They might think the best applicant is not certified and so will use exemptions to hire that candidate.

3. They might use the exemption to avoid hiring new teachers and simply se teachers that are not certifed in a specific area.

For the first hypothesis, I code mentions of "hard-to-staff" or "bureacratic hurdles of emergency certification". I also code for specific mentions of CTE (as more of an FYI). For the second, I code phrases indicating that candidates will be considered based on other criteria. For the third, I code for whether the school district plans to shift around certified candidates to areas they are not certified.

If plans discuss alternative qualification without discussing difficulties in staffing, then these districts might be thinking like "No Excuses" charters.

In [4]:
relabel_docs = True
hard_to_staff = []
cte = []
alt_qual = []
out_of_subject = []

In [None]:
if relabel_docs:
    for text, source in zip(docs_to_label.text[0:10], docs_to_label.source[0:10]):
        show = str(source) + str(text)
        
        annotation = input(show + "Mentions other hard-to-fill positions?")
        hard_to_staff.append(annotation)
        
        annotation = input(show + "CTE specifically?")
        cte.append(annotation)
        
        annotation = input(show + "Metions alternative qualifications?")
        alt_qual.append(annotation)
        
        annotation = input(show + "Metions using out-of-subject certified teachers")
        out_of_subject.append(annotation)
                           
        clear_output()

In [16]:
hard_to_staff = hard_to_staff[0:10]
cte = cte[0:10]
alt_qual = alt_qual[0:10]
out_of_subject = out_of_subject[0:10]

In [14]:
docs_to_label.text[10:20]

216    |21.003 - A person may not be employed as a te...
64     |21.003) Currently In the event a district can...
48     |21.003) Currently In the event a district can...
63     |21.003 Current Law: In the event a district c...
18                                                   NaN
446    |21.003, 21.053 and 21.057) Sec. 21.003. CERTI...
565    |21.003, 21.053, and 21.057) DK (LEGAL) State ...
61     |21.003) Current Statute: A person may not be ...
102                                                  NaN
356    |21.003 Certification Required 2. TEC|21.003 r...
Name: text, dtype: object

In [17]:
if relabel_docs:
        for text, source in zip(docs_to_label.text[10:20], docs_to_label.source[10:20]):
            show = str(source) + str(text)

            annotation = input(show + "Mentions hard-to-fill positions?")
            hard_to_staff.append(annotation)

            annotation = input("CTE specifically?")
            cte.append(annotation)

            annotation = input("Metions alternative qualifications?")
            alt_qual.append(annotation)

            annotation = input("Metions using out-of-subject certified teachers")
            out_of_subject.append(annotation)

            clear_output()

In [18]:
hard_to_staff = hard_to_staff[0:20]
cte = cte[0:20]
alt_qual = alt_qual[0:20]
out_of_subject = out_of_subject[0:20]

In [19]:
if relabel_docs:
        for text, source in zip(docs_to_label.text[20:30], docs_to_label.source[20:30]):
            show = str(source) + str(text)

            annotation = input(show + "Mentions hard-to-fill positions?")
            hard_to_staff.append(annotation)

            annotation = input("CTE specifically?")
            cte.append(annotation)

            annotation = input("Metions alternative qualifications?")
            alt_qual.append(annotation)

            annotation = input("Metions using out-of-subject certified teachers")
            out_of_subject.append(annotation)

            clear_output()

In [None]:
hard_to_staff = hard_to_staff[0:30]
cte = cte[0:30]
alt_qual = alt_qual[0:30]
out_of_subject = out_of_subject[0:30]

In [35]:
to_pickle = [docs_to_label.text, docs_to_label.source, hard_to_staff, cte, alt_qual, out_of_subject]
filename = data_path + 'coding_certification'
outfile = open(filename,'wb')
pickle.dump(to_pickle,outfile)
outfile.close()

In [37]:
infile = open(filename,'rb')
labelled_docs = pickle.load(infile)
infile.close()

In [50]:
text = labelled_docs[0]
source = labelled_docs[1]
hard_to_staff = [int(x) for x in labelled_docs[2] if (x == '0') | (x== '1')] 
cte = [int(x) for x in labelled_docs[3] if (x == '0') | (x== '1')] 
alt_qual = [int(x) for x in labelled_docs[4] if (x == '0') | (x== '1')] 
out_of_subject = [int(x) for x in labelled_docs[5] if (x == '0') | (x== '1')] 

In [25]:
filtered = [x for x in hard_to_staff if x == '1']
len(filtered)

23

In [59]:
print("Mentions hard-to-fill positions: ", sum(hard_to_staff)/len(hard_to_staff))
print("Mentions CTE: ", sum(cte)/len(cte))
print("Mentions alternative qualifications: ", sum(alt_qual)/len(alt_qual))
print("Mentions out-of-subject certified teachers: ", sum(out_of_subject)/len(out_of_subject))

Mentions hard-to-fill positions:  0.8214285714285714
Mentions CTE:  0.9655172413793104
Mentions alternative qualifications:  0.3103448275862069
Mentions out-of-subject certified teachers:  0.7586206896551724
