In [5]:
import sys
sys.path.append("../")

import pandas as pd
import os
import statsmodels.formula.api as sm
import numpy as np
import engarde.decorators as ed


from library import regulations
from library import characteristics
from library import analysis
from library import tables
from library import test_data

In [10]:
data_path = '/Users/kylieleblancKylie/domino/dofis/data/'
table_path = '/Users/kylieleblancKylie/domino/dofis/results/impact/'
data = pd.read_csv(os.path.join(data_path, 'clean', 'cits.csv'),
                  sep=",", low_memory = False)
@ed.verify(test_data.math2018correct)
@ed.verify(test_data.allyearsandtpsd)
def load():
    return data
data = load()
data = data[data.year == 2016]
data.head()

Unnamed: 0.1,Unnamed: 0,district,distname,year,cntyname,distischarter,rating_academic,rating_financial,eligible,type,...,yearpost,treatyear,treatpost,treatpostyear,yearpost1,yearpost2,yearpost3,treatpostyear1,treatpostyear2,treatpostyear3
4104,4901,1902,CAYUGA ISD,2016,ANDERSON,N,M,Pass,True,H,...,-0.0,-1.0,0.0,-0.0,0,0,0,0,0,0
4105,4902,1903,ELKHART ISD,2016,ANDERSON,N,M,Pass,True,G,...,-0.0,-2.0,0.0,-0.0,0,0,0,0,0,0
4106,4903,1904,FRANKSTON ISD,2016,ANDERSON,N,M,Pass,True,H,...,-0.0,-2.0,0.0,-0.0,0,0,0,0,0,0
4107,4904,1906,NECHES ISD,2016,ANDERSON,N,M,Pass,True,H,...,-0.0,-1.0,0.0,-0.0,0,0,0,0,0,0
4108,4905,1907,PALESTINE ISD,2016,ANDERSON,N,M,Pass,True,E,...,0.0,0.0,1.0,0.0,0,0,0,0,0,0


In [6]:
#TODO is -999 missing or NA? Worth differentiating?
data.doi_year.value_counts().sort_index()

-999.0      49
 2016.0     64
 2017.0    473
 2018.0    135
 2019.0     24
Name: doi_year, dtype: int64

# TPS v. Early early adopters

In [4]:
tps = data[(data.doi == False) | (data.doi_year == 2016)]
len(tps)

224

In [5]:
district_16 = analysis.many_y_one_x(data = tps, 
                                 y_list = characteristics.geography,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
district_16

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Urban,0.08,-0.0,0.04,0.97
1,Suburban,0.25,0.16,0.07,0.02
2,Town,0.26,0.02,0.07,0.71
3,Rural,0.41,-0.19,0.07,0.01


In [6]:
teacher_16 = analysis.many_y_one_x(data = tps, 
                                 y_list = characteristics.teacher,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
teacher_16

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Ave. Experience Teaching,7.39,-0.21,0.3,0.47
1,Teacher Turnover Ratio,0.19,-0.02,0.01,0.15
2,Student-Teacher Ratio,6.41,0.7,0.18,0.0


In [7]:
student_16 = analysis.many_y_one_x(data = tps, 
                                 y_list = characteristics.student,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
student_16

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Percent Hispanic,0.52,-0.1,0.04,0.02
1,Percent White,0.4,0.05,0.04,0.24
2,Percent Black,0.05,0.02,0.01,0.07
3,Percent Econ. Disadvantaged,0.62,-0.11,0.03,0.0
4,Average STAAR Performance (Std.),0.17,0.47,0.11,0.0


# Add 2017-18 adopters

In [8]:
doi_17 = data[(data.doi == False) | (data.doi_year == 2017)]
len(doi_17)

608

In [9]:
district_17 = analysis.many_y_one_x(data = doi_17, 
                                 y_list = characteristics.geography,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
district_17

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Urban,0.08,-0.03,0.02,0.11
1,Suburban,0.25,-0.04,0.04,0.24
2,Town,0.26,0.04,0.04,0.3
3,Rural,0.41,0.04,0.05,0.44


In [10]:
teacher_17 = analysis.many_y_one_x(data = doi_17, 
                                 y_list = characteristics.teacher,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
teacher_17

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Ave. Experience Teaching,7.39,-0.14,0.17,0.43
1,Teacher Turnover Ratio,0.19,-0.0,0.01,0.73
2,Student-Teacher Ratio,6.41,0.18,0.11,0.1


In [11]:
student_17 = analysis.many_y_one_x(data = doi_17, 
                                 y_list = characteristics.student,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
student_17

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Percent Hispanic,0.52,-0.17,0.02,0.0
1,Percent White,0.4,0.14,0.02,0.0
2,Percent Black,0.05,0.02,0.01,0.02
3,Percent Econ. Disadvantaged,0.62,-0.07,0.02,0.0
4,Average STAAR Performance (Std.),0.17,0.21,0.06,0.0


# 2018-19 Schools

In [12]:
doi_18 = data[(data.doi == False) | (data.doi_year == 2018)]
len(doi_18)

280

In [13]:
# Traditional public schools versus DOI
district_18 = analysis.many_y_one_x(data = doi_18, 
                                 y_list = characteristics.geography,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
district_18

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Urban,0.08,-0.05,0.03,0.1
1,Suburban,0.25,0.01,0.05,0.82
2,Town,0.26,0.02,0.05,0.76
3,Rural,0.41,0.02,0.06,0.76


In [14]:
teacher_18 = analysis.many_y_one_x(data = doi_18, 
                                 y_list = characteristics.teacher,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
teacher_18

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Ave. Experience Teaching,7.39,-0.18,0.24,0.45
1,Teacher Turnover Ratio,0.19,-0.01,0.01,0.4
2,Student-Teacher Ratio,6.41,0.45,0.14,0.0


In [15]:
student_18 = analysis.many_y_one_x(data = doi_18, 
                                 y_list = characteristics.student,
                                 y_labels = characteristics.labels,
                                 x = 'doi')
student_18

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Percent Hispanic,0.52,-0.15,0.04,0.0
1,Percent White,0.4,0.12,0.03,0.0
2,Percent Black,0.05,0.01,0.01,0.17
3,Percent Econ. Disadvantaged,0.62,-0.08,0.02,0.0
4,Average STAAR Performance (Std.),0.17,0.24,0.08,0.0


# To Table

In [16]:
rows = [6, 15, 22]

In [17]:
dfs = [district_16, teacher_16, student_16]
tables.n_to_excel(file = table_path + 'balance_by_adoption_year.xlsx',
                 col = 2, row = 4, n = len(tps[tps.doi == 0]))
tables.n_to_excel(file = table_path + 'balance_by_adoption_year.xlsx',
                 col = 3, row = 4, n = len(tps[tps.doi == 1]))
for df, row in zip(dfs,rows):
    tables.var_diff_to_excel(file = table_path + 'balance_by_adoption_year.xlsx', 
                             df = df, 
                             control_col = 'Control', 
                             diff_col = 'Difference', 
                             se_col = 'Std. Error', pvalue_col = 'P-value',
                             start_col = 2, start_row = row)

In [18]:
dfs = [district_17, teacher_17, student_17]
tables.n_to_excel(file = table_path + 'balance_by_adoption_year.xlsx',
                 col = 4, row = 4, n = len(doi_17[doi_17.doi == 1]))
for df, row in zip(dfs,rows):
    tables.var_diff_to_excel(file = table_path + 'balance_by_adoption_year.xlsx', 
                             df = df, 
                             control_col = 'Control', 
                             diff_col = 'Difference', 
                             se_col = 'Std. Error', pvalue_col = 'P-value',
                             start_col = 2, start_row = row,
                             change_diff_col = 4)

In [19]:
dfs = [district_18, teacher_18, student_18]
tables.n_to_excel(file = table_path + 'balance_by_adoption_year.xlsx',
                 col = 5, row = 4, n = len(doi_18[doi_18.doi == 1]))
for df, row in zip(dfs,rows):
    tables.var_diff_to_excel(file = table_path + 'balance_by_adoption_year.xlsx', 
                             df = df, 
                             control_col = 'Control', 
                             diff_col = 'Difference', 
                             se_col = 'Std. Error', pvalue_col = 'P-value',
                             start_col = 2, start_row = row,
                             change_diff_col = 5)