In [3]:
import pandas as pd
import os
import statsmodels.formula.api as sm
import numpy as np

from library import regulations
from library import characteristics
from library import analysis
from library import tables

In [4]:
data_path = '/Users/kylieleblancKylie/domino/dofis/data/'
table_path = '/Users/kylieleblancKylie/domino/dofis/results/descriptive/'
data = pd.read_csv(os.path.join(data_path, 'clean', 'master_data.csv'),
                  sep=",")


  interactivity=interactivity, compiler=compiler, result=result)


Just keep eligible districts from 2014-15

In [5]:
data = data[data.year == 'yr1415']
data = data[data.eligible == 1]

Generate opt out variable

In [6]:
data['optout'] = np.where((data['district_status'] != 'doi'), True, False)
data.head()

Unnamed: 0.1,Unnamed: 0,district,distname,year,cntyname,distischarter,rating_academic,rating_financial,eligible,type,...,alg_std,bio_std,eng1_std,eng2_std,us_std,math,reading,avescores,students_teacher_ratio,optout
3682,3682,1902,CAYUGA ISD,yr1415,ANDERSON,N,M,Pass,1.0,H,...,0.661039,0.688869,0.593574,0.737335,0.109439,0.714925,0.756997,0.683635,5.362454,False
3683,3683,1903,ELKHART ISD,yr1415,ANDERSON,N,M,Pass,1.0,G,...,0.295768,-0.013381,0.677605,1.086656,1.25559,0.253594,0.52618,0.469464,7.222552,False
3684,3684,1904,FRANKSTON ISD,yr1415,ANDERSON,N,M,Pass,1.0,H,...,0.702653,0.719402,0.643004,-0.072455,-0.09342,0.315321,0.626085,0.443978,6.946367,False
3685,3685,1906,NECHES ISD,yr1415,ANDERSON,N,M,Pass,1.0,H,...,-0.222086,0.999284,-0.642171,-1.104541,-0.09342,0.074073,0.619741,0.18235,6.075534,False
3686,3686,1907,PALESTINE ISD,yr1415,ANDERSON,N,M,Pass,1.0,E,...,0.23566,0.327567,-0.716316,-0.702292,-0.478851,0.094497,-0.738283,-0.305703,6.91134,False


Some basic descriptives

In [7]:
num_dois = len(data[data.doi == 1])
num_tps = len(data[data.doi == 0])
print(str(len(data)) + ' eligible districts, ' + str(num_dois), 'are DOIs and ' + str(num_tps) + ' have opted  out.')


973 eligible districts, 762 are DOIs and 211 have opted  out.


## District Characteristics

In [8]:
# Traditional public schools versus DOI
district = analysis.many_y_one_x(data = data, 
                                 y_list = characteristics.geography,
                                 y_labels = characteristics.labels,
                                 x = 'optout')
district
# note, I don't have the geographic characteristics of charter schools

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Urban,0.05,0.01,0.02,0.5
1,Suburban,0.25,0.0,0.03,0.98
2,Town,0.29,-0.05,0.03,0.16
3,Rural,0.42,0.04,0.04,0.35


## Teacher Characteristics

In [9]:
teacher = analysis.many_y_one_x(data = data, 
                                 y_list = characteristics.teacher,
                                 y_labels = characteristics.labels,
                                 x = 'optout')
teacher

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Ave. Experience in District,12.53,-0.4,0.19,0.03
1,Ave. Experience Teaching,7.38,-0.07,0.15,0.63
2,Teacher Turnover Ratio,18.83,1.16,0.68,0.09
3,Student-Teacher Ratio,6.7,-0.3,0.09,0.0


## Student Characteristics

In [10]:
student = analysis.many_y_one_x(data = data, 
                                 y_list = characteristics.student,
                                 y_labels = characteristics.labels,
                                 x = 'optout')
student

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Percent Hispanic,0.35,0.16,0.02,0.0
1,Percent White,0.55,-0.13,0.02,0.0
2,Percent Black,0.07,-0.02,0.01,0.04
3,Percent Econ. Disadvantaged,0.54,0.08,0.01,0.0
4,Average STAAR Performance (Std.),0.16,-0.29,0.05,0.0


## To table

In [11]:
dfs = [district, teacher, student]
rows = [6, 15, 24]
tables.n_to_excel(file = table_path + 'TPS v. DOI v. Charter.xlsx',
                 col = 2, row = 4, n = len(data[data.doi == 0]))
tables.n_to_excel(file = table_path + 'TPS v. DOI v. Charter.xlsx',
                 col = 3, row = 4, n = len(data[data.doi == 1]))
for df, row in zip(dfs,rows):
    tables.var_diff_to_excel(file = table_path + 'TPS v. DOI v. Charter.xlsx', 
                             df = df, 
                             control_col = 'Control', 
                             diff_col = 'Difference', 
                             se_col = 'Std. Error', pvalue_col = 'P-value',
                             start_col = 2, start_row = row)

# Add charter columns

In [12]:
data = pd.read_csv(os.path.join(data_path, 'clean', 'master_data.csv'),
                  sep=",")
data[(data['eligible'] == True) | (data['charter'] == True)]
data[(data['district_status'] == 'tps') | (data['charter'] == True)]

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,district,distname,year,cntyname,distischarter,rating_academic,rating_financial,eligible,type,...,m_8th_std,alg_std,bio_std,eng1_std,eng2_std,us_std,math,reading,avescores,students_teacher_ratio
7,7,2901,ANDREWS ISD,yr1112,ANDREWS,N,,,,D,...,0.978847,-0.670585,-1.458591,-0.844834,,,0.049887,-0.434597,-0.352151,7.585639
8,8,3801,PINEYWOODS COMMUNITY ACADEMY,yr1112,ANGELINA,Y,,,,I,...,-0.713670,-0.111118,-1.082022,-0.306049,,-2.187935,-0.492347,-0.685146,-0.672005,7.777622
12,12,3905,DIBOLL ISD,yr1112,ANGELINA,N,,,,G,...,0.142097,-0.226710,-1.646875,-1.294645,,,0.159871,-0.409356,-0.311009,7.160818
14,14,3907,CENTRAL ISD,yr1112,ANGELINA,N,,,,G,...,1.302136,0.342005,-0.898826,-0.246733,,,0.711367,0.052606,0.252019,7.301466
20,20,7901,CHARLOTTE ISD,yr1112,ATASCOSA,N,,,,H,...,-1.189095,-1.771024,-2.196462,-2.036093,,-2.309650,-1.584866,-1.776806,-1.780203,6.483667
23,23,7905,PLEASANTON ISD,yr1112,ATASCOSA,N,,,,E,...,0.522438,-1.109836,-1.621431,-1.151299,,,0.533409,-0.173119,-0.114722,6.604064
24,24,7906,POTEET ISD,yr1112,ATASCOSA,N,,,,G,...,-0.295295,-1.045104,-1.804627,-1.759286,,,-0.362876,-0.613177,-0.697689,7.012595
25,25,8901,BELLVILLE ISD,yr1112,AUSTIN,N,,,,E,...,0.674574,0.517705,0.037507,0.638061,,-1.183785,0.619735,0.424312,0.392111,6.827553
26,26,8902,SEALY ISD,yr1112,AUSTIN,N,,,,D,...,1.073932,0.087701,-0.919181,-0.508711,,,0.481484,-0.146172,0.044779,8.213319
29,29,10901,MEDINA ISD,yr1112,BANDERA,N,,,,H,...,-0.466448,-0.911017,-0.618944,0.494715,0.245110,,-0.084010,0.862496,0.242549,4.167873


In [14]:
teacher = analysis.many_y_one_x(data = data, 
                                 y_list = characteristics.teacher,
                                 y_labels = characteristics.labels,
                                 x = 'charter')
teacher

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Ave. Experience in District,12.5,-7.02,0.08,0.0
1,Ave. Experience Teaching,7.39,-5.13,0.06,0.0
2,Teacher Turnover Ratio,18.4,16.46,0.32,0.0
3,Student-Teacher Ratio,6.6,37.9,13.91,0.01


In [16]:
student = analysis.many_y_one_x(data = data, 
                                 y_list = characteristics.student,
                                 y_labels = characteristics.labels,
                                 x = 'charter')
student

Unnamed: 0,Characteristic,Control,Difference,Std. Error,P-value
0,Percent Hispanic,0.39,0.11,0.01,0.0
1,Percent White,0.51,-0.31,0.01,0.0
2,Percent Black,0.07,0.18,0.0,0.0
3,Percent Econ. Disadvantaged,0.58,0.1,0.01,0.0
4,Average STAAR Performance (Std.),0.09,-0.53,0.03,0.0


In [17]:
dfs = [teacher, student]
rows = [15, 24]
tables.n_to_excel(file = table_path + 'DOI v. TPS v. Charter.xlsx',
                 col = 4, row = 4, n = len(data[data.distischarter == 1]))
for df, row in zip(dfs,rows):
    tables.var_diff_to_excel(file = table_path + 'DOI v. TPS v. Charter.xlsx', 
                             df = df, 
                             control_col = 'Control', 
                             diff_col = 'Difference', 
                             se_col = 'Std. Error', pvalue_col = 'P-value',
                             start_col = 2, start_row = row,
                             added_third_col = True)