In [1]:
import sys
sys.path.append("../")

import pandas as pd
import os
import statsmodels.formula.api as sm
import numpy as np
import engarde.decorators as ed


from library import regulations
from library import characteristics
from library import analysis
from library import tables
from library import test_data

In [2]:
data_path = '/Users/kylieleblancKylie/domino/dofis/data/'
table_path = '/Users/kylieleblancKylie/domino/dofis/results/Who Needs Rules/'
data = pd.read_csv(os.path.join(data_path, 'clean', 'master_data_district.csv'),
                  sep=",", low_memory = False)
data = data[data.year == 2016]
data.head()

Unnamed: 0.1,Unnamed: 0,year,district,distname,distischarter,rating_academic,rating_financial,type,type_description,cntyname,...,district_status,type_urban,type_suburban,type_town,type_rural,eligible,teachers_nodegree,teachers_badegree,teachers_msdegree,teachers_phddegree
4901,4901,2016,1902,CAYUGA ISD,N,M,Pass,H,RURAL,ANDERSON,...,doi,0,0,0,1,1,0.0,0.908088,0.091912,0.0
4902,4902,2016,1903,ELKHART ISD,N,M,Pass,G,NON-METROPOLITAN STABLE,ANDERSON,...,doi,0,0,1,0,1,0.0,0.850449,0.149551,0.0
4903,4903,2016,1904,FRANKSTON ISD,N,M,Pass,H,RURAL,ANDERSON,...,doi,0,0,0,1,1,0.001721,0.877797,0.120482,0.0
4904,4904,2016,1906,NECHES ISD,N,M,Pass,H,RURAL,ANDERSON,...,doi,0,0,0,1,1,0.0,1.0,0.0,0.0
4905,4905,2016,1907,PALESTINE ISD,N,M,Pass,E,INDEPENDENT TOWN,ANDERSON,...,doi,0,0,1,0,1,0.025157,0.868664,0.106178,0.0


In [3]:
list(data.columns)

['Unnamed: 0',
 'year',
 'district',
 'distname',
 'distischarter',
 'rating_academic',
 'rating_financial',
 'type',
 'type_description',
 'cntyname',
 'students_amind_num',
 'students_asian_num',
 'students_black_num',
 'students_frpl_num',
 'students_hisp_num',
 'students_num',
 'students_paci_num',
 'students_tworaces_num',
 'students_white_num',
 'teachers_badegree_num',
 'teachers_exp_ave',
 'teachers_msdegree_num',
 'teachers_new_num',
 'teachers_nodegree_num',
 'teachers_num',
 'teachers_phddegree_num',
 'teachers_tenure_ave',
 'teachers_turnover_denom',
 'teachers_turnover_num',
 'teachers_turnover_ratio',
 'alg_avescore',
 'bio_avescore',
 'eng1_avescore',
 'eng2_avescore',
 'm_3rd_avescore',
 'm_4th_avescore',
 'm_5th_avescore',
 'm_6th_avescore',
 'm_7th_avescore',
 'm_8th_avescore',
 'r_3rd_avescore',
 'r_4th_avescore',
 'r_5th_avescore',
 'r_6th_avescore',
 'r_7th_avescore',
 'r_8th_avescore',
 's_5th_avescore',
 's_8th_avescore',
 'us_avescore',
 'alg_numtakers',
 'bio_n

In [4]:
#TODO is -999 missing or NA? Worth differentiating?
data.doi_year.value_counts().sort_index()

2015.0      1
2016.0     69
2017.0    585
2018.0    133
2019.0     23
Name: doi_year, dtype: int64

# Geography

In [5]:
labels = []
for char in characteristics.geography:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
geo_table = pd.DataFrame(data=table_dict)

for yr in [2016, 2017,2018]:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.geography:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    geo_table[yr] = means
    
geo_table

Unnamed: 0,Characteristics,2016,2017,2018
0,Urban,0.1,0.04,0.05
1,,[0.3],[0.2],[0.22]
2,Suburban,0.38,0.22,0.22
3,,[0.49],[0.41],[0.41]
4,Town,0.2,0.29,0.26
5,,[0.41],[0.45],[0.44]
6,Rural,0.32,0.45,0.47
7,,[0.47],[0.5],[0.5]


# Teacher characteristics

In [6]:
labels = []
for char in characteristics.teacher:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
teacher_table = pd.DataFrame(data=table_dict)

for yr in [2016, 2017,2018]:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.teacher:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    teacher_table[yr] = means
    
teacher_table

Unnamed: 0,Characteristics,2016,2017,2018
0,Ave. Experience Teaching,6.99,7.2,7.18
1,,[1.74],[1.85],[1.95]
2,Teacher Turnover Ratio,18.21,18.62,18.78
3,,[7.39],[8.8],[8.34]
4,Student-Teacher Ratio,13.59,12.57,12.87
5,,[2.14],[2.39],[2.2]


# Student 

In [7]:
labels = []
for char in characteristics.student:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
student_table = pd.DataFrame(data=table_dict)

for yr in [2016, 2017,2018]:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.student:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    student_table[yr] = means
    
student_table

Unnamed: 0,Characteristics,2016,2017,2018
0,Percent Hispanic,0.42,0.34,0.38
1,,[0.25],[0.23],[0.27]
2,Percent White,0.47,0.55,0.51
3,,[0.26],[0.24],[0.27]
4,Percent Black,0.06,0.07,0.07
5,,[0.08],[0.11],[0.1]
6,Percent Econ. Disadvantaged,0.54,0.55,0.58
7,,[0.22],[0.18],[0.18]
8,Average STAAR Performance (Std.),0.46,0.33,0.19
9,,[0.88],[0.69],[0.7]


# To Table

In [8]:
geo_table.columns

Index(['Characteristics', 2016, 2017, 2018], dtype='object')

In [9]:
dfs = [geo_table, teacher_table, student_table]
rows = [4, 13, 20]
for df, row in zip(dfs, rows):
    tables.df_to_excel(file = table_path + 'table2_characteristics_by_year.xlsx', df = df,
                      df_columns = [2016, 2017, 2018], start_col = 2, start_row = row)

In [14]:
tables.n_to_excel(file = table_path + 'table2_characteristics_by_year.xlsx',
                 col = 2, row = 31, n = len(data[data.doi_year == 2016]))
tables.n_to_excel(file = table_path + 'table2_characteristics_by_year.xlsx', 
                  col = 3, row = 31, n = len(data[data.doi_year == 2017]))
tables.n_to_excel(file = table_path + 'table2_characteristics_by_year.xlsx',
                 col = 4, row = 31, n = len(data[data.doi_year == 2018]))