In [53]:
import sys
sys.path.append("../")

import pandas as pd
import os
import statsmodels.formula.api as sm
import numpy as np
import engarde.decorators as ed


from library import regulations
from library import characteristics
from library import analysis
from library import tables
from library import test_data

In [54]:
data_path = '/Users/kylieleblancKylie/domino/dofis/data/'
table_path = '/Users/kylieleblancKylie/domino/dofis/results/Who Needs Rules/'
data = pd.read_csv(os.path.join(data_path, 'clean', 'master_data_district.csv'),
                  sep=",", low_memory = False)
data = data[data.year == 2016]
data.head()

Unnamed: 0.1,Unnamed: 0,year,district,distname,distischarter,rating_academic,rating_financial,type,type_description,cntyname,...,elem,middle_math,middle_reading,middle_science,algebra,biology,eng1,math,reading,avescores
4901,4901,2016,1902,CAYUGA ISD,N,M,Pass,H,RURAL,ANDERSON,...,1.282771,0.692503,0.844028,0.900302,1.881695,1.034905,1.097759,0.879946,1.17109,1.074875
4902,4902,2016,1903,ELKHART ISD,N,M,Pass,G,NON-METROPOLITAN STABLE,ANDERSON,...,0.8562,-0.172723,0.287515,1.239979,0.258778,0.14946,0.786351,0.401804,0.511792,0.475319
4903,4903,2016,1904,FRANKSTON ISD,N,M,Pass,H,RURAL,ANDERSON,...,0.539407,0.588836,0.881549,0.708117,0.628674,1.538693,0.489772,0.528941,0.745659,0.701539
4904,4904,2016,1906,NECHES ISD,N,M,Pass,H,RURAL,ANDERSON,...,0.38374,0.956068,0.58285,1.472389,0.180175,0.760112,-0.454338,0.824381,0.328818,0.622125
4905,4905,2016,1907,PALESTINE ISD,N,M,Pass,E,INDEPENDENT TOWN,ANDERSON,...,-0.523642,0.317496,-0.665815,0.256704,-0.203591,0.195259,-0.493882,-0.0051,-0.6927,-0.262562


In [55]:
#TODO is -999 missing or NA? Worth differentiating?
data.doi_year.value_counts().sort_index()

2016.0      3
2017.0    177
2018.0    505
2019.0    112
2020.0     14
Name: doi_year, dtype: int64

# Geography

In [56]:
years = [2017, 2018, 2019]

In [57]:
labels = []
for char in characteristics.geography:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
geo_table = pd.DataFrame(data=table_dict)

for yr in years:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.geography:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    geo_table[yr] = means
    
geo_table

Unnamed: 0,Characteristics,2017,2018,2019
0,Urban,0.1,0.04,0.04
1,,[0.3],[0.19],[0.19]
2,Suburban,0.32,0.22,0.21
3,,[0.47],[0.41],[0.41]
4,Town,0.27,0.28,0.26
5,,[0.44],[0.45],[0.44]
6,Rural,0.32,0.47,0.49
7,,[0.47],[0.5],[0.5]


# Teacher characteristics

In [58]:
labels = []
for char in characteristics.teacher:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
teacher_table = pd.DataFrame(data=table_dict)

for yr in years:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.teacher:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    teacher_table[yr] = means
    
teacher_table

Unnamed: 0,Characteristics,2017,2018,2019
0,Ave. Experience Teaching,7.3,7.18,7.04
1,,[1.65],[1.89],[1.98]
2,Teacher Turnover Ratio,17.78,18.75,19.11
3,,[6.94],[9.08],[8.53]
4,Student-Teacher Ratio,13.36,12.49,12.75
5,,[2.3],[2.35],[2.36]


# Student 

In [59]:
labels = []
for char in characteristics.student:
    labels.append(characteristics.labels[char])
    labels.append('')
table_dict = {'Characteristics': labels}
student_table = pd.DataFrame(data=table_dict)

for yr in years:
    means = []
    df = data[data.doi_year == yr]
    for char in characteristics.student:
        means.append(round(df[char].mean(), 2))
        sd = '[' + str(round(df[char].std(), 2)) + ']'
        means.append(sd)
    student_table[yr] = means
    
student_table

Unnamed: 0,Characteristics,2017,2018,2019
0,Percent Hispanic,0.37,0.34,0.39
1,,[0.24],[0.24],[0.28]
2,Percent White,0.51,0.55,0.51
3,,[0.24],[0.25],[0.28]
4,Percent Black,0.07,0.07,0.07
5,,[0.09],[0.11],[0.09]
6,Percent Econ. Disadvantaged,0.53,0.56,0.57
7,,[0.2],[0.18],[0.19]
8,Average STAAR Performance (Std.),0.44,0.31,0.19
9,,[0.78],[0.68],[0.73]


# To Table

In [60]:
geo_table.columns

Index(['Characteristics', 2017, 2018, 2019], dtype='object')

In [61]:
dfs = [geo_table, teacher_table, student_table]
rows = [4, 13, 20]
for df, row in zip(dfs, rows):
    tables.df_to_excel(file = table_path + 'table2_district_characteristics_by_year.xlsx', df = df,
                      df_columns = years, start_col = 2, start_row = row)

In [62]:
tables.n_to_excel(file = table_path + 'table2_district_characteristics_by_year.xlsx',
                 col = 2, row = 31, n = len(data[data.doi_year == years[0]]))
tables.n_to_excel(file = table_path + 'table2_district_characteristics_by_year.xlsx', 
                  col = 3, row = 31, n = len(data[data.doi_year == years[1]]))
tables.n_to_excel(file = table_path + 'table2_district_characteristics_by_year.xlsx',
                 col = 4, row = 31, n = len(data[data.doi_year == years[2]]))