In [1]:
from functions import *
import statistics

In [2]:
X = pd.read_csv("indep_x.csv", index_col = 0)
y = pd.read_csv("dep_y.csv", index_col = 0)

In [3]:
first_reg = pd.concat([X, y["grade_goal"]], axis = 1)
first_reg = first_reg[first_reg["gpa"]!= 17]

# note using the observations without gpa and setting their GPA to 17 reduced the coefficient of treated in the first regression to -1.159, still significant, though

In [4]:
X.columns

Index(['age', 'gender', 'scholarship', '1st_year', 'gpa', '1st_time', 'taste',
       'importance', 'expected_grade', 'knowledge', 'treated'],
      dtype='object')

In [5]:
progression = [
    ["taste"],
    ["taste", "age", "gender"],
    ["taste", "age", "gender", "scholarship", "1st_year", "1st_time"],
    ["taste", "age", "gender", "scholarship", "1st_year", "1st_time", "gpa", "importance"],
    ["taste", "age", "gender", "scholarship", "1st_year", "1st_time", "gpa", "importance", "expected_grade", "knowledge"]
]

In [7]:
model = sm.OLS(first_reg["grade_goal"], sm.add_constant(first_reg.drop(["grade_goal"], axis =1))).fit(cov = "HC3")

In [60]:
print(create_regression_table(first_reg, "grade_goal", ["treated"], progression))

\begin{tabular}{llllll}
\hline
 Variables      & (1)          & (2)          & (3)          & (4)          & (5)          \\
\hline
 treated        & -0.899*
(0.541)              & -0.892
(0.547)              & -1.000*
(0.568)              & -1.222**
(0.496)              & -1.462***
(0.475)              \\
 taste          & 1.497***
(0.482)              & 1.538***
(0.474)              & 1.404***
(0.477)              & 1.175***
(0.447)              & 1.148***
(0.442)              \\
 const          & 20.611***
(1.619)              & 20.537***
(4.660)              & 17.928***
(5.403)              & 2.473
(5.391)              & -4.733
(5.501)              \\
 age            &              & -0.017
(0.221)              & 0.074
(0.239)              & 0.228
(0.192)              & 0.222
(0.184)              \\
 gender         &              & 0.575
(0.532)              & 0.373
(0.550)              & 0.679
(0.518)              & 0.399
(0.495)              \\
 scholarship    &              &   

In [61]:
rand_inf1_coeff = randomization_inference(first_reg, treatment_col="treated", y_col="grade_goal", robust= "no")

In [62]:
(rand_inf1_coeff[0],rand_inf1_coeff[1])

(-0.7872717172637698, 0.7937867089502365)

It seems that our result is robust at the 5% level.

In [63]:
statistics.mean(rand_inf1_coeff[2])

0.010242748482216768

In [64]:
second_reg = pd.concat([X, y["confidence"]], axis = 1)
second_reg = second_reg[second_reg["gpa"]!= 17]



In [65]:
print(create_regression_table(second_reg, "confidence", ["treated"], progression, robust= "no"))

\begin{tabular}{llllll}
\hline
 Variables      & (1)          & (2)          & (3)          & (4)          & (5)          \\
\hline
 treated        & -0.157
(0.145)              & -0.158
(0.146)              & -0.167
(0.148)              & -0.174
(0.149)              & -0.214
(0.152)              \\
 taste          & 0.271**
(0.133)              & 0.276**
(0.138)              & 0.296**
(0.128)              & 0.309***
(0.119)              & 0.290**
(0.118)              \\
 const          & 2.662***
(0.454)              & 3.134***
(1.182)              & 3.139**
(1.552)              & 2.555
(1.923)              & 1.604
(2.448)              \\
 age            &              & -0.027
(0.060)              & -0.040
(0.059)              & -0.033
(0.061)              & -0.034
(0.064)              \\
 gender         &              & 0.080
(0.147)              & 0.103
(0.150)              & 0.113
(0.155)              & 0.065
(0.163)              \\
 scholarship    &              &              & 

In [66]:
rand_inf2_coeff = randomization_inference(second_reg, treatment_col="treated", y_col="confidence")

In [67]:
(rand_inf2_coeff[0],rand_inf2_coeff[1])

(-0.25461370011913614, 0.27613358656708104)

In [68]:
third_reg = pd.concat([X, y["anxiety"]], axis = 1)
third_reg = third_reg[third_reg["gpa"]!= 17]

In [69]:
print(create_regression_table(third_reg, "anxiety", ["treated"], progression, robust = "no"))

\begin{tabular}{llllll}
\hline
 Variables      & (1)          & (2)          & (3)          & (4)          & (5)          \\
\hline
 treated        & 0.089
(0.163)              & 0.087
(0.160)              & 0.118
(0.165)              & 0.104
(0.164)              & 0.135
(0.164)              \\
 taste          & -0.001
(0.145)              & 0.029
(0.138)              & 0.028
(0.137)              & -0.107
(0.141)              & -0.093
(0.140)              \\
 const          & 3.274***
(0.496)              & 4.903***
(1.306)              & 5.646***
(1.511)              & 4.962***
(1.422)              & 5.742***
(1.459)              \\
 age            &              & -0.097
(0.060)              & -0.097
(0.062)              & -0.093
(0.058)              & -0.093
(0.056)              \\
 gender         &              & 0.425**
(0.168)              & 0.423**
(0.177)              & 0.446**
(0.176)              & 0.485***
(0.178)              \\
 scholarship    &              &             

In [70]:
rand_inf3_coeff = randomization_inference(third_reg, treatment_col="treated", y_col="anxiety")

In [71]:
(rand_inf3_coeff[0],rand_inf3_coeff[1])

(-0.25318757699452454, 0.2547702824972652)

In [72]:
fourth_reg = pd.concat([X, y["time"]], axis = 1)
fourth_reg = fourth_reg[fourth_reg["gpa"]!= 17]

In [73]:
print(create_regression_table(fourth_reg, "time", ["treated"], progression))

\begin{tabular}{llllll}
\hline
 Variables      & (1)          & (2)          & (3)          & (4)          & (5)          \\
\hline
 treated        & 0.282
(0.181)              & 0.291
(0.182)              & 0.367**
(0.180)              & 0.380**
(0.181)              & 0.444**
(0.175)              \\
 taste          & 0.138
(0.187)              & 0.158
(0.191)              & 0.230
(0.191)              & 0.144
(0.170)              & 0.122
(0.172)              \\
 const          & 4.002***
(0.654)              & 2.468
(1.836)              & 4.618**
(2.033)              & 5.752**
(2.530)              & 8.104***
(2.346)              \\
 age            &              & 0.067
(0.090)              & 0.019
(0.087)              & 0.005
(0.085)              & 0.008
(0.080)              \\
 gender         &              & 0.268
(0.185)              & 0.364*
(0.192)              & 0.350*
(0.197)              & 0.419**
(0.195)              \\
 scholarship    &              &              & -0.136
(

In [74]:
rand_inf4_coeff = randomization_inference(fourth_reg, treatment_col="treated", y_col="time")

In [75]:
(rand_inf4_coeff[0],rand_inf4_coeff[1])

(-0.3025775119917994, 0.2918436415354852)

In [76]:
fifth_reg = pd.concat([X, y["exam_date"]], axis = 1)
fifth_reg = fifth_reg[fifth_reg["gpa"]!= 17]


In [77]:
print(create_regression_table(fifth_reg, "exam_date", ["treated"], progression))

\begin{tabular}{llllll}
\hline
 Variables      & (1)          & (2)          & (3)          & (4)          & (5)          \\
\hline
 treated        & -0.050
(0.055)              & -0.045
(0.056)              & -0.031
(0.057)              & -0.030
(0.059)              & -0.043
(0.061)              \\
 taste          & -0.169
(0.109)              & -0.167
(0.112)              & -0.150
(0.097)              & -0.133*
(0.074)              & -0.127*
(0.076)              \\
 const          & 1.727***
(0.393)              & 0.644
(0.625)              & 1.025
(0.938)              & 1.010
(1.205)              & 0.505
(1.100)              \\
 age            &              & 0.054
(0.033)              & 0.042
(0.033)              & 0.043
(0.034)              & 0.042
(0.036)              \\
 gender         &              & 0.015
(0.072)              & 0.040
(0.063)              & 0.039
(0.066)              & 0.025
(0.078)              \\
 scholarship    &              &              & -0.104**
(0.0

In [78]:
rand_inf5_coeff = randomization_inference(fifth_reg, treatment_col="treated", y_col="exam_date")

In [79]:
(rand_inf5_coeff[0],rand_inf5_coeff[1])

(-0.10621816348880939, 0.11056264063823683)