In [1]:
%cd ..
%matplotlib inline

D:\Projects\Python\PL-Heuristic


In [2]:
import numpy as np
import pandas as pd

from scipy import stats

import statsmodels.api as sm
import statsmodels.formula.api as smf

from analysis.caching import get_cached_results

In [3]:
pd.set_option('display.max_rows', 500)
pd.options.display.float_format = '{:0.2f}'.format

# Preparation

In [4]:
data = get_cached_results()
data.describe()

Unnamed: 0,heuristic_classroom_utilisation,heuristic_instruction_size,heuristic_objective,heuristic_percentage_instruction,heuristic_percentage_self_study,heuristic_self_study_size,heuristic_teacher_utilisation,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment
count,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0
mean,88.24,10.67,6610.21,59.66,40.34,49.52,66.92,88.03,11.67,6748.48,65.4,34.6,44.58,67.23,36.5
std,11.33,3.55,2351.19,21.67,21.67,16.22,14.76,11.95,4.47,2425.31,21.49,21.49,16.18,17.16,20.78
min,60.24,5.4,3469.55,15.0,5.62,15.13,43.75,60.24,5.19,3540.31,18.62,1.12,5.0,39.38,1.0
25%,78.31,7.93,4371.92,43.42,19.5,39.81,53.75,78.31,8.41,4417.57,49.62,15.38,31.25,53.75,18.75
50%,90.36,10.28,6256.42,61.62,38.38,46.26,58.12,89.16,11.43,7713.88,65.5,34.5,44.0,62.5,36.5
75%,100.0,12.22,9011.09,80.5,56.58,63.5,81.25,100.0,14.0,9152.75,84.62,50.38,55.06,83.75,54.25
max,100.0,20.99,9932.6,94.38,85.0,80.0,100.0,100.0,24.36,10080.38,98.88,81.38,80.0,100.0,72.0


In [5]:
# All performance measure columns. In the data set, these are split by 
# method - one for the ILP, and another for the heuristic.
MEASURES = [
    "objective",
    "instruction_size", "self_study_size",
    "percentage_instruction", "percentage_self_study",
    "classroom_utilisation", "teacher_utilisation"]

EXPERIMENTS = list(range(1, 73))

# Analysis

In [6]:
def results_per_experiment(experiment):
    """
    Computes a dictionary of results per experiment. This is composed of the 
    means of each performance measure, per solution method type (one of ilp,
    heuristic).    
    """
    results = {}
    
    for measure in MEASURES:
        ilp_data = data["ilp_" + measure][data.experiment == experiment]
        heuristic_data = data["heuristic_" + measure][data.experiment == experiment]

        results[measure, 'heuristic'] = heuristic_data.mean()
        results[measure, 'ilp'] = ilp_data.mean()

        results[measure, 'difference'] = (heuristic_data - ilp_data).mean()
        results[measure, 'percentage'] = 100 * results[measure, 'difference'] / results[measure, 'ilp']
        results[measure, 't_stat'] = stats.ttest_1samp(heuristic_data - ilp_data, 0, nan_policy='omit').statistic

    return results

In [7]:
result = pd.DataFrame([results_per_experiment(experiment) for experiment in EXPERIMENTS],
                      columns=[(measure, sub)
                               for measure in MEASURES
                               for sub in ['heuristic', 'ilp', 'difference', 'percentage', 't_stat']])

# This presents only the mean results per experiment
means = result.drop(columns=[column for column in result.columns
                             if column[1] not in ['heuristic', 'ilp']])

# This sets the index starting at 1 (for the experiment numbers), and creates
# a hierarchical column index grouped by performance measure and method
means.index = EXPERIMENTS
means.columns = pd.MultiIndex.from_tuples(means.columns,
                                          names=['Measure', 'Method'])

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [8]:
# This presents the pairwise-differences approach
differences = result.drop(columns=[column for column in result.columns
                             if column[1] not in ['difference', 'percentage', 't_stat']])

differences.index = EXPERIMENTS
differences.columns = pd.MultiIndex.from_tuples(differences.columns,
                                                names=['Measure', 'Statistics'])

## Experiment results

In [9]:
# Used to render this table in the paper
with open('experiment_results.tex', 'w') as file:
    means.to_latex(buf=file, float_format="{:0.2f}".format)

## Means

Mean results per experiment, for each method and performance measure.

In [10]:
experiments = pd.read_excel('experiments/experiments.xlsx')

means = means.join(experiments[['experiment', 'learners']].set_index('experiment'))
means



Unnamed: 0,"(objective, heuristic)","(objective, ilp)","(instruction_size, heuristic)","(instruction_size, ilp)","(self_study_size, heuristic)","(self_study_size, ilp)","(percentage_instruction, heuristic)","(percentage_instruction, ilp)","(percentage_self_study, heuristic)","(percentage_self_study, ilp)","(classroom_utilisation, heuristic)","(classroom_utilisation, ilp)","(teacher_utilisation, heuristic)","(teacher_utilisation, ilp)",learners
1,4691.81,4726.15,14.65,15.14,70.1,64.8,73.1,75.7,26.9,24.3,100.0,100.0,53.75,53.75,800
2,4678.85,4717.15,14.48,15.05,70.22,66.06,72.03,75.23,27.96,24.77,99.98,100.0,53.74,53.75,800
3,4672.5,4720.07,14.51,15.15,65.33,69.74,69.28,73.85,30.72,26.15,97.67,97.67,52.5,52.5,800
4,4808.41,4845.05,10.77,12.17,54.39,38.78,85.82,88.5,14.18,11.5,79.43,73.55,82.41,76.31,800
5,4815.61,4853.04,10.8,12.1,54.79,34.51,85.23,88.06,14.77,11.94,78.83,74.69,81.79,77.49,800
6,4755.06,4800.44,11.83,12.83,62.41,36.33,79.3,83.1,20.7,16.9,67.98,67.95,70.53,70.5,800
7,9684.62,9773.15,18.23,22.5,62.06,47.34,89.72,92.19,10.28,7.81,94.8,79.92,50.96,42.96,1600
8,9656.9,9751.89,18.1,22.63,60.26,49.51,89.41,92.2,10.59,7.8,95.33,79.13,51.24,42.53,1600
9,9662.11,9751.2,18.17,22.68,63.32,49.33,87.72,90.41,12.28,9.59,93.6,78.85,50.31,42.38,1600
10,9569.82,9799.91,10.56,13.69,69.94,42.91,81.77,95.32,18.23,4.68,77.23,68.55,80.12,71.12,1600


### Averages by experiment size

In [11]:
means[means.learners == 800].mean()

(objective, heuristic)                4302.65
(objective, ilp)                      4366.55
(instruction_size, heuristic)            9.70
(instruction_size, ilp)                 10.18
(self_study_size, heuristic)            43.41
(self_study_size, ilp)                  42.09
(percentage_instruction, heuristic)     52.47
(percentage_instruction, ilp)           57.35
(percentage_self_study, heuristic)      47.53
(percentage_self_study, ilp)            42.65
(classroom_utilisation, heuristic)      88.67
(classroom_utilisation, ilp)            90.46
(teacher_utilisation, heuristic)        67.24
(teacher_utilisation, ilp)              68.99
learners                               800.00
dtype: float64

In [12]:
means[means.learners == 1600].mean()

(objective, heuristic)                8917.77
(objective, ilp)                      9129.74
(instruction_size, heuristic)           11.63
(instruction_size, ilp)                 13.15
(self_study_size, heuristic)            55.64
(self_study_size, ilp)                  47.08
(percentage_instruction, heuristic)     66.84
(percentage_instruction, ilp)           73.43
(percentage_self_study, heuristic)      33.16
(percentage_self_study, ilp)            26.57
(classroom_utilisation, heuristic)      87.80
(classroom_utilisation, ilp)            85.59
(teacher_utilisation, heuristic)        66.60
(teacher_utilisation, ilp)              65.47
learners                              1600.00
dtype: float64

# Pairwise differences

Pairwise differences between heuristic and ILP solutions, as linear models.

In [13]:
differences

Measure,objective,objective,objective,instruction_size,instruction_size,instruction_size,self_study_size,self_study_size,self_study_size,percentage_instruction,percentage_instruction,percentage_instruction,percentage_self_study,percentage_self_study,percentage_self_study,classroom_utilisation,classroom_utilisation,classroom_utilisation,teacher_utilisation,teacher_utilisation,teacher_utilisation
Statistics,difference,percentage,t_stat,difference,percentage,t_stat,difference,percentage,t_stat,difference,...,t_stat,difference,percentage,t_stat,difference,percentage,t_stat,difference,percentage,t_stat
1,-34.34,-0.73,-25.56,-0.49,-3.25,-17.46,5.31,8.19,11.66,-2.6,...,-15.15,2.6,10.69,15.15,0.0,0.0,,0.0,0.0,
2,-38.3,-0.81,-26.47,-0.56,-3.73,-20.22,4.17,6.31,6.9,-3.19,...,-17.9,3.19,12.89,17.9,-0.02,-0.02,-1.0,-0.01,-0.02,-1.0
3,-47.57,-1.01,-26.48,-0.64,-4.19,-20.09,-4.41,-6.33,-5.38,-4.56,...,-19.99,4.56,17.45,19.99,0.0,0.0,,0.0,0.0,
4,-36.64,-0.76,-43.83,-1.41,-11.56,-37.44,15.61,40.26,9.63,-2.67,...,-31.5,2.67,23.26,31.5,5.88,7.99,17.84,6.1,7.99,17.84
5,-37.42,-0.77,-40.61,-1.31,-10.79,-31.27,20.29,58.79,11.45,-2.83,...,-27.76,2.83,23.69,27.76,4.14,5.55,7.96,4.3,5.55,7.96
6,-45.37,-0.95,-40.3,-1.0,-7.77,-32.44,26.09,71.81,12.49,-3.81,...,-28.88,3.81,22.52,28.88,0.02,0.04,0.07,0.03,0.04,0.07
7,-88.53,-0.91,-71.69,-4.27,-18.99,-46.58,14.72,31.1,7.77,-2.47,...,-33.51,2.47,31.61,33.51,14.88,18.62,34.43,8.0,18.62,34.43
8,-94.99,-0.97,-68.34,-4.53,-20.02,-54.39,10.76,21.72,5.73,-2.79,...,-38.94,2.79,35.74,38.94,16.2,20.47,45.19,8.71,20.47,45.19
9,-89.1,-0.91,-71.64,-4.51,-19.89,-52.49,13.98,28.34,6.69,-2.68,...,-40.97,2.68,27.99,40.97,14.76,18.71,25.34,7.93,18.71,25.34
10,-230.08,-2.35,-166.27,-3.13,-22.86,-81.81,27.03,63.0,12.06,-13.55,...,-175.21,13.55,289.48,175.21,8.67,12.65,31.72,9.0,12.65,31.72


In [14]:
# We do not need the 'other', informative data here. Just the differences,
# and the experiment parameters will suffice.
for measure in MEASURES:
    data[measure + "_diff"] = data["heuristic_" + measure] - data["ilp_" + measure]

diffs = data[[measure + "_diff" for measure in MEASURES]].copy()
diffs['experiment'] = data['experiment']

experiments = pd.read_excel('experiments/experiments.xlsx')
diffs = diffs.join(experiments.set_index('experiment'), on='experiment')

diffs.describe()

Unnamed: 0,objective_diff,instruction_size_diff,self_study_size_diff,percentage_instruction_diff,percentage_self_study_diff,classroom_utilisation_diff,teacher_utilisation_diff,experiment,penalty,progress,learners,instruction_classrooms,instruction_size,self_study_classrooms,self_study_size,num_teachers,first_degree,second_degree,third_degree
count,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0
mean,-137.94,-1.0,4.94,-5.74,5.74,0.21,-0.31,36.5,1.5,1.0,1200.0,90.0,24.0,4.5,80.0,120.0,0.63,0.3,0.07
std,127.33,1.3,18.61,3.84,3.84,6.29,5.28,20.78,0.5,0.82,400.03,43.59,8.0,1.5,0.0,40.0,0.26,0.22,0.09
min,-717.51,-6.62,-44.5,-19.12,-1.0,-24.1,-25.0,1.0,1.0,0.0,800.0,40.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
25%,-179.01,-1.2,-6.38,-7.19,2.88,-2.33,-1.88,18.75,1.0,0.0,800.0,70.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
50%,-91.55,-0.62,-2.38,-4.75,4.75,0.0,0.0,36.5,1.5,1.0,1200.0,80.0,24.0,4.5,80.0,120.0,0.5,0.4,0.0
75%,-57.34,-0.16,12.0,-2.88,7.19,0.6,0.62,54.25,2.0,2.0,1600.0,100.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2
max,-5.33,0.97,75.0,1.0,19.12,23.26,16.25,72.0,2.0,2.0,1600.0,160.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2


In [15]:
# This works since each experiment has the same number of instances
print("Mean difference in objective values is {:.2f}%."
      .format(differences['objective', 'percentage'].mean()))

Mean difference in objective values is -1.89%.


## Linear models

This estimates linear models for all parameter coefficients.

In [16]:
EXOG = [
    "C(learners, Treatment(reference=800))",
    "C(instruction_size, Treatment(reference=32))",
    "C(first_degree, Treatment(reference=1.0))",
    "C(penalty, Treatment(reference=1.0))",
    "C(progress, Treatment(reference=0.0))",
    #"C(progress, Treatment(reference=0.0)) : C(penalty, Treatment(reference=1.0))"
]

RESULTS = {}

### Objective

In [17]:
model = smf.ols(formula="objective_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['objective'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,objective_diff,R-squared:,0.695
Model:,OLS,Adj. R-squared:,0.695
Method:,Least Squares,F-statistic:,1475.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:20,Log-Likelihood:,-40828.0
No. Observations:,7199,AIC:,81670.0
Df Residuals:,7191,BIC:,81730.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-14.8294,2.000,-7.416,0.000,-18.749,-10.910
"C(learners, Treatment(reference=800))[T.1600]",-148.1118,1.659,-89.301,0.000,-151.363,-144.861
"C(instruction_size, Treatment(reference=32))[T.16]",-77.7891,1.659,-46.901,0.000,-81.040,-74.538
"C(first_degree, Treatment(reference=1.0))[T.0.4]",6.3072,2.046,3.082,0.002,2.296,10.318
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-2.3062,1.944,-1.186,0.236,-6.117,1.504
"C(penalty, Treatment(reference=1.0))[T.2]",-113.0250,1.659,-68.145,0.000,-116.276,-109.774
"C(progress, Treatment(reference=0.0))[T.1]",58.1782,2.273,25.597,0.000,53.723,62.633
"C(progress, Treatment(reference=0.0))[T.2]",76.9564,2.297,33.496,0.000,72.453,81.459

0,1,2,3
Omnibus:,2519.365,Durbin-Watson:,0.155
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9604.449
Skew:,-1.725,Prob(JB):,0.0
Kurtosis:,7.484,Cond. No.,5.89


### Instruction size

In [18]:
model = smf.ols(formula="instruction_size_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['instruction_size'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,instruction_size_diff,R-squared:,0.607
Model:,OLS,Adj. R-squared:,0.607
Method:,Least Squares,F-statistic:,1110.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:21,Log-Likelihood:,-8712.6
No. Observations:,7199,AIC:,17440.0
Df Residuals:,7191,BIC:,17500.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.9457,0.036,-54.740,0.000,-2.015,-1.876
"C(learners, Treatment(reference=800))[T.1600]",-1.0421,0.019,-54.409,0.000,-1.080,-1.005
"C(instruction_size, Treatment(reference=32))[T.16]",0.6825,0.019,35.632,0.000,0.645,0.720
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0178,0.023,-0.764,0.445,-0.063,0.028
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.0020,0.024,0.085,0.932,-0.045,0.049
"C(penalty, Treatment(reference=1.0))[T.2]",0.0295,0.019,1.538,0.124,-0.008,0.067
"C(progress, Treatment(reference=0.0))[T.1]",1.5939,0.027,59.153,0.000,1.541,1.647
"C(progress, Treatment(reference=0.0))[T.2]",1.7608,0.026,68.751,0.000,1.711,1.811

0,1,2,3
Omnibus:,79.522,Durbin-Watson:,0.345
Prob(Omnibus):,0.0,Jarque-Bera (JB):,109.679
Skew:,-0.145,Prob(JB):,1.5299999999999999e-24
Kurtosis:,3.53,Cond. No.,5.89


### Self-study size

In [19]:
model = smf.ols(formula="self_study_size_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['self_study_size'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,self_study_size_diff,R-squared:,0.442
Model:,OLS,Adj. R-squared:,0.441
Method:,Least Squares,F-statistic:,572.3
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:21,Log-Likelihood:,-29164.0
No. Observations:,7199,AIC:,58340.0
Df Residuals:,7191,BIC:,58400.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,9.0586,0.504,17.983,0.000,8.071,10.046
"C(learners, Treatment(reference=800))[T.1600]",7.2372,0.328,22.058,0.000,6.594,7.880
"C(instruction_size, Treatment(reference=32))[T.16]",10.1594,0.328,30.965,0.000,9.516,10.802
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.7850,0.407,1.927,0.054,-0.013,1.583
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.1610,0.405,0.397,0.691,-0.634,0.956
"C(penalty, Treatment(reference=1.0))[T.2]",3.3229,0.328,10.128,0.000,2.680,3.966
"C(progress, Treatment(reference=0.0))[T.1]",-20.6322,0.463,-44.605,0.000,-21.539,-19.726
"C(progress, Treatment(reference=0.0))[T.2]",-23.7535,0.430,-55.297,0.000,-24.595,-22.912

0,1,2,3
Omnibus:,197.813,Durbin-Watson:,1.334
Prob(Omnibus):,0.0,Jarque-Bera (JB):,497.492
Skew:,0.016,Prob(JB):,9.35e-109
Kurtosis:,4.287,Cond. No.,5.89


### Percentage instruction

In [20]:
model = smf.ols(formula="percentage_instruction_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['percentage_instruction'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,percentage_instruction_diff,R-squared:,0.111
Model:,OLS,Adj. R-squared:,0.11
Method:,Least Squares,F-statistic:,148.7
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,1.04e-205
Time:,14:24:22,Log-Likelihood:,-19484.0
No. Observations:,7199,AIC:,38980.0
Df Residuals:,7191,BIC:,39040.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.4816,0.109,-41.268,0.000,-4.694,-4.269
"C(learners, Treatment(reference=800))[T.1600]",-1.7164,0.086,-20.069,0.000,-1.884,-1.549
"C(instruction_size, Treatment(reference=32))[T.16]",-1.5872,0.086,-18.562,0.000,-1.755,-1.420
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.2475,0.105,2.365,0.018,0.042,0.453
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.1975,0.102,-1.931,0.054,-0.398,0.003
"C(penalty, Treatment(reference=1.0))[T.2]",0.8402,0.086,9.825,0.000,0.673,1.008
"C(progress, Treatment(reference=0.0))[T.1]",0.2625,0.106,2.484,0.013,0.055,0.470
"C(progress, Treatment(reference=0.0))[T.2]",-0.3793,0.114,-3.340,0.001,-0.602,-0.157

0,1,2,3
Omnibus:,798.408,Durbin-Watson:,0.261
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1095.763
Skew:,-0.954,Prob(JB):,1.14e-238
Kurtosis:,3.101,Cond. No.,5.89


### Percentage self-study

In [21]:
model = smf.ols(formula="percentage_self_study_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['percentage_self_study'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,percentage_self_study_diff,R-squared:,0.111
Model:,OLS,Adj. R-squared:,0.11
Method:,Least Squares,F-statistic:,148.7
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,1.04e-205
Time:,14:24:22,Log-Likelihood:,-19484.0
No. Observations:,7199,AIC:,38980.0
Df Residuals:,7191,BIC:,39040.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.4816,0.109,41.268,0.000,4.269,4.694
"C(learners, Treatment(reference=800))[T.1600]",1.7164,0.086,20.069,0.000,1.549,1.884
"C(instruction_size, Treatment(reference=32))[T.16]",1.5872,0.086,18.562,0.000,1.420,1.755
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.2475,0.105,-2.365,0.018,-0.453,-0.042
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.1975,0.102,1.931,0.054,-0.003,0.398
"C(penalty, Treatment(reference=1.0))[T.2]",-0.8402,0.086,-9.825,0.000,-1.008,-0.673
"C(progress, Treatment(reference=0.0))[T.1]",-0.2625,0.106,-2.484,0.013,-0.470,-0.055
"C(progress, Treatment(reference=0.0))[T.2]",0.3793,0.114,3.340,0.001,0.157,0.602

0,1,2,3
Omnibus:,798.408,Durbin-Watson:,0.261
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1095.763
Skew:,0.954,Prob(JB):,1.14e-238
Kurtosis:,3.101,Cond. No.,5.89


### Classroom pool utilisation

In [22]:
model = smf.ols(formula="classroom_utilisation_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['classroom_utilisation'] = [result.params, result.pvalues]

result.summary()

0,1,2,3
Dep. Variable:,classroom_utilisation_diff,R-squared:,0.553
Model:,OLS,Adj. R-squared:,0.552
Method:,Least Squares,F-statistic:,742.5
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:23,Log-Likelihood:,-20554.0
No. Observations:,7199,AIC:,41120.0
Df Residuals:,7191,BIC:,41180.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.8828,0.160,30.570,0.000,4.570,5.196
"C(learners, Treatment(reference=800))[T.1600]",4.0057,0.099,40.368,0.000,3.811,4.200
"C(instruction_size, Treatment(reference=32))[T.16]",-3.7834,0.099,-38.136,0.000,-3.978,-3.589
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.0033,0.121,0.028,0.978,-0.233,0.240
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.3072,0.122,-2.515,0.012,-0.547,-0.068
"C(penalty, Treatment(reference=1.0))[T.2]",0.9050,0.099,9.120,0.000,0.711,1.099
"C(progress, Treatment(reference=0.0))[T.1]",-6.5776,0.123,-53.674,0.000,-6.818,-6.337
"C(progress, Treatment(reference=0.0))[T.2]",-8.8360,0.135,-65.314,0.000,-9.101,-8.571

0,1,2,3
Omnibus:,221.946,Durbin-Watson:,0.677
Prob(Omnibus):,0.0,Jarque-Bera (JB):,481.232
Skew:,-0.182,Prob(JB):,3.17e-105
Kurtosis:,4.213,Cond. No.,5.89


### Teacher pool utilisation

In [23]:
model = smf.ols(formula="teacher_utilisation_diff ~ " + "+".join(EXOG), data=diffs)

result = model.fit(cov_type='HC3')
RESULTS['teacher_utilisation'] = [result.params, result.pvalues]

deresult.summary()

NameError: name 'deresult' is not defined

In [None]:
FIELDS = ['base', '1600', '16 size', '(0.4; 0.4; 0.2)', '(0.5; 0.5; 0)',
          'w2', 'sigma 1', 'sigma 2']

def significance(value):
    if value <= 0.01:
        return "**"
    
    if value <= 0.05:
        return "*"
    
    return ""

for idx in range(len(FIELDS)):
    print(FIELDS[idx], end=' & ')
    
    for measure in MEASURES:
        param = RESULTS[measure][0][idx]
        p_val = RESULTS[measure][1][idx]

        print(f'${param:0.2f}^{{{significance(p_val)}}}$', end=' & ')

    print()