In [1]:
%cd ..
%matplotlib inline

D:\Projects\Python\PL-Heuristic


In [2]:
from analysis.caching import get_cached_results

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Preparation

In [3]:
data = get_cached_results()

# Prune all heuristic-related columns, as we will not be needing those
data.drop(list(data.filter(regex='heuristic')), axis=1, inplace=True)

data.describe()

Unnamed: 0,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment
count,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0
mean,88.02564,11.666901,6747.593221,65.397746,34.602254,44.584185,67.231039,36.5
std,11.950004,4.471715,2425.311201,21.486019,21.486019,16.177522,17.161373,20.784048
min,60.240964,5.185185,3533.340676,18.625,1.125,5.0,39.375,1.0
25%,78.313253,8.408259,4417.163803,49.625,15.375,31.25,53.75,18.75
50%,89.156627,11.428571,5018.009891,65.5,34.5,44.0,62.5,36.5
75%,100.0,14.0,9152.171242,84.625,50.375,55.055556,83.75,54.25
max,100.0,24.360656,10080.377678,98.875,81.375,80.0,100.0,72.0


In [4]:
experiments = pd.read_excel('experiments/experiments.xlsx')

# Join with experimental data, so we know the parameter combinations
df = data.join(experiments.set_index('experiment'), on='experiment')

df.describe()

Unnamed: 0,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment,penalty,progress,learners,instruction_classrooms,instruction_size,self_study_classrooms,self_study_size,num_teachers,first_degree,second_degree,third_degree
count,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0
mean,88.02564,11.666901,6747.593221,65.397746,34.602254,44.584185,67.231039,36.5,1.5,1.0,1200.0,90.0,24.0,4.5,80.0,120.0,0.633333,0.3,0.066667
std,11.950004,4.471715,2425.311201,21.486019,21.486019,16.177522,17.161373,20.784048,0.500035,0.816553,400.027781,43.592017,8.000556,1.500104,0.0,40.002778,0.262485,0.21604,0.094287
min,60.240964,5.185185,3533.340676,18.625,1.125,5.0,39.375,1.0,1.0,0.0,800.0,40.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
25%,78.313253,8.408259,4417.163803,49.625,15.375,31.25,53.75,18.75,1.0,0.0,800.0,70.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
50%,89.156627,11.428571,5018.009891,65.5,34.5,44.0,62.5,36.5,1.5,1.0,1200.0,80.0,24.0,4.5,80.0,120.0,0.5,0.4,0.0
75%,100.0,14.0,9152.171242,84.625,50.375,55.055556,83.75,54.25,2.0,2.0,1600.0,100.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2
max,100.0,24.360656,10080.377678,98.875,81.375,80.0,100.0,72.0,2.0,2.0,1600.0,160.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2


# Analysis

In [5]:
EXOG = [
    "C(learners, Treatment(reference=800))",
    "C(instruction_size, Treatment(reference=32))",
    "C(first_degree, Treatment(reference=1.0))",
    "C(penalty, Treatment(reference=1.0))",
    "C(progress, Treatment(reference=0.0))",
    #"C(instruction_size, Treatment(reference=32)) : C(first_degree, Treatment(reference=1.0))"
]

## Classroom pool utilisation

In [6]:
model = smf.ols(formula="np.log(ilp_classroom_utilisation) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_classroom_utilisation),R-squared:,0.838
Model:,OLS,Adj. R-squared:,0.837
Method:,Least Squares,F-statistic:,7189.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:05:59,Log-Likelihood:,10368.0
No. Observations:,7199,AIC:,-20720.0
Df Residuals:,7191,BIC:,-20670.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.5212,0.002,1869.239,0.000,4.516,4.526
"C(learners, Treatment(reference=800))[T.1600]",-0.0573,0.001,-42.360,0.000,-0.060,-0.055
"C(instruction_size, Treatment(reference=32))[T.16]",-0.1947,0.001,-143.976,0.000,-0.197,-0.192
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0566,0.002,-33.782,0.000,-0.060,-0.053
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.0041,0.002,-2.561,0.010,-0.007,-0.001
"C(penalty, Treatment(reference=1.0))[T.2]",-0.0250,0.001,-18.497,0.000,-0.028,-0.022
"C(progress, Treatment(reference=0.0))[T.1]",0.1384,0.002,80.791,0.000,0.135,0.142
"C(progress, Treatment(reference=0.0))[T.2]",0.1780,0.002,98.007,0.000,0.174,0.182

0,1,2,3
Omnibus:,95.829,Durbin-Watson:,0.299
Prob(Omnibus):,0.0,Jarque-Bera (JB):,94.16
Skew:,0.256,Prob(JB):,3.58e-21
Kurtosis:,2.773,Cond. No.,5.89


## Teacher pool utilisation

In [7]:
model = smf.ols(formula="np.log(ilp_teacher_utilisation) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_teacher_utilisation),R-squared:,0.949
Model:,OLS,Adj. R-squared:,0.949
Method:,Least Squares,F-statistic:,19280.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:05:59,Log-Likelihood:,10368.0
No. Observations:,7199,AIC:,-20720.0
Df Residuals:,7191,BIC:,-20670.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.9003,0.002,1612.563,0.000,3.896,3.905
"C(learners, Treatment(reference=800))[T.1600]",-0.0573,0.001,-42.360,0.000,-0.060,-0.055
"C(instruction_size, Treatment(reference=32))[T.16]",0.4629,0.001,342.236,0.000,0.460,0.466
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0566,0.002,-33.782,0.000,-0.060,-0.053
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.0041,0.002,-2.561,0.010,-0.007,-0.001
"C(penalty, Treatment(reference=1.0))[T.2]",-0.0250,0.001,-18.497,0.000,-0.028,-0.022
"C(progress, Treatment(reference=0.0))[T.1]",0.1384,0.002,80.791,0.000,0.135,0.142
"C(progress, Treatment(reference=0.0))[T.2]",0.1780,0.002,98.007,0.000,0.174,0.182

0,1,2,3
Omnibus:,95.829,Durbin-Watson:,0.299
Prob(Omnibus):,0.0,Jarque-Bera (JB):,94.16
Skew:,0.256,Prob(JB):,3.58e-21
Kurtosis:,2.773,Cond. No.,5.89


## Instruction activity size

In [8]:
model = smf.ols(formula="np.log(ilp_instruction_size) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_instruction_size),R-squared:,0.958
Model:,OLS,Adj. R-squared:,0.958
Method:,Least Squares,F-statistic:,30650.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:06:00,Log-Likelihood:,8424.8
No. Observations:,7199,AIC:,-16830.0
Df Residuals:,7191,BIC:,-16780.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2.7100,0.003,1035.449,0.000,2.705,2.715
"C(learners, Treatment(reference=800))[T.1600]",0.2543,0.002,143.509,0.000,0.251,0.258
"C(instruction_size, Treatment(reference=32))[T.16]",-0.3104,0.002,-175.184,0.000,-0.314,-0.307
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.0355,0.002,16.164,0.000,0.031,0.040
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.0012,0.002,0.578,0.563,-0.003,0.005
"C(penalty, Treatment(reference=1.0))[T.2]",0.1504,0.002,84.855,0.000,0.147,0.154
"C(progress, Treatment(reference=0.0))[T.1]",-0.4481,0.002,-207.656,0.000,-0.452,-0.444
"C(progress, Treatment(reference=0.0))[T.2]",-0.6949,0.002,-285.124,0.000,-0.700,-0.690

0,1,2,3
Omnibus:,175.859,Durbin-Watson:,0.2
Prob(Omnibus):,0.0,Jarque-Bera (JB):,217.916
Skew:,-0.316,Prob(JB):,4.7899999999999994e-48
Kurtosis:,3.572,Cond. No.,5.89


## Self-study activity size

In [9]:
model = smf.ols(formula="np.log(ilp_self_study_size) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_self_study_size),R-squared:,0.282
Model:,OLS,Adj. R-squared:,0.281
Method:,Least Squares,F-statistic:,498.8
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:06:00,Log-Likelihood:,-2839.3
No. Observations:,7199,AIC:,5695.0
Df Residuals:,7191,BIC:,5750.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.7955,0.013,288.126,0.000,3.770,3.821
"C(learners, Treatment(reference=800))[T.1600]",0.0854,0.008,10.078,0.000,0.069,0.102
"C(instruction_size, Treatment(reference=32))[T.16]",-0.3871,0.008,-45.702,0.000,-0.404,-0.371
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0372,0.010,-3.562,0.000,-0.058,-0.017
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.0007,0.011,-0.069,0.945,-0.022,0.020
"C(penalty, Treatment(reference=1.0))[T.2]",-0.0216,0.008,-2.552,0.011,-0.038,-0.005
"C(progress, Treatment(reference=0.0))[T.1]",0.2422,0.012,19.747,0.000,0.218,0.266
"C(progress, Treatment(reference=0.0))[T.2]",0.0516,0.012,4.423,0.000,0.029,0.075

0,1,2,3
Omnibus:,1488.63,Durbin-Watson:,1.184
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5080.777
Skew:,-1.026,Prob(JB):,0.0
Kurtosis:,6.567,Cond. No.,5.89


## Objective value

In [10]:
model = smf.ols(formula="np.log(ilp_objective) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_objective),R-squared:,0.994
Model:,OLS,Adj. R-squared:,0.994
Method:,Least Squares,F-statistic:,197000.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:06:00,Log-Likelihood:,15120.0
No. Observations:,7199,AIC:,-30220.0
Df Residuals:,7191,BIC:,-30170.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,8.4694,0.001,9910.331,0.000,8.468,8.471
"C(learners, Treatment(reference=800))[T.1600]",0.7384,0.001,1056.216,0.000,0.737,0.740
"C(instruction_size, Treatment(reference=32))[T.16]",0.0336,0.001,48.056,0.000,0.032,0.035
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0090,0.001,-11.467,0.000,-0.011,-0.007
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.0015,0.001,-1.702,0.089,-0.003,0.000
"C(penalty, Treatment(reference=1.0))[T.2]",-0.0656,0.001,-93.851,0.000,-0.067,-0.064
"C(progress, Treatment(reference=0.0))[T.1]",-0.0857,0.001,-99.303,0.000,-0.087,-0.084
"C(progress, Treatment(reference=0.0))[T.2]",-0.1278,0.001,-133.590,0.000,-0.130,-0.126

0,1,2,3
Omnibus:,8752.2,Durbin-Watson:,0.759
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10038544.719
Skew:,-5.691,Prob(JB):,0.0
Kurtosis:,185.584,Cond. No.,5.89


## Percentage of learners in instruction activities

In [11]:
model = smf.ols(formula="np.log(ilp_percentage_instruction) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_percentage_instruction),R-squared:,0.897
Model:,OLS,Adj. R-squared:,0.897
Method:,Least Squares,F-statistic:,5367.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:06:01,Log-Likelihood:,5049.9
No. Observations:,7199,AIC:,-10080.0
Df Residuals:,7191,BIC:,-10030.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.1668,0.004,937.862,0.000,4.158,4.176
"C(learners, Treatment(reference=800))[T.1600]",0.2895,0.003,102.233,0.000,0.284,0.295
"C(instruction_size, Treatment(reference=32))[T.16]",0.2175,0.003,76.787,0.000,0.212,0.223
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-0.0503,0.003,-14.674,0.000,-0.057,-0.044
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.0058,0.004,-1.635,0.102,-0.013,0.001
"C(penalty, Treatment(reference=1.0))[T.2]",0.1757,0.003,62.040,0.000,0.170,0.181
"C(progress, Treatment(reference=0.0))[T.1]",-0.4104,0.003,-130.436,0.000,-0.417,-0.404
"C(progress, Treatment(reference=0.0))[T.2]",-0.7067,0.004,-181.524,0.000,-0.714,-0.699

0,1,2,3
Omnibus:,815.444,Durbin-Watson:,0.099
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1118.56
Skew:,-0.919,Prob(JB):,1.28e-243
Kurtosis:,3.594,Cond. No.,5.89


## Percentage of learners in self-study activities

In [12]:
model = smf.ols(formula="np.log(ilp_percentage_self_study) ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,np.log(ilp_percentage_self_study),R-squared:,0.933
Model:,OLS,Adj. R-squared:,0.933
Method:,Least Squares,F-statistic:,8855.0
Date:,"Mon, 02 Sep 2019",Prob (F-statistic):,0.0
Time:,17:06:01,Log-Likelihood:,-120.79
No. Observations:,7199,AIC:,257.6
Df Residuals:,7191,BIC:,312.6
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2.7853,0.009,315.935,0.000,2.768,2.803
"C(learners, Treatment(reference=800))[T.1600]",-0.6822,0.006,-117.467,0.000,-0.694,-0.671
"C(instruction_size, Treatment(reference=32))[T.16]",-0.4213,0.006,-72.548,0.000,-0.433,-0.410
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.1667,0.007,23.811,0.000,0.153,0.180
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.0136,0.007,1.861,0.063,-0.001,0.028
"C(penalty, Treatment(reference=1.0))[T.2]",-0.3641,0.006,-62.691,0.000,-0.375,-0.353
"C(progress, Treatment(reference=0.0))[T.1]",1.4903,0.007,200.148,0.000,1.476,1.505
"C(progress, Treatment(reference=0.0))[T.2]",1.8464,0.008,220.768,0.000,1.830,1.863

0,1,2,3
Omnibus:,1340.56,Durbin-Watson:,0.137
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3191.081
Skew:,-1.046,Prob(JB):,0.0
Kurtosis:,5.502,Cond. No.,5.89
