In [1]:
%cd ..
%matplotlib inline

D:\Projects\Python\PL-Heuristic


In [2]:
from analysis.caching import get_cached_results

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Preparation

In [3]:
data = get_cached_results()

# Prune all heuristic-related columns, as we will not be needing those
data.drop(list(data.filter(regex='heuristic')), axis=1, inplace=True)

data.describe()

Unnamed: 0,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment
count,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0
mean,88.026393,11.666783,6748.484177,65.398423,34.601577,44.583444,67.23182,36.5
std,11.948423,4.471629,2425.308372,21.486972,21.486972,16.178022,17.161286,20.784048
min,60.240964,5.185185,3540.313588,18.625,1.125,5.0,39.375,1.0
25%,78.313253,8.408259,4417.573599,49.625,15.375,31.25,53.75,18.75
50%,89.156627,11.428571,7713.877651,65.5,34.5,44.0,62.5,36.5
75%,100.0,14.0,9152.745958,84.625,50.375,55.055556,83.75,54.25
max,100.0,24.360656,10080.377678,98.875,81.375,80.0,100.0,72.0


In [4]:
experiments = pd.read_excel('experiments/experiments.xlsx')

# Join with experimental data, so we know the parameter combinations
df = data.join(experiments.set_index('experiment'), on='experiment')

df.describe()

Unnamed: 0,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment,penalty,progress,learners,instruction_classrooms,instruction_size,self_study_classrooms,self_study_size,num_teachers,first_degree,second_degree,third_degree
count,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0
mean,88.026393,11.666783,6748.484177,65.398423,34.601577,44.583444,67.23182,36.5,1.5,1.0,1200.0,90.0,24.0,4.5,80.0,120.0,0.633333,0.3,0.066667
std,11.948423,4.471629,2425.308372,21.486972,21.486972,16.178022,17.161286,20.784048,0.500035,0.816553,400.027781,43.592017,8.000556,1.500104,0.0,40.002778,0.262485,0.21604,0.094287
min,60.240964,5.185185,3540.313588,18.625,1.125,5.0,39.375,1.0,1.0,0.0,800.0,40.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
25%,78.313253,8.408259,4417.573599,49.625,15.375,31.25,53.75,18.75,1.0,0.0,800.0,70.0,16.0,3.0,80.0,80.0,0.4,0.0,0.0
50%,89.156627,11.428571,7713.877651,65.5,34.5,44.0,62.5,36.5,1.5,1.0,1200.0,80.0,24.0,4.5,80.0,120.0,0.5,0.4,0.0
75%,100.0,14.0,9152.745958,84.625,50.375,55.055556,83.75,54.25,2.0,2.0,1600.0,100.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2
max,100.0,24.360656,10080.377678,98.875,81.375,80.0,100.0,72.0,2.0,2.0,1600.0,160.0,32.0,6.0,80.0,160.0,1.0,0.5,0.2


# Analysis

In [5]:
EXOG = [
    "C(learners, Treatment(reference=800))",
    "C(instruction_size, Treatment(reference=32))",
    "C(first_degree, Treatment(reference=1.0))",
    "C(penalty, Treatment(reference=1.0))",
    "C(progress, Treatment(reference=0.0))",
    #"C(instruction_size, Treatment(reference=32)) : C(first_degree, Treatment(reference=1.0))"
]

## Classroom pool utilisation

In [6]:
model = smf.ols(formula="ilp_classroom_utilisation ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_classroom_utilisation,R-squared:,0.845
Model:,OLS,Adj. R-squared:,0.845
Method:,Least Squares,F-statistic:,9005.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:21,Log-Likelihood:,-21367.0
No. Observations:,7199,AIC:,42750.0
Df Residuals:,7191,BIC:,42800.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,92.8491,0.202,459.848,0.000,92.453,93.245
"C(learners, Treatment(reference=800))[T.1600]",-4.8688,0.111,-43.831,0.000,-5.086,-4.651
"C(instruction_size, Treatment(reference=32))[T.16]",-16.7393,0.111,-150.694,0.000,-16.957,-16.522
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-4.5202,0.136,-33.184,0.000,-4.787,-4.253
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.3459,0.134,-2.576,0.010,-0.609,-0.083
"C(penalty, Treatment(reference=1.0))[T.2]",-2.0974,0.111,-18.881,0.000,-2.315,-1.880
"C(progress, Treatment(reference=0.0))[T.1]",11.3051,0.141,80.106,0.000,11.029,11.582
"C(progress, Treatment(reference=0.0))[T.2]",14.6592,0.149,98.409,0.000,14.367,14.951

0,1,2,3
Omnibus:,76.438,Durbin-Watson:,0.281
Prob(Omnibus):,0.0,Jarque-Bera (JB):,77.403
Skew:,0.243,Prob(JB):,1.56e-17
Kurtosis:,2.852,Cond. No.,5.89


## Teacher pool utilisation

In [7]:
model = smf.ols(formula="ilp_teacher_utilisation ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_teacher_utilisation,R-squared:,0.932
Model:,OLS,Adj. R-squared:,0.932
Method:,Least Squares,F-statistic:,14510.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:21,Log-Likelihood:,-21020.0
No. Observations:,7199,AIC:,42060.0
Df Residuals:,7191,BIC:,42110.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,49.0800,0.167,293.487,0.000,48.752,49.408
"C(learners, Treatment(reference=800))[T.1600]",-3.5242,0.106,-33.293,0.000,-3.732,-3.317
"C(instruction_size, Treatment(reference=32))[T.16]",30.8313,0.106,291.280,0.000,30.624,31.039
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-4.5056,0.131,-34.415,0.000,-4.762,-4.249
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.3369,0.128,-2.629,0.009,-0.588,-0.086
"C(penalty, Treatment(reference=1.0))[T.2]",-2.2288,0.106,-21.054,0.000,-2.436,-2.021
"C(progress, Treatment(reference=0.0))[T.1]",9.0992,0.123,74.101,0.000,8.859,9.340
"C(progress, Treatment(reference=0.0))[T.2]",12.5791,0.142,88.297,0.000,12.300,12.858

0,1,2,3
Omnibus:,227.299,Durbin-Watson:,0.235
Prob(Omnibus):,0.0,Jarque-Bera (JB):,183.711
Skew:,0.314,Prob(JB):,1.2799999999999998e-40
Kurtosis:,2.532,Cond. No.,5.89


## Instruction activity size

In [8]:
model = smf.ols(formula="ilp_instruction_size ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_instruction_size,R-squared:,0.914
Model:,OLS,Adj. R-squared:,0.914
Method:,Least Squares,F-statistic:,7490.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:22,Log-Likelihood:,-12180.0
No. Observations:,7199,AIC:,24380.0
Df Residuals:,7191,BIC:,24430.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,15.9261,0.049,326.352,0.000,15.830,16.022
"C(learners, Treatment(reference=800))[T.1600]",2.9712,0.031,95.830,0.000,2.910,3.032
"C(instruction_size, Treatment(reference=32))[T.16]",-3.8006,0.031,-122.580,0.000,-3.861,-3.740
"C(first_degree, Treatment(reference=1.0))[T.0.4]",0.3635,0.038,9.653,0.000,0.290,0.437
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.0059,0.038,0.153,0.878,-0.069,0.081
"C(penalty, Treatment(reference=1.0))[T.2]",1.4662,0.031,47.285,0.000,1.405,1.527
"C(progress, Treatment(reference=0.0))[T.1]",-5.9360,0.042,-142.098,0.000,-6.018,-5.854
"C(progress, Treatment(reference=0.0))[T.2]",-8.1680,0.044,-184.277,0.000,-8.255,-8.081

0,1,2,3
Omnibus:,879.133,Durbin-Watson:,0.108
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1494.995
Skew:,0.832,Prob(JB):,0.0
Kurtosis:,4.489,Cond. No.,5.89


## Self-study activity size

In [9]:
model = smf.ols(formula="ilp_self_study_size ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_self_study_size,R-squared:,0.318
Model:,OLS,Adj. R-squared:,0.317
Method:,Least Squares,F-statistic:,566.5
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:22,Log-Likelihood:,-28877.0
No. Observations:,7199,AIC:,57770.0
Df Residuals:,7191,BIC:,57830.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,48.7595,0.507,96.232,0.000,47.766,49.753
"C(learners, Treatment(reference=800))[T.1600]",4.9947,0.315,15.841,0.000,4.377,5.613
"C(instruction_size, Treatment(reference=32))[T.16]",-15.4783,0.315,-49.092,0.000,-16.096,-14.860
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-1.8876,0.388,-4.871,0.000,-2.647,-1.128
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.3096,0.396,-0.783,0.434,-1.085,0.466
"C(penalty, Treatment(reference=1.0))[T.2]",0.7852,0.315,2.490,0.013,0.167,1.403
"C(progress, Treatment(reference=0.0))[T.1]",6.8828,0.457,15.075,0.000,5.988,7.778
"C(progress, Treatment(reference=0.0))[T.2]",-2.6657,0.411,-6.487,0.000,-3.471,-1.860

0,1,2,3
Omnibus:,96.741,Durbin-Watson:,1.012
Prob(Omnibus):,0.0,Jarque-Bera (JB):,174.572
Skew:,0.044,Prob(JB):,1.24e-38
Kurtosis:,3.758,Cond. No.,5.89


## Objective value

In [10]:
model = smf.ols(formula="ilp_objective ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_objective,R-squared:,0.994
Model:,OLS,Adj. R-squared:,0.994
Method:,Least Squares,F-statistic:,320000.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:23,Log-Likelihood:,-48112.0
No. Observations:,7199,AIC:,96240.0
Df Residuals:,7191,BIC:,96290.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4944.8798,6.661,742.345,0.000,4931.824,4957.935
"C(learners, Treatment(reference=800))[T.1600]",4763.2473,4.561,1044.314,0.000,4754.308,4772.187
"C(instruction_size, Treatment(reference=32))[T.16]",211.2680,4.561,46.317,0.000,202.328,220.208
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-60.2046,5.619,-10.715,0.000,-71.217,-49.192
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-8.0458,5.573,-1.444,0.149,-18.969,2.877
"C(penalty, Treatment(reference=1.0))[T.2]",-386.7903,4.561,-84.801,0.000,-395.730,-377.851
"C(progress, Treatment(reference=0.0))[T.1]",-560.8556,5.057,-110.905,0.000,-570.767,-550.944
"C(progress, Treatment(reference=0.0))[T.2]",-842.7708,6.433,-131.014,0.000,-855.379,-830.163

0,1,2,3
Omnibus:,304.963,Durbin-Watson:,0.395
Prob(Omnibus):,0.0,Jarque-Bera (JB):,396.173
Skew:,-0.442,Prob(JB):,9.38e-87
Kurtosis:,3.735,Cond. No.,5.89


## Percentage of learners in instruction activities

In [11]:
model = smf.ols(formula="ilp_percentage_instruction ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_percentage_instruction,R-squared:,0.959
Model:,OLS,Adj. R-squared:,0.959
Method:,Least Squares,F-statistic:,17800.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:23,Log-Likelihood:,-20793.0
No. Observations:,7199,AIC:,41600.0
Df Residuals:,7191,BIC:,41660.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,72.3306,0.142,510.989,0.000,72.053,72.608
"C(learners, Treatment(reference=800))[T.1600]",16.0806,0.103,156.761,0.000,15.880,16.282
"C(instruction_size, Treatment(reference=32))[T.16]",11.1967,0.103,109.146,0.000,10.996,11.398
"C(first_degree, Treatment(reference=1.0))[T.0.4]",-3.1202,0.124,-25.112,0.000,-3.364,-2.877
"C(first_degree, Treatment(reference=1.0))[T.0.5]",-0.3396,0.130,-2.617,0.009,-0.594,-0.085
"C(penalty, Treatment(reference=1.0))[T.2]",9.1164,0.103,88.865,0.000,8.915,9.317
"C(progress, Treatment(reference=0.0))[T.1]",-28.5619,0.129,-222.100,0.000,-28.814,-28.310
"C(progress, Treatment(reference=0.0))[T.2]",-43.3804,0.130,-333.442,0.000,-43.635,-43.125

0,1,2,3
Omnibus:,381.387,Durbin-Watson:,0.154
Prob(Omnibus):,0.0,Jarque-Bera (JB):,433.755
Skew:,-0.59,Prob(JB):,6.479999999999999e-95
Kurtosis:,2.769,Cond. No.,5.89


## Percentage of learners in self-study activities

In [12]:
model = smf.ols(formula="ilp_percentage_self_study ~ " + "+".join(EXOG), data=df)

result = model.fit(cov_type='HC3')
result.summary()

0,1,2,3
Dep. Variable:,ilp_percentage_self_study,R-squared:,0.959
Model:,OLS,Adj. R-squared:,0.959
Method:,Least Squares,F-statistic:,17800.0
Date:,"Mon, 14 Oct 2019",Prob (F-statistic):,0.0
Time:,14:24:24,Log-Likelihood:,-20793.0
No. Observations:,7199,AIC:,41600.0
Df Residuals:,7191,BIC:,41660.0
Df Model:,7,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,27.6694,0.142,195.474,0.000,27.392,27.947
"C(learners, Treatment(reference=800))[T.1600]",-16.0806,0.103,-156.761,0.000,-16.282,-15.880
"C(instruction_size, Treatment(reference=32))[T.16]",-11.1967,0.103,-109.146,0.000,-11.398,-10.996
"C(first_degree, Treatment(reference=1.0))[T.0.4]",3.1202,0.124,25.112,0.000,2.877,3.364
"C(first_degree, Treatment(reference=1.0))[T.0.5]",0.3396,0.130,2.617,0.009,0.085,0.594
"C(penalty, Treatment(reference=1.0))[T.2]",-9.1164,0.103,-88.865,0.000,-9.317,-8.915
"C(progress, Treatment(reference=0.0))[T.1]",28.5619,0.129,222.100,0.000,28.310,28.814
"C(progress, Treatment(reference=0.0))[T.2]",43.3804,0.130,333.442,0.000,43.125,43.635

0,1,2,3
Omnibus:,381.387,Durbin-Watson:,0.154
Prob(Omnibus):,0.0,Jarque-Bera (JB):,433.755
Skew:,0.59,Prob(JB):,6.479999999999999e-95
Kurtosis:,2.769,Cond. No.,5.89
