In [1]:
%cd ..
%matplotlib inline

D:\Projects\Python\PL-Heuristic


In [2]:
import pandas as pd
from scipy import stats

from analysis.caching import get_cached_results

# Preparation

In [3]:
data = get_cached_results()
data.describe()

Unnamed: 0,heuristic_classroom_utilisation,heuristic_instruction_size,heuristic_objective,heuristic_percentage_instruction,heuristic_percentage_self_study,heuristic_self_study_size,heuristic_teacher_utilisation,ilp_classroom_utilisation,ilp_instruction_size,ilp_objective,ilp_percentage_instruction,ilp_percentage_self_study,ilp_self_study_size,ilp_teacher_utilisation,experiment
count,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7200.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7199.0,7200.0
mean,88.235918,10.668209,6610.209687,59.657344,40.342656,49.523178,66.920573,88.02564,11.666901,6747.593221,65.397746,34.602254,44.584185,67.231039,36.5
std,11.331711,3.545793,2351.192867,21.667173,21.667173,16.217169,14.763535,11.950004,4.471715,2425.311201,21.486019,21.486019,16.177522,17.161373,20.784048
min,60.240964,5.4,3469.548383,15.0,5.625,15.133333,43.75,60.240964,5.185185,3533.340676,18.625,1.125,5.0,39.375,1.0
25%,78.313253,7.931034,4371.918796,43.421875,19.5,39.8125,53.75,78.313253,8.408259,4417.163803,49.625,15.375,31.25,53.75,18.75
50%,90.361446,10.279151,6256.421236,61.625,38.375,46.263158,58.125,89.156627,11.428571,5018.009891,65.5,34.5,44.0,62.5,36.5
75%,100.0,12.220543,9011.085109,80.5,56.578125,63.5,81.25,100.0,14.0,9152.171242,84.625,50.375,55.055556,83.75,54.25
max,100.0,20.985075,9932.599786,94.375,85.0,80.0,100.0,100.0,24.360656,10080.377678,98.875,81.375,80.0,100.0,72.0


In [4]:
# All performance measure columns. In the data set, these are split by 
# method - one for the ILP, and another for the heuristic.
MEASURES = [
    "objective",
    "instruction_size", "self_study_size",
    "percentage_instruction", "percentage_self_study",
    "classroom_utilisation", "teacher_utilisation"]

# Methods
ILP = "ilp_"
HEURISTIC = "heuristic_"

EXPERIMENTS = list(range(1, 73))

# Analysis

In [7]:
def results_per_experiment(experiment):
    """
    Computes a dictionary of results per experiment. For each performance measure
    the ILP and heuristic results are compared, and a tuple of percentage 
    difference and t-test results are stored.
    
    We use Welch's t-test as the variances cannot be assumed equal.
    """
    results = {}
    
    for measure in MEASURES:
        ilp_data = data[ILP + measure][data.experiment == experiment]
        heuristic_data = data[HEURISTIC + measure][data.experiment == experiment]

        diff = heuristic_data - ilp_data

        results[measure] = 100 * (diff / ilp_data).mean(), \
                           stats.ttest_ind(ilp_data, heuristic_data, equal_var=False, nan_policy='omit')[1]
        
    return results

# Each cell in comparisons is a tuple of (percentage change, t-test p-value)
comparisons = pd.DataFrame([results_per_experiment(experiment) for experiment in EXPERIMENTS])

In [8]:
def significance(p_value):
    if p_value <= 0.01:
        return "^{**}"
    
    if p_value <= 0.05:
        return "^*"
    
    return ""

for idx, row in comparisons.iterrows():
    print(f"${idx + 1}$", end=' & ')

    for measure in MEASURES:
        print("${0:.2f}\%{1}$".format(row[measure][0], significance(row[measure][1])), end=' & ')
    print('\\\\')
    
print('Averages', end=' & ')
for measure in MEASURES:
    print("$\mathbf{{{0:.2f}\%}}$".format(comparisons[measure].apply(lambda x: x[0]).mean()), end=' & ')

$1$ & $-0.73\%^{**}$ & $-3.24\%^{**}$ & $8.33\%^{**}$ & $-3.43\%^{**}$ & $10.75\%^{**}$ & $0.00\%$ & $0.00\%$ & \\
$2$ & $-0.81\%^{**}$ & $-3.72\%^{**}$ & $6.54\%^{**}$ & $-4.24\%^{**}$ & $12.97\%^{**}$ & $-0.02\%$ & $-0.02\%$ & \\
$3$ & $-1.01\%^{**}$ & $-4.18\%^{**}$ & $-5.98\%^{**}$ & $-6.18\%^{**}$ & $17.51\%^{**}$ & $0.00\%$ & $0.00\%$ & \\
$4$ & $-0.76\%^{**}$ & $-11.51\%^{**}$ & $67.71\%^{**}$ & $-3.02\%^{**}$ & $23.53\%^{**}$ & $8.17\%^{**}$ & $8.17\%^{**}$ & \\
$5$ & $-0.77\%^{**}$ & $-10.74\%^{**}$ & $117.77\%^{**}$ & $-3.21\%^{**}$ & $23.88\%^{**}$ & $5.91\%^{**}$ & $5.91\%^{**}$ & \\
$6$ & $-0.95\%^{**}$ & $-7.73\%^{**}$ & $110.32\%^{**}$ & $-4.58\%^{**}$ & $22.63\%^{**}$ & $0.26\%$ & $0.26\%$ & \\
$7$ & $-0.91\%^{**}$ & $-18.88\%^{**}$ & $54.63\%^{**}$ & $-2.68\%^{**}$ & $31.78\%^{**}$ & $18.87\%^{**}$ & $18.87\%^{**}$ & \\
$8$ & $-0.97\%^{**}$ & $-19.92\%^{**}$ & $35.77\%^{**}$ & $-3.02\%^{**}$ & $36.02\%^{**}$ & $20.65\%^{**}$ & $20.65\%^{**}$ & \\
$9$ & $-0.91\%^{**}$ &