In [1]:
from OfflineEvaluator import OfflineEvaluator
from Preprocessor import Preprocessor
import utils
from UserProfiler import UserProfiler

import pickle
import logging
import operator
import sys
import os
from collections import Counter
import itertools
from itertools import combinations
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import spacy
import re
import networkx as nx

In [2]:
dbnums = ##[...]
dbnum_lookup = ##{..:..} 

# Read in offline evaluation results

In [3]:
results = {}

In [4]:
for dbnum in dbnums:
    results[dbnum] = {}
    os.chdir('results/db%d'%dbnum)
    for met in ['RND', 'PPL', 'SCB']:
        with open('%d-%s-results_eval1.pickle'%(dbnum,met),'rb') as handle:
            res = pickle.load(handle)
            results[dbnum][met] = res
    for met in ['csclrec', 'coppr', 'pure_ppr', 'kcb', 'wmf']:
        with open('%d-%s-eval1_cv.pickle'%(dbnum,met),'rb') as handle:
            res = pickle.load(handle)
            results[dbnum][met] = res
    os.chdir('../..')

In [5]:
dfs_res = []
for dbnum in dbnums:
    lst = []
    for met,res in results[dbnum].items():
        for wk,res_all_user in res.items():
            for usr, res_usr in res_all_user[0].items():
                _, precision, recall, miuf, semd, sd ,mr = res_usr
                lst.append([met, wk,usr,precision, recall, miuf, semd, sd ,mr])
    df_res = pd.DataFrame(lst, columns=['method','week','user','p@10','r@10','miuf','ild','#adjacent posts','max possible recall'])
    df_res['course'] = dbnum
    dfs_res.append(df_res)
df_res = pd.concat(dfs_res)

In [6]:
df_res['method'] = df_res['method'].replace({'ucf':'UCF','wmf':'MCF','kcb':'KCB',
                          'csclrec':'CSCLRec','coppr':'CoPPR','pure_ppr':'PPR'})

df_res['course'] = df_res['course'].replace(dbnum_lookup)

In [7]:
df_res = df_res.groupby(['course','method','week']).mean()

In [8]:
df_res.reset_index(inplace=True)

In [9]:
df_res.head(20)

Unnamed: 0,course,method,week,user,p@10,r@10,miuf,ild,#adjacent posts,max possible recall
0,LA,CSCLRec,2,1021.576923,0.857021,0.127597,0.370622,0.246306,4.807692,0.195408
1,LA,CSCLRec,3,1021.576923,0.844017,0.200668,0.480176,0.274166,3.5,0.223878
2,LA,CSCLRec,4,1021.576923,0.723077,0.212459,0.591936,0.240066,2.384615,0.291228
3,LA,CSCLRec,5,1021.576923,0.738782,0.16337,0.504277,0.241945,2.653846,0.253654
4,LA,CSCLRec,6,1021.576923,0.807265,0.205244,0.508824,0.18394,4.423077,0.305226
5,LA,CSCLRec,7,1021.576923,0.930769,0.202886,0.335557,0.249469,7.461538,0.214291
6,LA,CSCLRec,8,1021.576923,0.923077,0.21133,0.315804,0.217581,10.115385,0.234798
7,LA,CSCLRec,9,1021.576923,0.863248,0.237635,0.409324,0.178914,3.692308,0.312966
8,LA,CSCLRec,10,1021.576923,0.684982,0.241202,0.546676,0.181996,3.230769,0.41708
9,LA,CSCLRec,11,1021.576923,0.840598,0.185361,0.399405,0.148047,2.423077,0.221699


# Two-way ANOVA

In [10]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats

## Long courses

In [11]:
df_long = df_res[df_res.course.isin(['LA','LB','LC'])][['method','week','p@10','r@10','ild','miuf']]

In [12]:
df_long.head(15)

Unnamed: 0,method,week,p@10,r@10,ild,miuf
0,CSCLRec,2,0.857021,0.127597,0.246306,0.370622
1,CSCLRec,3,0.844017,0.200668,0.274166,0.480176
2,CSCLRec,4,0.723077,0.212459,0.240066,0.591936
3,CSCLRec,5,0.738782,0.16337,0.241945,0.504277
4,CSCLRec,6,0.807265,0.205244,0.18394,0.508824
5,CSCLRec,7,0.930769,0.202886,0.249469,0.335557
6,CSCLRec,8,0.923077,0.21133,0.217581,0.315804
7,CSCLRec,9,0.863248,0.237635,0.178914,0.409324
8,CSCLRec,10,0.684982,0.241202,0.181996,0.546676
9,CSCLRec,11,0.840598,0.185361,0.148047,0.399405


In [13]:
model = ols('miuf ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),22.646893,7.0,27.760998,1.743921e-25
C(week),1.954258,10.0,1.676897,0.08934387
C(method):C(week),1.838966,70.0,0.225424,1.0
Residual,20.511063,176.0,,


In [14]:
# Assumption checking
w, pvalue = stats.shapiro(model.resid)
print(w, pvalue)
w, pvalue = stats.bartlett(*[x['miuf'].to_list() for _, x in df_long.groupby(['method','week'])])
print(w, pvalue)

0.9792996048927307 0.0006917573628015816
53.96043230344946 0.9979210693038086


In [18]:
print(pairwise_tukeyhsd(endog=df_long['miuf'], groups=df_long['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR   0.0305    0.9 -0.2013  0.2624  False
CSCLRec    KCB   0.4166  0.001  0.1848  0.6485   True
CSCLRec    MCF   0.2247 0.0652 -0.0071  0.4565  False
CSCLRec    PPL  -0.2106 0.1056 -0.4424  0.0212  False
CSCLRec    PPR  -0.1668 0.3569 -0.3986  0.0651  False
CSCLRec    RND   0.5543  0.001  0.3225  0.7861   True
CSCLRec    SCB   0.5808  0.001  0.3489  0.8126   True
  CoPPR    KCB   0.3861  0.001  0.1543  0.6179   True
  CoPPR    MCF   0.1941 0.1757 -0.0377   0.426  False
  CoPPR    PPL  -0.2412  0.035  -0.473 -0.0093   True
  CoPPR    PPR  -0.1973 0.1604 -0.4291  0.0345  False
  CoPPR    RND   0.5238  0.001   0.292  0.7556   True
  CoPPR    SCB   0.5502  0.001  0.3184  0.7821   True
    KCB    MCF   -0.192 0.1872 -0.4238  0.0399  False
    KCB    PPL  -0.6273  0.001 -0.8591 -0.3954   True
    KCB    PPR  -0.5834  0.0

In [15]:
model = ols('Q("p@10") ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),7.742919,7.0,85.166564,3.717954e-53
C(week),1.169078,10.0,9.001317,6.553305e-12
C(method):C(week),0.671175,70.0,0.738244,0.9267552
Residual,2.285863,176.0,,


In [17]:
# Assumption checking
w, pvalue = stats.shapiro(model.resid)
print(w, pvalue)
w, pvalue = stats.bartlett(*[x['p@10'].to_list() for _, x in df_long.groupby(['method','week'])])
print(w, pvalue)

0.9955922961235046 0.6592274904251099
96.21961970757901 0.23397867225469468


In [18]:
print(pairwise_tukeyhsd(endog=df_long['p@10'], groups=df_long['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR  -0.0125    0.9  -0.108   0.083  False
CSCLRec    KCB  -0.4498  0.001 -0.5453 -0.3543   True
CSCLRec    MCF  -0.2422  0.001 -0.3378 -0.1467   True
CSCLRec    PPL  -0.3324  0.001 -0.4279 -0.2368   True
CSCLRec    PPR  -0.1841  0.001 -0.2796 -0.0885   True
CSCLRec    RND  -0.4264  0.001 -0.5219 -0.3309   True
CSCLRec    SCB  -0.4354  0.001 -0.5309 -0.3399   True
  CoPPR    KCB  -0.4373  0.001 -0.5328 -0.3418   True
  CoPPR    MCF  -0.2297  0.001 -0.3252 -0.1342   True
  CoPPR    PPL  -0.3198  0.001 -0.4153 -0.2243   True
  CoPPR    PPR  -0.1715  0.001  -0.267  -0.076   True
  CoPPR    RND  -0.4139  0.001 -0.5094 -0.3183   True
  CoPPR    SCB  -0.4228  0.001 -0.5184 -0.3273   True
    KCB    MCF   0.2076  0.001  0.1121  0.3031   True
    KCB    PPL   0.1175 0.0051   0.022   0.213   True
    KCB    PPR   0.2658  0.0

In [19]:
model = ols('Q("r@10") ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),0.147477,7.0,1.119126,0.353076
C(week),0.366263,10.0,1.94557,0.041985
C(method):C(week),0.081996,70.0,0.062223,1.0
Residual,3.313281,176.0,,


In [20]:
# Assumption checking
w, pvalue = stats.shapiro(model.resid)
print(w, pvalue)
w, pvalue = stats.bartlett(*[x['r@10'].to_list() for _, x in df_long.groupby(['method','week'])])
print(w, pvalue)

0.9525035619735718 1.407042589107732e-07
35.30309243467295 0.9999998475781801


In [21]:
print(pairwise_tukeyhsd(endog=df_long['r@10'], groups=df_long['method'], alpha=0.05))

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower  upper  reject
----------------------------------------------------
CSCLRec  CoPPR   0.0023    0.9 -0.0889 0.0935  False
CSCLRec    KCB  -0.0654 0.3622 -0.1566 0.0258  False
CSCLRec    MCF  -0.0368    0.9  -0.128 0.0544  False
CSCLRec    PPL  -0.0375    0.9 -0.1287 0.0537  False
CSCLRec    PPR  -0.0364    0.9 -0.1276 0.0548  False
CSCLRec    RND  -0.0571 0.5366 -0.1483 0.0341  False
CSCLRec    SCB  -0.0559 0.5602 -0.1471 0.0353  False
  CoPPR    KCB  -0.0676 0.3162 -0.1588 0.0236  False
  CoPPR    MCF  -0.0391 0.8943 -0.1303 0.0521  False
  CoPPR    PPL  -0.0398 0.8799  -0.131 0.0514  False
  CoPPR    PPR  -0.0387    0.9 -0.1299 0.0525  False
  CoPPR    RND  -0.0594 0.4908 -0.1506 0.0318  False
  CoPPR    SCB  -0.0582 0.5148 -0.1494  0.033  False
    KCB    MCF   0.0285    0.9 -0.0627 0.1197  False
    KCB    PPL   0.0278    0.9 -0.0634  0.119  False
    KCB    PPR   0.0289    0.9 -0.0623 0.1201 

In [29]:
model = ols('ild ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),4.255422,7.0,43.304774,5.617084e-35
C(week),0.140167,10.0,0.998472,0.446687
C(method):C(week),0.209742,70.0,0.213442,1.0
Residual,2.442632,174.0,,


In [30]:
print(pairwise_tukeyhsd(endog=df_long['ild'], groups=df_long['method'], alpha=0.05))
# This arise some NaN's because some recommenders did not generate recommendations 
#    which makes the ILD measure uncomputable for some special cases 
# (e.g early stage of a class where a student did not have any interactions 
#      so that KCB cannot construct user profile feature vector to infer this user's preference)

Multiple Comparison of Means - Tukey HSD, FWER=0.05
 group1 group2 meandiff p-adj  lower upper reject
-------------------------------------------------
CSCLRec  CoPPR   -0.059 0.5566   nan   nan  False
CSCLRec    KCB      nan 0.5566   nan   nan  False
CSCLRec    MCF   0.1789 0.5566   nan   nan  False
CSCLRec    PPL    0.147 0.5566   nan   nan  False
CSCLRec    PPR   0.1198 0.5566   nan   nan  False
CSCLRec    RND   0.1389 0.5566   nan   nan  False
CSCLRec    SCB  -0.2096 0.5566   nan   nan  False
  CoPPR    KCB      nan 0.5566   nan   nan  False
  CoPPR    MCF   0.2378 0.5566   nan   nan  False
  CoPPR    PPL    0.206 0.5566   nan   nan  False
  CoPPR    PPR   0.1788 0.5566   nan   nan  False
  CoPPR    RND   0.1978 0.5566   nan   nan  False
  CoPPR    SCB  -0.1506 0.5566   nan   nan  False
    KCB    MCF      nan 0.5566   nan   nan  False
    KCB    PPL      nan 0.5566   nan   nan  False
    KCB    PPR      nan 0.5566   nan   nan  False
    KCB    RND      nan 0.5566   nan   nan  Fals

  reject = st_range > q_crit
  reject2 = np.abs(meandiffs) > crit_int


## Short courses

In [27]:
df_short = df_res[df_res.course.isin(['SA','SB','SC'])][['method','week','p@10','r@10','ild','miuf']]

### ILD

In [28]:
model = ols('ild ~ C(method) + C(week) + C(method):C(week)', data=df_short).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),0.835791,7.0,31.67704,1.73706e-18
C(week),0.002836,3.0,0.250819,0.8604658
C(method):C(week),0.025441,21.0,0.321409,0.997379
Residual,0.241232,64.0,,


In [29]:
print(pairwise_tukeyhsd(endog=df_short['ild'], groups=df_short['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR   -0.035 0.7522 -0.1051  0.0351  False
CSCLRec    KCB   -0.003    0.9 -0.0732  0.0671  False
CSCLRec    MCF   0.1686  0.001  0.0985  0.2388   True
CSCLRec    PPL   0.1192  0.001   0.049  0.1893   True
CSCLRec    PPR   0.0514 0.3193 -0.0188  0.1215  False
CSCLRec    RND     0.16  0.001  0.0898  0.2301   True
CSCLRec    SCB  -0.1128  0.001 -0.1829 -0.0426   True
  CoPPR    KCB    0.032 0.8302 -0.0382  0.1021  False
  CoPPR    MCF   0.2036  0.001  0.1335  0.2738   True
  CoPPR    PPL   0.1542  0.001   0.084  0.2243   True
  CoPPR    PPR   0.0864 0.0058  0.0162  0.1565   True
  CoPPR    RND    0.195  0.001  0.1248  0.2651   True
  CoPPR    SCB  -0.0778 0.0191 -0.1479 -0.0076   True
    KCB    MCF   0.1716  0.001  0.1015  0.2418   True
    KCB    PPL   0.1222  0.001   0.052  0.1923   True
    KCB    PPR   0.0544 0.25

### MIUF

In [30]:
model = ols('miuf ~ C(method) + C(week) + C(method):C(week)', data=df_short).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),8.479608,7.0,50.912878,9.254983e-24
C(week),0.145002,3.0,2.031436,0.1183052
C(method):C(week),0.160213,21.0,0.320648,0.9974215
Residual,1.522755,64.0,,


In [31]:
print(pairwise_tukeyhsd(endog=df_short['miuf'], groups=df_short['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR   0.0191    0.9 -0.1636  0.2018  False
CSCLRec    KCB    0.548  0.001  0.3653  0.7307   True
CSCLRec    MCF   0.3207  0.001   0.138  0.5034   True
CSCLRec    PPL  -0.1321 0.3355 -0.3148  0.0505  False
CSCLRec    PPR  -0.0763 0.8999  -0.259  0.1064  False
CSCLRec    RND   0.7109  0.001  0.5282  0.8936   True
CSCLRec    SCB    0.439  0.001  0.2563  0.6217   True
  CoPPR    KCB   0.5289  0.001  0.3462  0.7116   True
  CoPPR    MCF   0.3016  0.001  0.1189  0.4843   True
  CoPPR    PPL  -0.1513 0.1806  -0.334  0.0314  False
  CoPPR    PPR  -0.0954   0.71 -0.2781  0.0873  False
  CoPPR    RND   0.6918  0.001  0.5091  0.8745   True
  CoPPR    SCB   0.4199  0.001  0.2372  0.6026   True
    KCB    MCF  -0.2273 0.0051 -0.4099 -0.0446   True
    KCB    PPL  -0.6801  0.001 -0.8628 -0.4974   True
    KCB    PPR  -0.6243  0.0

### p@10

In [32]:
model = ols('Q("p@10") ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),7.742919,7.0,85.166564,3.717954e-53
C(week),1.169078,10.0,9.001317,6.553305e-12
C(method):C(week),0.671175,70.0,0.738244,0.9267552
Residual,2.285863,176.0,,


In [33]:
print(pairwise_tukeyhsd(endog=df_short['p@10'], groups=df_short['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR  -0.0192    0.9 -0.1159  0.0775  False
CSCLRec    KCB  -0.3523  0.001  -0.449 -0.2556   True
CSCLRec    MCF  -0.3018  0.001 -0.3986 -0.2051   True
CSCLRec    PPL  -0.2727  0.001 -0.3694  -0.176   True
CSCLRec    PPR  -0.1849  0.001 -0.2817 -0.0882   True
CSCLRec    RND  -0.4013  0.001  -0.498 -0.3046   True
CSCLRec    SCB  -0.3497  0.001 -0.4464  -0.253   True
  CoPPR    KCB  -0.3332  0.001 -0.4299 -0.2364   True
  CoPPR    MCF  -0.2827  0.001 -0.3794 -0.1859   True
  CoPPR    PPL  -0.2535  0.001 -0.3502 -0.1568   True
  CoPPR    PPR  -0.1658  0.001 -0.2625  -0.069   True
  CoPPR    RND  -0.3821  0.001 -0.4789 -0.2854   True
  CoPPR    SCB  -0.3305  0.001 -0.4272 -0.2338   True
    KCB    MCF   0.0505 0.7102 -0.0462  0.1472  False
    KCB    PPL   0.0796 0.1859 -0.0171  0.1764  False
    KCB    PPR   0.1674  0.0

### r@10

In [34]:
model = ols('Q("r@10") ~ C(method) + C(week) + C(method):C(week)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(method),0.147477,7.0,1.119126,0.353076
C(week),0.366263,10.0,1.94557,0.041985
C(method):C(week),0.081996,70.0,0.062223,1.0
Residual,3.313281,176.0,,


In [35]:
print(pairwise_tukeyhsd(endog=df_short['r@10'], groups=df_short['method'], alpha=0.05))

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
CSCLRec  CoPPR  -0.0113    0.9 -0.0537  0.0311  False
CSCLRec    KCB  -0.0717  0.001 -0.1141 -0.0294   True
CSCLRec    MCF  -0.0578 0.0014 -0.1001 -0.0154   True
CSCLRec    PPL  -0.0476 0.0165   -0.09 -0.0053   True
CSCLRec    PPR   -0.046 0.0239 -0.0883 -0.0036   True
CSCLRec    RND  -0.0737  0.001 -0.1161 -0.0313   True
CSCLRec    SCB  -0.0697  0.001 -0.1121 -0.0273   True
  CoPPR    KCB  -0.0604  0.001 -0.1028 -0.0181   True
  CoPPR    MCF  -0.0465 0.0214 -0.0888 -0.0041   True
  CoPPR    PPL  -0.0364 0.1478 -0.0787   0.006  False
  CoPPR    PPR  -0.0347 0.1927  -0.077  0.0077  False
  CoPPR    RND  -0.0624  0.001 -0.1048   -0.02   True
  CoPPR    SCB  -0.0584 0.0012 -0.1008  -0.016   True
    KCB    MCF    0.014    0.9 -0.0284  0.0563  False
    KCB    PPL   0.0241 0.6256 -0.0183  0.0665  False
    KCB    PPR   0.0258 0.55