# Libraries

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt
import random as rd
import pandas as pd
import numpy as np
import pickle as pkl
from tqdm import tqdm
import os
import time
from collections import defaultdict
from scipy import stats
from itertools import permutations, product
from numpy.linalg import norm
import seaborn as sns
sns.set(style="darkgrid")
pd.set_option("display.precision", 4)

# Summary

#### Evaluation strategies
We have identified three evaluation strategies:
1. For every user in the **train set** rank **all items**. | Book paper, *eva_one*
2. For every user in the **test set** rank **only the items this user has rated in the test set**. | Music paper, *eva_two*
3. For every user in the **test set** rank **only the items this user has NOT rated in the train set**. | Movie(?) paper, *eva_three*

#### Algorithms

In [2]:
algo_names = ["UserKNN",
              "ItemKNN", 
              "UserKNN with means", 
              "BPR",
              "MF",
              "PMF",
              "NMF",
              "WMF",
              "HPF",
              "NeuMF",
              "VAECF"
              ] # in the order that they are in the model list

# Import results

### Additions

In [3]:
data = ["movies", "music","books"]
evaluation = ["eva_one", "eva_two", "eva_three"]
pop_notion = ["pop_one", "pop_two", "pop_three"]

In [4]:
aspects = [data, evaluation, pop_notion]

In [5]:
some_aspects = [data, evaluation]

In [6]:
results_location = "data/results/"

In [7]:
# pkl.load(open(results_location+"df_item_dist_cornac_"+data[2]+"_"+evaluation[0]+".csv","rb"))

In [8]:
c = pd.read_csv(results_location+"df_item_dist_cornac_"+data[2]+"_"+evaluation[0]+".csv", index_col=0)

In [9]:
c2 = pd.read_csv(results_location+"df_item_dist_cornac_"+data[2]+"_"+evaluation[1]+".csv", index_col=0)

In [30]:
stats.pearsonr(c2["count"], c2.HPF)

PearsonRResult(statistic=0.9173785262794707, pvalue=0.0)

In [31]:
stats.pearsonr(c2["count"], c2.NMF)

PearsonRResult(statistic=0.9124281116550262, pvalue=0.0)

In [32]:
stats.pearsonr(c["count"], c.NMF)

PearsonRResult(statistic=-0.03643678357239864, pvalue=0.0024315820834472663)

### Define aspect combinations

In [13]:
all_combinations = list(product(*aspects))
all_combinations = [x for x in all_combinations if not ("movies" in x)&("pop_three" in x)]
all_combinations = [x for x in all_combinations if not ("books" in x)&("pop_three" in x)]
# remove combinations of "pop_three" that dont include music.
some_combinations = list(product(*some_aspects))

In [14]:
sum_exists = 0
for combi in some_combinations:
    path = results_location+"df_item_dist_cornac_"+combi[0]+"_"+combi[1]+".csv"
    exists = os.path.exists(path)
    print(combi, exists)
    sum_exists+=exists
print(sum_exists, "out of", len(some_combinations))

('movies', 'eva_one') True
('movies', 'eva_two') True
('movies', 'eva_three') True
('music', 'eva_one') True
('music', 'eva_two') True
('music', 'eva_three') True
('books', 'eva_one') True
('books', 'eva_two') True
('books', 'eva_three') True
9 out of 9


In [18]:
new_results_location = 'results/'


In [21]:
sum_exists = 0

for combi in all_combinations:
    path = new_results_location+"low_gap_vals_"+combi[0]+"_"+combi[1]+"_"+combi[2]+".pickle"
    exists = os.path.exists(path)
    sum_exists+=exists
    print(combi, "|",exists)
print(sum_exists, "out of", len(all_combinations))

('movies', 'eva_one', 'pop_one') | True
('movies', 'eva_one', 'pop_two') | True
('movies', 'eva_two', 'pop_one') | True
('movies', 'eva_two', 'pop_two') | False
('movies', 'eva_three', 'pop_one') | False
('movies', 'eva_three', 'pop_two') | False
('music', 'eva_one', 'pop_one') | True
('music', 'eva_one', 'pop_two') | True
('music', 'eva_one', 'pop_three') | True
('music', 'eva_two', 'pop_one') | True
('music', 'eva_two', 'pop_two') | True
('music', 'eva_two', 'pop_three') | True
('music', 'eva_three', 'pop_one') | True
('music', 'eva_three', 'pop_two') | True
('music', 'eva_three', 'pop_three') | True
('books', 'eva_one', 'pop_one') | True
('books', 'eva_one', 'pop_two') | True
('books', 'eva_two', 'pop_one') | True
('books', 'eva_two', 'pop_two') | True
('books', 'eva_three', 'pop_one') | True
('books', 'eva_three', 'pop_two') | True
18 out of 21


# GAP

## TTESTS

In [77]:
music_GAPs_eva1

Unnamed: 0,pop_one_low,pop_one_med,pop_one_high,pop_two_low,pop_two_med,pop_two_high,pop_three_low,pop_three_med,pop_three_high
UserKNN,-32.5565,-32.623,-47.84,-27.6148,-30.8748,-52.7355,-53.2343,-28.1602,-30.3992
ItemKNN,-20.8436,-13.3886,-28.6179,-16.9566,-11.5815,-33.323,-17.4293,-13.5857,-24.155
UserKNN with means,6.3282,-6.3729,-25.7099,10.914,-4.6747,-30.0298,-34.6581,-4.5828,6.0123
BPR,596.572,352.6279,239.9962,611.0406,358.1945,230.8102,379.8273,355.3834,319.3504
MF,-49.0138,-68.9102,-78.2648,-47.1346,-68.5696,-79.0581,-67.6446,-68.0914,-71.4476
PMF,-69.7458,-81.5445,-87.0964,-68.6307,-81.3423,-87.5674,-80.7946,-81.0568,-83.0543
NMF,-78.6297,-87.0898,-90.8664,-77.8386,-86.979,-91.1399,-86.0877,-86.7619,-88.4016
WMF,-2.8335,14.1982,1.2421,0.6154,17.7609,-7.0215,-30.4233,19.9834,32.4637
HPF,150.4338,143.7847,125.212,166.0245,148.6245,110.1365,93.1937,153.1862,167.6119
NeuMF,399.0671,242.3132,157.2812,399.6447,243.9151,159.2425,262.203,239.021,213.3555


In [111]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'music' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        #path_ttest = 'gap_ttests_books_eva_one_pop_one.pickle'
        path_ttest = new_results_location+"gap_ttests_music_"+eva_str+"_"+pop_n+".pickle"
        pvalues = pd.DataFrame(pkl.load(open(path_ttest,"rb") )[2:], index=algo_names, columns = ['low-med','low-high','med-high'])
        print(pvalues)
        print((pvalues<0.005) & (pvalues>0.0) & (pvalues<1.0))

eva_one pop_one
----------------
                        low-med     low-high     med-high
UserKNN              9.7777e-01   2.1284e-09   2.5915e-19
ItemKNN              1.8642e-02   2.5603e-02   6.3821e-10
UserKNN with means   1.7743e-05   7.6560e-24   6.3051e-25
BPR                 5.7753e-213  1.3936e-293   0.0000e+00
MF                   0.0000e+00   0.0000e+00   0.0000e+00
PMF                  0.0000e+00   0.0000e+00   0.0000e+00
NMF                 6.6688e-286   0.0000e+00  8.6434e-288
WMF                  5.7264e-04   4.2891e-01   8.1892e-05
HPF                  2.6764e-01   3.5523e-05   4.6652e-07
NeuMF                4.6191e-93  3.9264e-165  7.6198e-239
VAECF                1.5856e-16   5.8740e-07   6.6730e-15
                    low-med  low-high  med-high
UserKNN               False      True      True
ItemKNN               False     False      True
UserKNN with means     True      True      True
BPR                    True      True     False
MF                    False    

In [100]:
combi = all_combinations[0]
path = new_results_location+"low_gap_vals_"+combi[0]+"_"+combi[1]+"_"+combi[2]+".pickle"


In [101]:
pop_gaps = []
for p in pop_notion:
    for v in ["low","med","high"]:
        pop_gaps.append(p+"_"+v)

In [102]:
pop_gaps

['pop_one_low',
 'pop_one_med',
 'pop_one_high',
 'pop_two_low',
 'pop_two_med',
 'pop_two_high',
 'pop_three_low',
 'pop_three_med',
 'pop_three_high']

### Music

In [105]:
music_GAPs_eva1 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)
music_GAPs_eva2 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)
music_GAPs_eva3 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)

In [107]:
for combi in all_combinations:
    if "music" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        path_low = results_location+"low_gap_vals_music_"+eva_str+"_"+pop_n+".pickle"
        path_med = results_location+"med_gap_vals_music_"+eva_str+"_"+pop_n+".pickle"
        path_high = results_location+"high_gap_vals_music_"+eva_str+"_"+pop_n+".pickle"
        low_gap_vals = pd.DataFrame([pkl.load(open(path_low,"rb") )[2:]], columns=algo_names)
        med_gap_vals = pd.DataFrame([pkl.load(open(path_med,"rb") )[2:]], columns=algo_names)
        high_gap_vals = pd.DataFrame([pkl.load(open(path_high,"rb") )[2:]], columns=algo_names)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                music_GAPs_eva1.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                music_GAPs_eva1.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                music_GAPs_eva1.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                music_GAPs_eva2.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                music_GAPs_eva2.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                music_GAPs_eva2.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
            
        else:
            for algorithm in algo_names:
                music_GAPs_eva3.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                music_GAPs_eva3.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                music_GAPs_eva3.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]

In [109]:
np.round(music_GAPs_eva3,1)

Unnamed: 0,pop_one_low,pop_one_med,pop_one_high,pop_two_low,pop_two_med,pop_two_high,pop_three_low,pop_three_med,pop_three_high
UserKNN,-57.9,-82.0,-87.3,-56.1,-81.8,-87.9,-80.2,-81.6,-80.1
ItemKNN,-57.6,-70.0,-77.8,-56.8,-70.6,-76.5,-49.4,-75.6,-85.2
UserKNN with means,-51.8,-81.1,-87.3,-49.4,-81.1,-87.7,-77.5,-81.2,-79.3
BPR,580.6,323.6,214.0,596.4,328.1,206.5,362.1,325.0,288.2
MF,-53.6,-70.4,-79.8,-51.7,-70.1,-80.7,-70.5,-69.5,-72.9
PMF,-70.1,-81.8,-87.2,-69.0,-81.6,-87.6,-81.0,-81.3,-83.2
NMF,-78.6,-87.1,-90.9,-77.8,-87.0,-91.1,-86.1,-86.8,-88.4
WMF,-8.2,-5.0,-19.5,-5.6,-2.9,-24.3,-35.7,-0.8,5.8
HPF,120.4,100.1,86.8,132.1,103.4,76.4,65.5,106.1,121.7
NeuMF,382.6,215.3,138.1,384.5,216.5,140.3,244.7,211.8,187.9


In [62]:
music_GAPs_eva1.columns =["eva_one_"+x for x in music_GAPs_eva1.columns]
music_GAPs_eva2.columns =["eva_two_"+x for x in music_GAPs_eva2.columns]
music_GAPs_eva3.columns =["eva_three_"+x for x in music_GAPs_eva3.columns]

In [63]:
combined_music_GAPs = pd.concat([music_GAPs_eva1, music_GAPs_eva2, music_GAPs_eva3], axis=1)

In [65]:
combined_music_GAPs = np.round(combined_music_GAPs,1)

In [67]:
with open("latex_tables/music_GAPs_combined.tex", "w") as tf:
    tf.write(combined_music_GAPs.to_latex())

In [69]:
with open("latex_tables/music_GAPs_eva1.tex", "w") as tf:
    tf.write(np.round(music_GAPs_eva1,1).to_latex())
with open("latex_tables/music_GAPs_eva2.tex", "w") as tf:
    tf.write(np.round(music_GAPs_eva2,1).to_latex())
with open("latex_tables/music_GAPs_eva3.tex", "w") as tf:
    tf.write(np.round(music_GAPs_eva3,1).to_latex())

### Books

#### ttest?

In [142]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'books' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        #path_ttest = 'gap_ttests_books_eva_one_pop_one.pickle'
        path_ttest = new_results_location+"gap_ttests_books_"+eva_str+"_"+pop_n+".pickle"
        pvalues = pd.DataFrame(pkl.load(open(path_ttest,"rb") )[2:], index=algo_names, columns = ['low-med','low-high','med-high'])
        print(pvalues)

eva_one pop_one
----------------
                        low-med     low-high     med-high
UserKNN             7.9210e-253   0.0000e+00  2.5660e-270
ItemKNN             1.0265e-164  3.8971e-241  9.2192e-114
UserKNN with means  8.9939e-265   0.0000e+00  6.2246e-267
BPR                  0.0000e+00   0.0000e+00   0.0000e+00
MF                   1.3921e-65  1.8760e-162  2.4769e-159
PMF                 2.6136e-162  5.5610e-253  2.1764e-133
NMF                  0.0000e+00   0.0000e+00   0.0000e+00
WMF                  4.7120e-64   3.7146e-24   2.7075e-20
HPF                 2.3977e-119  6.8786e-223  5.6330e-138
NeuMF                0.0000e+00   0.0000e+00   0.0000e+00
VAECF                7.7775e-49  1.8195e-114  1.4313e-143
eva_one pop_two
----------------
                        low-med     low-high     med-high
UserKNN             2.6466e-304   0.0000e+00   0.0000e+00
ItemKNN             4.9315e-211  1.7961e-306   0.0000e+00
UserKNN with means   0.0000e+00   0.0000e+00   0.0000e+00
BPR   

#### gap

In [134]:
books_GAPs_eva1 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
books_GAPs_eva2 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
books_GAPs_eva3 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
for combi in all_combinations:
    if "books" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        path_low = new_results_location+"low_gap_vals_books_"+eva_str+"_"+pop_n+".pickle"
        path_med = new_results_location+"med_gap_vals_books_"+eva_str+"_"+pop_n+".pickle"
        path_high = new_results_location+"high_gap_vals_books_"+eva_str+"_"+pop_n+".pickle"
        low_gap_vals = pd.DataFrame([pkl.load(open(path_low,"rb") )[2:]], columns=algo_names)
        med_gap_vals = pd.DataFrame([pkl.load(open(path_med,"rb") )[2:]], columns=algo_names)
        high_gap_vals = pd.DataFrame([pkl.load(open(path_high,"rb") )[2:]], columns=algo_names)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                books_GAPs_eva1.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                books_GAPs_eva1.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                books_GAPs_eva1.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                books_GAPs_eva2.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                books_GAPs_eva2.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                books_GAPs_eva2.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
            
        else:
            for algorithm in algo_names:
                books_GAPs_eva3.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                books_GAPs_eva3.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                books_GAPs_eva3.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]

In [135]:
books_GAPs_eva1

Unnamed: 0,pop_one_low,pop_one_med,pop_one_high,pop_two_low,pop_two_med,pop_two_high
UserKNN,25.2487,-54.5058,-73.2624,70.9276,-51.1751,-80.3198
ItemKNN,56.1647,-34.4437,-61.308,109.4333,-28.1203,-72.4008
UserKNN with means,22.1253,-54.9305,-73.4937,67.214,-51.4353,-80.8482
BPR,1118.9694,523.8554,295.2193,1434.7379,572.27,228.3943
MF,22.9898,-23.8985,-57.7958,44.6767,-18.2631,-62.3604
PMF,186.1511,44.9508,-5.2485,252.9929,53.0784,-15.1389
NMF,-47.1272,-72.8368,-82.7174,-32.9785,-70.8432,-85.5686
WMF,39.0438,109.9239,81.4392,49.089,118.902,67.0285
HPF,283.5865,155.3489,91.2869,369.6912,168.3932,71.7007
NeuMF,1030.6557,478.6574,266.5858,1323.5469,523.5644,204.6023


In [136]:
books_GAPs_eva1.columns =["eva_one_"+x for x in books_GAPs_eva1.columns]
books_GAPs_eva2.columns =["eva_two_"+x for x in books_GAPs_eva2.columns]
books_GAPs_eva3.columns =["eva_three_"+x for x in books_GAPs_eva3.columns]

In [137]:
books_GAPs_eva1

Unnamed: 0,eva_one_pop_one_low,eva_one_pop_one_med,eva_one_pop_one_high,eva_one_pop_two_low,eva_one_pop_two_med,eva_one_pop_two_high
UserKNN,25.2487,-54.5058,-73.2624,70.9276,-51.1751,-80.3198
ItemKNN,56.1647,-34.4437,-61.308,109.4333,-28.1203,-72.4008
UserKNN with means,22.1253,-54.9305,-73.4937,67.214,-51.4353,-80.8482
BPR,1118.9694,523.8554,295.2193,1434.7379,572.27,228.3943
MF,22.9898,-23.8985,-57.7958,44.6767,-18.2631,-62.3604
PMF,186.1511,44.9508,-5.2485,252.9929,53.0784,-15.1389
NMF,-47.1272,-72.8368,-82.7174,-32.9785,-70.8432,-85.5686
WMF,39.0438,109.9239,81.4392,49.089,118.902,67.0285
HPF,283.5865,155.3489,91.2869,369.6912,168.3932,71.7007
NeuMF,1030.6557,478.6574,266.5858,1323.5469,523.5644,204.6023


In [140]:
print(np.round(books_GAPs_eva1,1).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} &  eva\_one\_pop\_one\_low &  eva\_one\_pop\_one\_med &  eva\_one\_pop\_one\_high &  eva\_one\_pop\_two\_low &  eva\_one\_pop\_two\_med &  eva\_one\_pop\_two\_high \\
\midrule
UserKNN            &                 25.2 &                -54.5 &                 -73.3 &                 70.9 &                -51.2 &                 -80.3 \\
ItemKNN            &                 56.2 &                -34.4 &                 -61.3 &                109.4 &                -28.1 &                 -72.4 \\
UserKNN with means &                 22.1 &                -54.9 &                 -73.5 &                 67.2 &                -51.4 &                 -80.8 \\
BPR                &               1119.0 &                523.9 &                 295.2 &               1434.7 &                572.3 &                 228.4 \\
MF                 &                 23.0 &                -23.9 &                 -57.8 &                 44.7 &                -18.3 &   

In [138]:
with open("latex_tables/books_GAPs_eva1.tex", "w") as tf:
    tf.write(np.round(books_GAPs_eva1,1).to_latex())
with open("latex_tables/books_GAPs_eva2.tex", "w") as tf:
    tf.write(np.round(books_GAPs_eva2,1).to_latex())
with open("latex_tables/books_GAPs_eva3.tex", "w") as tf:
    tf.write(np.round(books_GAPs_eva3,1).to_latex())

### Movies

#### ttests

In [130]:
for combi in all_combinations:
    
    if 'movies' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        #path_ttest = 'gap_ttests_books_eva_one_pop_one.pickle'
        path_ttest = new_results_location+"gap_ttests_books_"+eva_str+"_"+pop_n+".pickle"
        print(pd.DataFrame(pkl.load(open(path_ttest,"rb") )))
        # pvalues = pd.DataFrame(pkl.load(open(path_ttest,"rb") ), index=algo_names, columns = ['low-med','low-high','med-high'])
        # print(pvalues)
        break

eva_one pop_one
----------------
              0            1            2
0   1.4977e-219   0.0000e+00  2.6645e-256
1    0.0000e+00   0.0000e+00   0.0000e+00
2   7.9210e-253   0.0000e+00  2.5660e-270
3   1.0265e-164  3.8971e-241  9.2192e-114
4   8.9939e-265   0.0000e+00  6.2246e-267
5    0.0000e+00   0.0000e+00   0.0000e+00
6    1.3921e-65  1.8760e-162  2.4769e-159
7   2.6136e-162  5.5610e-253  2.1764e-133
8    0.0000e+00   0.0000e+00   0.0000e+00
9    4.7120e-64   3.7146e-24   2.7075e-20
10  2.3977e-119  6.8786e-223  5.6330e-138
11   0.0000e+00   0.0000e+00   0.0000e+00
12   7.7775e-49  1.8195e-114  1.4313e-143


In [126]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'movies' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        #path_ttest = 'gap_ttests_books_eva_one_pop_one.pickle'
        path_ttest = new_results_location+"gap_ttests_books_"+eva_str+"_"+pop_n+".pickle"
        pvalues = pd.DataFrame(pkl.load(open(path_ttest,"rb") )[2:], index=algo_names, columns = ['low-med','low-high','med-high'])
        print(pvalues)
        

eva_one pop_one
----------------
                        low-med     low-high     med-high
UserKNN             7.9210e-253   0.0000e+00  2.5660e-270
ItemKNN             1.0265e-164  3.8971e-241  9.2192e-114
UserKNN with means  8.9939e-265   0.0000e+00  6.2246e-267
BPR                  0.0000e+00   0.0000e+00   0.0000e+00
MF                   1.3921e-65  1.8760e-162  2.4769e-159
PMF                 2.6136e-162  5.5610e-253  2.1764e-133
NMF                  0.0000e+00   0.0000e+00   0.0000e+00
WMF                  4.7120e-64   3.7146e-24   2.7075e-20
HPF                 2.3977e-119  6.8786e-223  5.6330e-138
NeuMF                0.0000e+00   0.0000e+00   0.0000e+00
VAECF                7.7775e-49  1.8195e-114  1.4313e-143
eva_one pop_two
----------------
                        low-med     low-high     med-high
UserKNN             2.6466e-304   0.0000e+00   0.0000e+00
ItemKNN             4.9315e-211  1.7961e-306   0.0000e+00
UserKNN with means   0.0000e+00   0.0000e+00   0.0000e+00
BPR   

#### gaps

In [122]:
movies_GAPs_eva1 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
movies_GAPs_eva2 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
movies_GAPs_eva3 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
for combi in all_combinations:
    if "movies" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        path_low = new_results_location+"low_gap_vals_movies_"+eva_str+"_"+pop_n+".pickle"
        path_med = new_results_location+"med_gap_vals_movies_"+eva_str+"_"+pop_n+".pickle"
        path_high = new_results_location+"high_gap_vals_movies_"+eva_str+"_"+pop_n+".pickle"
        low_gap_vals = pd.DataFrame([pkl.load(open(path_low,"rb") )[2:]], columns=algo_names)
        med_gap_vals = pd.DataFrame([pkl.load(open(path_med,"rb") )[2:]], columns=algo_names)
        high_gap_vals = pd.DataFrame([pkl.load(open(path_high,"rb") )[2:]], columns=algo_names)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                movies_GAPs_eva1.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                movies_GAPs_eva1.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                movies_GAPs_eva1.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                movies_GAPs_eva2.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                movies_GAPs_eva2.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                movies_GAPs_eva2.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]
            
        else:
            for algorithm in algo_names:
                movies_GAPs_eva3.at[algorithm, pop_n+"_low"] = low_gap_vals[algorithm][0]
                movies_GAPs_eva3.at[algorithm, pop_n+"_med"] = med_gap_vals[algorithm][0]
                movies_GAPs_eva3.at[algorithm, pop_n+"_high"] = high_gap_vals[algorithm][0]

In [123]:
movies_GAPs_eva1.columns =["eva_one_"+x for x in movies_GAPs_eva1.columns]
movies_GAPs_eva2.columns =["eva_two_"+x for x in movies_GAPs_eva2.columns]
movies_GAPs_eva3.columns =["eva_three_"+x for x in movies_GAPs_eva3.columns]

In [95]:
with open("latex_tables/movies_GAPs_eva1.tex", "w") as tf:
    tf.write(np.round(movies_GAPs_eva1,1).to_latex())
with open("latex_tables/movies_GAPs_eva2.tex", "w") as tf:
    tf.write(np.round(movies_GAPs_eva2,1).to_latex())
with open("latex_tables/movies_GAPs_eva3.tex", "w") as tf:
    tf.write(np.round(movies_GAPs_eva3,1).to_latex())

## NDCGs

### Music

In [61]:
music_NDCGs_eva1 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)
music_NDCGs_eva2 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)
music_NDCGs_eva3 = pd.DataFrame(columns=pop_gaps, index = algo_names).fillna(0)

In [62]:
for combi in all_combinations:
    if "music" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        #print(eva_str, pop_n)
        path_ttest = new_results_location+"NDCGs_music_"+eva_str+"_"+pop_n+".csv"
        ndcg_df = pd.read_csv(path_ttest, index_col=0)
        #print(ndcg_df)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                music_NDCGs_eva1.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                music_NDCGs_eva1.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                music_NDCGs_eva1.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                music_NDCGs_eva2.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                music_NDCGs_eva2.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                music_NDCGs_eva2.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
            
        else:
            for algorithm in algo_names:
                music_NDCGs_eva3.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                music_NDCGs_eva3.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                music_NDCGs_eva3.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']

In [66]:
with open("latex_tables/music_NDCGs_eva1.tex", "w") as tf:
    tf.write(np.round(music_NDCGs_eva1,3).to_latex())
with open("latex_tables/music_NDCGs_eva2.tex", "w") as tf:
    tf.write(np.round(music_NDCGs_eva2,3).to_latex())
with open("latex_tables/music_NDCGs_eva3.tex", "w") as tf:
    tf.write(np.round(music_NDCGs_eva3,3).to_latex())

#### ttest

In [91]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'music' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        path_ttest = new_results_location+"NDCG_ttests_music_"+eva_str+"_"+pop_n+".csv"
        pvalues = pd.read_csv(path_ttest, index_col=0).drop(['Random','MostPop'])
        print((pvalues<0.005)&(pvalues>0.0)&(pvalues!=1.0))

eva_one pop_one
----------------
                    low-med  low-high  med-high
UserKNN               False     False      True
ItemKNN               False     False     False
UserKNN with means    False     False      True
BPR                    True      True     False
MF                    False     False     False
PMF                   False      True      True
NMF                   False     False     False
WMF                   False     False     False
HPF                    True      True     False
NeuMF                  True      True      True
VAECF                  True      True     False
eva_one pop_two
----------------
                    low-med  low-high  med-high
UserKNN               False     False     False
ItemKNN               False     False     False
UserKNN with means    False     False     False
BPR                    True      True     False
MF                    False     False     False
PMF                   False     False      True
NMF                   

### Books

In [145]:
books_NDCGs_eva1 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
books_NDCGs_eva2 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
books_NDCGs_eva3 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)

In [146]:
for combi in all_combinations:
    if "books" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        #print(eva_str, pop_n)
        path_ttest = new_results_location+"NDCGs_books_"+eva_str+"_"+pop_n+".csv"
        ndcg_df = pd.read_csv(path_ttest, index_col=0)
        #print(ndcg_df)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                books_NDCGs_eva1.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                books_NDCGs_eva1.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                books_NDCGs_eva1.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                books_NDCGs_eva2.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                books_NDCGs_eva2.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                books_NDCGs_eva2.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
            
        else:
            for algorithm in algo_names:
                books_NDCGs_eva3.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                books_NDCGs_eva3.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                books_NDCGs_eva3.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']

In [147]:
with open("latex_tables/books_NDCGs_eva1.tex", "w") as tf:
    tf.write(np.round(books_NDCGs_eva1,3).to_latex())
with open("latex_tables/books_NDCGs_eva2.tex", "w") as tf:
    tf.write(np.round(books_NDCGs_eva2,3).to_latex())
with open("latex_tables/books_NDCGs_eva3.tex", "w") as tf:
    tf.write(np.round(books_NDCGs_eva3,3).to_latex())

In [149]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'books' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        path_ttest = new_results_location+"NDCG_ttests_books_"+eva_str+"_"+pop_n+".csv"
        pvalues = pd.read_csv(path_ttest, index_col=0).drop(['Random','MostPop'])
        print((pvalues<0.005)&(pvalues>0.0)&(pvalues!=1.0))
        print(pvalues)

eva_one pop_one
----------------
                    low-med  low-high  med-high
UserKNN               False     False     False
ItemKNN               False      True     False
UserKNN with means    False     False     False
BPR                    True      True      True
MF                    False     False     False
PMF                    True      True     False
NMF                   False     False     False
WMF                    True      True      True
HPF                    True      True      True
NeuMF                  True      True      True
VAECF                  True      True      True
                       low-med    low-high    med-high
UserKNN             8.1640e-01  8.2310e-01  6.3015e-01
ItemKNN             4.0664e-02  2.1307e-03  7.0850e-02
UserKNN with means  5.8428e-01  6.7744e-01  4.0846e-01
BPR                 3.3069e-24  2.5890e-19  5.6601e-05
MF                  6.9806e-02  9.0507e-01  8.8826e-02
PMF                 3.5086e-04  1.2844e-03  4.8505e-01
NMF   

### Movies

In [116]:
movies_NDCGs_eva1 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
movies_NDCGs_eva2 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)
movies_NDCGs_eva3 = pd.DataFrame(columns=pop_gaps[:-3], index = algo_names).fillna(0)

In [117]:
for combi in all_combinations:
    if "movies" in combi:
        eva_str = combi[1]
        pop_n = combi[2]
        #print(eva_str, pop_n)
        path_ttest = new_results_location+"NDCGs_movies_"+eva_str+"_"+pop_n+".csv"
        ndcg_df = pd.read_csv(path_ttest, index_col=0)
        #print(ndcg_df)
        
        if eva_str=="eva_one":
            for algorithm in algo_names:
                movies_NDCGs_eva1.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                movies_NDCGs_eva1.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                movies_NDCGs_eva1.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
                        
        elif eva_str=="eva_two":
            for algorithm in algo_names:
                movies_NDCGs_eva2.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                movies_NDCGs_eva2.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                movies_NDCGs_eva2.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']
            
        else:
            for algorithm in algo_names:
                movies_NDCGs_eva3.at[algorithm, pop_n+"_low"] = ndcg_df.loc[algorithm]['low']
                movies_NDCGs_eva3.at[algorithm, pop_n+"_med"] = ndcg_df.loc[algorithm]['med']
                movies_NDCGs_eva3.at[algorithm, pop_n+"_high"] =ndcg_df.loc[algorithm]['high']

In [121]:
with open("latex_tables/movies_NDCGs_eva1.tex", "w") as tf:
    tf.write(np.round(movies_NDCGs_eva1,3).to_latex())
with open("latex_tables/movies_NDCGs_eva2.tex", "w") as tf:
    tf.write(np.round(movies_NDCGs_eva2,3).to_latex())
with open("latex_tables/movies_NDCGs_eva3.tex", "w") as tf:
    tf.write(np.round(movies_NDCGs_eva3,3).to_latex())

In [144]:
new_results_location = 'results/'
for combi in all_combinations:
    
    if 'movies' in combi: 
        eva_str = combi[1]
        pop_n = combi[2]
        print(eva_str, pop_n)
        print('----------------')
        path_ttest = new_results_location+"NDCG_ttests_movies_"+eva_str+"_"+pop_n+".csv"
        pvalues = pd.read_csv(path_ttest, index_col=0).drop(['MostPop'])
        print((pvalues<0.005)&(pvalues>0.0)&(pvalues!=1.0))
        print(pvalues)

eva_one pop_one
----------------
                    low-med  low-high  med-high
UserKNN               False     False     False
ItemKNN               False      True      True
UserKNN with means    False     False     False
BPR                   False     False     False
MF                     True      True      True
PMF                   False     False     False
NMF                    True      True     False
WMF                   False     False     False
HPF                   False     False     False
NeuMF                 False     False     False
VAECF                 False     False     False
                       low-med    low-high    med-high
UserKNN             1.7660e-02  1.0422e-02  8.3786e-02
ItemKNN             3.3837e-02  3.7760e-23  3.5101e-42
UserKNN with means  2.3507e-02  2.3507e-02         NaN
BPR                 5.2238e-02  7.2814e-01  1.1437e-01
MF                  2.1499e-08  6.1920e-27  3.9179e-17
PMF                 7.4911e-01  5.6431e-01  3.0936e-01
NMF   