# Libraries

In [2]:
import pickle as pkl
import pandas as pd
import numpy as np
from modelling_mf import calculate_average_PL, calculate_ARP
from scipy.stats import mannwhitneyu

In [79]:
algo_name = "MF"
data_strategy = "epinion"  # change depending on which dataset we want to examine

# Analysis

## LKPY

In [80]:
algo_versions = {"MF": [{"bias": True}, {"bias": False}]}

In [81]:
versions = algo_versions[algo_name]

In [82]:
file_location = "experimental_results/" + algo_name + "/"

In [83]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [84]:
stringed_versions = [str(args) for args in versions]

In [85]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [86]:
library = ["Lenskit"]

In [87]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [88]:
results_lkpy = pd.DataFrame(results, index=index)

In [89]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Lenskit,"(0.12584777417168197, 0.0)",1.03,0.001115,0.003262,402.394492,0.211626,0.033192
False,Lenskit,"(0.01957999723391162, 1.5892081148164622e-26)",1.24,0.000467,0.000513,-54.10702,0.887888,0.041994


In [90]:
lkpy_detailed_results = []
for args in versions:
    file = open(file_location + 'correct_detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    lkpy_detailed_results.append(result)

## Cornac

In [91]:
algo_versions = {"CornacMF": [{"bias": True}, {"bias": False}]}
algo_name = "CornacMF"

In [92]:
versions = algo_versions[algo_name]

In [93]:
file_location = "experimental_results/" + algo_name + "/"

In [94]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [95]:
stringed_versions = [str(args) for args in versions]

In [96]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [97]:
library = ["Cornac"]

In [98]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [99]:
results_cornac = pd.DataFrame(results, index=index)

In [100]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Cornac,"(0.009046154316153961, 8.478315356882231e-07)",1.029231,0.000197,0.000848,19.814337,0.0,7.8e-05
False,Cornac,"(0.0013471572948617603, 0.4633919937605722)",1.152307,6.2e-05,0.000232,-53.845347,0.002121,0.00081


In [101]:
cornac_detailed_results = []
for args in versions:
    file = open(file_location + 'correct_detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    cornac_detailed_results.append(result)

# Merge

In [102]:
mf_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["Library", "Bias"])
    .set_index(["Library", "Bias"])
)

In [103]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
mf_metrics = mf_metrics[metrics_order]

In [104]:
mf_metrics = mf_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [105]:
mf_metrics['RealPopCorr'] = mf_metrics.PopCorr.apply(lambda x: x[0])
mf_metrics['Significance'] = mf_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
mf_metrics['PopCorr'] = mf_metrics.RealPopCorr 
mf_metrics = mf_metrics.drop('RealPopCorr', axis=1)

In [106]:
mf_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
Library,Bias,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cornac,False,0.001347,0.000232,-53.845347,0.002121,0.00081,1.152307,6.2e-05,False
Cornac,True,0.009046,0.000848,19.814337,0.0,7.8e-05,1.029231,0.000197,True
Lenskit,False,0.01958,0.000513,-54.10702,0.887888,0.041994,1.24,0.000467,True
Lenskit,True,0.125848,0.003262,402.394492,0.211626,0.033192,1.03,0.001115,True


In [107]:
print(mf_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
        &      &  PopCorr &    ARP &       PL &  AggDiv &   RMSE &  NDCG@10 \\
Library & Bias &          &        &          &         &        &          \\
\midrule
Cornac & False &    0.001 &  0.000 &  -53.845 &   0.001 &  1.152 &    0.000 \\
        & True &    0.009 &  0.001 &   19.814 &   0.000 &  1.029 &    0.000 \\
Lenskit & False &    0.020 &  0.001 &  -54.107 &   0.042 &  1.240 &    0.000 \\
        & True &    0.126 &  0.003 &  402.394 &   0.033 &  1.030 &    0.001 \\
\bottomrule
\end{tabular}



In [108]:
import pickle

with open("metrics_combined/"+data_strategy+"_all_mf.pkl", "wb") as f:
    pickle.dump(mf_metrics.round(3), f)

# Significance tests

## 1. Average Recommendation Popularity
recommendation values

In [109]:
mf_results = cornac_detailed_results+lkpy_detailed_results

In [110]:
def highest_average(df_list, column_name = 'recommendation'):
    highest_average = -10^6
    highest_i = -1
    for i in range(len(df_list)):
        df = df_list[i]
        mean = np.mean(df[column_name].values)
        if mean > highest_average:
            highest_average = mean
            highest_i = i
    print(highest_average, highest_i)
    return highest_average, highest_i

In [111]:
def mannwhitneyu_test(df_list, alt = 'greater', column_name = 'recommendation'):
    # find the highest average 
    ha, hi = highest_average(df_list, column_name)
    inds_df_list = list(range(len(df_list)))
    to_test_inds = inds_df_list[:hi] + inds_df_list[hi+1:]
    print(to_test_inds)
    df1 = df_list[hi]
    pvalues = []
    for ind in to_test_inds:
        df2 = df_list[ind]
        x = df1[column_name].values
        y = df2[column_name].values
        pvalue = mannwhitneyu(x,y, alternative = alt)[1]
        pvalues.append(pvalue)
    return pvalues # pvalues for all comparisons

In [112]:
mannwhitneyu_test(mf_results) 

0.003262157336492948 2
[0, 1, 3]


[0.0, 0.0, 0.0]

## 2. Popularity Lift

In [113]:
for df in mf_results:
    df['popularity_lift'] = (df['recommendation']-df['profile'])/df['profile']*100

In [114]:
mannwhitneyu_test(mf_results, column_name = 'popularity_lift') 

402.3951463607632 2
[0, 1, 3]


[0.0, 0.0, 0.0]