# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
algo_name = "MF"
data_strategy = "epinion"  # change depending on which dataset we want to examine

# Analysis

## LKPY

In [3]:
algo_versions = {"MF": [{"bias": True}, {"bias": False}]}

In [4]:
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [9]:
library = ["Lenskit"]

In [10]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

In [12]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Lenskit,"(0.12566312145843653, 0.0)",1.03,0.001079,0.003251,397.916272,0.204315,0.033334
False,Lenskit,"(0.019999590040108053, 1.328219081818236e-27)",1.236,0.000516,0.000508,-54.150247,0.79866,0.042389


## Cornac

In [13]:
algo_versions = {"CornacMF": [{"bias": True}, {"bias": False}]}
algo_name = "CornacMF"

In [14]:
versions = algo_versions[algo_name]

In [15]:
file_location = "experimental_results/" + algo_name + "/"

In [16]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [17]:
stringed_versions = [str(args) for args in versions]

In [18]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [19]:
library = ["Cornac"]

In [20]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [21]:
results_cornac = pd.DataFrame(results, index=index)

In [22]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Cornac,"(0.008696644921763967, 2.2038823362318165e-06)",1.029269,0.000202,0.000826,16.711729,0.0,7.8e-05
False,Cornac,"(0.001288100608227683, 0.48322334822010893)",1.153752,5.2e-05,0.000226,-54.312121,0.002528,0.000753


# Merge

In [23]:
mf_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["Library", "Bias"])
    .set_index(["Library", "Bias"])
)

In [24]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
mf_metrics = mf_metrics[metrics_order]

In [25]:
mf_metrics = mf_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [26]:
mf_metrics['RealPopCorr'] = mf_metrics.PopCorr.apply(lambda x: x[0])
mf_metrics['Significance'] = mf_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
mf_metrics['PopCorr'] = mf_metrics.RealPopCorr 
mf_metrics = mf_metrics.drop('RealPopCorr', axis=1)

In [27]:
mf_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
Library,Bias,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cornac,False,0.001288,0.000226,-54.312121,0.002528,0.000753,1.153752,5.2e-05,False
Cornac,True,0.008697,0.000826,16.711729,0.0,7.8e-05,1.029269,0.000202,True
Lenskit,False,0.02,0.000508,-54.150247,0.79866,0.042389,1.236,0.000516,True
Lenskit,True,0.125663,0.003251,397.916272,0.204315,0.033334,1.03,0.001079,True


In [28]:
print(mf_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
        &      &  PopCorr &    ARP &       PL &  AggDiv &   RMSE &  NDCG@10 \\
Library & Bias &          &        &          &         &        &          \\
\midrule
Cornac & False &    0.001 &  0.000 &  -54.312 &   0.001 &  1.154 &    0.000 \\
        & True &    0.009 &  0.001 &   16.712 &   0.000 &  1.029 &    0.000 \\
Lenskit & False &    0.020 &  0.001 &  -54.150 &   0.042 &  1.236 &    0.001 \\
        & True &    0.126 &  0.003 &  397.916 &   0.033 &  1.030 &    0.001 \\
\bottomrule
\end{tabular}



In [29]:
import pickle

with open("metrics_combined/"+data_strategy+"_all_mf.pkl", "wb") as f:
    pickle.dump(mf_metrics.round(3), f)