# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
algo_name = "MF"
data_strategy = "ml1m"  # change depending on which dataset we want to examine

# Analysis

## LKPY

In [3]:
algo_versions = {"MF": [{"bias": True}, {"bias": False}]}

In [4]:
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [9]:
library = ["Lenskit"]

In [10]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

In [12]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Lenskit,"(0.18281201180408185, 3.254366339717149e-29)",0.866,0.039624,0.154846,8.191902,3.311589,0.135456
False,Lenskit,"(0.1530897593020129, 7.131834200649564e-21)",0.86,0.037897,0.125644,-14.354416,5.217053,0.189692


## Cornac

In [13]:
algo_versions = {"CornacMF": [{"bias": True}, {"bias": False}]}
algo_name = "CornacMF"

In [14]:
versions = algo_versions[algo_name]

In [15]:
file_location = "experimental_results/" + algo_name + "/"

In [16]:
results = []
# for data_strategy in data_strategies:
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [17]:
stringed_versions = [str(args) for args in versions]

In [18]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [19]:
library = ["Cornac"]

In [20]:
index = pd.MultiIndex.from_product(
    [bias, library], names=["Bias", "Library"]
).drop_duplicates()

In [21]:
results_cornac = pd.DataFrame(results, index=index)

In [22]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Bias,Library,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
True,Cornac,"(0.23667686838048066, 2.3769418029025756e-48)",0.856338,0.0589,0.191816,35.601525,2.269205,0.104965
False,Cornac,"(0.27121214539452443, 1.7056375151033963e-63)",0.856538,0.064022,0.191648,34.398847,2.581291,0.137615


# Merge

In [23]:
mf_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["Library", "Bias"])
    .set_index(["Library", "Bias"])
)

In [24]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
mf_metrics = mf_metrics[metrics_order]

In [25]:
mf_metrics = mf_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [26]:
mf_metrics['RealPopCorr'] = mf_metrics.PopCorr.apply(lambda x: x[0])
mf_metrics['Significance'] = mf_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
mf_metrics['PopCorr'] = mf_metrics.RealPopCorr 
mf_metrics = mf_metrics.drop('RealPopCorr', axis=1)

In [27]:
mf_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
Library,Bias,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cornac,False,0.271212,0.191648,34.398847,2.581291,0.137615,0.856538,0.064022,True
Cornac,True,0.236677,0.191816,35.601525,2.269205,0.104965,0.856338,0.0589,True
Lenskit,False,0.15309,0.125644,-14.354416,5.217053,0.189692,0.86,0.037897,True
Lenskit,True,0.182812,0.154846,8.191902,3.311589,0.135456,0.866,0.039624,True


In [28]:
print(mf_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{llrrrrrr}
\toprule
        &      &  PopCorr &    ARP &      PL &  AggDiv &   RMSE &  NDCG@10 \\
Library & Bias &          &        &         &         &        &          \\
\midrule
Cornac & False &    0.271 &  0.192 &  34.399 &   0.138 &  0.857 &    0.064 \\
        & True &    0.237 &  0.192 &  35.602 &   0.105 &  0.856 &    0.059 \\
Lenskit & False &    0.153 &  0.126 & -14.354 &   0.190 &  0.860 &    0.038 \\
        & True &    0.183 &  0.155 &   8.192 &   0.135 &  0.866 &    0.040 \\
\bottomrule
\end{tabular}



In [29]:
import pickle

with open("metrics_combined/"+data_strategy+"_all_mf.pkl", "wb") as f:
    pickle.dump(mf_metrics.round(3), f)