# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
algo_name = "MF"
data_strategies = [
    "uniformly_random",
    "popularity_good",
    "popularity_bad",
    "popularity_good_for_bp_ur",
    "popularity_bad_for_bp_ur",
]

# Analysis

## LKPY

In [3]:
algo_versions = {
    "UserKNN": [
        {"center": True, "min_sim": 0},
        {"center": False, "min_sim": 0},
        {"center": True, "min_sim": -1},
    ],
    "MF": [{"bias": True}, {"bias": False}],
    "FunkSVD": [{"bias": True}, {"bias": False}],
}

In [4]:
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [9]:
library = ["Lenskit"]

In [10]:
index = pd.MultiIndex.from_product(
    [data_strategies, bias, library], names=["DataStrategy", "Bias", "Library"]
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

## Cornac

In [12]:
algo_versions = {"CornacMF": [{"bias": True}, {"bias": False}]}
algo_name = "CornacMF"

In [13]:
versions = algo_versions[algo_name]

In [14]:
file_location = "experimental_results/" + algo_name + "/"

In [15]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [16]:
stringed_versions = [str(args) for args in versions]

In [17]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [18]:
library = ["Cornac"]

In [19]:
index = pd.MultiIndex.from_product(
    [data_strategies, bias, library], names=["DataStrategy", "Bias", "Library"]
).drop_duplicates()

In [20]:
results_cornac = pd.DataFrame(results, index=index)

# Merge

In [21]:
mf_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["DataStrategy", "Library", "Bias"])
    .set_index(["DataStrategy", "Library", "Bias"])
    .reindex(data_strategies, level=0)
)

In [22]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
mf_metrics = mf_metrics[metrics_order]

In [23]:
mf_metrics = mf_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [24]:
mf_metrics = mf_metrics.rename(
    index={
        "uniformly_random": "Scenario 1",
        "popularity_good": "Scenario 2",
        "popularity_bad": "Scenario 3",
        "popularity_good_for_bp_ur": "Scenario 4",
        "popularity_bad_for_bp_ur": "Scenario 5",
    }
)

In [25]:
print(mf_metrics.round(3).to_latex())

\begin{tabular}{llllrrrrrr}
\toprule
           &         &      &                                         PopCorr &    ARP &       PL &   APLT &  AggDiv &   RMSE &  NDCG@10 \\
DataStrategy & Library & Bias &                                                 &        &          &        &         &        &          \\
\midrule
Scenario 1 & Cornac & False &     (-0.019817197523989312, 0.0992486268830372) &  0.002 &  -57.028 &  6.020 &   0.423 &  2.901 &    0.001 \\
           &         & True &     (0.0496886036050218, 3.541373345741842e-05) &  0.004 &    4.208 &  2.246 &   0.007 &  2.844 &    0.001 \\
           & Lenskit & False &     (-0.011601554411600401, 0.3345342882304186) &  0.002 &  -50.758 &  4.918 &   0.544 &  3.402 &    0.002 \\
           &         & True &      (-0.00803288291031341, 0.5040275932506005) &  0.002 &  -49.614 &  4.213 &   0.383 &  3.034 &    0.001 \\
Scenario 2 & Cornac & False &                      (0.48922599776186715, 0.0) &  0.030 &  742.078 &  0.010 &   

In [None]:
import pickle

with open("metrics_combined/all_mf.pkl", "wb") as f:
    pickle.dump(mf_metrics.round(3), f)