# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
algo_name = "UserKNN"
data_strategies = [
    "uniformly_random",
    "popularity_good",
    "popularity_bad",
    "popularity_good_for_bp_ur",
    "popularity_bad_for_bp_ur",
]

# Analysis

## LKPY

In [13]:
algo_versions = {
    "UserKNN": [
        {"min_nbrs": 1, "min_sim": 0},
        {"min_nbrs": 2, "min_sim": 0},
        {"min_nbrs": 1, "min_sim": -1},
        {"min_nbrs": 2, "min_sim": -1},
    ],
}

In [14]:
versions = algo_versions[algo_name]

In [15]:
file_location = "experimental_results/" + algo_name + "/"

In [16]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [17]:
stringed_versions = [str(args) for args in versions]

In [18]:
# Initialize empty lists for the two halves
center = []
min_sim = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(", ")

    center.append(parts[0].split(" ")[-1])
    min_sim.append(parts[1].split(" ")[-1].split("}")[0])

In [19]:
over_common = ["False"]

In [20]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [21]:
results_lkpy = pd.DataFrame(results, index=index)

## Cornac

In [23]:
algo_versions = {
    "CornacUserKNN": [{"center": True}],
}
algo_name = "CornacUserKNN"

In [24]:
versions = algo_versions[algo_name]

In [25]:
file_location = "experimental_results/" + algo_name + "/"

In [26]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [27]:
stringed_versions = [str(args) for args in versions]

In [28]:
# Initialize empty lists for the two halves
center = []
min_sim = []
# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")
    center.append(parts[-1].split("}")[0])
    min_sim.append("-1")

In [29]:
over_common = ["True"]

In [30]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [31]:
results_cornac = pd.DataFrame(results, index=index)

In [32]:
results_cornac.index = results_cornac.index.set_levels(
    results_cornac.index.levels[1].str.replace("-1", "1"), level=2
)

# Merge

In [33]:
user_knn_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(
        ["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"]
    )
    .set_index(["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
    .reindex(data_strategies, level=0)
)

In [34]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
user_knn_metrics = user_knn_metrics[metrics_order]

In [35]:
user_knn_metrics = user_knn_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [36]:
user_knn_metrics = user_knn_metrics.rename(
    index={
        "uniformly_random": "Scenario 1",
        "popularity_good": "Scenario 2",
        "popularity_bad": "Scenario 3",
        "popularity_good_for_bp_ur": "Scenario 4",
        "popularity_bad_for_bp_ur": "Scenario 5",
    }
)

In [37]:
user_knn_metrics = user_knn_metrics.reindex(["1", "2", "5", "10"], level=3)

In [38]:
print(user_knn_metrics.drop("APLT", axis=1).round(3).to_latex())

\begin{tabular}{lllllrrrrr}
\toprule
           &   &       &   &                                        PopCorr &    ARP &       PL &  AggDiv &   RMSE &  NDCG@10 \\
DataStrategy & MinimumSimilarity & OverCommon & MinimumNeighbours &                                                &        &          &         &        &          \\
\midrule
Scenario 1 & -1 & False & 1 &    (0.018078800576386483, 0.13261414434946503) &  0.002 &  -32.285 &   0.400 &  3.502 &    0.001 \\
           &   &       & 2 &  (0.41822601518091973, 3.097865376849624e-291) &  0.004 &   21.252 &   0.681 &  3.352 &    0.003 \\
           &   & True & 1 &    (0.0035126658997710024, 0.7701522568823012) &  0.002 &  -35.746 &   0.400 &  3.337 &    0.001 \\
           & 0 & False & 1 &   (0.10110466641885223, 3.420017408632659e-17) &  0.003 &  -12.827 &   0.340 &  3.624 &    0.002 \\
           &   &       & 2 &                      (0.6149052740925817, 0.0) &  0.005 &   65.440 &   0.595 &  3.464 &    0.005 \\
Scenario 2 &

In [39]:
import pickle

with open("metrics_combined/all_user_knn.pkl", "wb") as f:
    pickle.dump(user_knn_metrics.drop("APLT", axis=1).round(3), f)