# Libraries

In [None]:
import pickle as pkl
import pandas as pd

In [None]:
algo_name = "UserKNN"
data_strategies = [
    "uniformly_random",
    "popularity_good",
    "popularity_bad",
    "popularity_good_for_bp_ur",
    "popularity_bad_for_bp_ur",
]

# Analysis

## LKPY

In [None]:
algo_versions = {
    "UserKNN": [
        {"min_nbrs": 1, "min_sim": 0},
        {"min_nbrs": 2, "min_sim": 0},
        {"min_nbrs": 5, "min_sim": 0},
        {"min_nbrs": 10, "min_sim": 0},
        {"min_nbrs": 1, "min_sim": -1},
        {"min_nbrs": 2, "min_sim": -1},
        {"min_nbrs": 5, "min_sim": -1},
        {"min_nbrs": 10, "min_sim": -1},
    ],
}

In [None]:
versions = algo_versions[algo_name]

In [None]:
file_location = "experimental_results/" + algo_name + "/"

In [None]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [None]:
stringed_versions = [str(args) for args in versions]

In [None]:
# Initialize empty lists for the two halves
center = []
min_sim = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(", ")

    center.append(parts[0].split(" ")[-1])
    min_sim.append(parts[1].split(" ")[-1].split("}")[0])

In [None]:
over_common = ["False"]

In [None]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [None]:
results_lkpy = pd.DataFrame(results, index=index)

## Cornac

In [None]:
algo_versions = {
    "CornacUserKNN": [{"center": True}],
}
algo_name = "CornacUserKNN"

In [None]:
versions = algo_versions[algo_name]

In [None]:
file_location = "experimental_results/" + algo_name + "/"

In [None]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [None]:
stringed_versions = [str(args) for args in versions]

In [None]:
# Initialize empty lists for the two halves
center = []
min_sim = []
# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")
    center.append(parts[-1].split("}")[0])
    min_sim.append("-1")

In [None]:
over_common = ["True"]

In [None]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [None]:
results_cornac = pd.DataFrame(results, index=index)

In [None]:
results_cornac.index = results_cornac.index.set_levels(
    results_cornac.index.levels[1].str.replace("-1", "1"), level=2
)

# Merge

In [None]:
user_knn_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(
        ["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"]
    )
    .set_index(["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
    .reindex(data_strategies, level=0)
)

In [None]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
user_knn_metrics = user_knn_metrics[metrics_order]

In [None]:
user_knn_metrics = user_knn_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [None]:
user_knn_metrics = user_knn_metrics.rename(
    index={
        "uniformly_random": "Scenario 1",
        "popularity_good": "Scenario 2",
        "popularity_bad": "Scenario 3",
        "popularity_good_for_bp_ur": "Scenario 4",
        "popularity_bad_for_bp_ur": "Scenario 5",
    }
)

In [None]:
user_knn_metrics = user_knn_metrics.reindex(["1", "2", "5", "10"], level=3)

In [None]:
print(user_knn_metrics.drop("APLT", axis=1).round(3).to_latex())

In [None]:
import pickle

with open("metrics_combined/all_user_knn.pkl", "wb") as f:
    pickle.dump(user_knn_metrics.drop("APLT", axis=1).round(3), f)