# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
algo_name = "UserKNN"
data_strategies = [
    "uniformly_random",
    "popularity_good",
    "popularity_bad",
    "popularity_good_for_bp_ur",
    "popularity_bad_for_bp_ur",
]

# Analysis

## LKPY

In [3]:
algo_versions = {
    "UserKNN": [
        {"min_nbrs": 1, "min_sim": 0},
        {"min_nbrs": 2, "min_sim": 0},
        {"min_nbrs": 1, "min_sim": -1},
        {"min_nbrs": 2, "min_sim": -1},
    ],
}

In [4]:
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
center = []
min_sim = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(", ")

    center.append(parts[0].split(" ")[-1])
    min_sim.append(parts[1].split(" ")[-1].split("}")[0])

In [9]:
over_common = ["False"]

In [10]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

## Cornac

In [12]:
algo_versions = {
    "CornacUserKNN": [{"center": True}],
}
algo_name = "CornacUserKNN"

In [13]:
versions = algo_versions[algo_name]

In [14]:
file_location = "experimental_results/" + algo_name + "/"

In [15]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [16]:
stringed_versions = [str(args) for args in versions]

In [17]:
# Initialize empty lists for the two halves
center = []
min_sim = []
# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")
    center.append(parts[-1].split("}")[0])
    min_sim.append("-1")

In [18]:
over_common = ["True"]

In [19]:
index = pd.MultiIndex.from_product(
    [data_strategies, min_sim, center, over_common],
    names=["DataStrategy", "MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [20]:
results_cornac = pd.DataFrame(results, index=index)

In [21]:
results_cornac.index = results_cornac.index.set_levels(
    results_cornac.index.levels[1].str.replace("-1", "1"), level=2
)

# Merge

In [22]:
user_knn_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(
        ["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"]
    )
    .set_index(["DataStrategy", "MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
    .reindex(data_strategies, level=0)
)

In [23]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
user_knn_metrics = user_knn_metrics[metrics_order]

In [24]:
user_knn_metrics = user_knn_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [25]:
user_knn_metrics = user_knn_metrics.rename(
    index={
        "uniformly_random": "Scenario 1",
        "popularity_good": "Scenario 2",
        "popularity_bad": "Scenario 3",
        "popularity_good_for_bp_ur": "Scenario 4",
        "popularity_bad_for_bp_ur": "Scenario 5",
    }
)

In [26]:
user_knn_metrics = user_knn_metrics.reindex(["1", "2", "5", "10"], level=3)

In [27]:
user_knn_metrics['RealPopCorr'] = user_knn_metrics.PopCorr.apply(lambda x: x[0])
user_knn_metrics['Significance'] = user_knn_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
user_knn_metrics['PopCorr'] = user_knn_metrics.RealPopCorr 
user_knn_metrics = user_knn_metrics.drop('RealPopCorr', axis=1)

In [30]:
user_knn_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
DataStrategy,MinimumSimilarity,OverCommon,MinimumNeighbours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Scenario 1,-1,False,1,0.018079,0.002059,-32.284513,3.762652,0.400231,3.502,0.000794,False
Scenario 1,-1,False,2,0.418226,0.003528,21.251888,1.935323,0.680826,3.352,0.003087,True
Scenario 1,-1,True,1,0.003513,0.001959,-35.745549,3.923726,0.400376,3.337384,0.000798,False
Scenario 1,0,False,1,0.101105,0.002723,-12.827119,2.745972,0.340124,3.624,0.001672,True
Scenario 1,0,False,2,0.614905,0.005088,65.440196,1.071823,0.595145,3.464,0.00482,True
Scenario 2,-1,False,1,0.595504,0.020677,426.621401,0.59665,0.196648,1.188,0.01902,True
Scenario 2,-1,False,2,0.614367,0.022179,447.618276,0.196923,0.339257,1.19,0.020719,True
Scenario 2,-1,True,1,0.604307,0.015046,305.197289,1.600833,0.238405,1.149931,0.012895,True
Scenario 2,0,False,1,0.55196,0.027255,632.300469,0.03901,0.080046,1.04,0.023409,True
Scenario 2,0,False,2,0.562015,0.027052,591.965761,0.033348,0.172518,1.026,0.024699,True


In [28]:
print(user_knn_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{llllrrrrrr}
\toprule
           &   &       &   &  PopCorr &    ARP &       PL &  AggDiv &   RMSE &  NDCG@10 \\
DataStrategy & MinimumSimilarity & OverCommon & MinimumNeighbours &          &        &          &         &        &          \\
\midrule
Scenario 1 & -1 & False & 1 &    0.018 &  0.002 &  -32.285 &   0.400 &  3.502 &    0.001 \\
           &   &       & 2 &    0.418 &  0.004 &   21.252 &   0.681 &  3.352 &    0.003 \\
           &   & True & 1 &    0.004 &  0.002 &  -35.746 &   0.400 &  3.337 &    0.001 \\
           & 0 & False & 1 &    0.101 &  0.003 &  -12.827 &   0.340 &  3.624 &    0.002 \\
           &   &       & 2 &    0.615 &  0.005 &   65.440 &   0.595 &  3.464 &    0.005 \\
Scenario 2 & -1 & False & 1 &    0.596 &  0.021 &  426.621 &   0.197 &  1.188 &    0.019 \\
           &   &       & 2 &    0.614 &  0.022 &  447.618 &   0.339 &  1.190 &    0.021 \\
           &   & True & 1 &    0.604 &  0.015 &  305.197 &   0.238 &  1.150 &    0.013 \\
     

In [29]:
import pickle

with open("metrics_combined/all_user_knn.pkl", "wb") as f:
    pickle.dump(user_knn_metrics.drop("APLT", axis=1).round(3), f)