# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
data_strategy = "fairbook"  # change depending on which dataset we want to examine

# Analysis

## LKPY

In [33]:
algo_versions = {
    "UserKNN": [
        {"min_nbrs": 1, "min_sim": 0},
        {"min_nbrs": 2, "min_sim": 0},
        # {'min_nbrs':5, 'min_sim':0},
        # {"min_nbrs": 10, "min_sim": 0},
        {"min_nbrs": 1, "min_sim": -1},
        {"min_nbrs": 2, "min_sim": -1},
        # {'min_nbrs':5, 'min_sim':-1},
        # {"min_nbrs": 10, "min_sim": -1},
    ],
}

In [34]:
algo_name = "UserKNN"
versions = algo_versions[algo_name]

In [35]:
file_location = "experimental_results/" + algo_name + "/"

In [36]:
results = []
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [37]:
stringed_versions = [str(args) for args in versions]

In [38]:
# Initialize empty lists for the two halves
min_nbrs = []
min_sim = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(", ")

    min_nbrs.append(parts[0].split(" ")[-1])
    min_sim.append(parts[1].split(" ")[-1].split("}")[0])

In [39]:
over_common = ["False"]

In [40]:
index = pd.MultiIndex.from_product(
    [min_sim, min_nbrs, over_common],
    names=["MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [41]:
results_lkpy = pd.DataFrame(results, index=index)

In [42]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
MinimumSimilarity,MinimumNeighbours,OverCommon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,False,"(0.0709518452253593, 3.4409373799052483e-09)",1.816,0.001008,0.002605,-17.74011,3.028568,0.318018
0,2,False,"(0.4464601671014467, 0.0)",1.704,0.005847,0.005293,67.356267,1.223127,0.504407
-1,1,False,"(-0.0003114874374997293, 0.9793301180788686)",1.86,0.000529,0.001931,-38.582994,3.947709,0.338535
-1,2,False,"(0.2824592921360292, 4.056414194840436e-127)",1.758,0.003233,0.003457,15.01827,2.019144,0.575928


In [43]:
lkpy_detailed_results = []
for args in versions:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    lkpy_detailed_results.append(result)

## Cornac

In [44]:
algo_versions = {
    "CornacUserKNN": [{"center": True}],
}
algo_name = "CornacUserKNN"

In [45]:
versions = algo_versions[algo_name]

In [46]:
file_location = "experimental_results/" + algo_name + "/"

In [47]:
results = []
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [48]:
stringed_versions = [str(args) for args in versions]

In [49]:
# Initialize empty lists for the two halves
min_nbrs = []
min_sim = []
# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")
    min_nbrs.append(parts[-1].split("}")[0])
    min_sim.append("-1")

In [50]:
over_common = ["True"]

In [51]:
index = pd.MultiIndex.from_product(
    [min_sim, min_nbrs, over_common],
    names=["MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [52]:
results_cornac = pd.DataFrame(results, index=index)

In [53]:
results_cornac.index = results_cornac.index.set_levels(
    results_cornac.index.levels[1].str.replace("True", "1"), level=1
)

In [54]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
MinimumSimilarity,MinimumNeighbours,OverCommon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-1,1,True,"(0.009609258756313608, 0.4241206672736523)",1.738937,0.000609,0.002024,-32.84345,3.938783,0.342292


In [55]:
cornac_detailed_results = []
for args in versions:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    cornac_detailed_results.append(result)

# Merge

In [56]:
user_knn_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
    .set_index(["MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
)

In [57]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
user_knn_metrics = user_knn_metrics[metrics_order]

In [58]:
user_knn_metrics = user_knn_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
).reindex(["1", "2", "5", "10"], level=2)

In [59]:
print(user_knn_metrics.drop("APLT", axis=1).round(3).to_latex())

\begin{tabular}{llllrrrrr}
\toprule
  &       &   &                                       PopCorr &    ARP &      PL &  AggDiv &   RMSE &  NDCG@10 \\
MinimumSimilarity & OverCommon & MinimumNeighbours &                                               &        &         &         &        &          \\
\midrule
-1 & False & 1 &  (-0.0003114874374997293, 0.9793301180788686) &  0.002 & -38.583 &   0.339 &  1.860 &    0.001 \\
  &       & 2 &  (0.2824592921360292, 4.056414194840436e-127) &  0.003 &  15.018 &   0.576 &  1.758 &    0.003 \\
  & True & 1 &    (0.009609258756313608, 0.4241206672736523) &  0.002 & -32.843 &   0.342 &  1.739 &    0.001 \\
0 & False & 1 &  (0.0709518452253593, 3.4409373799052483e-09) &  0.003 & -17.740 &   0.318 &  1.816 &    0.001 \\
  &       & 2 &                     (0.4464601671014467, 0.0) &  0.005 &  67.356 &   0.504 &  1.704 &    0.006 \\
\bottomrule
\end{tabular}



In [60]:
import pickle

with open("metrics_combined/"+data_strategy+"_all_user_knn.pkl", "wb") as f:
    pickle.dump(user_knn_metrics.drop("APLT", axis=1).round(3), f)

# Significance tests

## 1. Average Recommendation Popularity

In [61]:
lkpy_detailed_results

[[       profile  recommendation  average_rating
  0     0.003775             0.0       44.750000
  1     0.004561             0.0       50.000000
  2     0.003617             0.0       50.000000
  3     0.003932             0.0       50.000000
  4     0.003775             0.0       50.000000
  ...        ...             ...             ...
  6916  0.003617             5.0       44.050000
  6917  0.003460             0.0       38.200000
  6918  0.003460             0.0       39.583333
  6919  0.003460             0.0       47.033333
  6920  0.003775             0.0       42.900000
  
  [6921 rows x 3 columns]],
 [       profile  recommendation  average_rating
  0     0.003775             2.0       44.750000
  1     0.004561             0.0       50.000000
  2     0.003617             0.0       50.000000
  3     0.003932             0.0       50.000000
  4     0.003775             0.0       50.000000
  ...        ...             ...             ...
  6916  0.003617             6.0      