# Libraries

In [1]:
import pickle as pkl
import pandas as pd

In [2]:
data_strategy = "ml1m"  # change depending on which dataset we want to examine

# Analysis

## LKPY

In [3]:
algo_versions = {
    "UserKNN": [
        {"min_nbrs": 1, "min_sim": 0},
        {"min_nbrs": 2, "min_sim": 0},
        # {'min_nbrs':5, 'min_sim':0},
        {"min_nbrs": 10, "min_sim": 0},
        {"min_nbrs": 1, "min_sim": -1},
        {"min_nbrs": 2, "min_sim": -1},
        # {'min_nbrs':5, 'min_sim':-1},
        {"min_nbrs": 10, "min_sim": -1},
    ],
}

In [4]:
algo_name = "UserKNN"
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
min_nbrs = []
min_sim = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(", ")

    min_nbrs.append(parts[0].split(" ")[-1])
    min_sim.append(parts[1].split(" ")[-1].split("}")[0])

In [9]:
over_common = ["False"]

In [10]:
index = pd.MultiIndex.from_product(
    [min_sim, min_nbrs, over_common],
    names=["MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

In [12]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
MinimumSimilarity,MinimumNeighbours,OverCommon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,False,"(-0.04977147729479633, 0.002439142361278052)",0.9,0.002671,0.009937,-91.917624,9.753207,0.074204
0,2,False,"(-0.0030202702476565956, 0.8541678777978053)",0.894,0.012751,0.041055,-67.793998,8.754033,0.109552
0,10,False,"(0.17569851551417628, 4.454130623006229e-27)",0.898,0.047951,0.169928,21.997285,3.378492,0.109822
-1,1,False,"(-0.08195448226452781, 5.857438034683565e-07)",0.906,1.2e-05,0.000304,-99.782118,9.999834,0.06422
-1,2,False,"(-0.05280191665496846, 0.0013018250786169837)",0.904,0.004332,0.017443,-86.269501,9.537164,0.103076
-1,10,False,"(0.2473077503173118, 9.145543648196975e-53)",0.894,0.055352,0.175437,26.134287,2.757018,0.174582


In [13]:
lkpy_detailed_results = []
for args in versions:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    lkpy_detailed_results.append(result)

## Cornac

In [13]:
algo_versions = {
    "CornacUserKNN": [{"center": True}],
}
algo_name = "CornacUserKNN"

In [14]:
versions = algo_versions[algo_name]

In [15]:
file_location = "experimental_results/" + algo_name + "/"

In [16]:
results = []
for args in versions:
    file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [17]:
stringed_versions = [str(args) for args in versions]

In [18]:
# Initialize empty lists for the two halves
min_nbrs = []
min_sim = []
# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")
    min_nbrs.append(parts[-1].split("}")[0])
    min_sim.append("-1")

In [19]:
over_common = ["True"]

In [20]:
index = pd.MultiIndex.from_product(
    [min_sim, min_nbrs, over_common],
    names=["MinimumSimilarity", "MinimumNeighbours", "OverCommon"],
).drop_duplicates()

In [21]:
results_cornac = pd.DataFrame(results, index=index)

In [22]:
results_cornac.index = results_cornac.index.set_levels(
    results_cornac.index.levels[1].str.replace("True", "1"), level=1
)

In [23]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
MinimumSimilarity,MinimumNeighbours,OverCommon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-1,1,True,"(-0.09327498569759275, 1.2794716660567971e-08)",0.909644,4.2e-05,0.000375,-99.722348,9.998344,0.066919


In [25]:
cornac_detailed_results = []
for args in versions:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
    result = pkl.load(file)
    cornac_detailed_results.append(result)

# Merge

In [24]:
user_knn_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
    .set_index(["MinimumSimilarity", "OverCommon", "MinimumNeighbours"])
)

In [25]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
user_knn_metrics = user_knn_metrics[metrics_order]

In [26]:
user_knn_metrics = user_knn_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
).reindex(["1", "2", "5", "10"], level=2)

In [27]:
user_knn_metrics['RealPopCorr'] = user_knn_metrics.PopCorr.apply(lambda x: x[0])
user_knn_metrics['Significance'] = user_knn_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
user_knn_metrics['PopCorr'] = user_knn_metrics.RealPopCorr 
user_knn_metrics = user_knn_metrics.drop('RealPopCorr', axis=1)

In [28]:
user_knn_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
MinimumSimilarity,OverCommon,MinimumNeighbours,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
-1,False,1,-0.081954,0.000304,-99.782118,9.999834,0.06422,0.906,1.2e-05,True
-1,False,2,-0.052802,0.017443,-86.269501,9.537164,0.103076,0.904,0.004332,True
-1,False,10,0.247308,0.175437,26.134287,2.757018,0.174582,0.894,0.055352,True
-1,True,1,-0.093275,0.000375,-99.722348,9.998344,0.066919,0.909644,4.2e-05,True
0,False,1,-0.049771,0.009937,-91.917624,9.753207,0.074204,0.9,0.002671,True
0,False,2,-0.00302,0.041055,-67.793998,8.754033,0.109552,0.894,0.012751,False
0,False,10,0.175699,0.169928,21.997285,3.378492,0.109822,0.898,0.047951,True


In [29]:
print(user_knn_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{lllrrrrrr}
\toprule
  &       &    &  PopCorr &    ARP &      PL &  AggDiv &   RMSE &  NDCG@10 \\
MinimumSimilarity & OverCommon & MinimumNeighbours &          &        &         &         &        &          \\
\midrule
-1 & False & 1 &   -0.082 &  0.000 & -99.782 &   0.064 &  0.906 &    0.000 \\
  &       & 2 &   -0.053 &  0.017 & -86.270 &   0.103 &  0.904 &    0.004 \\
  &       & 10 &    0.247 &  0.175 &  26.134 &   0.175 &  0.894 &    0.055 \\
  & True & 1 &   -0.093 &  0.000 & -99.722 &   0.067 &  0.910 &    0.000 \\
0 & False & 1 &   -0.050 &  0.010 & -91.918 &   0.074 &  0.900 &    0.003 \\
  &       & 2 &   -0.003 &  0.041 & -67.794 &   0.110 &  0.894 &    0.013 \\
  &       & 10 &    0.176 &  0.170 &  21.997 &   0.110 &  0.898 &    0.048 \\
\bottomrule
\end{tabular}



In [30]:
import pickle

with open("metrics_combined/"+data_strategy+"_all_user_knn.pkl", "wb") as f:
    pickle.dump(user_knn_metrics.round(3), f)

# Significance tests

## 1. Average Recommendation Popularity

In [31]:
lkpy_detailed_results

[[         profile  recommendation  average_rating
  1       0.060549             6.0       17.228033
  2       0.036726             0.0       23.704094
  3       0.008663             0.0       12.551282
  4       0.012859             0.0       16.756289
  5       0.016784            13.0       15.536464
  ...          ...             ...             ...
  296273  0.000226             0.0       25.000000
  296274  0.000226             0.0       25.000000
  296275  0.000226             0.0       25.000000
  296276  0.000180             0.0       12.000000
  296277  0.000226             0.0       25.000000
  
  [296277 rows x 3 columns]],
 [         profile  recommendation  average_rating
  1       0.060549            12.0       17.228033
  2       0.036726             0.0       23.704094
  3       0.008663             0.0       12.551282
  4       0.012859             1.0       16.756289
  5       0.016784            12.0       15.536464
  ...          ...             ...             ..