# Libraries

In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu

In [2]:
data_strategy = "fairbook"  # change depending on which dataset we want to examine

# Analysis

In [3]:
mlp_values = ['64-32', '64-64'] # the different versions of the algorithm tested

In [4]:
algo_name = "DMF"

In [7]:
file_location = "metrics/" + algo_name + "/" +data_strategy+"/"

In [8]:
results = []
for mlp in mlp_values:
    file = open(file_location + data_strategy + "_" + mlp + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [14]:
index = pd.MultiIndex.from_product(
    [mlp_values],
    names=["Network layers"],
).drop_duplicates()

In [15]:
index

MultiIndex([('64-32',),
            ('64-64',)],
           names=['Network layers'])

In [16]:
results = pd.DataFrame(results, index=index)

In [17]:
results

Unnamed: 0_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Network layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
64-32,"(0.01196435454992095, 0.31963853024271466)",0,0.002005,0.002197,-38.609755,4.124874,0.059529
64-64,"(0.124834440162723, 1.922346687769655e-25)",0,0.003344,0.004812,35.581927,2.139445,0.063719


In [18]:
detailed_results = []
for mlp in mlp_values:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + mlp + ".pkl", "rb")
    result = pkl.load(file)
    detailed_results.append(result)

In [20]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
metrics = results[metrics_order]

In [21]:
metrics = metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [22]:
metrics['RealPopCorr'] = metrics.PopCorr.apply(lambda x: x[0])
metrics['Significance'] = metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
metrics['PopCorr'] = metrics.RealPopCorr 
metrics = metrics.drop('RealPopCorr', axis=1)

In [23]:
metrics

Unnamed: 0_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
Network layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
64-32,0.011964,0.002197,-38.609755,4.124874,0.059529,0,0.002005,False
64-64,0.124834,0.004812,35.581927,2.139445,0.063719,0,0.003344,True


In [24]:
print(metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
      &  PopCorr &    ARP &      PL &  AggDiv &  RMSE &  NDCG@10 \\
Network layers &          &        &         &         &       &          \\
\midrule
64-32 &    0.012 &  0.002 & -38.610 &   0.060 &     0 &    0.002 \\
64-64 &    0.125 &  0.005 &  35.582 &   0.064 &     0 &    0.003 \\
\bottomrule
\end{tabular}



In [25]:
import pickle

with open("metrics/"+data_strategy+"_final_metrics.pkl", "wb") as f:
    pickle.dump(metrics.round(3).drop('RMSE',axis=1), f)  # RMSE is irrelevant for DMF

# Significance tests

## 1. Average Recommendation Popularity

In [26]:
def highest_average(df_list, column_name = 'recommendation'):
    highest_average = -10^6
    highest_i = -1
    for i in range(len(df_list)):
        df = df_list[i]
        mean = np.mean(df[column_name].values)
        print('mean', mean)
        if mean > highest_average:
            highest_average = mean
            highest_i = i
    print(highest_average, highest_i)
    return highest_average, highest_i

In [35]:
def mannwhitneyu_test(df_list, alt = 'greater', column_name = 'recommendation'):
    # find the highest average 
    ha, hi = highest_average(df_list, column_name)
    inds_df_list = list(range(len(df_list)))
    to_test_inds = inds_df_list[:hi] + inds_df_list[hi+1:]
    print(to_test_inds)
    df1 = df_list[hi]
    pvalues = []
    for ind in to_test_inds:
        df2 = df_list[ind]
        x = df1[column_name].values
        y = df2[column_name].values
        statistic, pvalue = mannwhitneyu(x,y, alternative = alt)
        pvalues.append(pvalue)
    return pvalues # pvalues for all comparisons

In [36]:
mannwhitneyu_test(detailed_results) 

mean 0.0021974430936902
mean 0.004811993143506939
0.004811993143506939 1
[0]


[0.0]

## 2. Popularity Lift

In [37]:
for df in detailed_results:
    df['popularity_lift'] = (df['recommendation']-df['profile'])/df['profile']*100

In [38]:
mannwhitneyu_test(detailed_results, column_name = 'popularity_lift') 

mean -38.60905436487472
mean 35.58080051232812
35.58080051232812 1
[0]


[0.0]