# Libraries

In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu

In [2]:
data_strategy = "ml1m"  # change depending on which dataset we want to examine

# Analysis

In [3]:
mlp_values = ['64-32', '64-64'] # the different versions of the algorithm tested

In [4]:
algo_name = "DMF"

In [5]:
file_location = "metrics/" + algo_name + "/" +data_strategy+"/"

In [6]:
results = []
for mlp in mlp_values:
    file = open(file_location + data_strategy + "_" + mlp + ".pkl", "rb")
    result = pkl.load(file)
    results.append(result)

In [7]:
index = pd.MultiIndex.from_product(
    [mlp_values],
    names=["Network layers"],
).drop_duplicates()

In [8]:
index

MultiIndex([('64-32',),
            ('64-64',)],
           names=['Network layers'])

In [9]:
results = pd.DataFrame(results, index=index)

In [10]:
results

Unnamed: 0_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
Network layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
64-32,"(0.046193715274107074, 0.004912988697202006)",0,0.01009,0.064205,-54.776855,6.484934,0.041015
64-64,"(0.032165067612558704, 0.05023512431301087)",0,0.008296,0.057347,-59.654331,6.541225,0.043173


In [11]:
detailed_results = []
for mlp in mlp_values:
    file = open(file_location + 'detailed_per_item_'+data_strategy + "_" + mlp + ".pkl", "rb")
    result = pkl.load(file)
    detailed_results.append(result)

In [12]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
metrics = results[metrics_order]

In [13]:
metrics = metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [14]:
metrics['RealPopCorr'] = metrics.PopCorr.apply(lambda x: x[0])
metrics['Significance'] = metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
metrics['PopCorr'] = metrics.RealPopCorr 
metrics = metrics.drop('RealPopCorr', axis=1)

In [15]:
metrics

Unnamed: 0_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
Network layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
64-32,0.046194,0.064205,-54.776855,6.484934,0.041015,0,0.01009,True
64-64,0.032165,0.057347,-59.654331,6.541225,0.043173,0,0.008296,False


In [16]:
print(metrics.drop(['APLT','Significance','RMSE'], axis=1).round(3).to_latex())

\begin{tabular}{lrrrrr}
\toprule
      &  PopCorr &    ARP &      PL &  AggDiv &  NDCG@10 \\
Network layers &          &        &         &         &          \\
\midrule
64-32 &    0.046 &  0.064 & -54.777 &   0.041 &    0.010 \\
64-64 &    0.032 &  0.057 & -59.654 &   0.043 &    0.008 \\
\bottomrule
\end{tabular}



In [17]:
import pickle

with open("metrics/"+algo_name+'/'+data_strategy+'/'+data_strategy+"_final_metrics.pkl", "wb") as f:
    pickle.dump(metrics.round(3).drop('RMSE',axis=1), f)  # RMSE is irrelevant for DMF

# Significance tests

## 1. Average Recommendation Popularity

In [23]:
def highest_average(df_list, column_name = 'recommendation'):
    highest_average = -10**6
    highest_i = -1
    for i in range(len(df_list)):
        df = df_list[i]
        mean = np.mean(df[column_name].values)
        print('mean', mean)
        if mean > highest_average:
            highest_average = mean
            highest_i = i
    print(highest_average, highest_i)
    return highest_average, highest_i

In [24]:
def mannwhitneyu_test(df_list, alt = 'greater', column_name = 'recommendation'):
    # find the highest average 
    ha, hi = highest_average(df_list, column_name)
    inds_df_list = list(range(len(df_list)))
    to_test_inds = inds_df_list[:hi] + inds_df_list[hi+1:]
    print(to_test_inds)
    df1 = df_list[hi]
    pvalues = []
    for ind in to_test_inds:
        df2 = df_list[ind]
        x = df1[column_name].values
        y = df2[column_name].values
        statistic, pvalue = mannwhitneyu(x,y, alternative = alt)
        pvalues.append(pvalue)
    return pvalues # pvalues for all comparisons

In [25]:
mannwhitneyu_test(detailed_results) 

mean 0.06420547618525503
mean 0.05734654730055699
0.06420547618525503 0
[1]


[4.95191751122115e-63]

## 2. Popularity Lift

In [26]:
for df in detailed_results:
    df['popularity_lift'] = (df['recommendation']-df['profile'])/df['profile']*100

In [27]:
mannwhitneyu_test(detailed_results, column_name = 'popularity_lift') 

mean -54.77685509511659
mean -59.65433129769365
-54.77685509511659 0
[1]


[6.316069150958552e-39]