# Libraries

In [36]:
import pickle as pkl
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu

In [2]:
algo_name = "MF"
data_strategies = [
    "uniformly_random",
    "popularity_good",
    "popularity_bad",
    "popularity_good_for_bp_ur",
    "popularity_bad_for_bp_ur",
]

# Analysis

## LKPY

In [3]:
algo_versions = {
    "UserKNN": [
        {"center": True, "min_sim": 0},
        {"center": False, "min_sim": 0},
        {"center": True, "min_sim": -1},
    ],
    "MF": [{"bias": True}, {"bias": False}],
    "FunkSVD": [{"bias": True}, {"bias": False}],
}

In [4]:
versions = algo_versions[algo_name]

In [5]:
file_location = "experimental_results/" + algo_name + "/"

In [6]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [7]:
stringed_versions = [str(args) for args in versions]

In [8]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [9]:
library = ["Lenskit"]

In [10]:
index = pd.MultiIndex.from_product(
    [data_strategies, bias, library], names=["DataStrategy", "Bias", "Library"]
).drop_duplicates()

In [11]:
results_lkpy = pd.DataFrame(results, index=index)

In [12]:
results_lkpy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
DataStrategy,Bias,Library,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
uniformly_random,True,Lenskit,"(-0.0084113327575704, 0.4841482929966509)",3.028,0.001334,0.001807,-49.724579,4.215338,0.382026
uniformly_random,False,Lenskit,"(-0.014868944844468091, 0.21615007565767605)",3.4,0.00227,0.00174,-52.394803,5.209546,0.540962
popularity_good,True,Lenskit,"(0.4981708413296651, 0.0)",0.818,0.024783,0.029964,742.365701,0.0,0.003612
popularity_good,False,Lenskit,"(0.507238102015917, 0.0)",0.94,0.024854,0.029899,740.064312,0.000157,0.006502
popularity_bad,True,Lenskit,"(0.01719862360454119, 0.1525320747804469)",0.81,0.001452,0.00222,-37.387105,0.575497,0.396763
popularity_bad,False,Lenskit,"(-0.09129381107025494, 2.7518548375828272e-14)",1.032,0.001383,0.00099,-73.688405,7.662996,0.390984
popularity_good_for_bp_ur,True,Lenskit,"(0.2189312101908367, 6.981832841594372e-76)",2.39,0.007704,0.008316,129.270877,4.841846,0.239127
popularity_good_for_bp_ur,False,Lenskit,"(0.4616894083582897, 0.0)",2.746,0.011716,0.01038,149.806771,4.285501,0.52478
popularity_bad_for_bp_ur,True,Lenskit,"(-0.01312049158509899, 0.2751070558435274)",2.582,0.001482,0.001782,-50.113408,3.050236,0.45976
popularity_bad_for_bp_ur,False,Lenskit,"(-0.04479021928309672, 0.0001934576279506944)",2.89,0.002164,0.001499,-59.393055,5.304493,0.621153


In [13]:
lkpy_dict_detailed = {}
for data_strategy in data_strategies:
    lkpy_detailed_results = []
    for args in versions:
        file = open(file_location + 'correct_detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        lkpy_detailed_results.append(result)
    lkpy_dict_detailed[data_strategy] = lkpy_detailed_results

## Cornac

In [14]:
algo_versions = {"CornacMF": [{"bias": True}, {"bias": False}]}
algo_name = "CornacMF"

In [15]:
versions = algo_versions[algo_name]

In [16]:
file_location = "experimental_results/" + algo_name + "/"

In [17]:
results = []
for data_strategy in data_strategies:
    for args in versions:
        file = open(file_location + data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        results.append(result)

In [18]:
stringed_versions = [str(args) for args in versions]

In [19]:
# Initialize empty lists for the two halves
bias = []

# Split the strings and populate the lists
for s in stringed_versions:
    parts = s.split(": ")

    bias.append(parts[-1].split("}")[0])

In [20]:
library = ["Cornac"]

In [21]:
index = pd.MultiIndex.from_product(
    [data_strategies, bias, library], names=["DataStrategy", "Bias", "Library"]
).drop_duplicates()

In [22]:
results_cornac = pd.DataFrame(results, index=index)

In [23]:
results_cornac

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pop_corr,RMSE,NDCG,ARP,ave_PL,ACLT,AggDiv
DataStrategy,Bias,Library,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
uniformly_random,True,Cornac,"(0.07863243050744975, 5.730837387909516e-11)",2.871573,0.002812,0.006229,74.547644,0.0,0.003323
uniformly_random,False,Cornac,"(-0.01268121076724073, 0.29150121207439417)",3.190945,0.000669,0.001277,-67.526611,9.224062,0.070077
popularity_good,True,Cornac,"(0.4673158053085428, 0.0)",0.802046,0.024955,0.030007,743.445627,0.0,0.002601
popularity_good,False,Cornac,"(0.47544579552522975, 0.0)",0.937581,0.025701,0.030003,743.38764,0.0,0.002601
popularity_bad,True,Cornac,"(0.009875877002987922, 0.4113780367227908)",0.7959,0.000855,0.002449,-31.47276,0.0,0.00289
popularity_bad,False,Cornac,"(-0.02170099684454945, 0.07103593084113649)",0.849619,0.000743,0.00085,-76.209743,9.024816,0.00838
popularity_good_for_bp_ur,True,Cornac,"(0.42369300111121155, 1.203854057246853e-299)",2.297497,0.018671,0.026529,647.073679,0.985524,0.003179
popularity_good_for_bp_ur,False,Cornac,"(0.035582635946755106, 0.003070275317710374)",2.509736,0.004534,0.003534,-20.285699,4.881852,0.031065
popularity_bad_for_bp_ur,True,Cornac,"(-0.00555074323531515, 0.6442960270931561)",2.499912,0.000606,0.001668,-53.319605,1.78915,0.003179
popularity_bad_for_bp_ur,False,Cornac,"(-0.018058599812239288, 0.13304751704817472)",2.670848,0.000609,0.001041,-73.47387,8.838885,0.030487


In [24]:
cornac_dict_detailed = {}
for data_strategy in data_strategies:
    cornac_detailed_results = []
    for args in versions:
        file = open(file_location + 'correct_detailed_per_item_'+data_strategy + "_" + str(args) + ".pkl", "rb")
        result = pkl.load(file)
        cornac_detailed_results.append(result)
    cornac_dict_detailed[data_strategy] = cornac_detailed_results

# Merge

In [25]:
mf_metrics = (
    pd.concat([results_lkpy, results_cornac])
    .reset_index()
    .sort_values(["DataStrategy", "Library", "Bias"])
    .set_index(["DataStrategy", "Library", "Bias"])
    .reindex(data_strategies, level=0)
)

In [26]:
metrics_order = ["pop_corr", "ARP", "ave_PL", "ACLT", "AggDiv", "RMSE", "NDCG"]
mf_metrics = mf_metrics[metrics_order]

In [27]:
mf_metrics = mf_metrics.rename(
    columns={"pop_corr": "PopCorr", "ave_PL": "PL", "ACLT": "APLT", "NDCG": "NDCG@10"}
)

In [28]:
mf_metrics = mf_metrics.rename(
    index={
        "uniformly_random": "Scenario 1",
        "popularity_good": "Scenario 2",
        "popularity_bad": "Scenario 3",
        "popularity_good_for_bp_ur": "Scenario 4",
        "popularity_bad_for_bp_ur": "Scenario 5",
    }
)

In [29]:
mf_metrics['RealPopCorr'] = mf_metrics.PopCorr.apply(lambda x: x[0])
mf_metrics['Significance'] = mf_metrics.PopCorr.apply(lambda x: True if x[1]<0.005 else False)
mf_metrics['PopCorr'] = mf_metrics.RealPopCorr 
mf_metrics = mf_metrics.drop('RealPopCorr', axis=1)

In [30]:
mf_metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PopCorr,ARP,PL,APLT,AggDiv,RMSE,NDCG@10,Significance
DataStrategy,Library,Bias,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Scenario 1,Cornac,False,-0.012681,0.001277,-67.526611,9.224062,0.070077,3.190945,0.000669,False
Scenario 1,Cornac,True,0.078632,0.006229,74.547644,0.0,0.003323,2.871573,0.002812,True
Scenario 1,Lenskit,False,-0.014869,0.00174,-52.394803,5.209546,0.540962,3.4,0.00227,False
Scenario 1,Lenskit,True,-0.008411,0.001807,-49.724579,4.215338,0.382026,3.028,0.001334,False
Scenario 2,Cornac,False,0.475446,0.030003,743.38764,0.0,0.002601,0.937581,0.025701,True
Scenario 2,Cornac,True,0.467316,0.030007,743.445627,0.0,0.002601,0.802046,0.024955,True
Scenario 2,Lenskit,False,0.507238,0.029899,740.064312,0.000157,0.006502,0.94,0.024854,True
Scenario 2,Lenskit,True,0.498171,0.029964,742.365701,0.0,0.003612,0.818,0.024783,True
Scenario 3,Cornac,False,-0.021701,0.00085,-76.209743,9.024816,0.00838,0.849619,0.000743,False
Scenario 3,Cornac,True,0.009876,0.002449,-31.47276,0.0,0.00289,0.7959,0.000855,False


In [31]:
print(mf_metrics.drop(['APLT','Significance'], axis=1).round(3).to_latex())

\begin{tabular}{lllrrrrrr}
\toprule
           &         &      &  PopCorr &    ARP &       PL &  AggDiv &   RMSE &  NDCG@10 \\
DataStrategy & Library & Bias &          &        &          &         &        &          \\
\midrule
Scenario 1 & Cornac & False &   -0.013 &  0.001 &  -67.527 &   0.070 &  3.191 &    0.001 \\
           &         & True &    0.079 &  0.006 &   74.548 &   0.003 &  2.872 &    0.003 \\
           & Lenskit & False &   -0.015 &  0.002 &  -52.395 &   0.541 &  3.400 &    0.002 \\
           &         & True &   -0.008 &  0.002 &  -49.725 &   0.382 &  3.028 &    0.001 \\
Scenario 2 & Cornac & False &    0.475 &  0.030 &  743.388 &   0.003 &  0.938 &    0.026 \\
           &         & True &    0.467 &  0.030 &  743.446 &   0.003 &  0.802 &    0.025 \\
           & Lenskit & False &    0.507 &  0.030 &  740.064 &   0.007 &  0.940 &    0.025 \\
           &         & True &    0.498 &  0.030 &  742.366 &   0.004 &  0.818 &    0.025 \\
Scenario 3 & Cornac & False &  

In [32]:
import pickle

with open("metrics_combined/all_mf.pkl", "wb") as f:
    pickle.dump(mf_metrics.round(3), f)

# Significance tests

## 1. Average Recommendation Popularity
recommendation values

In [54]:
def highest_average(df_list, column_name = 'recommendation'):
    highest_average = -10000000
    highest_i = -1
    for i in range(len(df_list)):
        df = df_list[i]
        mean = np.mean(df[column_name].values)
        if mean > highest_average:
            highest_average = mean
            highest_i = i
    print(highest_average, highest_i)
    return highest_average, highest_i

In [45]:
def mannwhitneyu_test(df_list, alt = 'greater', column_name = 'recommendation'):
    # find the highest average 
    ha, hi = highest_average(df_list, column_name)
    inds_df_list = list(range(len(df_list)))
    to_test_inds = inds_df_list[:hi] + inds_df_list[hi+1:]
    print(to_test_inds)
    df1 = df_list[hi]
    pvalues = []
    for ind in to_test_inds:
        df2 = df_list[ind]
        x = df1[column_name].values
        y = df2[column_name].values
        pvalue = mannwhitneyu(x,y, alternative = alt)[1]
        pvalues.append(pvalue)
    return pvalues # pvalues for all comparisons

In [55]:
for data_strategy in data_strategies:
    mf_results = cornac_dict_detailed[data_strategy]+lkpy_dict_detailed[data_strategy]
    print(data_strategy)
    print(mannwhitneyu_test(mf_results))

uniformly_random
0.0062289698804902935 0
[1, 2, 3]
[0.0, 0.0, 0.0]
popularity_good
0.030006683626159835 0
[1, 2, 3]
[0.8534853308427657, 4.288410014633355e-28, 2.727805375449378e-55]
popularity_bad
0.0024489683942505278 0
[1, 2, 3]
[0.0, 0.0, 0.0]
popularity_good_for_bp_ur
0.026528862786129606 0
[1, 2, 3]
[0.0, 0.0, 0.0]
popularity_bad_for_bp_ur
0.0017818500835292475 2
[0, 1, 3]
[3.2593782881832063e-24, 0.0, 0.0]


## 2. Popularity Lift

In [56]:
for data_strategy in data_strategies:
    print(data_strategy)
    mf_results = cornac_dict_detailed[data_strategy]+lkpy_dict_detailed[data_strategy]
    for df in mf_results:
        df['popularity_lift'] = (df['recommendation']-df['profile'])/df['profile']*100
    print(mannwhitneyu_test(mf_results, column_name = 'popularity_lift')) 

uniformly_random
74.54856930010556 0
[1, 2, 3]
[0.0, 0.0, 0.0]
popularity_good
743.4447209493031 0
[1, 2, 3]
[0.49763789819401555, 0.4449623897845607, 0.3703877704580408]
popularity_bad
-31.472504998943098 0
[1, 2, 3]
[0.0, 2.3220355214609353e-21, 0.0]
popularity_good_for_bp_ur
647.0730669387227 0
[1, 2, 3]
[0.0, 0.0, 0.0]
popularity_bad_for_bp_ur
-50.113341166560296 2
[0, 1, 3]
[6.039310291090798e-05, 0.0, 4.333732102116825e-70]
