### setup

In [198]:
import datetime
import matplotlib
import numpy as np
import pandas as pd
import pyreadr
import pickle
import re
import os

In [2]:
os.chdir('C:\\Users\\Simon\\Desktop\\MA\\session-rec')

In [272]:
# datatypes = ['app-level', 'seq-level']
datatypes = ['app-level']
windows = [1,2,3,4,5]

In [4]:
USER_KEY = 'userID'
TIME_KEY = 'timestamp'
if datatype == 'app-level':    
    ITEM_KEY = 'appID'
    SESSION_KEY = 'sessionID'
else:
    ITEM_KEY = 'usID'
    SESSION_KEY = 'sentenceID'

### helper functions

In [5]:
# for multiple windows (incl. min20)
# get average performance across all windows for a given algorithm
def get_av_perf(files, key):
    res = pd.DataFrame()
    for file in files:
        window = file.strip('.csv').split('_')[-1]
        df = pd.read_csv(folder_res + file, sep = ';')
        df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
        df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
        df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
        df.insert(0, 'model', key)
        df.insert(1, 'window', window)
        res = res.append(df)
    res = res.groupby('model').mean().reset_index(level=0)
    return(res)

In [6]:
# for single window
# get performance for a given algorithm
def get_perf(file, key):
    df = pd.read_csv(folder_res + file, sep = ';')
    df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
    df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
    df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
    df.insert(0, 'model', key)
    return(df)

In [7]:
# extract ground truth from test data (test_data) for a single item (position) in a single session (sessionId)
def extract_ground_truth(ID, position, test_data):
    relevant_df = test_data[test_data[SESSION_KEY]==ID]
    index = relevant_df.index[position+1]
    ground_truth = relevant_df[ITEM_KEY][index]
    return ground_truth

In [118]:
# generate a df containing the ground truth as well as predictions for all available algorithms
def generate_predictions(predictions_files, test_data, mapping_id2name, multiple=True):
    predictions = pd.DataFrame()
    for file in predictions_files:
        if multiple:
            model = "_".join(file.split('_')[2:-2])
        else:
            model = "_".join(file.split('_')[2:-1])
        df = pd.read_csv(folder_res + file, sep = ';')
        if 'sessionID' not in predictions.columns:
            predictions['sessionID'] = df['SessionId']
        if 'position' not in predictions.columns:
            predictions['position'] = df['Position']
        if 'ground_truth' not in predictions.columns:
            predictions['ground_truth'] = predictions.apply(lambda x: extract_ground_truth(x['sessionID'], x['position'], test_data), axis=1)
            predictions['ground_truth_name'] = predictions['ground_truth'].apply(lambda x: mapping_reverse[x])
        predictions['recs-' + model] = df['Recommendations'].apply(lambda x: [int(i) for i in x.split(',')])
        predictions['recs_names-' + model] = predictions['recs-' + model].apply(lambda x: [mapping_reverse[i] for i in x])
        predictions['scores-' + model] = df['Scores'].apply(lambda x: x.split(','))
    return predictions

In [94]:
# helper function outputting whether ground truth is in recommendation list of length k for a single algorithm and item
def calc_hr_k(ground_truth, rec_list, k):
    return ground_truth in rec_list[:k]

In [9]:
def print_predictions(predictions, sessionID, num_recs, positions, models):
    # predictions must contain columns named 'sessionID' and 'position', containing the respective values
    predictions_dict = {}
    for pos in positions:
        row = predictions[(predictions.sessionID == sessionID) & (predictions.position == pos)]
        ground_truth = row.ground_truth_name.to_string(index=False)
#         print('sessionID: ' + str(sessionID) + ', position: ' + str(pos))
#         print('ground truth: ' + str(row.ground_truth_name.to_string(index=False)))
        df = pd.DataFrame()
        for model in models:
            df[model] = [row['recs_names-' + model].tolist()[0][i] for i in range(num_recs)]
        name = str(sessionID) + '_' + str(pos)
        predictions_dict[name] = (sessionID, pos, ground_truth, df)
    return predictions_dict

In [124]:
def capitalize_names(df):
    name_dict = {
        'ar': 'AR',
        'ct-pre': 'CT',
        'ctpre': 'CT',
        'gru4rec': 'GRU4Rec',
        'gru4rec_Reminder': 'GRU4Rec_Reminder',
        'hgru4rec': 'HGRU4Rec',
        'shan': 'SHAN',
        'sknn': 'SKNN',
        'sr': 'SR',
        'sr_BR': 'SR_BR',
        'stan': 'STAN',
        'vsknn': 'VSKNN',
        'vsknn_EBR': 'VSKNN_EBR',
        'vstan': 'VSTAN',
        'vstan_EBR': 'VSTAN_EBR'
    }
    df['model'] = df['model'].apply(lambda x: name_dict[x])
    return df

### multiple windows

##### overall

In [39]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window'))]
    res = get_av_perf(files, key)
    results_app = results_app.append(res)
results_app = capitalize_names(results_app)
results_app = results_app.round(4)
results_app = results_app.sort_values('model')

In [40]:
results_app

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.2059,0.543,0.6631,0.8069,0.3329,0.3491,0.3593,0.3557,0.2172
0,CT,0.3738,0.6712,0.7811,0.8618,0.4931,0.5078,0.5136,0.3799,0.2333
0,SKNN,0.0111,0.3911,0.5991,0.6993,0.1272,0.1546,0.1617,0.0494,0.2331
0,SR,0.2513,0.5827,0.7291,0.827,0.3775,0.3969,0.4038,0.3888,0.1781
0,SR_BR,0.268,0.6296,0.7806,0.8271,0.4056,0.4263,0.4298,0.3967,0.1798
0,STAN,0.2184,0.5283,0.5459,0.5466,0.3433,0.346,0.346,0.1816,0.085
0,VSKNN,0.1601,0.4394,0.5459,0.6366,0.2644,0.2784,0.2848,0.1978,0.1709
0,VSKNN_EBR,0.1242,0.4796,0.6831,0.8535,0.2443,0.2716,0.2837,0.389,0.1721
0,VSTAN,0.1609,0.502,0.6087,0.6606,0.2908,0.3055,0.3092,0.3517,0.1407
0,VSTAN_EBR,0.2004,0.5531,0.7519,0.9027,0.3225,0.3494,0.3604,0.4722,0.1855


In [26]:
with open('../tables/results_app_multiple.tex','w') as tf:
    tf.write(results_app.to_latex(index=False))

##### min20

Prediction on long sessions (20+) if trained on long sessions only (no extra tuning), implying a minimum sequence length of 20 for both training and test data:

In [27]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app_min20 = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window'))
             and ('min20' in f) and ('min20_test' not in f)]
    res = get_av_perf(files, key)
    results_app_min20 = results_app_min20.append(res)
results_app_min20 = capitalize_names(results_app_min20)
results_app_min20 = results_app_min20.round(4)
results_app_min20 = results_app_min20.sort_values('model')

In [28]:
results_app_min20

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.2265,0.5717,0.6721,0.7905,0.3648,0.3784,0.3868,0.6149,0.184
0,CT,0.4257,0.7222,0.7925,0.8523,0.5443,0.5539,0.558,0.6521,0.2048
0,SKNN,0.013,0.348,0.5366,0.6849,0.1117,0.1379,0.1483,0.0609,0.207
0,SR,0.314,0.6303,0.739,0.8256,0.4337,0.4482,0.4544,0.6447,0.1618
0,SR_BR,0.3419,0.6804,0.7858,0.828,0.4696,0.4841,0.4872,0.6688,0.1668
0,STAN,0.257,0.635,0.6595,0.661,0.4085,0.4121,0.4122,0.3316,0.0707
0,VSKNN,0.079,0.2267,0.3095,0.3848,0.1323,0.1431,0.1484,0.3535,0.045
0,VSKNN_EBR,0.0741,0.3297,0.5137,0.724,0.162,0.1864,0.2014,0.6615,0.0902
0,VSTAN,0.0977,0.3368,0.3992,0.4438,0.1836,0.1922,0.1954,0.4884,0.0373
0,VSTAN_EBR,0.2232,0.5737,0.7242,0.8788,0.3466,0.3665,0.3775,0.7121,0.1295


In [29]:
with open('../tables/results_app_multiple_min20.tex','w') as tf:
    tf.write(results_app_min20.to_latex(index=False))

Prediction on long sessions (20+) if trained on all sessions, implying a minimum sequence length of 20 for test data only:

In [30]:
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app_min20_test = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window')) and ('min20_test' in f)]
    res = get_av_perf(files, key)
    results_app_min20_test = results_app_min20_test.append(res)
results_app_min20_test = capitalize_names(results_app_min20_test)
results_app_min20_test = results_app_min20_test.round(4)
results_app_min20_test = results_app_min20_test.sort_values('model')

In [31]:
results_app_min20_test

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.189,0.4647,0.5921,0.7895,0.2964,0.3135,0.3274,0.1315,0.2349
0,CT,0.3123,0.5908,0.7314,0.8492,0.4221,0.4405,0.4492,0.1608,0.246
0,SKNN,0.0042,0.2962,0.5733,0.6433,0.0791,0.114,0.119,0.0384,0.2462
0,SR,0.181,0.4747,0.6646,0.7987,0.2932,0.3176,0.3272,0.1665,0.1832
0,SR_BR,0.1903,0.5183,0.7357,0.7974,0.3158,0.3453,0.3498,0.1633,0.1833
0,STAN,0.244,0.6113,0.633,0.633,0.3908,0.394,0.394,0.0738,0.0959
0,VSKNN,0.2242,0.5611,0.6308,0.7256,0.3549,0.3639,0.3703,0.0815,0.2328
0,VSKNN_EBR,0.0992,0.4531,0.7025,0.9101,0.2161,0.2489,0.2636,0.0982,0.2052
0,VSTAN,0.2307,0.6056,0.7499,0.8178,0.3803,0.3999,0.4048,0.2012,0.201
0,VSTAN_EBR,0.1345,0.4392,0.7024,0.9016,0.2385,0.274,0.2886,0.1467,0.2141


In [32]:
with open('../tables/results_app_multiple_min20_test.tex','w') as tf:
    tf.write(results_app_min20_test.to_latex(index=False))

app-level minlength 20, compare performance on positions 20+ to performance on positions 20+ without minsessionlength
hypothesis: extra damage of including short sequences?
if no performance difference: maybe no extra +20 tuning necessary


### single window

##### overall

In [36]:
folder_res = 'results/testing/' + str(datatype) + '/single/'
algos = set([f.split('test_single_')[1].split('_single')[0] for f in os.listdir(folder_res)])
results_app_single = pd.DataFrame()
for key in algos:
    file = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_single'))][0] # list is of length 1 actually
    res = get_perf(file, key)
    results_app_single = results_app_single.append(res)
results_app_single = capitalize_names(results_app_single)
results_app_single = results_app_single.round(4)
results_app_single = results_app_single.sort_values('model')

In [37]:
results_app_single

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.1928,0.5752,0.7256,0.8432,0.3227,0.3431,0.3514,0.1794,0.2318
0,CT,0.3608,0.6968,0.8104,0.8848,0.5045,0.5201,0.5255,0.199,0.247
0,SKNN,0.0032,0.532,0.6736,0.7608,0.1909,0.2112,0.2171,0.0201,0.2395
0,SR,0.24,0.6264,0.7944,0.8616,0.3953,0.4185,0.4234,0.2053,0.1879
0,SR_BR,0.2616,0.6848,0.8312,0.86,0.4249,0.4457,0.4479,0.2076,0.1895
0,STAN,0.14,0.2896,0.292,0.292,0.2025,0.2029,0.2029,0.0656,0.0876
0,VSKNN,0.1864,0.5112,0.64,0.7848,0.3061,0.3225,0.3337,0.0776,0.2326
0,VSKNN_EBR,0.1928,0.6344,0.8216,0.8968,0.3407,0.3677,0.3729,0.2289,0.2195
0,VSTAN,0.1464,0.556,0.6408,0.68,0.3012,0.313,0.3157,0.142,0.1826
0,VSTAN_EBR,0.244,0.6176,0.8056,0.8936,0.3777,0.4051,0.4115,0.3726,0.2123


In [38]:
with open('../tables/results_app_single.tex','w') as tf:
    tf.write(results_app_single.to_latex(index=False))

### performance by position

##### cutoffs

In [273]:
cutoffs = [2, 5, 10]
# cutoffs = [2]
k = 20 # HR@k
datatype = 'app-level'

In [275]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
folder_data = folder_res.replace('results', 'data')
data = pd.read_csv('../data/app-level/data_app_nodrop.csv') # create app and user mappings
mapping_reverse = dict((v,k) for k,v in mapping.items())

results_app_cutoff = pd.DataFrame()

for cutoff in cutoffs:
    for window in windows:
        test_data = pd.read_hdf(str(folder_data) + 'window_' + str(window) + '.hdf', 'test') 
        predictions_files = [f for f in os.listdir(folder_res) if ('min20' not in f) 
                             and f.endswith('window_' + str(window) + '-Saver@50.csv')]
        predictions = generate_predictions(predictions_files, test_data, mapping_reverse)
        algorithms = [i for i in predictions.columns if i.startswith('recs-')]

        # for  single cutoff and single window, create 'performance-by-position' df containing two rows and |algorithms| columns
        perf_by_pos = pd.DataFrame()
        positions = ['position <= ' + str(cutoff), 'position > ' + str(cutoff)]
        for pos in positions:
            if pos==('position <= ' + str(cutoff)):
                pred_pos = predictions[predictions['position']<=cutoff-1] # -1 b/c the first position has index 0
            else:
                pred_pos = predictions[predictions['position']>cutoff-1]
            df = pd.DataFrame()
            df['position'] = [pos]
            df['window'] = [window]
            for algo in algorithms:
                algo_name = ''.join(algo.split('-')[1:])
                value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
                df[algo_name] = [value]
            perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)
        results_app_cutoff = results_app_cutoff.append(perf_by_pos)

results_app_cutoff = results_app_cutoff.groupby('position').mean() # average across positions (e.g., "<= 2", "> 10")
results_app_cutoff.drop(['window'], axis=1, inplace=True)
results_app_cutoff = results_app_cutoff.transpose() # transpose to have algorithms as rows, positions as columns
columns_reordered = results_app_cutoff.columns.tolist()
columns_reordered.sort(key=lambda x: int(re.search(r'\d+$',x).group()))
results_app_cutoff = results_app_cutoff[columns_reordered]
results_app_cutoff.reset_index(inplace=True) # convert index to column named "index"
results_app_cutoff.rename(columns={'index': 'model'}, inplace=True) # rename column "index" to "model"
results_app_cutoff.rename_axis(None, axis=1, inplace=True) # unname new index
results_app_cutoff = capitalize_names(results_app_cutoff) # adjust model names
results_app_cutoff = results_app_cutoff.round(4)
results_app_cutoff = results_app_cutoff.sort_values('model')

In [264]:
results_app_cutoff

Unnamed: 0,model,position <= 2,position > 2,position <= 5,position > 5,position <= 10,position > 10
0,AR,0.8834,0.8199,0.8589,0.822,0.857,0.8139
1,CT,0.8853,0.885,0.8833,0.8911,0.8894,0.887
2,SKNN,0.8659,0.7202,0.8222,0.695,0.8025,0.6738
4,SR,0.8864,0.8434,0.8702,0.8488,0.8717,0.8403
3,SR_BR,0.8877,0.8413,0.8704,0.8456,0.8713,0.8389
5,STAN,0.0624,0.5018,0.1682,0.6295,0.2484,0.7009
7,VSKNN,0.8287,0.7869,0.8135,0.7869,0.8142,0.775
6,VSKNN_EBR,0.9651,0.9069,0.9419,0.9102,0.9361,0.9087
9,VSTAN,0.502,0.8417,0.6246,0.8792,0.6765,0.8995
8,VSTAN_EBR,0.9676,0.9079,0.9481,0.9066,0.9422,0.8988


In [265]:
with open('../tables/results_app_multiple_cutoff_HR@' + str(k) + '.tex','w') as tf:
    tf.write(results_app_cutoff.to_latex(index=False))

##### individual positions

In [298]:
k = 1 # HR@k
datatype = 'app-level'

In [299]:
# folder_res = 'results/testing/' + str(datatype) + '/multiple/'
# folder_data = folder_res.replace('results', 'data')
# data = pd.read_csv('../data/app-level/data_app_nodrop.csv') # create app and user mappings
# mapping_reverse = dict((v,k) for k,v in mapping.items())

results_app_pos = pd.DataFrame()

for window in windows:
    test_data = pd.read_hdf(str(folder_data) + 'window_' + str(window) + '.hdf', 'test') 
    predictions_files = [f for f in os.listdir(folder_res) if ('min20' not in f) 
                         and f.endswith('window_' + str(window) + '-Saver@50.csv')]
    predictions = generate_predictions(predictions_files, test_data, mapping_reverse)
    algorithms = [i for i in predictions.columns if i.startswith('recs-')]

    # for  single cutoff and single window, create 'performance-by-position' df containing two rows and |algorithms| columns
    perf_by_pos = pd.DataFrame()
    positions = range(1,11)
    for pos in positions:
        pred_pos = predictions[predictions['position']==pos-1]
        df = pd.DataFrame()
        df['position'] = ['position = ' + str(pos)]
        df['window'] = [window]
        for algo in algorithms:
            algo_name = ''.join(algo.split('-')[1:])
            value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
            df[algo_name] = [value]
        perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)
    results_app_pos = results_app_pos.append(perf_by_pos)

results_app_pos = results_app_pos.groupby('position').mean() # average across positions
results_app_pos.drop(['window'], axis=1, inplace=True)
results_app_pos = results_app_pos.transpose() # transpose to have algorithms as rows, positions as columns
columns_reordered = results_app_pos.columns.tolist()
columns_reordered.sort(key=lambda x: int(re.search(r'\d+$',x).group()))
results_app_pos = results_app_pos[columns_reordered]
results_app_pos.reset_index(inplace=True) # convert index to column named "index"
results_app_pos.rename(columns={'index': 'model'}, inplace=True) # rename column "index" to "model"
results_app_pos.rename_axis(None, axis=1, inplace=True) # unname new index
results_app_pos = capitalize_names(results_app_pos) # adjust model names
results_app_pos = results_app_pos.round(4)
results_app_pos = results_app_pos.sort_values('model')

In [300]:
results_app_pos

Unnamed: 0,model,position = 1,position = 2,position = 3,position = 4,position = 5,position = 6,position = 7,position = 8,position = 9,position = 10
0,AR,0.311,0.0974,0.2257,0.1605,0.1361,0.1957,0.2116,0.1745,0.1817,0.1858
1,CT,0.311,0.5042,0.355,0.3323,0.3936,0.4759,0.4512,0.3949,0.4425,0.4355
2,SKNN,0.0072,0.0256,0.0143,0.0264,0.044,0.0226,0.016,0.0195,0.0294,0.0069
4,SR,0.311,0.1075,0.2657,0.2404,0.3389,0.3658,0.3363,0.2793,0.3273,0.2923
3,SR_BR,0.3218,0.1445,0.2678,0.2389,0.3566,0.3647,0.3384,0.3108,0.3814,0.3496
5,STAN,0.0072,0.0254,0.1456,0.2254,0.1734,0.2959,0.2531,0.2784,0.2879,0.2704
7,VSKNN,0.1581,0.0428,0.1468,0.1682,0.1639,0.2179,0.234,0.2613,0.2084,0.2592
6,VSKNN_EBR,0.3176,0.2353,0.1249,0.2344,0.1539,0.2076,0.1476,0.1872,0.1752,0.1271
9,VSTAN,0.0294,0.0295,0.1488,0.2244,0.1876,0.236,0.2039,0.253,0.2452,0.3085
8,VSTAN_EBR,0.3571,0.2593,0.182,0.2877,0.2211,0.2665,0.1969,0.2469,0.2728,0.1863


In [301]:
with open('../tables/results_app_multiple_pos_HR@' + str(k) + '.tex','w') as tf:
    tf.write(results_app_cutoff.to_latex(index=False))

### extracting predictions

In [96]:
# sessionID = predictions.sessionID[1]
# num_recs = 5
# positions = predictions.position[predictions.sessionID==sessionID]
# models = results.model.tolist()
# print_predictions(predictions, sessionID, num_recs, positions, models)

In [82]:
# calculate HR@1
# predictions.apply(lambda x: x['recs-sr_BR'][0] == x['ground_truth'], axis=1).sum()/len(predictions)

### performance by category

In [100]:
# need to calculate performance by category for each window separately, then average across all five windows

First, we add a new column containing the ground truth name

In [29]:
# category_mapping = {}
# for app in data.app_name.value_counts().index:
#     if app not in category_mapping:
#         cat = data.category[data.app_name==app].iloc[0]
#         category_mapping[app] = cat
        
# with open('../../data/app-level/category_mapping.pickle', 'wb') as handle:
#     pickle.dump(category_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [139]:
with open('../../data/app-level/category_mapping.pickle', 'rb') as handle:
    category_mapping = pickle.load(handle)

In [140]:
predictions['ground_truth_category_name'] = predictions['ground_truth_name'].apply(lambda x: category_mapping[x])

In [71]:
# alternative (faster)
# aux_df = data[['category', 'app_name']].drop_duplicates(subset=['app_name']).rename(columns={'category':'ground_truth_category_name'})
# predictions = predictions.merge(aux_df[['ground_truth_category_name', 'app_name']], left_on='ground_truth_name', right_on='app_name').drop(['app_name'], 1)

In [None]:
algorithms = [i for i in predictions.columns if i.startswith('recs-')]

In [161]:
# helper function outputting whether ground truth is in recommendation list of length k for a single algorithm and item
def calc_hr_k(ground_truth, rec_list, k):
    return ground_truth in rec_list[:k]

In [237]:
cat_counts = predictions['ground_truth_category_name'].value_counts()

##### exact prediction

Now, we create a new dataframe containing HR@k (for fixed k) for each algorithm as column

In [None]:
k=1

In [339]:
perf_by_cat_exact = pd.DataFrame()
for cat in cat_counts.index:
    pred_cat = predictions[predictions['ground_truth_category_name'] == cat]
    df = pd.DataFrame()
    df['category'] = [cat]
    count = cat_counts[cat]
    df['count'] = [count]
    for algo in algorithms:
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_cat.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_cat)
        df[algo_name] = [value]
    perf_by_cat_exact = perf_by_cat_exact.append(df).reset_index(drop=True)

In [343]:
perf_by_cat_exact.head(3)

Unnamed: 0,category,count,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,Launcher,205,0.321951,0.536585,0.390244,0.146341,0.492683,0.453659,0.312195,0.307317,0.287805,0.317073,0.102439,0.341463,0.419512
1,Messaging,147,0.428571,0.802721,0.251701,0.020408,0.435374,0.755102,0.782313,0.482993,0.489796,0.510204,0.0,0.0,0.353741
2,Social_Networks,72,0.180556,0.152778,0.319444,0.152778,0.166667,0.138889,0.152778,0.194444,0.180556,0.194444,0.388889,0.375,0.222222


In [341]:
for algo in perf_by_cat_exact.columns[2:]:
    row = perf_by_cat_exact[algo].argmax()
    cat = perf_by_cat_exact['category'][row]
    perf = perf_by_cat_exact[algo][row]
    count = perf_by_cat_exact['count'][row]
    print('algo ' + str(algo) + ': best performance for category ' + str(cat) + '(freq: ' + str(count) + '): HR@' + str(k) + ' ' + str(perf))

algo ar: best performance for category PHOTO(freq: 13): HR@1 0.9230769230769231
algo ctpre: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo gru4rec_Reminder: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo gru4rec: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo sknn: best performance for category CONNECTED(freq: 16): HR@1 0.625
algo sr_BR: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo sr: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo stan_ER: best performance for category PHOTO(freq: 13): HR@1 0.7692307692307693
algo stan: best performance for category Workout(freq: 2): HR@1 1.0
algo vsknn_EBR: best performance for category PHOTO(freq: 13): HR@1 0.5384615384615384
algo vsknn: best performance for category Workout(freq: 2): HR@1 1.0
algo vstan_EBR: best performance for category PHOTO(freq: 13): HR@1 0.7692307692307693
algo vstan: best performance for category Workout(freq: 2): HR@1 1.0


##### category-level prediction

Now, we also have to convert the recommendations to category-level. Furthermore, we now have to match based on names as we cannot use token IDs anymore.

In [335]:
k=1

In [285]:
algorithms_names = [i for i in predictions.columns if i.startswith('recs_names-')]

In [303]:
for algo in algorithms_names:
    col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
    predictions[col_name] = predictions[algo].apply(lambda x: [category_mapping[i] for i in x])

General performance:

In [307]:
algorithms_names

['recs_names-ar',
 'recs_names-ct-pre',
 'recs_names-gru4rec_Reminder',
 'recs_names-gru4rec',
 'recs_names-sknn',
 'recs_names-sr_BR',
 'recs_names-sr',
 'recs_names-stan_ER',
 'recs_names-stan',
 'recs_names-vsknn_EBR',
 'recs_names-vsknn',
 'recs_names-vstan_EBR',
 'recs_names-vstan']

In [326]:
perf_by_cat_cat = pd.DataFrame()
df = pd.DataFrame()
for algo in algorithms_names:
    col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
    algo_name = ''.join(algo.split('-')[1:])
    value = predictions.apply(lambda x: calc_hr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(predictions)
    df[algo_name] = [value]
perf_by_cat_cat = perf_by_cat_cat.append(df).reset_index(drop=True)

In [324]:
results[['model', 'HitRate@1', 'HitRate@20']]

Unnamed: 0,model,HitRate@1,HitRate@20
0,ar,0.274929,0.861823
0,ct-pre,0.444444,0.901709
0,gru4rec_Reminder,0.292023,0.810541
0,gru4rec,0.192308,0.683761
0,sknn,0.290598,0.824786
0,sr_BR,0.373219,0.947293
0,sr,0.350427,0.896011
0,stan_ER,0.27208,0.950142
0,stan,0.253561,0.887464
0,vsknn_EBR,0.25641,0.907407


In [329]:
perf_by_cat_cat

Unnamed: 0,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,0.913105,0.933048,0.887464,0.824786,0.911681,0.952991,0.935897,0.954416,0.928775,0.933048,0.712251,0.858974,0.931624


Category-level performance:

In [336]:
perf_by_cat_cat = pd.DataFrame()
for cat in cat_counts.index:
    pred_cat = predictions[predictions['ground_truth_category_name'] == cat]
    df = pd.DataFrame()
    df['category'] = [cat]
    count = cat_counts[cat]
    df['count'] = [count]
    for algo in algorithms_names:
        col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_cat.apply(lambda x: calc_hr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(pred_cat)
        df[algo_name] = [value]
    perf_by_cat_cat = perf_by_cat_cat.append(df).reset_index(drop=True)

In [344]:
perf_by_cat_cat.head(3)

Unnamed: 0,category,count,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,Launcher,205,0.478049,0.62439,0.526829,0.365854,0.492683,0.468293,0.458537,0.307317,0.287805,0.317073,0.102439,0.390244,0.419512
1,Messaging,147,0.428571,0.816327,0.258503,0.027211,0.44898,0.77551,0.795918,0.489796,0.503401,0.517007,0.013605,0.006803,0.367347
2,Social_Networks,72,0.222222,0.166667,0.430556,0.236111,0.236111,0.166667,0.180556,0.263889,0.236111,0.263889,0.555556,0.555556,0.277778
