### setup

In [1]:
import datetime
import matplotlib
import numpy as np
import pandas as pd
import pyreadr
import pickle
import re
import os

In [2]:
os.chdir('C:\\Users\\Simon\\Desktop\\MA\\session-rec')

In [3]:
# datatypes = ['app-level', 'sequence-level']
datatype = 'app-level'
windows = [1,2,3,4,5]

In [4]:
USER_KEY = 'userID'
TIME_KEY = 'timestamp'
if datatype == 'app-level':    
    ITEM_KEY = 'appID'
    SESSION_KEY = 'sessionID'
else:
    ITEM_KEY = 'usID'
    SESSION_KEY = 'sentenceID'

### helper functions

In [5]:
# for multiple windows (incl. min20)
# get average performance across all windows for a given algorithm
def get_av_perf(files, key):
    res = pd.DataFrame()
    for file in files:
        window = file.strip('.csv').split('_')[-1]
        df = pd.read_csv(folder_res + file, sep = ';')
        df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
        df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
        df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
        df.insert(0, 'model', key)
        df.insert(1, 'window', window)
        res = res.append(df)
    res = res.groupby('model').mean().reset_index(level=0)
    return(res)

In [6]:
# for single window
# get performance for a given algorithm
def get_perf(file, key):
    df = pd.read_csv(folder_res + file, sep = ';')
    df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
    df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
    df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
    df.insert(0, 'model', key)
    return(df)

In [7]:
# extract ground truth from test data (test_data) for a single item (position) in a single session (sessionId)
def extract_ground_truth(ID, position, test_data):
    relevant_df = test_data[test_data[SESSION_KEY]==ID]
    index = relevant_df.index[position+1]
    ground_truth = relevant_df[ITEM_KEY][index]
    return ground_truth

In [8]:
# generate a df containing the ground truth as well as predictions for all available algorithms
def generate_predictions(predictions_files, test_data, mapping_id2name, multiple=True):
    predictions = pd.DataFrame()
    for file in predictions_files:
        if multiple:
            model = "_".join(file.split('_')[2:-2])
        else:
            model = "_".join(file.split('_')[2:-1])
        df = pd.read_csv(folder_res + file, sep = ';')
        if 'sessionID' not in predictions.columns:
            predictions['sessionID'] = df['SessionId']
        if 'position' not in predictions.columns:
            predictions['position'] = df['Position']
        if 'ground_truth' not in predictions.columns:
            predictions['ground_truth'] = predictions.apply(lambda x: extract_ground_truth(x['sessionID'], x['position'], test_data), axis=1)
            predictions['ground_truth_name'] = predictions['ground_truth'].apply(lambda x: mapping_reverse[x])
        predictions['recs-' + model] = df['Recommendations'].apply(lambda x: [int(i) for i in x.split(',')])
        predictions['recs_names-' + model] = predictions['recs-' + model].apply(lambda x: [mapping_reverse[i] for i in x])
        predictions['scores-' + model] = df['Scores'].apply(lambda x: x.split(','))
    return predictions

In [9]:
# helper function outputting whether ground truth is in recommendation list of length k for a single algorithm and item
def calc_hr_k(ground_truth, rec_list, k):
    return ground_truth in rec_list[:k]

In [10]:
# helper function for calculating the MRR
def calc_mrr_k(ground_truth, rec_list, k):
    if ground_truth not in rec_list[:k]:
        return 0
    else:
        score = rec_list.index(ground_truth) + 1
        return 1/score

In [11]:
def print_predictions(predictions, sessionID, num_recs, positions, models):
    # predictions must contain columns named 'sessionID' and 'position', containing the respective values
    predictions_dict = {}
    for pos in positions:
        row = predictions[(predictions.sessionID == sessionID) & (predictions.position == pos)]
        ground_truth = row.ground_truth_name.to_string(index=False)
#         print('sessionID: ' + str(sessionID) + ', position: ' + str(pos))
#         print('ground truth: ' + str(row.ground_truth_name.to_string(index=False)))
        df = pd.DataFrame()
        for model in models:
            df[model] = [row['recs_names-' + model].tolist()[0][i] for i in range(num_recs)]
        name = str(sessionID) + '_' + str(pos)
        predictions_dict[name] = (sessionID, pos, ground_truth, df)
    return predictions_dict

In [12]:
def capitalize_names(df):
    name_dict = {
        'ar': 'AR',
        'ct-pre': 'CT',
        'ctpre': 'CT',
        'gru4rec': 'GRU4Rec',
        'gru4rec_Reminder': 'GRU4Rec_Reminder',
        'hgru4rec': 'HGRU4Rec',
        'shan': 'SHAN',
        'sknn': 'SKNN',
        'sr': 'SR',
        'sr_BR': 'SR_BR',
        'stan': 'STAN',
        'vsknn': 'VSKNN',
        'vsknn_EBR': 'VSKNN_EBR',
        'vstan': 'VSTAN',
        'vstan_EBR': 'VSTAN_EBR'
    }
    df['model'] = df['model'].apply(lambda x: name_dict[x])
    return df

### multiple windows

##### overall

In [23]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
algos -= {'vsknn', 'vsknn_EBR'}
results_app = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) 
             and (f.startswith('test_single_' + str(key) + '_window'))
             and ('min20' not in f)]
    res = get_av_perf(files, key)
    results_app = results_app.append(res)
results_app = capitalize_names(results_app)
results_app = results_app.round(4)
results_app = results_app.sort_values('model')

In [24]:
results_app

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.2024,0.5926,0.725,0.8407,0.3374,0.3555,0.3637,0.3208,0.2328
0,CT,0.3833,0.7006,0.8193,0.884,0.513,0.529,0.5336,0.3267,0.2491
0,GRU4Rec,0.319,0.5829,0.6701,0.7503,0.4218,0.4338,0.4394,0.8002,0.0895
0,GRU4Rec_Reminder,0.3105,0.628,0.7241,0.7949,0.4285,0.4417,0.4467,0.82,0.1438
0,HGRU4Rec,0.2978,0.4247,0.4706,0.5182,0.3486,0.3548,0.358,0.6824,0.0249
0,SKNN,0.0159,0.529,0.6876,0.7697,0.1909,0.2119,0.2178,0.0489,0.2463
0,SR,0.2591,0.6431,0.7836,0.8568,0.4056,0.4248,0.43,0.3552,0.1892
0,SR_BR,0.2718,0.6902,0.8203,0.8559,0.4314,0.4496,0.4523,0.3578,0.1892
0,STAN,0.1543,0.3386,0.3451,0.3459,0.2307,0.2317,0.2317,0.1395,0.0883
0,VSTAN,0.1543,0.5635,0.6769,0.7202,0.3086,0.3244,0.3276,0.3654,0.1838


In [25]:
with open('../tables/results_app_multiple.tex','w') as tf:
    tf.write(results_app.to_latex(index=False))

##### min20

Prediction on long sessions (20+) if trained on long sessions only (no extra tuning), implying a minimum sequence length of 20 for both training and test data:

In [26]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
algos -= {'vsknn', 'vsknn_EBR'}
results_app_min20 = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window'))
             and ('min20' in f) and ('min20_test' not in f)]
    res = get_av_perf(files, key)
    results_app_min20 = results_app_min20.append(res)
results_app_min20 = capitalize_names(results_app_min20)
results_app_min20 = results_app_min20.round(4)
results_app_min20 = results_app_min20.sort_values('model')

In [27]:
results_app_min20

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.2265,0.5717,0.6721,0.7905,0.3648,0.3784,0.3868,0.6149,0.184
0,CT,0.4257,0.7222,0.7925,0.8523,0.5443,0.5539,0.558,0.6521,0.2048
0,GRU4Rec,0.3693,0.6093,0.684,0.7616,0.4651,0.4752,0.4806,0.8887,0.061
0,GRU4Rec_Reminder,0.3516,0.6612,0.7673,0.8451,0.4673,0.4817,0.4873,0.9722,0.105
0,HGRU4Rec,0.3067,0.4602,0.5073,0.5595,0.3666,0.3729,0.3765,0.7059,0.0224
0,SKNN,0.013,0.348,0.5366,0.6849,0.1117,0.1379,0.1483,0.0609,0.207
0,SR,0.314,0.6303,0.739,0.8256,0.4337,0.4482,0.4544,0.6447,0.1618
0,SR_BR,0.3419,0.6804,0.7858,0.828,0.4696,0.4841,0.4872,0.6688,0.1668
0,STAN,0.257,0.635,0.6595,0.661,0.4085,0.4121,0.4122,0.3316,0.0707
0,VSTAN,0.0977,0.3368,0.3992,0.4438,0.1836,0.1922,0.1954,0.4884,0.0373


In [28]:
with open('../tables/results_app_multiple_min20.tex','w') as tf:
    tf.write(results_app_min20.to_latex(index=False))

Prediction on long sessions (20+) if trained on all sessions, implying a minimum sequence length of 20 for test data only:

In [29]:
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
algos -= {'vsknn', 'vsknn_EBR'}
results_app_min20_test = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window')) and ('min20_test' in f)]
    res = get_av_perf(files, key)
    results_app_min20_test = results_app_min20_test.append(res)
results_app_min20_test = capitalize_names(results_app_min20_test)
results_app_min20_test = results_app_min20_test.round(4)
results_app_min20_test = results_app_min20_test.sort_values('model')

In [30]:
results_app_min20_test

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.189,0.4647,0.5921,0.7895,0.2964,0.3135,0.3274,0.1315,0.2349
0,CT,0.3123,0.5908,0.7314,0.8492,0.4221,0.4405,0.4492,0.1608,0.246
0,GRU4Rec,0.2846,0.5013,0.5874,0.6798,0.3683,0.3797,0.386,0.438,0.0477
0,GRU4Rec_Reminder,0.2091,0.4544,0.5679,0.6695,0.2963,0.3116,0.3186,0.458,0.1213
0,HGRU4Rec,0.2716,0.4096,0.4696,0.5225,0.3251,0.333,0.3366,0.4225,0.0165
0,SKNN,0.0042,0.2962,0.5733,0.6433,0.0791,0.114,0.119,0.0384,0.2462
0,SR,0.181,0.4747,0.6646,0.7987,0.2932,0.3176,0.3272,0.1665,0.1832
0,SR_BR,0.1903,0.5183,0.7357,0.7974,0.3158,0.3453,0.3498,0.1633,0.1833
0,STAN,0.244,0.6113,0.633,0.633,0.3908,0.394,0.394,0.0738,0.0959
0,VSTAN,0.2307,0.6056,0.7499,0.8178,0.3803,0.3999,0.4048,0.2012,0.201


In [31]:
with open('../tables/results_app_multiple_min20_test.tex','w') as tf:
    tf.write(results_app_min20_test.to_latex(index=False))

app-level minlength 20, compare performance on positions 20+ to performance on positions 20+ without minsessionlength
hypothesis: extra damage of including short sequences?
if no performance difference: maybe no extra +20 tuning necessary


### single window

##### overall

In [32]:
folder_res = 'results/testing/' + str(datatype) + '/single/'
algos = set([f.split('test_single_')[1].split('_single')[0] for f in os.listdir(folder_res) if f.startswith('test_single')])
algos -= {'vsknn', 'vsknn_EBR'}
results_app_single = pd.DataFrame()
for key in algos:
    file = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) 
             and (f.startswith('test_single_' + str(key) + '_single'))
             and ('min20' not in f)
             and ('embedding' not in f)][0] # list is of length 1 actually
    res = get_perf(file, key)
    results_app_single = results_app_single.append(res)
results_app_single = capitalize_names(results_app_single)
results_app_single = results_app_single.round(4)
results_app_single = results_app_single.sort_values('model')

In [33]:
results_app_single

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.1928,0.5752,0.7256,0.8432,0.3227,0.3431,0.3514,0.1794,0.2318
0,CT,0.3608,0.6968,0.8104,0.8848,0.5045,0.5201,0.5255,0.199,0.247
0,GRU4Rec,0.2784,0.5208,0.6432,0.732,0.375,0.3923,0.3985,0.6504,0.0897
0,GRU4Rec_Reminder,0.2464,0.528,0.6184,0.6944,0.3539,0.3663,0.3718,0.6446,0.1007
0,HGRU4Rec,0.2992,0.4344,0.4728,0.5056,0.352,0.3574,0.3596,0.5446,0.022
0,SKNN,0.0032,0.532,0.6736,0.7608,0.1909,0.2112,0.2171,0.0201,0.2395
0,SR,0.24,0.6264,0.7944,0.8616,0.3953,0.4185,0.4234,0.2053,0.1879
0,SR_BR,0.2616,0.6848,0.8312,0.86,0.4249,0.4457,0.4479,0.2076,0.1895
0,STAN,0.14,0.2896,0.292,0.292,0.2025,0.2029,0.2029,0.0656,0.0876
0,VSTAN,0.1464,0.556,0.6408,0.68,0.3012,0.313,0.3157,0.142,0.1826


In [34]:
with open('../tables/results_app_single.tex','w') as tf:
    tf.write(results_app_single.to_latex(index=False))

### performance by position

##### create mapping dicts

In [35]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
folder_data = folder_res.replace('results', 'data')
data = pd.read_csv('../data/app-level/data_app_nodrop.csv') # create app and user mappings
mapping = dict([(y,x+1) for x,y in enumerate(sorted(set(data['app_name'])))])
mapping_reverse = dict((v,k) for k,v in mapping.items())

##### individual positions

In [42]:
k = 20 # HR@k

In [43]:
results_app_pos = pd.DataFrame()

for window in windows:
    test_data = pd.read_hdf(str(folder_data) + 'window_' + str(window) + '.hdf', 'test') 
    predictions_files = [f for f in os.listdir(folder_res) if ('min20' not in f) 
                         and f.endswith('window_' + str(window) + '-Saver@50.csv')]
    predictions = generate_predictions(predictions_files, test_data, mapping_reverse)
    algorithms = [i for i in predictions.columns if i.startswith('recs-')]
    algorithms.remove('recs-vsknn')
    algorithms.remove('recs-vsknn_EBR')

    perf_by_pos = pd.DataFrame()
    positions = range(1,11)
    for pos in positions:
        pred_pos = predictions[predictions['position']==pos-1]
        df = pd.DataFrame()
        df['position'] = ['position = ' + str(pos)]
        df['window'] = [window]
        for algo in algorithms:
            algo_name = ''.join(algo.split('-')[1:])
            value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
            df[algo_name] = [value]
        perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)
    results_app_pos = results_app_pos.append(perf_by_pos)

results_app_pos = results_app_pos.groupby('position').mean() # average across positions
results_app_pos.drop(['window'], axis=1, inplace=True)
results_app_pos = results_app_pos.transpose() # transpose to have algorithms as rows, positions as columns
columns_reordered = results_app_pos.columns.tolist()
columns_reordered.sort(key=lambda x: int(re.search(r'\d+$',x).group()))
results_app_pos = results_app_pos[columns_reordered]
results_app_pos.reset_index(inplace=True) # convert index to column named "index"
results_app_pos.rename(columns={'index': 'model'}, inplace=True) # rename column "index" to "model"
results_app_pos.rename_axis(None, axis=1, inplace=True) # unname new index
results_app_pos = capitalize_names(results_app_pos) # adjust model names
results_app_pos = results_app_pos.round(4)
results_app_pos = results_app_pos.sort_values('model')

In [44]:
results_app_pos

Unnamed: 0,model,position = 1,position = 2,position = 3,position = 4,position = 5,position = 6,position = 7,position = 8,position = 9,position = 10
0,AR,0.8732,0.9,0.7872,0.8518,0.8482,0.8423,0.8773,0.8385,0.8518,0.8226
1,CT,0.8766,0.8991,0.8502,0.9001,0.8989,0.9099,0.942,0.8994,0.9234,0.8526
3,GRU4Rec,0.8176,0.8326,0.7382,0.7319,0.7555,0.755,0.7172,0.6976,0.6795,0.7529
2,GRU4Rec_Reminder,0.8785,0.8192,0.7685,0.8013,0.8273,0.7853,0.7879,0.7699,0.8335,0.7564
4,HGRU4Rec,0.5141,0.6028,0.4378,0.4822,0.4898,0.5814,0.5453,0.5424,0.5694,0.5625
5,SKNN,0.864,0.8691,0.762,0.7677,0.7478,0.6953,0.7515,0.7724,0.7631,0.6823
7,SR,0.8808,0.8959,0.8109,0.868,0.8743,0.8774,0.9095,0.8616,0.8736,0.8426
6,SR_BR,0.8801,0.9004,0.8093,0.8709,0.8679,0.8774,0.9001,0.8568,0.8736,0.8426
8,STAN,0.0796,0.0324,0.2109,0.3839,0.3659,0.5137,0.508,0.5366,0.6019,0.5941
10,VSTAN,0.3558,0.7253,0.7668,0.8032,0.8076,0.8421,0.871,0.8885,0.9016,0.8057


In [45]:
with open('../tables/results_app_multiple_pos_HR@' + str(k) + '.tex','w') as tf:
    tf.write(results_app_pos.to_latex(index=False))

In [43]:
# sessionID = predictions.sessionID[1]
# num_recs = 5
# positions = predictions.position[predictions.sessionID==sessionID]
# models = results.model.tolist()
# print_predictions(predictions, sessionID, num_recs, positions, models)

##### cutoffs

In [50]:
cutoffs = [2, 5, 10]
k = 20 # HR@k

In [51]:
results_app_cutoff = pd.DataFrame()

for cutoff in cutoffs:
    for window in windows:
        test_data = pd.read_hdf(str(folder_data) + 'window_' + str(window) + '.hdf', 'test') 
        predictions_files = [f for f in os.listdir(folder_res) if ('min20' not in f) 
                             and f.endswith('window_' + str(window) + '-Saver@50.csv')]
        predictions = generate_predictions(predictions_files, test_data, mapping_reverse)
        algorithms = [i for i in predictions.columns if i.startswith('recs-')]
        algorithms.remove('recs-vsknn')
        algorithms.remove('recs-vsknn_EBR')

        # for  single cutoff and single window, create 'performance-by-position' df containing two rows and |algorithms| columns
        perf_by_pos = pd.DataFrame()
        positions = ['position <= ' + str(cutoff), 'position > ' + str(cutoff)]
        for pos in positions:
            if pos==('position <= ' + str(cutoff)):
                pred_pos = predictions[predictions['position']<=cutoff-1] # -1 b/c the first position has index 0
            else:
                pred_pos = predictions[predictions['position']>cutoff-1]
            df = pd.DataFrame()
            df['position'] = [pos]
            df['window'] = [window]
            for algo in algorithms:
                algo_name = ''.join(algo.split('-')[1:])
                value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
                df[algo_name] = [value]
            perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)
        results_app_cutoff = results_app_cutoff.append(perf_by_pos)

results_app_cutoff = results_app_cutoff.groupby('position').mean() # average across positions (e.g., "<= 2", "> 10")
results_app_cutoff.drop(['window'], axis=1, inplace=True)
results_app_cutoff = results_app_cutoff.transpose() # transpose to have algorithms as rows, positions as columns
columns_reordered = results_app_cutoff.columns.tolist()
columns_reordered.sort(key=lambda x: int(re.search(r'\d+$',x).group()))
results_app_cutoff = results_app_cutoff[columns_reordered]
results_app_cutoff.reset_index(inplace=True) # convert index to column named "index"
results_app_cutoff.rename(columns={'index': 'model'}, inplace=True) # rename column "index" to "model"
results_app_cutoff.rename_axis(None, axis=1, inplace=True) # unname new index
results_app_cutoff = capitalize_names(results_app_cutoff) # adjust model names
results_app_cutoff = results_app_cutoff.round(4)
results_app_cutoff = results_app_cutoff.sort_values('model')

In [52]:
results_app_cutoff

Unnamed: 0,model,position <= 2,position > 2,position <= 5,position > 5,position <= 10,position > 10
0,AR,0.8834,0.8199,0.8589,0.822,0.857,0.8139
1,CT,0.8853,0.885,0.8833,0.8911,0.8894,0.887
3,GRU4Rec,0.8236,0.7122,0.7897,0.6921,0.7744,0.6649
2,GRU4Rec_Reminder,0.8561,0.7618,0.8299,0.7473,0.8206,0.7254
4,HGRU4Rec,0.5485,0.5,0.5114,0.531,0.5224,0.5146
5,SKNN,0.8659,0.7202,0.8222,0.695,0.8025,0.6738
7,SR,0.8864,0.8434,0.8702,0.8488,0.8717,0.8403
6,SR_BR,0.8877,0.8413,0.8704,0.8456,0.8713,0.8389
8,STAN,0.0624,0.5018,0.1682,0.6295,0.2484,0.7009
10,VSTAN,0.502,0.8417,0.6246,0.8792,0.6765,0.8995


In [53]:
with open('../tables/results_app_multiple_cutoff_HR@' + str(k) + '.tex','w') as tf:
    tf.write(results_app_cutoff.to_latex(index=False))

### performance by category

##### create mapping dicts

In [54]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
folder_data = folder_res.replace('results', 'data')
data = pd.read_csv('../data/app-level/data_app_nodrop.csv') # create app and user mappings
mapping = dict([(y,x+1) for x,y in enumerate(sorted(set(data['app_name'])))])
mapping_reverse = dict((v,k) for k,v in mapping.items())

# category_mapping = {}
# for app in data.app_name.value_counts().index:
#     if app not in category_mapping:
#         cat = data.category[data.app_name==app].iloc[0]
#         category_mapping[app] = cat
        
# with open('../data/app-level/category_mapping.pickle', 'wb') as handle:
#     pickle.dump(category_mapping, handle)

with open('../data/app-level/category_mapping.pickle', 'rb') as handle:
    category_mapping = pickle.load(handle)

##### category-level prediction

Now, we also have to convert the recommendations to category-level. Furthermore, we now have to match based on names as we cannot use token IDs anymore.

In [58]:
windows = [1,2,3,4,5]
ks = [1,5,10,20]

In [59]:
results_app_cat_combined = pd.DataFrame()
for metric in ['HR', 'MRR']:
    results_app_cat = pd.DataFrame()
    for k in ks:

        perf_by_cat = pd.DataFrame()
        for window in windows:
            test_data = pd.read_hdf(str(folder_data) + 'window_' + str(window) + '.hdf', 'test') 
            predictions_files = [f for f in os.listdir(folder_res) if ('min20' not in f) 
                                 and f.endswith('window_' + str(window) + '-Saver@50.csv')]
            predictions = generate_predictions(predictions_files, test_data, mapping_reverse)
            predictions['ground_truth_category_name'] = predictions['ground_truth_name'].apply(lambda x: category_mapping[x])
            algorithms_names = [i for i in predictions.columns if i.startswith('recs_names-')]
            algorithms_names.remove('recs_names-vsknn')
            algorithms_names.remove('recs_names-vsknn_EBR')

            df = pd.DataFrame()
            for algo in algorithms_names:
                col_name = 'recs_names_cat-' + algo.split('recs_names-')[1]
                algo_name = ''.join(algo.split('-')[1:])
                predictions[col_name] = predictions[algo].apply(lambda x: [category_mapping[i] for i in x])
                if metric == 'HR':
                    value = predictions.apply(lambda x: 
                                          calc_hr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(predictions)
                else:
                     value = predictions.apply(lambda x: 
                                          calc_mrr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(predictions)                   
                df[algo_name] = [value]
            perf_by_cat = perf_by_cat.append(df)
        perf_by_cat = pd.DataFrame(perf_by_cat.mean()) # average across windows
        perf_by_cat.rename(columns={0: str(metric) + '@' + str(k)}, inplace=True)

        if results_app_cat.shape == (0,0):
            results_app_cat = results_app_cat.append(perf_by_cat)
        else:
            results_app_cat = results_app_cat.merge(perf_by_cat, left_index=True, right_index=True)


    results_app_cat.reset_index(inplace=True) # convert index to column named "index"
    results_app_cat.rename(columns={'index': 'model'}, inplace=True) # rename column "index" to "model"
    results_app_cat.rename_axis(None, axis=1, inplace=True) # unname new index
    results_app_cat = capitalize_names(results_app_cat) # adjust model names
    results_app_cat = results_app_cat.round(4)
    results_app_cat = results_app_cat.sort_values('model')
    
    if results_app_cat_combined.shape == (0,0):
        results_app_cat_combined = results_app_cat_combined.append(results_app_cat)
    else:
        results_app_cat_combined = results_app_cat_combined.merge(results_app_cat, left_on='model', right_on='model')

if 1 in ks:
    results_app_cat_combined.drop(['MRR@1'], axis=1, inplace=True)

In [60]:
results_app_cat_combined

Unnamed: 0,model,HR@1,HR@5,HR@10,HR@20,MRR@5,MRR@10,MRR@20
0,AR,0.2224,0.6972,0.8147,0.9037,0.389,0.4052,0.4113
1,CT,0.3981,0.7706,0.8615,0.9208,0.5464,0.5588,0.563
2,GRU4Rec,0.3425,0.6566,0.7523,0.8316,0.4663,0.4795,0.4851
3,GRU4Rec_Reminder,0.337,0.6675,0.7777,0.8528,0.4606,0.4756,0.481
4,HGRU4Rec,0.3285,0.4934,0.5668,0.6451,0.3923,0.402,0.4075
5,SKNN,0.0159,0.6319,0.7813,0.8483,0.212,0.2327,0.2372
6,SR,0.2818,0.7532,0.8463,0.907,0.4623,0.4748,0.4791
7,SR_BR,0.2831,0.759,0.8657,0.9077,0.4577,0.4728,0.4759
8,STAN,0.1564,0.3694,0.4162,0.46,0.244,0.2498,0.253
9,VSTAN,0.1672,0.6063,0.7456,0.8004,0.3303,0.3498,0.3538


In [61]:
with open('../tables/results_app_multiple_cat' + '.tex','w') as tf:
    tf.write(results_app_cat_combined.to_latex(index=False))

### removing on and off

##### multiple windows

In [62]:
folder_res = 'results/testing_onoff/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
algos -= {'vsknn', 'vsknn_EBR'}
results_app_droponoff = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) 
             and (f.startswith('test_single_' + str(key) + '_window'))
             and ('min20' not in f)]
    res = get_av_perf(files, key)
    results_app_droponoff = results_app_droponoff.append(res)
results_app_droponoff = capitalize_names(results_app_droponoff)
results_app_droponoff = results_app_droponoff.round(4)
results_app_droponoff = results_app_droponoff.sort_values('model')

In [63]:
results_app_droponoff

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,AR,0.2367,0.6064,0.7282,0.8315,0.3834,0.3997,0.4071,0.4224,0.1373
0,CT,0.4244,0.7419,0.8223,0.8844,0.5502,0.5614,0.5656,0.3711,0.1557
0,GRU4Rec,0.35,0.5875,0.6722,0.746,0.441,0.4524,0.4575,0.8776,0.0514
0,GRU4Rec_Reminder,0.3673,0.6811,0.763,0.8283,0.4889,0.5001,0.5047,0.9279,0.0766
0,HGRU4Rec,0.3118,0.4656,0.522,0.5863,0.3712,0.3788,0.3831,0.6391,0.0218
0,SKNN,0.2443,0.6201,0.7291,0.8057,0.3896,0.4042,0.4095,0.307,0.1423
0,SR,0.3274,0.6538,0.768,0.8583,0.4485,0.4642,0.4705,0.4482,0.1301
0,SR_BR,0.3555,0.7156,0.8203,0.8613,0.4904,0.505,0.5081,0.4581,0.1309
0,STAN,0.3049,0.567,0.5721,0.5739,0.4147,0.4154,0.4156,0.19,0.0523
0,VSTAN,0.2156,0.5093,0.5836,0.6338,0.331,0.3411,0.3446,0.5333,0.0515


In [64]:
with open('../tables/results_app_multiple_droponoff.tex','w') as tf:
    tf.write(results_app_droponoff.to_latex(index=False))