### setup

In [1]:
import datetime
import matplotlib
import numpy as np
import pandas as pd
import pyreadr
import pickle

import os

In [2]:
os.chdir('C:\\Users\\Simon\\Desktop\\MA\\session-rec')

In [3]:
datatypes = ['app-level', 'seq-level']
datatype = datatypes[0]
windows = [1,2,3,4,5]

In [4]:
USER_KEY = 'userID'
TIME_KEY = 'timestamp'
if datatype == 'app-level':    
    ITEM_KEY = 'appID'
    SESSION_KEY = 'sessionID'
else:
    ITEM_KEY = 'usID'
    SESSION_KEY = 'sentenceID'

### helper functions

In [18]:
# for multiple windows (incl. min20)
# get average performance across all windows for a given algorithm
def get_av_perf(files, key):
    res = pd.DataFrame()
    for file in files:
        window = file.strip('.csv').split('_')[-1]
        df = pd.read_csv(folder_res + file, sep = ';')
        df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
        df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
        df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
        df.insert(0, 'model', key)
        df.insert(1, 'window', window)
        res = res.append(df)
    res = res.groupby('model').mean().reset_index(level=0)
    return(res)

In [35]:
# for single window
# get performance for a given algorithm
def get_perf(file, key):
    df = pd.read_csv(folder_res + file, sep = ';')
    df.drop(['Metrics', 'Saver@50: '], axis=1, inplace=True)
    df.drop(df.filter(regex='Unnamed'), axis=1, inplace=True) # drop 'Unnamed: 24' column containing only NaNs
    df.rename(columns = lambda x : str(x)[:-2], inplace=True) # remove colon and whitespace from all column names
    df.insert(0, 'model', key)
    return(df)

In [61]:
# extract ground truth from test data (test_data) for a single item (position) in a single session (sessionId)
def extract_ground_truth(ID, position, test_data):
    relevant_df = test_data[test_data[SESSION_KEY]==ID]
    index = relevant_df.index[position+1]
    ground_truth = relevant_df[ITEM_KEY][index]
    return ground_truth

In [84]:
# generate a df containing the ground truth as well as predictions for all available algorithms
def generate_predictions(predictions_files, test_data, mapping_id2name):
    predictions = pd.DataFrame()
    for file in predictions_files:
        model = "_".join(file.split('_')[2:-1])
        df = pd.read_csv(folder_res + file, sep = ';')
        if 'sessionID' not in predictions.columns:
            predictions['sessionID'] = df['SessionId']
        if 'position' not in predictions.columns:
            predictions['position'] = df['Position']
        if 'ground_truth' not in predictions.columns:
            predictions['ground_truth'] = predictions.apply(lambda x: extract_ground_truth(x['sessionID'], x['position'], test_data), axis=1)
            predictions['ground_truth_name'] = predictions['ground_truth'].apply(lambda x: mapping_reverse[x])
        predictions['recs-' + model] = df['Recommendations'].apply(lambda x: [int(i) for i in x.split(',')])
        predictions['recs_names-' + model] = predictions['recs-' + model].apply(lambda x: [mapping_reverse[i] for i in x])
        predictions['scores-' + model] = df['Scores'].apply(lambda x: x.split(','))
    return predictions

In [92]:
def print_predictions(predictions, sessionID, num_recs, positions, models):
    # predictions must contain columns named 'sessionID' and 'position', containing the respective values
    predictions_dict = {}
    for pos in positions:
        row = predictions[(predictions.sessionID == sessionID) & (predictions.position == pos)]
        ground_truth = row.ground_truth_name.to_string(index=False)
#         print('sessionID: ' + str(sessionID) + ', position: ' + str(pos))
#         print('ground truth: ' + str(row.ground_truth_name.to_string(index=False)))
        df = pd.DataFrame()
        for model in models:
            df[model] = [row['recs_names-' + model].tolist()[0][i] for i in range(num_recs)]
        name = str(sessionID) + '_' + str(pos)
        predictions_dict[name] = (sessionID, pos, ground_truth, df)
    return predictions_dict

### multiple windows

##### overall

In [19]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window'))]
    res = get_av_perf(files, key)
    results_app = results_app.append(res)

In [8]:
results_app

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,sr_BR,0.306843,0.685328,0.803019,0.841957,0.45052,0.466845,0.469779,0.513318,0.178041
0,vsknn_EBR,0.136702,0.492882,0.673422,0.825247,0.258416,0.282923,0.293738,0.534436,0.155521
0,vstan_EBR,0.233412,0.609971,0.776638,0.9033,0.364414,0.387128,0.396222,0.634862,0.171264
0,vstan,0.125963,0.450165,0.53806,0.581977,0.246096,0.258339,0.261473,0.426917,0.110566
0,vsknn,0.128089,0.378551,0.503412,0.592008,0.219181,0.235693,0.242092,0.256023,0.139892
0,ct-pre,0.404463,0.711361,0.805924,0.868126,0.528676,0.541424,0.545792,0.489416,0.22694
0,stan,0.205645,0.486762,0.502303,0.503452,0.319574,0.321904,0.321987,0.235537,0.079495
0,sr,0.286516,0.636669,0.761281,0.841191,0.419638,0.436498,0.442165,0.499956,0.175482
0,sknn,0.01447,0.438507,0.612069,0.727302,0.151303,0.174901,0.183023,0.054911,0.226626
0,ar,0.21441,0.582134,0.698555,0.815625,0.351087,0.36694,0.375228,0.46782,0.208424


In [46]:
with open('../tables/results_multiple.tex','w') as tf:
    tf.write(results_app.to_latex(index=False))

##### min20

Prediction on long sessions (20+) if trained on long sessions only (no extra tuning), implying a minimum sequence length of 20 for both training and test data:

In [127]:
folder_res = 'results/testing/' + str(datatype) + '/multiple/'
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app_min20 = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window'))
             and ('min20' in f) and ('min20_test' not in f)]
    res = get_av_perf(files, key)
    results_app_min20 = results_app_min20.append(res)

In [123]:
results_app_min20

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,sr_BR,0.341889,0.68041,0.785769,0.827994,0.469606,0.48408,0.487222,0.668832,0.166845
0,vsknn_EBR,0.074059,0.329651,0.513735,0.724032,0.161978,0.186408,0.20136,0.661464,0.090154
0,vstan_EBR,0.223215,0.573742,0.724226,0.878761,0.346622,0.36655,0.377486,0.712126,0.129514
0,vstan,0.097653,0.336831,0.399226,0.443798,0.183638,0.192234,0.195386,0.488433,0.037339
0,vsknn,0.078995,0.226729,0.309466,0.384813,0.132269,0.143144,0.14838,0.353464,0.044974
0,ct-pre,0.425663,0.722168,0.792544,0.852277,0.544338,0.553866,0.558032,0.652148,0.204826
0,stan,0.256956,0.634958,0.659476,0.661044,0.408467,0.412143,0.41225,0.331568,0.070702
0,sr,0.313969,0.630259,0.738955,0.825583,0.433679,0.448234,0.454366,0.64471,0.161785
0,sknn,0.01303,0.348018,0.536553,0.684919,0.111743,0.137858,0.148289,0.060887,0.206955
0,ar,0.226454,0.571704,0.672101,0.790538,0.364786,0.378363,0.386757,0.614864,0.184026


Prediction on long sessions (20+) if trained on all sessions, implying a minimum sequence length of 20 for test data only:

In [129]:
algos = set([f.split('_window')[0].split('test_single_')[1] for f in os.listdir(folder_res)])
results_app_min20_test = pd.DataFrame()
for key in algos:
    files = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_window')) and ('min20_test' in f)]
    res = get_av_perf(files, key)
    results_app_min20_test = results_app_min20_test.append(res)

In [131]:
results_app_min20_test

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,sr_BR,0.190311,0.51827,0.735668,0.797411,0.315831,0.345293,0.349794,0.163333,0.183325
0,vsknn_EBR,0.099224,0.453127,0.702537,0.910098,0.216053,0.248928,0.263555,0.098197,0.205198
0,vstan_EBR,0.134525,0.439234,0.702419,0.901583,0.238527,0.273995,0.288606,0.14674,0.214118
0,vstan,0.230714,0.605628,0.749873,0.817848,0.380282,0.399934,0.404801,0.201245,0.201026
0,vsknn,0.224245,0.561114,0.630824,0.725641,0.354935,0.363913,0.370316,0.081503,0.232841
0,ct-pre,0.312345,0.590821,0.731366,0.849249,0.422057,0.440521,0.449204,0.160768,0.246044
0,stan,0.243995,0.611254,0.632984,0.632984,0.390766,0.394045,0.394045,0.073839,0.095866
0,sr,0.180957,0.474725,0.664619,0.798725,0.293224,0.317611,0.327185,0.166508,0.183233
0,sknn,0.004219,0.296237,0.573306,0.643284,0.079066,0.114025,0.118978,0.038394,0.24616
0,ar,0.188979,0.464734,0.592135,0.789479,0.296414,0.313525,0.327418,0.131541,0.234878


app-level minlength 20, compare performance on positions 20+ to performance on positions 20+ without minsessionlength
hypothesis: extra damage of including short sequences?
if no performance difference: maybe no extra +20 tuning necessary


### single window

##### overall

In [103]:
folder_res = 'results/testing/' + str(datatype) + '/single/'
algos = set([f.split('test_single_')[1].split('_single')[0] for f in os.listdir(folder_res)])
results_seq = pd.DataFrame()
for key in algos:
    file = [f for f in os.listdir(folder_res) 
             if ('Saver' not in f) and (f.startswith('test_single_' + str(key) + '_single'))][0] # list is of length 1 actually
    res = get_perf(file, key)b
    results_seq = results_seq.append(res)

In [44]:
results_seq

Unnamed: 0,model,HitRate@1,HitRate@5,HitRate@10,HitRate@20,MRR@5,MRR@10,MRR@20,Coverage@20,Popularity@20
0,sr_BR,0.2616,0.6848,0.8312,0.86,0.424933,0.445704,0.447944,0.207591,0.18946
0,vsknn_EBR,0.1928,0.6344,0.8216,0.8968,0.340693,0.367704,0.372909,0.228867,0.219461
0,vstan_EBR,0.244,0.6176,0.8056,0.8936,0.377653,0.405148,0.41151,0.372628,0.21228
0,vstan,0.1464,0.556,0.6408,0.68,0.30116,0.312951,0.315748,0.142036,0.182576
0,vsknn,0.1864,0.5112,0.64,0.7848,0.306133,0.322507,0.333712,0.077631,0.232581
0,ct-pre,0.3608,0.6968,0.8104,0.8848,0.504547,0.520107,0.525493,0.198965,0.246952
0,stan,0.14,0.2896,0.292,0.292,0.202533,0.202933,0.202933,0.065555,0.087643
0,sr,0.24,0.6264,0.7944,0.8616,0.39532,0.418539,0.42344,0.20529,0.187877
0,sknn,0.0032,0.532,0.6736,0.7608,0.19092,0.211221,0.217112,0.020127,0.239488
0,ar,0.1928,0.5752,0.7256,0.8432,0.322747,0.343113,0.351387,0.179413,0.231806


In [45]:
with open('../tables/results_single.tex','w') as tf:
    tf.write(results_seq.to_latex(index=False))

### extracting predictions

##### extracting prediction for single

In [57]:
folder_res = 'results/testing/' + str(datatype) + '/single/'
folder_data = folder_res.replace('results', 'data')
test_data = pd.read_hdf(str(folder_data) + 'single.hdf', 'test') # make sure this aligns with folder_res
data = pd.read_csv('../data/app-level/data_app_nodrop.csv') # create app and user mappings
mapping = dict([(y,x+1) for x,y in enumerate(sorted(set(data['app_name'])))]) # not necessary
mapping_reverse = dict((v,k) for k,v in mapping.items())

In [87]:
predictions_files = [f for f in os.listdir(folder_res) if 'Saver' in f]
predictions = generate_predictions(predictions_files, test_data, mapping_reverse)

In [96]:
# sessionID = predictions.sessionID[1]
# num_recs = 5
# positions = predictions.position[predictions.sessionID==sessionID]
# models = results.model.tolist()
# print_predictions(predictions, sessionID, num_recs, positions, models)

In [82]:
# calculate HR@1
# predictions.apply(lambda x: x['recs-sr_BR'][0] == x['ground_truth'], axis=1).sum()/len(predictions)

### performance by category

In [100]:
# need to calculate performance by category for each window separately, then average across all five windows

First, we add a new column containing the ground truth name

In [29]:
# category_mapping = {}
# for app in data.app_name.value_counts().index:
#     if app not in category_mapping:
#         cat = data.category[data.app_name==app].iloc[0]
#         category_mapping[app] = cat
        
# with open('../../data/app-level/category_mapping.pickle', 'wb') as handle:
#     pickle.dump(category_mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [139]:
with open('../../data/app-level/category_mapping.pickle', 'rb') as handle:
    category_mapping = pickle.load(handle)

In [140]:
predictions['ground_truth_category_name'] = predictions['ground_truth_name'].apply(lambda x: category_mapping[x])

In [71]:
# alternative (faster)
# aux_df = data[['category', 'app_name']].drop_duplicates(subset=['app_name']).rename(columns={'category':'ground_truth_category_name'})
# predictions = predictions.merge(aux_df[['ground_truth_category_name', 'app_name']], left_on='ground_truth_name', right_on='app_name').drop(['app_name'], 1)

In [None]:
algorithms = [i for i in predictions.columns if i.startswith('recs-')]

In [161]:
# helper function outputting whether ground truth is in recommendation list of length k for a single algorithm and item
def calc_hr_k(ground_truth, rec_list, k):
    return ground_truth in rec_list[:k]

In [237]:
cat_counts = predictions['ground_truth_category_name'].value_counts()

##### exact prediction

Now, we create a new dataframe containing HR@k (for fixed k) for each algorithm as column

In [None]:
k=1

In [339]:
perf_by_cat_exact = pd.DataFrame()
for cat in cat_counts.index:
    pred_cat = predictions[predictions['ground_truth_category_name'] == cat]
    df = pd.DataFrame()
    df['category'] = [cat]
    count = cat_counts[cat]
    df['count'] = [count]
    for algo in algorithms:
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_cat.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_cat)
        df[algo_name] = [value]
    perf_by_cat_exact = perf_by_cat_exact.append(df).reset_index(drop=True)

In [343]:
perf_by_cat_exact.head(3)

Unnamed: 0,category,count,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,Launcher,205,0.321951,0.536585,0.390244,0.146341,0.492683,0.453659,0.312195,0.307317,0.287805,0.317073,0.102439,0.341463,0.419512
1,Messaging,147,0.428571,0.802721,0.251701,0.020408,0.435374,0.755102,0.782313,0.482993,0.489796,0.510204,0.0,0.0,0.353741
2,Social_Networks,72,0.180556,0.152778,0.319444,0.152778,0.166667,0.138889,0.152778,0.194444,0.180556,0.194444,0.388889,0.375,0.222222


In [341]:
for algo in perf_by_cat_exact.columns[2:]:
    row = perf_by_cat_exact[algo].argmax()
    cat = perf_by_cat_exact['category'][row]
    perf = perf_by_cat_exact[algo][row]
    count = perf_by_cat_exact['count'][row]
    print('algo ' + str(algo) + ': best performance for category ' + str(cat) + '(freq: ' + str(count) + '): HR@' + str(k) + ' ' + str(perf))

algo ar: best performance for category PHOTO(freq: 13): HR@1 0.9230769230769231
algo ctpre: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo gru4rec_Reminder: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo gru4rec: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo sknn: best performance for category CONNECTED(freq: 16): HR@1 0.625
algo sr_BR: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo sr: best performance for category OUTGOING(freq: 5): HR@1 1.0
algo stan_ER: best performance for category PHOTO(freq: 13): HR@1 0.7692307692307693
algo stan: best performance for category Workout(freq: 2): HR@1 1.0
algo vsknn_EBR: best performance for category PHOTO(freq: 13): HR@1 0.5384615384615384
algo vsknn: best performance for category Workout(freq: 2): HR@1 1.0
algo vstan_EBR: best performance for category PHOTO(freq: 13): HR@1 0.7692307692307693
algo vstan: best performance for category Workout(freq: 2): HR@1 1.0


##### category-level prediction

Now, we also have to convert the recommendations to category-level. Furthermore, we now have to match based on names as we cannot use token IDs anymore.

In [335]:
k=1

In [285]:
algorithms_names = [i for i in predictions.columns if i.startswith('recs_names-')]

In [303]:
for algo in algorithms_names:
    col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
    predictions[col_name] = predictions[algo].apply(lambda x: [category_mapping[i] for i in x])

General performance:

In [307]:
algorithms_names

['recs_names-ar',
 'recs_names-ct-pre',
 'recs_names-gru4rec_Reminder',
 'recs_names-gru4rec',
 'recs_names-sknn',
 'recs_names-sr_BR',
 'recs_names-sr',
 'recs_names-stan_ER',
 'recs_names-stan',
 'recs_names-vsknn_EBR',
 'recs_names-vsknn',
 'recs_names-vstan_EBR',
 'recs_names-vstan']

In [326]:
perf_by_cat_cat = pd.DataFrame()
df = pd.DataFrame()
for algo in algorithms_names:
    col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
    algo_name = ''.join(algo.split('-')[1:])
    value = predictions.apply(lambda x: calc_hr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(predictions)
    df[algo_name] = [value]
perf_by_cat_cat = perf_by_cat_cat.append(df).reset_index(drop=True)

In [324]:
results[['model', 'HitRate@1', 'HitRate@20']]

Unnamed: 0,model,HitRate@1,HitRate@20
0,ar,0.274929,0.861823
0,ct-pre,0.444444,0.901709
0,gru4rec_Reminder,0.292023,0.810541
0,gru4rec,0.192308,0.683761
0,sknn,0.290598,0.824786
0,sr_BR,0.373219,0.947293
0,sr,0.350427,0.896011
0,stan_ER,0.27208,0.950142
0,stan,0.253561,0.887464
0,vsknn_EBR,0.25641,0.907407


In [329]:
perf_by_cat_cat

Unnamed: 0,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,0.913105,0.933048,0.887464,0.824786,0.911681,0.952991,0.935897,0.954416,0.928775,0.933048,0.712251,0.858974,0.931624


Category-level performance:

In [336]:
perf_by_cat_cat = pd.DataFrame()
for cat in cat_counts.index:
    pred_cat = predictions[predictions['ground_truth_category_name'] == cat]
    df = pd.DataFrame()
    df['category'] = [cat]
    count = cat_counts[cat]
    df['count'] = [count]
    for algo in algorithms_names:
        col_name = 'recs_names_cat-' + ''.join(algo.split('-')[1:])
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_cat.apply(lambda x: calc_hr_k(x['ground_truth_category_name'], x[col_name], k), axis=1).sum()/len(pred_cat)
        df[algo_name] = [value]
    perf_by_cat_cat = perf_by_cat_cat.append(df).reset_index(drop=True)

In [344]:
perf_by_cat_cat.head(3)

Unnamed: 0,category,count,ar,ctpre,gru4rec_Reminder,gru4rec,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,Launcher,205,0.478049,0.62439,0.526829,0.365854,0.492683,0.468293,0.458537,0.307317,0.287805,0.317073,0.102439,0.390244,0.419512
1,Messaging,147,0.428571,0.816327,0.258503,0.027211,0.44898,0.77551,0.795918,0.489796,0.503401,0.517007,0.013605,0.006803,0.367347
2,Social_Networks,72,0.222222,0.166667,0.430556,0.236111,0.236111,0.166667,0.180556,0.263889,0.236111,0.263889,0.555556,0.555556,0.277778


### performance by position

In [None]:
# TBD: need to compute performance by position for all windows separately, then average across them

We expect performance of neural algorithms to increase as the position increases

In [38]:
algorithms = [i for i in predictions.columns if i.startswith('recs-')]

In [39]:
# helper function outputting whether ground truth is in recommendation list of length k for a single algorithm and item
def calc_hr_k(ground_truth, rec_list, k):
    return ground_truth in rec_list[:k]

In [40]:
pos_counts = predictions['position'].value_counts()

In [41]:
k=1

In [42]:
perf_by_pos = pd.DataFrame()
for pos in pos_counts.index:
    pred_pos = predictions[predictions['position']==pos]
    df = pd.DataFrame()
    df['position'] = [pos]
    count = pos_counts[pos]
    df['count'] = [count]
    for algo in algorithms:
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
        df[algo_name] = [value]
    perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)

In [43]:
perf_by_pos.head(20)

Unnamed: 0,position,count,ar,ctpre,gru4rec_Reminder,gru4rec,hgru4rec,shan,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,0,147,0.353741,0.47619,0.244898,0.088435,0.129252,0.251701,0.231293,0.455782,0.44898,0.285714,0.244898,0.340136,0.088435,0.122449,0.244898
1,1,111,0.225225,0.36036,0.306306,0.162162,0.324324,0.351351,0.297297,0.279279,0.306306,0.234234,0.234234,0.243243,0.117117,0.261261,0.333333
2,2,75,0.24,0.426667,0.253333,0.266667,0.4,0.333333,0.32,0.373333,0.373333,0.213333,0.2,0.2,0.2,0.24,0.213333
3,3,57,0.245614,0.421053,0.315789,0.280702,0.315789,0.245614,0.298246,0.350877,0.333333,0.22807,0.263158,0.192982,0.122807,0.192982,0.280702
4,4,47,0.319149,0.510638,0.361702,0.234043,0.404255,0.340426,0.361702,0.446809,0.404255,0.361702,0.297872,0.319149,0.191489,0.297872,0.361702
5,5,35,0.171429,0.571429,0.371429,0.285714,0.285714,0.314286,0.285714,0.371429,0.342857,0.257143,0.285714,0.228571,0.114286,0.142857,0.228571
6,6,29,0.344828,0.413793,0.413793,0.310345,0.413793,0.413793,0.413793,0.413793,0.275862,0.275862,0.206897,0.172414,0.206897,0.241379,0.37931
7,7,27,0.296296,0.518519,0.37037,0.185185,0.333333,0.259259,0.296296,0.37037,0.37037,0.296296,0.296296,0.259259,0.185185,0.185185,0.333333
8,8,25,0.16,0.44,0.36,0.28,0.36,0.28,0.2,0.24,0.2,0.24,0.08,0.16,0.2,0.2,0.28
9,9,22,0.272727,0.545455,0.409091,0.181818,0.227273,0.272727,0.363636,0.318182,0.363636,0.272727,0.363636,0.272727,0.090909,0.136364,0.227273


In [74]:
# considering only positions <= vs. >
perf_by_pos = pd.DataFrame()
cutoff = 2
for pos in ['<=', '>']:
    if pos=='<=':
        pred_pos = predictions[predictions['position']<=cutoff]
    else:
        pred_pos = predictions[predictions['position']>cutoff]
    df = pd.DataFrame()
    df['position'] = [pos]
    for algo in algorithms:
        algo_name = ''.join(algo.split('-')[1:])
        value = pred_pos.apply(lambda x: calc_hr_k(x['ground_truth'], x[algo], k), axis=1).sum()/len(pred_pos)
        df[algo_name] = [value]
    perf_by_pos = perf_by_pos.append(df).reset_index(drop=True)

In [75]:
perf_by_pos

Unnamed: 0,position,ar,ctpre,gru4rec_Reminder,gru4rec,hgru4rec,shan,sknn,sr_BR,sr,stan_ER,stan,vsknn_EBR,vsknn,vstan_EBR,vstan
0,<=,0.285285,0.426426,0.267267,0.153153,0.255255,0.303303,0.273273,0.378378,0.384384,0.252252,0.231231,0.276276,0.123123,0.195195,0.267267
1,>,0.265583,0.460705,0.314363,0.227642,0.317073,0.292683,0.306233,0.368564,0.319783,0.289973,0.273713,0.238482,0.159892,0.230352,0.292683
