In [1]:
import os

import numpy as np
import pandas as pd

from src.config import DATA_DIR, MODEL_DIR, PARAM_DIR, OUTPUT_DIR
from src.recommendation.all_evaluation import model_results
from src.util.eval_measures import eval_lst
from src.util.io import load_pickle


In [2]:
data_dir = os.path.join(DATA_DIR, 'day')
results_dir = os.path.join(MODEL_DIR, 'day')
datasets = os.listdir(data_dir)
all_result = {}


In [3]:
from src.recommendation.Mixture import train_mixture_model

model_type = 'mixture_model'
df1 = model_results(datasets, train_mixture_model, model_type, data_dir, results_dir, eval_lst,
                    save_multinomials=False, overwrite=False)
all_result[model_type] = df1


In [4]:
from src.recommendation.Mixture import train_mixture_model

model_type = 'mixture_decay_model'
param = load_pickle(os.path.join(PARAM_DIR, model_type))
df2 = model_results(datasets, train_mixture_model, model_type, data_dir, results_dir, eval_lst,
                    n_components=param, save_multinomials=False, overwrite=False)
all_result[model_type] = df2


In [5]:
from src.recommendation.FPMC import train_fpmc_model, model_type

param = load_pickle(os.path.join(PARAM_DIR, model_type))
df3 = model_results(datasets, train_fpmc_model, model_type, data_dir, results_dir, eval_lst,
                    n_components=param, save_multinomials=False, overwrite=False)
all_result[model_type] = df3


In [6]:
from src.recommendation.NMF import train_nmf_model, model_type

param = load_pickle(os.path.join(PARAM_DIR, model_type))
df4 = model_results(datasets, train_nmf_model, model_type, data_dir, results_dir, eval_lst,
                    n_components=param, save_multinomials=False, overwrite=False)
all_result[model_type] = df4


In [7]:
from src.recommendation.HPF import train_hpf_model, model_type

param = load_pickle(os.path.join(PARAM_DIR, model_type))
df5 = model_results(datasets, train_hpf_model, model_type, data_dir, results_dir, eval_lst,
                    n_components=param, save_multinomials=False, overwrite=False)
all_result[model_type] = df5


In [8]:
from src.recommendation.LDA import train_lda_model, model_type

param = load_pickle(os.path.join(PARAM_DIR, model_type))
df6 = model_results(datasets, train_lda_model, model_type, data_dir, results_dir, eval_lst,
                    n_components=param, save_multinomials=False, overwrite=False)
all_result[model_type] = df6


In [9]:
from src.recommendation.Global import train_global_model, model_type

df7 = model_results(datasets, train_global_model, model_type, data_dir, results_dir, eval_lst,
                    save_multinomials=False, overwrite=False)
all_result[model_type] = df7


In [None]:
from src.recommendation.Personal import train_favourite_model, model_type

df8 = model_results(datasets, train_favourite_model, model_type, data_dir, results_dir, eval_lst,
                    save_multinomials=False, overwrite=False)
all_result[model_type] = df8


# Display results

In [12]:
def display_results(metrics=False, decimal=3, decimal_sd=3, bold=True, std=True, idx=df1.columns.tolist()):
    col_ref = {'global_model': 'Global', 'personal_model': 'Personal', 'fpmc_model': 'FPMC',
               'mixture_model': 'Mixture', 'mixture_decay_model': 'MixtureTW',
               'nmf_model': 'NMF', 'hpf_model': 'HPF', 'lda_model': 'LDA', }
    cols = ['MixtureTW', 'Mixture', 'FPMC', 'NMF', 'HPF', 'LDA', 'Global', 'Personal']

    result_mean = {m: df.astype('float64').mean().values for m, df in all_result.items()}
    result_mean['index'] = idx
    result = pd.DataFrame(result_mean).set_index('index')
    result = result.round(decimal).astype(str).rename(index=str, columns=col_ref)[cols]
    for col in cols:
        result[col] = result[col].str.pad(decimal + 2, 'right', '0')
    if bold:
        for row, col in enumerate(np.argmax(result.values, axis=1)):
            result.iat[row, col] = "\textbf{" + result.iat[row, col] + "}"
    if std:
        result_std = {m: df.astype('float64').std().values for m, df in all_result.items()}
        result_std['index'] = idx
        res = pd.DataFrame(result_std).set_index('index')
        res = res.round(decimal_sd).astype(str).rename(index=str, columns=col_ref)[cols]
        for col in cols:
            result[col] = result[col] + ' $\pm$ ' + res[col].str.pad(decimal_sd + 2, 'right', '0')
    if metrics:
        result.index.names = ['Method']
        result = result.T[metrics]
    return result


In [13]:
metrics = [u'recall@5', u'precision@5', u'nDCG@5', ]
result = display_results(metrics=metrics)
result


Method,recall@5,precision@5,nDCG@5
MixtureTW,\textbf{0.389} $\pm$ 0.029,\textbf{0.352} $\pm$ 0.039,\textbf{0.465} $\pm$ 0.038
Mixture,0.370 $\pm$ 0.026,0.337 $\pm$ 0.035,0.446 $\pm$ 0.034
FPMC,0.355 $\pm$ 0.026,0.321 $\pm$ 0.034,0.414 $\pm$ 0.034
NMF,0.184 $\pm$ 0.010,0.175 $\pm$ 0.014,0.267 $\pm$ 0.012
HPF,0.099 $\pm$ 0.005,0.102 $\pm$ 0.010,0.143 $\pm$ 0.009
LDA,0.071 $\pm$ 0.013,0.060 $\pm$ 0.005,0.083 $\pm$ 0.009
Global,0.053 $\pm$ 0.004,0.052 $\pm$ 0.004,0.073 $\pm$ 0.004
Personal,0.366 $\pm$ 0.026,0.333 $\pm$ 0.035,0.441 $\pm$ 0.034


In [14]:
filename = os.path.join(OUTPUT_DIR, 'table 3_all_prediction.txt')
result.to_latex(filename, escape=False)
