Table of contents:
1. Settings
    1. data settings
    2. specify hyperparameter grid
2. Data preparation
3. Hyperpara tuning
    1. CF
    2. CF+SR
4. Experiment
    1. CF
    2. CF+SR
5. Results
6. Write results to .txt file

In [157]:
import pandas as pd
import numpy as np
from myFunctions import read_data, prep
from myFunctions import metrics_sparse, df_to_piv, piv_to_R, matrix_factorization, hyperpara_tuning, spearman, kendall, sim
from myFunctions import results_to_txt
from myFunctions import print_results
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

# 1. Settings

1.1 Data settings. Selection and split.

In [207]:
#Select input data:
toy_data = False     #contents make no sense - just to test code
dataset_1 = False   #not publicly available
dataset_2 = False   #not publicly available
dataset_3 = True

datasets = [toy_data,dataset_1,dataset_2,dataset_3]
df, cov_array_personal, cov_array_personal_num, cov_array_personal_cat = read_data(datasets)

In [208]:
#Train/val/test split:
test_size = 0.25
out_of_time = 1         #1 for out of time split. 0 otherwise
one_in_train_val = 1    #1 if each case needs at least 1 observation in train_val set. 0 otherwise

1.2 Specify hyperpara settings. Tuning: yes/no. Specify grid.

In [228]:
#Tune hyperparameters. True: run full grid search (takes multiple hours). False: use optimal hyperparameters for dataset_3.
tune_hyperpara_CF = False    #if True: takes multiple hours
tune_hyperpara_CF_SR = False  #if True: takes multiple hours

#Calculate or load similarity matrix. Calculating might take >1hr, depending on dataset used.
load_similarity_matrix = True #True: load matrix. False: calculate and save matrix.

#Define Hyperpara grid:
lambda_array = [0.01,0.02,0.05]
beta_array = [0,0.05,0.1,0.2,0.5]
alpha_array = [0.0001,0.001]
L_array = [5,10,15,]
steps_array = [500,1000,2000,3000]

#Set default values of hyperparameters if these are not tuned:
lambda_optimal_cf, beta_optimal_cf, alpha_optimal_cf, L_optimal_cf, steps_optimal_cf = 0.05,0,0.001,10,1000     #optimal hyperparas for CF on dataset_3
lambda_optimal_cf_sr, beta_optimal_cf_sr, alpha_optimal_cf_sr, L_optimal_cf_sr, steps_optimal_cf_sr = 0.02,0.1,0.001,5,3000 #optimal hyperparas for CF_SR on dataset_3

#Select input data:
#Toy_data = True
#Imec = False
#Imec_subset = False
#Cegeka = False
#Cegeka_noSensors = False
#Cegeka_subset = False
#Acerta = False
#Acerta_noSensors = False
#Acerta_noSensors_anonymous = False
#Acerta_subset = False






In [210]:





"""Toy data"""
#if Toy_data:
#    path = 'Data/toy_example.csv'
#    separator = ';'
#    cov_array_personal = ['X1','X2','X3','X4']
#    cov_array_personal_num = ['X1','X2','X3']
#    cov_array_personal_cat = ['X4']

#"""Imec data"""
#if Imec:
#    path = 'Data/imec_log2022-05-11.csv'
#    cov_array_personal = ['Branch_of_Study','Degree']
#    cov_array_personal_num = []
#    cov_array_personal_cat = ['Branch_of_Study','Degree']
#    separator = ';'

#if Imec_subset:
#    #path = 'Data/imec_log_reworked_subset.csv'
#    #path= 'Data/imec_log_reworked_subsetobj2 20220727.csv'
#    path = 'Data/imec_log_reworked_obj2 20220729.csv'
    #path = 'Data/imec_log_reworked.csv'
#    cov_array_personal = ['Branch_of_Study','Degree']
#    cov_array_personal_num = []
#    cov_array_personal_cat = ['Branch_of_Study','Degree']
#    separator = ';'

"""Acerta data"""
#if Acerta:
#    path = 'Data/acerta_log_obj2 20220803.csv'
#    separator = ';'
#    cov_array_personal = ['opleidingsniveau','ancienniteit_in_AOV']
#    #cov_array_personal_num = ['Loonschaal','ancienniteit_in_AOV']
#    cov_array_personal_num = ['ancienniteit_in_AOV']
#    cov_array_personal_cat = ['opleidingsniveau']

#if Acerta_noSensors:
#    path = 'Data/Acerta_log__obj2_nosensors_20230117.csv'
#    separator = ';'
#    cov_array_personal = ['opleidingsniveau','ancienniteit_in_AOV']
#    #cov_array_personal_num = ['Loonschaal','ancienniteit_in_AOV']
#    cov_array_personal_num = ['ancienniteit_in_AOV']
#    cov_array_personal_cat = ['opleidingsniveau']

#if Acerta_noSensors_anonymous:
#    path = 'Data/HR_log_anonymous_noSensors_y.csv'
#    separator = ';'
#    cov_array_personal = ['V06','V08']
#    #cov_array_personal_num = ['Loonschaal','ancienniteit_in_AOV']
#    cov_array_personal_num = ['V08']
#    cov_array_personal_cat = ['V06']

#if Acerta_subset:
##    #path = 'Data/acerta_log_subsetConsult_20221117.csv'
#    path = 'Data/acerta_log_subset_obj2_20220803.csv'
#    separator = ';'
#    #cov_array_personal = ['opleidingsniveau','Loonschaal','ancienniteit_in_AOV'] #for Acerta data
#    cov_array_personal = ['opleidingsniveau','ancienniteit_in_AOV']
#    #cov_array_personal_num = ['Loonschaal','ancienniteit_in_AOV']
#    cov_array_personal_num = ['ancienniteit_in_AOV']
#    cov_array_personal_cat = ['opleidingsniveau']


"""Cegeka data"""
#if Cegeka:
#    path = 'Data/cegeka_log_extended_obj2_noSensors 20230106.csv' #full dataset, without current positions (leave out sensored data)
#    separator = ';'
#    cov_array_personal = ['Opleidingsgraad','Leeftijdscategorie'] #for Cegeka data
#    cov_array_personal_num = []
#    cov_array_personal_cat = ['Opleidingsgraad','Leeftijdscategorie']

##if Cegeka_noSensors:
#    path = 'Data/cegeka_log_extended_obj2_noSensors 20230106.csv'
#    cov_array_personal = ['Opleidingsgraad','Leeftijdscategorie'] #for Cegeka data
#    cov_array_personal_num = []
#    cov_array_personal_cat = ['Opleidingsgraad','Leeftijdscategorie']
#    separator = ";"

#if Cegeka_subset:
#    path = 'Data/cegeka_log_subset_obj2 20220804.csv'
    #path = 'Data/cegeka_log_smallsubset_obj2 20230106.csv'
#    separator = ';'
#    cov_array_personal = ['Opleidingsgraad','Leeftijdscategorie'] #for Cegeka data
#    cov_array_personal_num = []
#    cov_array_personal_cat = ['Opleidingsgraad','Leeftijdscategorie']


'Cegeka data'

# 2. Preparation

In [211]:
#start time experiment:
start_time_experiment = datetime.now()

In [212]:
#From eventlog, select target columns
values_log = 'objective'
index_log = ['case:concept:name']
columns_log = ['concept:name']

#train-test split
#test_size = 0.25 #This will be smaller in reality because of the extra samples in test set
#random_state = 42

In [213]:
#Read eventlog as df
#df = pd.read_csv(path, sep=str(separator))
#df = pd.read_csv(path, sep=str(separator), dtype={'case:concept:name': str,'Afdeling Code':str,'Dienstverbandtype Code':str}) #for cegeka

In [214]:
df, df_train_val, df_train, df_val, df_test = prep(df,test_size=0.25,out_of_time=out_of_time,one_in_train_val=one_in_train_val)

employee 385 has an observation moved from test to train_val
employee 1090 has an observation moved from test to train_val
employee 1457 has an observation moved from test to train_val
employee 44 has an observation moved from test to train_val
employee 527 has an observation moved from test to train_val
employee 48 has an observation moved from test to train_val
employee 463 has an observation moved from test to train_val
employee 228 has an observation moved from test to train_val
employee 631 has an observation moved from test to train_val
employee 1245 has an observation moved from test to train_val
employee 412 has an observation moved from test to train_val
employee 913 has an observation moved from test to train_val
employee 1212 has an observation moved from test to train_val
employee 578 has an observation moved from test to train_val
employee 518 has an observation moved from test to train_val
employee 738 has an observation moved from test to train_val
employee 249 has an ob

In [215]:
df

Unnamed: 0,case:concept:name,time_start,time_end,concept:name,prev_act,next_act,contract_start,contract_end,V01,V02,V03,V04,V05,V06,V07,V08,V09,V10,V11,objective
854,644,1/01/2001,31/01/2012,before_data_capture,none,func_002,1/01/2012,31/12/2099,cat_7,cat_04,cat_181,cat_2,cat_01,cat_4,cat_4,0.36,0.8,0.58,0.243421,0.666175
414,408,1/01/2001,31/01/2012,before_data_capture,none,func_010,15/02/2008,5/10/2014,cat_5,cat_02,cat_135,cat_2,cat_01,cat_4,cat_3,0.16,1.0,0.42,0.151123,0.204548
528,464,1/01/2001,31/01/2012,before_data_capture,none,func_166,19/12/2011,31/12/2099,cat_3,cat_16,cat_302,cat_2,cat_01,cat_4,cat_3,0.08,1.0,0.33,0.152436,0.412646
534,467,1/01/2001,31/01/2012,before_data_capture,none,func_101,1/01/2012,31/12/2099,cat_7,cat_04,cat_164,cat_1,cat_01,cat_1,cat_2,0.20,1.0,0.58,0.229951,0.444580
537,469,1/01/2001,31/01/2012,before_data_capture,none,func_177,29/10/2007,31/12/2099,cat_2,cat_11,cat_093,cat_2,cat_01,cat_4,cat_4,0.28,1.0,0.33,0.162006,0.493371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1114,777,8/01/2001,31/01/2012,before_data_capture,none,func_216,1/01/2001,31/12/2099,cat_2,cat_06,cat_105,cat_2,cat_01,cat_4,cat_4,0.60,0.8,0.42,0.197233,0.666513
2135,1277,8/08/2011,31/01/2012,before_data_capture,none,func_262,19/08/2003,31/12/2099,cat_5,cat_02,cat_159,cat_1,cat_01,cat_4,cat_2,0.36,1.0,0.58,0.214737,0.664544
1803,1109,8/12/2008,31/01/2012,before_data_capture,none,func_002,1/01/2012,9/12/2013,cat_7,cat_02,cat_132,cat_1,cat_01,cat_2,cat_3,0.04,1.0,0.58,0.206963,0.293212
1119,779,9/01/2001,31/01/2012,before_data_capture,none,func_189,30/08/2002,31/12/2099,cat_2,cat_04,cat_163,cat_2,cat_01,cat_3,cat_4,0.76,0.8,0.42,0.216891,0.499508


In [216]:
print(len(df),len(df_train_val),len(df_train),len(df_val),len(df_test))


2680 2183 1455 727 497


In [217]:
df

Unnamed: 0,case:concept:name,time_start,time_end,concept:name,prev_act,next_act,contract_start,contract_end,V01,V02,V03,V04,V05,V06,V07,V08,V09,V10,V11,objective
854,644,1/01/2001,31/01/2012,before_data_capture,none,func_002,1/01/2012,31/12/2099,cat_7,cat_04,cat_181,cat_2,cat_01,cat_4,cat_4,0.36,0.8,0.58,0.243421,0.666175
414,408,1/01/2001,31/01/2012,before_data_capture,none,func_010,15/02/2008,5/10/2014,cat_5,cat_02,cat_135,cat_2,cat_01,cat_4,cat_3,0.16,1.0,0.42,0.151123,0.204548
528,464,1/01/2001,31/01/2012,before_data_capture,none,func_166,19/12/2011,31/12/2099,cat_3,cat_16,cat_302,cat_2,cat_01,cat_4,cat_3,0.08,1.0,0.33,0.152436,0.412646
534,467,1/01/2001,31/01/2012,before_data_capture,none,func_101,1/01/2012,31/12/2099,cat_7,cat_04,cat_164,cat_1,cat_01,cat_1,cat_2,0.20,1.0,0.58,0.229951,0.444580
537,469,1/01/2001,31/01/2012,before_data_capture,none,func_177,29/10/2007,31/12/2099,cat_2,cat_11,cat_093,cat_2,cat_01,cat_4,cat_4,0.28,1.0,0.33,0.162006,0.493371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1114,777,8/01/2001,31/01/2012,before_data_capture,none,func_216,1/01/2001,31/12/2099,cat_2,cat_06,cat_105,cat_2,cat_01,cat_4,cat_4,0.60,0.8,0.42,0.197233,0.666513
2135,1277,8/08/2011,31/01/2012,before_data_capture,none,func_262,19/08/2003,31/12/2099,cat_5,cat_02,cat_159,cat_1,cat_01,cat_4,cat_2,0.36,1.0,0.58,0.214737,0.664544
1803,1109,8/12/2008,31/01/2012,before_data_capture,none,func_002,1/01/2012,9/12/2013,cat_7,cat_02,cat_132,cat_1,cat_01,cat_2,cat_3,0.04,1.0,0.58,0.206963,0.293212
1119,779,9/01/2001,31/01/2012,before_data_capture,none,func_189,30/08/2002,31/12/2099,cat_2,cat_04,cat_163,cat_2,cat_01,cat_3,cat_4,0.76,0.8,0.42,0.216891,0.499508


In [218]:
"""

df is original data (format event log),
piv_df is a dataframe
    piv_df_train
    piv_df_test
R is an array
    R_train is an array
    R_test is an array
nR_train is the predicted matrix R, based on R_train as input
"""

'\n\ndf is original data (format event log),\npiv_df is a dataframe\n    piv_df_train\n    piv_df_test\nR is an array\n    R_train is an array\n    R_test is an array\nnR_train is the predicted matrix R, based on R_train as input\n'

In [219]:
piv, piv_train_val,piv_train,piv_val,piv_test = df_to_piv(df,df_train_val,df_train,df_val,df_test)

In [220]:
df

Unnamed: 0,case:concept:name,time_start,time_end,concept:name,prev_act,next_act,contract_start,contract_end,V01,V02,V03,V04,V05,V06,V07,V08,V09,V10,V11,objective
854,644,1/01/2001,31/01/2012,before_data_capture,none,func_002,1/01/2012,31/12/2099,cat_7,cat_04,cat_181,cat_2,cat_01,cat_4,cat_4,0.36,0.8,0.58,0.243421,0.666175
414,408,1/01/2001,31/01/2012,before_data_capture,none,func_010,15/02/2008,5/10/2014,cat_5,cat_02,cat_135,cat_2,cat_01,cat_4,cat_3,0.16,1.0,0.42,0.151123,0.204548
528,464,1/01/2001,31/01/2012,before_data_capture,none,func_166,19/12/2011,31/12/2099,cat_3,cat_16,cat_302,cat_2,cat_01,cat_4,cat_3,0.08,1.0,0.33,0.152436,0.412646
534,467,1/01/2001,31/01/2012,before_data_capture,none,func_101,1/01/2012,31/12/2099,cat_7,cat_04,cat_164,cat_1,cat_01,cat_1,cat_2,0.20,1.0,0.58,0.229951,0.444580
537,469,1/01/2001,31/01/2012,before_data_capture,none,func_177,29/10/2007,31/12/2099,cat_2,cat_11,cat_093,cat_2,cat_01,cat_4,cat_4,0.28,1.0,0.33,0.162006,0.493371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1114,777,8/01/2001,31/01/2012,before_data_capture,none,func_216,1/01/2001,31/12/2099,cat_2,cat_06,cat_105,cat_2,cat_01,cat_4,cat_4,0.60,0.8,0.42,0.197233,0.666513
2135,1277,8/08/2011,31/01/2012,before_data_capture,none,func_262,19/08/2003,31/12/2099,cat_5,cat_02,cat_159,cat_1,cat_01,cat_4,cat_2,0.36,1.0,0.58,0.214737,0.664544
1803,1109,8/12/2008,31/01/2012,before_data_capture,none,func_002,1/01/2012,9/12/2013,cat_7,cat_02,cat_132,cat_1,cat_01,cat_2,cat_3,0.04,1.0,0.58,0.206963,0.293212
1119,779,9/01/2001,31/01/2012,before_data_capture,none,func_189,30/08/2002,31/12/2099,cat_2,cat_04,cat_163,cat_2,cat_01,cat_3,cat_4,0.76,0.8,0.42,0.216891,0.499508


In [221]:
piv

concept:name,before_data_capture,func_001,func_002,func_004,func_005,func_006,func_007,func_008,func_010,func_011,...,func_252,func_253,func_254,func_255,func_256,func_257,func_258,func_262,func_263,function_265
case:concept:name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,0.535013,,,,,,
2,,0.541067,,,,,,,,,...,,,,0.535013,,,,,,
5,,,,,,,,,,,...,,,,0.358973,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1571,0.064053,,,,,,,,,,...,,,0.467571,,,,,,,
1572,0.499846,,,,,,,,0.467654,,...,,,,,,,,,,
1573,,,0.452518,,,,,,,,...,,,,,,,,,,
1574,0.499846,,,,,,,,,,...,,,,,,,,,,


In [222]:
R,R_train_val,R_train,R_val,R_test = piv_to_R(piv, piv_train_val,piv_train,piv_val,piv_test)

In [223]:
#Details on dataset:

#Read users and item indices (to later reconstruct the ratings matrix)
empl = piv.index.values.tolist()
jobs = piv.columns.values.tolist()

print(f'the size of the dataframe is: {df.size}')
print(f'amount of employees: {len(empl)}')
print(f'amount of jobs: {len(jobs)}')

#print('the size of the dataframe is: ' + str(df.size))
#print('amount of employees: ' + str(len(empl)))
#print('amount of jobs: ' + str(len(jobs)))


the size of the dataframe is: 53600
amount of employees: 1238
amount of jobs: 209


In [224]:
#The vector on which similarities between employees are calculated
cov_array_personal

['V06', 'V08']

# 3. Hyperpara tuning

In [229]:
#initialization for matrix factorization
N = len(R) # N: num of employees
M = len(R[0]) # M: num of jobs
#np.random.seed(0)

In [230]:
def load_SM(datasets):
    if datasets[0]:
        SM = np.load('similarity_matrix/SM_toy_data.npy')
    if datasets[1]:
        SM = np.load('similarity_matrix/SM_dataset_1.npy')
    if datasets[2]:
        SM = np.load('similarity_matrix/SM_dataset_2.npy')
    if datasets[3]:
        SM = np.load('similarity_matrix/SM_dataset_3.npy')
    return SM

In [231]:
def calc_SM(datasets,df,N,cov_array_personal,cov_array_personal_cat,cov_array_personal_num):
    SM = sim(df,N,cov_array_personal,cov_array_personal_cat,cov_array_personal_num)
    if datasets[0]:
        np.save('similarity_matrix/SM_toy_data.npy', SM)
    if datasets[1]:
        np.save('similarity_matrix/SM_dataset_1.npy', SM)
    if datasets[2]:
        np.save('similarity_matrix/SM_dataset_2.npy', SM)
    if datasets[3]:
        np.save('similarity_matrix/SM_dataset_3.npy', SM)
    return SM

In [232]:
if load_similarity_matrix:
    similarity_matrix = load_SM(datasets)
else:
    similarity_matrix = calc_SM(datasets,df,N,cov_array_personal,cov_array_personal_cat,cov_array_personal_num)


In [233]:
#if True, overwrite previously assigned values
if tune_hyperpara_CF:
    lambda_optimal_cf, beta_optimal_cf, alpha_optimal_cf, L_optimal_cf, steps_optimal_cf = hyperpara_tuning(R_train=R_train,R_val=R_val,sr=0,objective=1,sm=similarity_matrix,lambda_array=lambda_array, beta_array=beta_array, alpha_array=alpha_array, L_array=L_array, steps_array=steps_array)

In [234]:
#if True, overwrite previously assigned values
if tune_hyperpara_CF_SR:
    lambda_optimal_cf_sr, beta_optimal_cf_sr, alpha_optimal_cf_sr, L_optimal_cf_sr, steps_optimal_cf_sr = hyperpara_tuning(R_train=R_train,R_val=R_val,sr=1,objective=1,sm=similarity_matrix,lambda_array=lambda_array, beta_array=beta_array, alpha_array=alpha_array, L_array=L_array, steps_array=steps_array)

In [235]:
print("optimal hyperparameters for CF:")
print("lambda: {:.3f} beta: {:.3f} alpha: {:.3f} L: {:.3f} steps: {:.3f}".format(lambda_optimal_cf, beta_optimal_cf, alpha_optimal_cf, L_optimal_cf, steps_optimal_cf))

print("optimal hyperparameters for CF_SR:")
print("lambda: {:.3f} beta: {:.3f} alpha: {:.3f} L: {:.3f} steps: {:.3f}".format(lambda_optimal_cf_sr, beta_optimal_cf_sr, alpha_optimal_cf_sr, L_optimal_cf_sr, steps_optimal_cf_sr))


#print('optimal hyperparameters for CF:')
#print('lambda:'+ str(lambda_optimal_cf), 'beta:'+ str(beta_optimal_cf), 'alpha:'+ str(alpha_optimal_cf), 'L:'+ str(L_optimal_cf), 'steps:'+ str(steps_optimal_cf))

#print('optimal hyperparameters for CF_SR:')
#print('lambda:'+str(lambda_optimal_cf_sr), 'beta:'+str(beta_optimal_cf_sr), 'alpha:'+str(alpha_optimal_cf_sr), 'L:'+str(L_optimal_cf_sr), 'steps:'+str(steps_optimal_cf_sr))

optimal hyperparameters for CF:
lambda: 0.050 beta: 0.000 alpha: 0.001 L: 10.000 steps: 1000.000
optimal hyperparameters for CF_SR:
lambda: 0.020 beta: 0.100 alpha: 0.001 L: 5.000 steps: 3000.000


# 4. Experiment

In [236]:
def create_initial_matrix(N, M, L):
    return np.full([N, L], 0.5), np.full([M, L], 0.5)

P_ini_cf, Q_ini_cf = create_initial_matrix(N, M, L_optimal_cf)
P_ini_cf_sr, Q_ini_cf_sr = create_initial_matrix(N, M, L_optimal_cf_sr)

4.1 Method 1: CF

In [237]:
#runtime: easily >1hr
nP_cf, nQ_cf = matrix_factorization(
    R=R_train,
    P=P_ini_cf,
    Q=Q_ini_cf,
    K=L_optimal_cf,
    steps=steps_optimal_cf,
    alpha=alpha_optimal_cf,
    lambda1=lambda_optimal_cf,
    beta=beta_optimal_cf,
    SR=False,
    similarity_matrix=None)
nR_cf = np.dot(nP_cf, nQ_cf.T)

  0%|          | 0/1000 [00:00<?, ?it/s]

4.2 Method 2: CF with SR

In [202]:
#runtime: easily >1hr
nP_cf_sr, nQ_cf_sr = matrix_factorization(
    R_train,
    P_ini_cf_sr,
    Q_ini_cf_sr,
    K=L_optimal_cf_sr,
    steps=steps_optimal_cf_sr,
    alpha=alpha_optimal_cf_sr,
    lambda1=lambda_optimal_cf_sr,
    beta=beta_optimal_cf_sr,
    SR=True,
    similarity_matrix=similarity_matrix)
nR_cf_sr = np.dot(nP_cf_sr, nQ_cf_sr.T)

  0%|          | 0/3000 [00:00<?, ?it/s]

# 5. Results

In [204]:
"""
def print_results(R_pred,R_test):
    rmse,mae = metrics_sparse(R_pred,R_test)

    print('mae: '+ str(mae))
    print('rmse: '+ str(rmse))
    print('spearman: ' + str(spearman(R_pred,R_test)))
    print('kendall: ' + str(kendall(R_pred,R_test)))

    return mae,rmse,spearman(R_pred,R_test),kendall(R_pred,R_test)
"""

"\ndef print_results(R_pred,R_test):\n    rmse,mae = metrics_sparse(R_pred,R_test)\n\n    print('mae: '+ str(mae))\n    print('rmse: '+ str(rmse))\n    print('spearman: ' + str(spearman(R_pred,R_test)))\n    print('kendall: ' + str(kendall(R_pred,R_test)))\n\n    return mae,rmse,spearman(R_pred,R_test),kendall(R_pred,R_test)\n"

In [205]:
"""
def calc_results(R_pred,R_test):
    rmse,mae = metrics_sparse(R_pred,R_test)
    return
"""

'\ndef calc_results(R_pred,R_test):\n    rmse,mae = metrics_sparse(R_pred,R_test)\n    return\n'

In [240]:


print('\n** pred CF  ** ')
print_results(nR_cf,R_test)

print('\n** pred CF+SR ** ')
print_results(nR_cf_sr,R_test)

print('\n** pred random ** ')
R_rand = np.random.rand(N,M)
print_results(R_rand,R_test,)

print('\n** pred 0.5 ** ')
R_05 = np.full([N,M],0.5)
print_results(R_05,R_test)



** pred CF  ** 
mae: 0.1541
rmse: 0.2829
spearman: 0.2344
kendall: 0.2138

** pred CF+SR ** 


ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

# 6. Write results to text file

In [102]:

#Run cell to write a summary of results to Results/results_YYYY-mm-dd HH-MM-SS.txt
results_to_txt(
    nR_cf, nR_cf_sr, R_test, start_time_experiment, toy_data, dataset_1, dataset_2, dataset_3, cov_array_personal, test_size, out_of_time, one_in_train_val, lambda_array, beta_array, alpha_array, L_array, steps_array, lambda_optimal_cf, beta_optimal_cf, L_optimal_cf, alpha_optimal_cf, steps_optimal_cf, lambda_optimal_cf_sr, beta_optimal_cf_sr, alpha_optimal_cf_sr, L_optimal_cf_sr, steps_optimal_cf_sr)

TypeError: cannot unpack non-iterable NoneType object