Notebook for Pre-selection of models. For this purpose a shallow train/test evaluation protocol is applied:

In [1]:
import pandas as pd
import numpy as np
from dataloading import DataLoader
from eda_py import EDA
from eALS_adaptor import eALSAdaptor
from implicit.evaluation import train_test_split, ranking_metrics_at_k
from cv_py import CrossValidation

%cd C:\Users\781110104\OneDrive - Genpact\Documents\VSCode

In [2]:
dl = DataLoader()

In [4]:
user_item_co = dl.import_data('AGCO', 'CO', 'df')
user_item_co_t = dl.import_data('TEREX', 'CO', 'df')

In [5]:
user_item_filtered = dl.remove_low_interact_items(user_item_co, 1)
user_item_filtered_t = dl.remove_low_interact_items(user_item_co_t, 1)

In [6]:
user_item_filtered_log = dl.log_scale_df(user_item_filtered, 0.01)
user_item_filtered_log_t = dl.log_scale_df(user_item_filtered_t, 0.01)

In [7]:
user_item_csr = dl.to_csr(user_item_filtered_log)
user_item_csr_t = dl.to_csr(user_item_filtered_log_t)

In [8]:
train, test = train_test_split(user_item_csr, 0.8, 22)
train_t, test_t = train_test_split(user_item_csr_t, 0.8, 22)

In [9]:
cv = CrossValidation(user_item_csr, 5)
cv_t = CrossValidation(user_item_csr_t, 5)

In [11]:
space_iALS = {'factors' : [64], 'regularization' : [60, 80, 100, 120], 'alpha' : [0.1, 0.2, 0.3, 0.4, 0.5], 'iterations' : [15]}
space_iALS_t = {'factors' : [64], 'regularization' : [60, 80, 100, 120], 'alpha' : [0.3, 0.4, 0.5, 0.6, 0.7], 'iterations' : [15]}
hyper_ials = cv.hyperp_tuning_simple(test=test, train=train, seed=22, param_space=space_iALS, model_class='iALS')
hyper_ials_t = cv_t.hyperp_tuning_simple(test=test_t, train=train_t, seed=22, param_space=space_iALS_t, model_class='iALS')



In [20]:
hyper_ials_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,alpha,iterations,precision,map,ndcg,auc,mpr
3,64,60,0.6,15,0.6,0.492189,0.615407,0.5137,0.105059
4,64,60,0.7,15,0.595,0.485532,0.609501,0.513683,0.104252
9,64,80,0.7,15,0.5875,0.482107,0.609961,0.513854,0.105117
2,64,60,0.5,15,0.585,0.48642,0.61088,0.513374,0.106705
8,64,80,0.6,15,0.57,0.466846,0.591967,0.512733,0.107891
14,64,100,0.7,15,0.5525,0.447601,0.569747,0.512344,0.109149
1,64,60,0.4,15,0.5425,0.441904,0.568991,0.511896,0.108931
7,64,80,0.5,15,0.525,0.424213,0.546535,0.511503,0.11069
19,64,120,0.7,15,0.5225,0.417505,0.536843,0.511433,0.116346
13,64,100,0.6,15,0.52,0.416385,0.536267,0.511481,0.113558


In [18]:
space_eALS = {'factors' : [64], 'regularization' : [5, 10, 20], 'alpha' : [0.5, 1, 2], 'w0' : [20, 30, 40, 50, 60]}
space_eALS_t = {'factors' : [64], 'regularization' : [1, 5, 10], 'alpha' : [0.3, 0.5, 1, 2], 'w0' : [20, 30, 40, 50, 60]}
hyper_eals = cv.hyperp_tuning_simple(test=test, train=train, seed=22, param_space=space_eALS, model_class='eALS')
hyper_eals_t = cv_t.hyperp_tuning_simple(test=test_t, train=train_t, seed=22, param_space=space_eALS_t, model_class='eALS')

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [15]:
space_BPR = {'factors' : [64], 'regularization' : [0.01, 0.05, 0.1], 'learning_rate' : [0.005, 0.01, 0.03, 0.05], 'iterations' : [15]}
space_BPR_t = {'factors' : [64], 'regularization' : [0.01, 0.05, 0.07, 0.1], 'learning_rate' : [0.01, 0.03, 0.04, 0.05], 'iterations' : [15]}
hyper_bpr = cv.hyperp_tuning_simple(test=test, train=train, seed=22, param_space=space_BPR, model_class='BPR')
hyper_bpr_t = cv_t.hyperp_tuning_simple(test=test_t, train=train_t, seed=22, param_space=space_BPR_t, model_class='BPR')

In [22]:
hyper_bpr_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,learning_rate,iterations,precision,map,ndcg,auc,mpr
6,64,0.05,0.04,15,0.39,0.256441,0.403107,0.508503,0.116047
11,64,0.07,0.05,15,0.3875,0.248703,0.397436,0.508647,0.118413
5,64,0.05,0.03,15,0.3775,0.26183,0.405805,0.508523,0.132994
7,64,0.05,0.05,15,0.375,0.238101,0.383687,0.507968,0.112722
10,64,0.07,0.04,15,0.3675,0.251628,0.393997,0.508289,0.130743
1,64,0.01,0.03,15,0.365,0.234539,0.37751,0.507725,0.1119
9,64,0.07,0.03,15,0.355,0.242092,0.375757,0.507113,0.15716
15,64,0.1,0.05,15,0.355,0.241122,0.374548,0.507521,0.144177
14,64,0.1,0.04,15,0.3475,0.238117,0.361746,0.507167,0.178801
2,64,0.01,0.04,15,0.3425,0.214839,0.360217,0.507562,0.110371


In [16]:
space_LMF = {'factors' : [64], 'regularization' : [10, 20, 30, 40, 50], 'learning_rate' : [0.3, 0.5, 0.7, 1.0, 2.0], 'iterations' : [15], 'neg_prop': [10, 20, 30]}
space_LMF_t = {'factors' : [64], 'regularization' : [10, 20, 30, 40, 50], 'learning_rate' : [0.3, 0.5, 0.7, 1.0, 2.0], 'iterations' : [15], 'neg_prop': [0.5, 1, 2, 5, 10]}
hyper_lmf = cv.hyperp_tuning_simple(test=test, train=train, seed=22, param_space=space_LMF, model_class='LMF')
hyper_lmf_t = cv_t.hyperp_tuning_simple(test=test_t, train=train_t, seed=22, param_space=space_LMF_t, model_class='LMF')

In [24]:
hyper_lmf_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,learning_rate,iterations,neg_prop,precision,map,ndcg,auc,mpr
65,64,30,1.0,15,0.5,0.4600,0.346414,0.469798,0.510447,0.211272
40,64,20,1.0,15,0.5,0.3875,0.265012,0.391519,0.508147,0.168905
45,64,20,2.0,15,0.5,0.2725,0.136900,0.256487,0.506580,0.171952
35,64,20,0.7,15,0.5,0.2550,0.136568,0.248284,0.505231,0.172220
90,64,40,1.0,15,0.5,0.2550,0.121686,0.225077,0.505348,0.255528
...,...,...,...,...,...,...,...,...,...,...
93,64,40,1.0,15,5.0,0.0225,0.004964,0.018138,0.500073,0.442797
106,64,50,0.5,15,1.0,0.0200,0.010670,0.027186,0.500089,0.297611
113,64,50,0.7,15,5.0,0.0200,0.004625,0.016424,0.499974,0.396915
48,64,20,2.0,15,5.0,0.0200,0.005214,0.017547,0.499952,0.470872
