Notebook for Pre-tuning of models. For this purpose a shallow train/test evaluation protocol is applied:

In [14]:
import pandas as pd
import numpy as np
from dataloading import DataLoader
from eALS_adaptor import eALSAdaptor
from implicit.evaluation import train_test_split, ranking_metrics_at_k
from cv_py import CrossValidation

%cd C:\Users\781110104\OneDrive - Genpact\Documents\VSCode

In [15]:
dl = DataLoader()

In [16]:
# loading the data
user_item_co = dl.import_data('AGCO', 'CO', 'df')
user_item_co_t = dl.import_data('TEREX', 'CO', 'df')

FileNotFoundError: [Errno 2] No such file or directory: 'loc_agco_new.csv'

In [5]:
# remove items with only one interaction
user_item_filtered = dl.remove_low_interact_items(user_item_co, 1)
user_item_filtered_t = dl.remove_low_interact_items(user_item_co_t, 1)

In [6]:
# scale interaction data
user_item_filtered_log = dl.log_scale_df(user_item_filtered, 0.01)
user_item_filtered_log_t = dl.log_scale_df(user_item_filtered_t, 0.01)

In [7]:
# transform to sparse matrix
user_item_csr = dl.to_csr(user_item_filtered_log)
user_item_csr_t = dl.to_csr(user_item_filtered_log_t)

In [18]:
# apply 80/20 train test split
train, test = train_test_split(user_item_csr, 0.8, 22)
train_t, test_t = train_test_split(user_item_csr_t, 0.8, 22)

In [19]:
cv = CrossValidation(5)

In [20]:
# split train data again, only once for pre-tuning
cal, val = train_test_split(train, 0.8, 22)
cal_t, val_t = train_test_split(train_t, 0.8, 22)

In [52]:
# tune iALS model for both OEMs
space_iALS = {'factors' : [64], 'regularization' : [100, 120, 140], 'alpha' : [0.3, 0.4, 0.5], 'iterations' : [15]}
space_iALS_t = {'factors' : [64], 'regularization' : [40, 60, 80], 'alpha' : [0.6, 0.7, 0.8], 'iterations' : [15]}
hyper_ials = cv.hyperp_tuning_simple(test=val, train=cal, seed=22, param_space=space_iALS, model_class='iALS', exclude=test)
hyper_ials_t = cv.hyperp_tuning_simple(test=val_t, train=cal_t, seed=22, param_space=space_iALS_t, model_class='iALS', exclude=test_t)

In [53]:
hyper_ials_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,alpha,iterations,precision,map,ndcg,auc,mpr
4,64,60,0.7,15,0.5475,0.431271,0.572166,0.51607,0.122
3,64,60,0.6,15,0.54,0.424648,0.564252,0.515573,0.121715
5,64,60,0.8,15,0.54,0.423,0.564975,0.5158,0.121559
8,64,80,0.8,15,0.5375,0.412792,0.556747,0.515777,0.122598
1,64,40,0.7,15,0.53,0.416848,0.559729,0.515636,0.128754
7,64,80,0.7,15,0.53,0.406903,0.551875,0.5154,0.123062
2,64,40,0.8,15,0.5225,0.410191,0.553879,0.515314,0.130178
6,64,80,0.6,15,0.5225,0.40279,0.551472,0.515196,0.125526
0,64,40,0.6,15,0.5175,0.417861,0.553184,0.515231,0.127861


In [85]:
# tune eALS model for both OEMs
space_eALS = {'factors' : [64], 'regularization' : [200, 250, 300], 'alpha' : [0.1, 0.2, 0.3], 'w0' : [25000, 30000, 35000], 'iterations' : [15]}
space_eALS_t = {'factors' : [64], 'regularization' : [10], 'alpha' : [0.2], 'w0' : [1500], 'iterations' : [15]}
hyper_eals = cv.hyperp_tuning_simple(test=val, train=cal, seed=22, param_space=space_eALS, model_class='eALS', exclude=test)
hyper_eals_t = cv.hyperp_tuning_simple(test=val_t, train=cal_t, seed=22, param_space=space_eALS_t, model_class='eALS', exclude=test_t)

converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items to <class 'numpy.float32'>
converting type of user_items t

In [86]:
hyper_eals.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,alpha,w0,iterations,precision,map,ndcg,auc,mpr
11,64,250,0.1,35000,15,0.856766,0.802814,0.867464,0.505434,0.083007
13,64,250,0.2,30000,15,0.856106,0.802382,0.866741,0.505371,0.082499
10,64,250,0.1,30000,15,0.856106,0.802082,0.867305,0.505427,0.083066
14,64,250,0.2,35000,15,0.856106,0.803282,0.867282,0.505361,0.082594
2,64,200,0.1,35000,15,0.855446,0.801305,0.867282,0.505389,0.083742
1,64,200,0.1,30000,15,0.853465,0.800688,0.866775,0.50536,0.082946
12,64,250,0.2,25000,15,0.853465,0.798655,0.865144,0.505388,0.082631
0,64,200,0.1,25000,15,0.852475,0.799589,0.866262,0.50535,0.08238
17,64,250,0.3,35000,15,0.852145,0.798158,0.864031,0.505385,0.082622
5,64,200,0.2,35000,15,0.851815,0.799207,0.864901,0.505332,0.084177


In [15]:
# tune BPR model for both OEMs
space_BPR = {'factors' : [64], 'regularization' : [0.01, 0.05, 0.1], 'learning_rate' : [0.005, 0.01, 0.03, 0.05], 'iterations' : [15]}
space_BPR_t = {'factors' : [64], 'regularization' : [0.03, 0.05, 0.07], 'learning_rate' : [0.03, 0.04, 0.05], 'iterations' : [15]}
hyper_bpr = cv.hyperp_tuning_simple(test=val, train=cal, seed=22, param_space=space_BPR, model_class='BPR', exclude=test)
hyper_bpr_t = cv.hyperp_tuning_simple(test=val_t, train=cal_t, seed=22, param_space=space_BPR_t, model_class='BPR', exclude=test_t)

In [22]:
hyper_bpr_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,learning_rate,iterations,precision,map,ndcg,auc,mpr
6,64,0.05,0.04,15,0.39,0.256441,0.403107,0.508503,0.116047
11,64,0.07,0.05,15,0.3875,0.248703,0.397436,0.508647,0.118413
5,64,0.05,0.03,15,0.3775,0.26183,0.405805,0.508523,0.132994
7,64,0.05,0.05,15,0.375,0.238101,0.383687,0.507968,0.112722
10,64,0.07,0.04,15,0.3675,0.251628,0.393997,0.508289,0.130743
1,64,0.01,0.03,15,0.365,0.234539,0.37751,0.507725,0.1119
9,64,0.07,0.03,15,0.355,0.242092,0.375757,0.507113,0.15716
15,64,0.1,0.05,15,0.355,0.241122,0.374548,0.507521,0.144177
14,64,0.1,0.04,15,0.3475,0.238117,0.361746,0.507167,0.178801
2,64,0.01,0.04,15,0.3425,0.214839,0.360217,0.507562,0.110371


In [16]:
# # tune LMF model for both OEMs
space_LMF = {'factors' : [64], 'regularization' : [10, 20, 30, 40, 50], 'learning_rate' : [0.3, 0.5, 0.7, 1.0, 2.0], 'iterations' : [15], 'neg_prop': [10, 20, 30]}
space_LMF_t = {'factors' : [64], 'regularization' : [10, 20, 30, 40, 50], 'learning_rate' : [0.3, 0.5, 0.7, 1.0, 2.0], 'iterations' : [15], 'neg_prop': [0.5, 1, 2, 5, 10]}
hyper_lmf = cv.hyperp_tuning_simple(test=val, train=cal, seed=22, param_space=space_LMF, model_class='LMF', exclude=test)
hyper_lmf_t = cv.hyperp_tuning_simple(test=val_t, train=cal_t, seed=22, param_space=space_LMF_t, model_class='LMF', exclude=test_t)

In [24]:
hyper_lmf_t.sort_values(by=['precision'], ascending=False)

Unnamed: 0,factors,regularization,learning_rate,iterations,neg_prop,precision,map,ndcg,auc,mpr
65,64,30,1.0,15,0.5,0.4600,0.346414,0.469798,0.510447,0.211272
40,64,20,1.0,15,0.5,0.3875,0.265012,0.391519,0.508147,0.168905
45,64,20,2.0,15,0.5,0.2725,0.136900,0.256487,0.506580,0.171952
35,64,20,0.7,15,0.5,0.2550,0.136568,0.248284,0.505231,0.172220
90,64,40,1.0,15,0.5,0.2550,0.121686,0.225077,0.505348,0.255528
...,...,...,...,...,...,...,...,...,...,...
93,64,40,1.0,15,5.0,0.0225,0.004964,0.018138,0.500073,0.442797
106,64,50,0.5,15,1.0,0.0200,0.010670,0.027186,0.500089,0.297611
113,64,50,0.7,15,5.0,0.0200,0.004625,0.016424,0.499974,0.396915
48,64,20,2.0,15,5.0,0.0200,0.005214,0.017547,0.499952,0.470872
