In [27]:
import pandas as pd
import numpy as np
from ast import literal_eval
import scipy.sparse

from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, recall_at_k, reciprocal_rank, auc_score
# To reimport:
# import tools.lightfm_tools
from tools.lightfm_tools import prepare_interactions, evaluate_model, evaluate_diversity_and_coverage, random_search
# import tools.lightfm_tools
# import importlib
# importlib.reload(tools.lightfm_tools)
# from tools.lightfm_tools import prepare_interactions, evaluate_model, evaluate_diversity_and_coverage, random_search


## Data loading

In [28]:
# Ratings without the least active users and the least popular games
df = pd.read_csv('data/ratings.csv.gz')
games_df = pd.read_json('./data/bgg_GameItem.jl', lines = True)
# Names of the most popular features
features_names = pd.read_csv('data/LightFM_item_features_names.csv.gz')
# Features of games
game_features = pd.read_csv('data/LightFM_item_features.csv.gz')
game_features['features'] = game_features.features.apply(literal_eval)

In [29]:
# We don't need features of games we don't have any interactions with
features_list = [x for x in features_names['0']]
games_list = [x for x in df['bgg_id']]
game_features = game_features.drop((game_features['bgg_id'])[~game_features['bgg_id'].isin(games_list)].index)

In [30]:
# Positive interactions are: rating the game above 6 or having a game in collection without rating it
df_positive = df.loc[((df['bgg_user_owned'] == 1.0) & (df['bgg_user_rating'].isnull()) | (df['bgg_user_rating'] > 6))].copy()
df_positive['value'] = 1

In [31]:
df_negative =  df.loc[(df['bgg_user_rating'] < 4)].copy()
df_negative['value'] = -1
df_logistic = pd.concat([df_positive, df_negative])

In [32]:
best_epochs_so_far = 49
best_params_so_far = {'no_components': 71, 'learning_schedule': 'adadelta', 'learning_rate': 0.05652168855133591, 'item_alpha': 2.816672985617472e-06, 'max_sampled': 9, 'rho': 0.9369181237831744, 'epsilon': 2.0854895195545605e-06}

## Preparation of dataset, interactions and item features matrices

In [33]:
dataset = Dataset()
dataset.fit((x for x in df_positive['bgg_user_name']), (x for x in df_positive['bgg_id']), item_features=(x for x in features_list))

In [34]:
# Interactions for all functions except logistic loss
try:
    interactions = scipy.sparse.load_npz('data/interactions.npz')
    test_interactions = scipy.sparse.load_npz('data/test_interactions.npz')
    known = pd.read_csv('data/known.csv.gz')
    print('Loaded interactions')
except:    
    interactions, test_interactions, known = prepare_interactions(df_positive, dataset)
    scipy.sparse.save_npz('data/interactions.npz', interactions)
    scipy.sparse.save_npz('data/test_interactions.npz', test_interactions)
    known.to_csv('data/known.csv.gz', compression='gzip', index=False)    

Loaded interactions


In [35]:
dataset_logistic = Dataset()
dataset_logistic.fit((x for x in df_logistic['bgg_user_name']), (x for x in df_logistic['bgg_id']), item_features=(x for x in features_list))

In [12]:
# Interactions for logistic loss
try:
    interactions_logistic = scipy.sparse.load_npz('data/interactions_logistic.npz')
    test_interactions_logistic = scipy.sparse.load_npz('data/test_interactions_logistic.npz')
    known_logistic = pd.read_csv('data/known_logistic.csv.gz')
    print("Loaded interactions")
except:    
    interactions_logistic, test_interactions_logistic, known_logistic = prepare_interactions(df_logistic, dataset)
    scipy.sparse.save_npz('data/interactions_logistic.npz', interactions_logistic)
    scipy.sparse.save_npz('data/test_interactions_logistic.npz', test_interactions_logistic)
    known_logistic.to_csv('data/known_logistic.csv.gz', compression='gzip', index=False)    

Loaded interactions


In [13]:
item_features = dataset.build_item_features((val['bgg_id'], [w for w in val['features'] if w in features_list]) for idx, val in game_features.iterrows())

In [37]:
id_item_features = dataset.build_item_features((val['bgg_id'], [w for w in val['features'] if w in features_list] + [val['bgg_id']]) for idx, val in game_features.iterrows())

In [None]:
id_item_features_logistic = dataset_logistic.build_item_features((val['bgg_id'], [w for w in val['features'] if w in features_list] + [val['bgg_id']]) for idx, val in game_features.iterrows())

In [15]:
random_game_features = game_features.copy()
random_game_features.iloc[:, 1] = np.random.permutation(game_features.iloc[:,1].values)
random_item_features = dataset.build_item_features((val['bgg_id'], [w for w in val['features'] if w in features_list]) for idx, val in random_game_features.iterrows())

## Comparison of results achieved using different parameters

### WARP model

In [10]:
model = LightFM(loss='warp')
model.fit(interactions, verbose=True, epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [02:20<00:00,  7.02s/it]


<lightfm.lightfm.LightFM at 0x7f3fa8fd4b80>

In [17]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8)

Precision: train 0.50
Precision: test 0.20
AUC: train 0.94
AUC: test 0.93


In [18]:
features_model = LightFM(loss='warp')
features_model.fit(interactions, verbose=True, item_features = item_features,
                   epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [10:27<00:00, 31.35s/it]


<lightfm.lightfm.LightFM at 0x7fa19c2e66d0>

In [19]:
evaluate_model(features_model, interactions, test_interactions, k=5,
               num_threads=8, item_features = item_features)

Precision: train 0.49
Precision: test 0.19
AUC: train 0.93
AUC: test 0.92


In [20]:
random_features_model = LightFM(loss='warp')
random_features_model.fit(interactions, verbose=True, item_features = random_item_features,
                          epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [09:29<00:00, 28.45s/it]


<lightfm.lightfm.LightFM at 0x7fa19c2e68b0>

In [21]:
evaluate_model(random_features_model, interactions, test_interactions, k=5,
               num_threads=8, item_features = random_item_features)

Precision: train 0.48
Precision: test 0.18
AUC: train 0.93
AUC: test 0.92


#### WARP hyperparameters search

In [8]:
random_search(interactions, test_interactions, id_item_features, num_samples=20, num_threads=8)

Params:
{'no_components': 72, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.06247712728655479, 'item_alpha': 9.490407498434895e-06, 'max_sampled': 11, 'num_epochs': 45, 'rho': 0.9151663627867668, 'epsilon': 9.171970898190447e-06}


Epoch: 100%|██████████| 45/45 [47:09<00:00, 62.87s/it]
Epoch:   0%|          | 0/32 [00:00<?, ?it/s]

Train precision: 0.6410, test precision: 0.2078
Params:
{'no_components': 65, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.022383884192652345, 'item_alpha': 9.557891502289009e-06, 'max_sampled': 13, 'num_epochs': 32, 'rho': 0.8292936663732287, 'epsilon': 3.4102368089271628e-06}


Epoch: 100%|██████████| 32/32 [38:40<00:00, 72.52s/it]


Train precision: 0.6099, test precision: 0.2007
Params:
{'no_components': 72, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.018302438875387395, 'item_alpha': 7.1322848289047725e-06, 'max_sampled': 11, 'num_epochs': 45, 'rho': 0.8338214431327815, 'epsilon': 6.922122716155008e-06}


Epoch: 100%|██████████| 45/45 [42:50<00:00, 57.13s/it]


Train precision: 0.6302, test precision: 0.2020
Params:
{'no_components': 95, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.03275109799535048, 'item_alpha': 8.997690018348368e-06, 'max_sampled': 9, 'num_epochs': 37, 'rho': 0.8470038142101286, 'epsilon': 6.723899132056612e-06}


Epoch: 100%|██████████| 37/37 [43:19<00:00, 70.26s/it]


Train precision: 0.5706, test precision: 0.2047
Params:
{'no_components': 60, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.05116948414096397, 'item_alpha': 8.106676395442752e-06, 'max_sampled': 13, 'num_epochs': 41, 'rho': 0.8146936110149877, 'epsilon': 5.842940014384907e-06}


Epoch: 100%|██████████| 41/41 [43:33<00:00, 63.74s/it]


Train precision: 0.6047, test precision: 0.2075
Params:
{'no_components': 85, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.04262816075760235, 'item_alpha': 3.1237271824180853e-07, 'max_sampled': 13, 'num_epochs': 37, 'rho': 0.8602124452303144, 'epsilon': 6.140861149682677e-06}


Epoch: 100%|██████████| 37/37 [45:50<00:00, 74.34s/it]


Train precision: 0.6059, test precision: 0.2058
Params:
{'no_components': 77, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.04377413507543912, 'item_alpha': 3.126015423135039e-07, 'max_sampled': 11, 'num_epochs': 20, 'rho': 0.8611403781364557, 'epsilon': 6.2553941379274414e-06}


Epoch: 100%|██████████| 20/20 [19:33<00:00, 58.67s/it]


Train precision: 0.6327, test precision: 0.2098
Params:
{'no_components': 91, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.09729632295647288, 'item_alpha': 8.144824123836496e-06, 'max_sampled': 9, 'num_epochs': 40, 'rho': 0.8616924952225455, 'epsilon': 7.861745844687e-06}


Epoch: 100%|██████████| 40/40 [41:24<00:00, 62.11s/it]


Train precision: 0.6930, test precision: 0.2068
Params:
{'no_components': 85, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.03549996157764155, 'item_alpha': 2.6835927799481706e-06, 'max_sampled': 10, 'num_epochs': 34, 'rho': 0.8325177677034052, 'epsilon': 3.508904686877298e-06}


Epoch: 100%|██████████| 34/34 [40:04<00:00, 70.72s/it]


Train precision: 0.5719, test precision: 0.2044
Params:
{'no_components': 81, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.08656903263726906, 'item_alpha': 1.7856021088623879e-06, 'max_sampled': 10, 'num_epochs': 32, 'rho': 0.9630560572956857, 'epsilon': 8.990180435447093e-06}


Epoch: 100%|██████████| 32/32 [32:47<00:00, 61.49s/it]
Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Train precision: 0.6640, test precision: 0.2042
Params:
{'no_components': 54, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.037437120904102554, 'item_alpha': 7.980738372484048e-06, 'max_sampled': 8, 'num_epochs': 25, 'rho': 0.8550638666667969, 'epsilon': 1.8913667865100771e-06}


Epoch: 100%|██████████| 25/25 [21:19<00:00, 51.17s/it]
Epoch:   0%|          | 0/31 [00:00<?, ?it/s]

Train precision: 0.5432, test precision: 0.2025
Params:
{'no_components': 67, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.08382698343628706, 'item_alpha': 4.145770676536075e-06, 'max_sampled': 11, 'num_epochs': 31, 'rho': 0.8033974617916476, 'epsilon': 5.203170667059746e-06}


Epoch: 100%|██████████| 31/31 [31:06<00:00, 60.21s/it]


Train precision: 0.6374, test precision: 0.2075
Params:
{'no_components': 83, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.015183142440779484, 'item_alpha': 8.732869830247295e-06, 'max_sampled': 7, 'num_epochs': 43, 'rho': 0.9588474781980215, 'epsilon': 1.9536966762432584e-06}


Epoch: 100%|██████████| 43/43 [37:07<00:00, 51.81s/it]


Train precision: 0.6137, test precision: 0.2025
Params:
{'no_components': 94, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.014523540323062419, 'item_alpha': 9.657214018614694e-06, 'max_sampled': 12, 'num_epochs': 34, 'rho': 0.9123118632428795, 'epsilon': 9.605724589529243e-07}


Epoch: 100%|██████████| 34/34 [46:37<00:00, 82.27s/it]
Epoch:   0%|          | 0/46 [00:00<?, ?it/s]

Train precision: 0.5230, test precision: 0.1967
Params:
{'no_components': 55, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.03650632219355546, 'item_alpha': 2.198154797434897e-06, 'max_sampled': 12, 'num_epochs': 46, 'rho': 0.8301526394536567, 'epsilon': 8.976219271375456e-06}


Epoch: 100%|██████████| 46/46 [43:07<00:00, 56.24s/it]


Train precision: 0.5722, test precision: 0.2051
Params:
{'no_components': 92, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.05078388997894149, 'item_alpha': 7.707576601518201e-06, 'max_sampled': 13, 'num_epochs': 27, 'rho': 0.8106288913966373, 'epsilon': 8.056768770513088e-06}


Epoch: 100%|██████████| 27/27 [28:41<00:00, 63.75s/it]


Train precision: 0.6448, test precision: 0.1906
Params:
{'no_components': 78, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.011373711641591558, 'item_alpha': 6.949738056546539e-06, 'max_sampled': 11, 'num_epochs': 30, 'rho': 0.9079395431188544, 'epsilon': 2.026452427714593e-06}


Epoch: 100%|██████████| 30/30 [38:41<00:00, 77.38s/it]
Epoch:   0%|          | 0/46 [00:00<?, ?it/s]

Train precision: 0.4997, test precision: 0.1885
Params:
{'no_components': 73, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.07228259832299408, 'item_alpha': 9.382342146190704e-06, 'max_sampled': 10, 'num_epochs': 46, 'rho': 0.9118148139245935, 'epsilon': 6.945673886249597e-06}


Epoch: 100%|██████████| 46/46 [41:51<00:00, 54.60s/it]
Epoch:   0%|          | 0/49 [00:00<?, ?it/s]

Train precision: 0.6116, test precision: 0.1898
Params:
{'no_components': 71, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.05652168855133591, 'item_alpha': 2.816672985617472e-06, 'max_sampled': 9, 'num_epochs': 49, 'rho': 0.9369181237831744, 'epsilon': 2.0854895195545605e-06}


Epoch: 100%|██████████| 49/49 [43:03<00:00, 52.73s/it]


Train precision: 0.6203, test precision: 0.2108
Params:
{'no_components': 83, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.04271551982778008, 'item_alpha': 7.013647542683806e-06, 'max_sampled': 13, 'num_epochs': 43, 'rho': 0.8022863069967255, 'epsilon': 9.261414654082415e-06}


Epoch: 100%|██████████| 43/43 [44:41<00:00, 62.37s/it]


Train precision: 0.6593, test precision: 0.1976


(0.21082604,
 {'no_components': 71,
  'learning_schedule': 'adadelta',
  'loss': 'warp',
  'learning_rate': 0.05652168855133591,
  'item_alpha': 2.816672985617472e-06,
  'max_sampled': 9,
  'rho': 0.9369181237831744,
  'epsilon': 2.0854895195545605e-06,
  'num_epochs': 49})

#### WARP with best hyperparameters

In [38]:
model = LightFM(loss='warp', **best_params_so_far)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=best_epochs_so_far, num_threads=8)

Epoch: 100%|██████████| 49/49 [43:37<00:00, 53.42s/it]


<lightfm.lightfm.LightFM at 0x7f3fc5785160>

In [39]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Precision: train 0.62
Precision: test 0.21
AUC: train 0.98
AUC: test 0.93


In [40]:
no_users = len(df['bgg_user_name'].unique())/10
evaluate_diversity_and_coverage(model, int(no_users), dataset, games_df, known, 'data/top10_warp.csv.gz', item_features = id_item_features, N = 10)

100%|██████████| 24757/24757 [02:19<00:00, 177.48it/s]


Diversity: {'category': 0.23476236639180456, 'mechanic': 0.20414428242517269}
Coverage: 3101


### k-OS WARP model

In [24]:
model = LightFM(loss = 'warp-kos')
model.fit(interactions, verbose=True, epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [03:19<00:00,  9.95s/it]


<lightfm.lightfm.LightFM at 0x7fa19c7c5ee0>

In [25]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8)

Precision: train 0.46
Precision: test 0.17
AUC: train 0.92
AUC: test 0.91


In [26]:
id_features_model = LightFM(loss = 'warp-kos')
id_features_model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [11:35<00:00, 34.78s/it]


<lightfm.lightfm.LightFM at 0x7fa19c7c53d0>

In [27]:
evaluate_model(id_features_model, interactions, test_interactions, k=5,
               num_threads=8, item_features = id_item_features)

Precision: train 0.46
Precision: test 0.17
AUC: train 0.91
AUC: test 0.90


#### k-OS WARP with best hyperparameters (for WARP)

In [44]:
model = LightFM(loss='warp-kos', **best_params_so_far)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=best_epochs_so_far, num_threads=8)

Epoch: 100%|██████████| 49/49 [42:32<00:00, 52.09s/it] 


<lightfm.lightfm.LightFM at 0x7f3fc57851c0>

In [45]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Precision: train 0.69
Precision: test 0.19
AUC: train 0.94
AUC: test 0.91


In [46]:
no_users = len(df['bgg_user_name'].unique())/10
evaluate_diversity_and_coverage(model, int(no_users), dataset, games_df, known, 'data/top10_warp-kos.csv.gz', item_features = id_item_features, N = 10)

100%|██████████| 24757/24757 [02:19<00:00, 177.56it/s]


Diversity: {'category': 0.2360823393864554, 'mechanic': 0.20801886645890363}
Coverage: 2724


### BPR model

In [28]:
model = LightFM(loss = 'bpr')
model.fit(interactions, verbose=True, epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [01:40<00:00,  5.01s/it]


<lightfm.lightfm.LightFM at 0x7fa19c88c220>

In [29]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8)

Precision: train 0.45
Precision: test 0.13
AUC: train 0.90
AUC: test 0.87


In [30]:
id_features_model = LightFM(loss = 'bpr')
id_features_model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [14:18<00:00, 42.94s/it]


<lightfm.lightfm.LightFM at 0x7fa19c88c4f0>

In [31]:
evaluate_model(id_features_model, interactions, test_interactions, k=5,
               num_threads=8, item_features = id_item_features)

Precision: train 0.41
Precision: test 0.12
AUC: train 0.90
AUC: test 0.86


#### BPR with best hyperparameters (for WARP)

In [None]:
model = LightFM(loss='bpr', no_components = 71)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

In [41]:
model = LightFM(loss='bpr', **best_params_so_far)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=best_epochs_so_far, num_threads=8)

Epoch: 100%|██████████| 49/49 [1:25:20<00:00, 104.51s/it]


<lightfm.lightfm.LightFM at 0x7f3fbbe4e9a0>

In [42]:
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Precision: train 0.00
Precision: test 0.00
AUC: train 0.18
AUC: test 0.21


In [43]:
no_users = len(df['bgg_user_name'].unique())/10
evaluate_diversity_and_coverage(model, int(no_users), dataset, games_df, known, 'data/top10_bpr.csv.gz', item_features = id_item_features, N = 10)

100%|██████████| 24757/24757 [02:39<00:00, 155.21it/s]


Diversity: {'category': 0.2438473389921465, 'mechanic': 0.1611749723798841}
Coverage: 1828


#### What causes precision to be 0?

In [47]:
best_params_so_far

{'no_components': 71,
 'learning_schedule': 'adadelta',
 'learning_rate': 0.05652168855133591,
 'item_alpha': 2.816672985617472e-06,
 'max_sampled': 9,
 'rho': 0.9369181237831744,
 'epsilon': 2.0854895195545605e-06}

In [50]:
model = LightFM(loss='bpr', no_components = 71)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:26<00:00, 101.29s/it]


Precision: train 0.44
Precision: test 0.12
AUC: train 0.90
AUC: test 0.86


In [51]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta')
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:35<00:00, 103.06s/it]


Precision: train 0.57
Precision: test 0.12
AUC: train 0.94
AUC: test 0.87


#### Item_alpha is to big for bpr?

In [52]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', item_alpha = 2.816672985617472e-06)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:35<00:00, 103.06s/it]


Precision: train 0.15
Precision: test 0.04
AUC: train 0.84
AUC: test 0.83


In [54]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', max_sampled = 9)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:39<00:00, 103.98s/it]


Precision: train 0.57
Precision: test 0.12
AUC: train 0.94
AUC: test 0.87


In [55]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', max_sampled = 9, rho = 0.9369181237831744)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:41<00:00, 104.23s/it]


Precision: train 0.55
Precision: test 0.12
AUC: train 0.93
AUC: test 0.87


In [56]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', max_sampled = 9, rho = 0.9369181237831744, epsilon = 2.0854895195545605e-06)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:40<00:00, 104.18s/it]


Precision: train 0.60
Precision: test 0.12
AUC: train 0.94
AUC: test 0.87


In [57]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', max_sampled = 9, rho = 0.9369181237831744, epsilon = 2.0854895195545605e-06)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=20, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 20/20 [34:45<00:00, 104.27s/it]


Precision: train 0.78
Precision: test 0.12
AUC: train 0.96
AUC: test 0.83


In [58]:
model = LightFM(loss='bpr', no_components = 71, learning_schedule = 'adadelta', learning_rate = 0.05652168855133591, max_sampled = 9, rho = 0.9369181237831744, epsilon = 2.0854895195545605e-06)
model.fit(interactions, verbose=True, item_features = id_item_features,
                      epochs=5, num_threads=8)
evaluate_model(model, interactions, test_interactions, k=5, num_threads=8, item_features = id_item_features)

Epoch: 100%|██████████| 5/5 [08:37<00:00, 103.41s/it]


Precision: train 0.59
Precision: test 0.12
AUC: train 0.94
AUC: test 0.87


### Logistic model

In [32]:
model = LightFM(loss = 'logistic')
model.fit(interactions_logistic, verbose=True, epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [01:08<00:00,  3.43s/it]


<lightfm.lightfm.LightFM at 0x7fa19c738b50>

In [33]:
evaluate_model(model, interactions_logistic, test_interactions_logistic, k=5, num_threads=8)

Precision: train 0.24
Precision: test 0.08
AUC: train 0.80
AUC: test 0.79


In [34]:
id_features_model = LightFM(loss = 'logistic')
id_features_model.fit(interactions_logistic, verbose=True, item_features = id_item_features_logistic,
                      epochs=20, num_threads=8)

Epoch: 100%|██████████| 20/20 [06:49<00:00, 20.47s/it]


<lightfm.lightfm.LightFM at 0x7fa19c7c50a0>

In [35]:
evaluate_model(id_features_model, interactions_logistic, test_interactions_logistic, k=5,
               num_threads=8, item_features = id_item_features_logistic)

Precision: train 0.06
Precision: test 0.01
AUC: train 0.67
AUC: test 0.67
