In [3]:
import pickle

import pandas as pd
import scipy.sparse

from lightfm import LightFM

In [4]:
DATA_ROOT = '../../data'
THREADS = 8

## Data loading

In [5]:
full_df = pd.read_csv(f'{DATA_ROOT}/ratings_all_implicit.csv.gz')
train_df = pd.read_csv(f'{DATA_ROOT}/ratings_train_implicit.csv.gz')
games_df = pd.read_json(f'{DATA_ROOT}/bgg_GameItem.jl', lines=True)[[
    'name', 'bgg_id', 'mechanic', 'category', 'complexity',
    'max_players_best', 'min_players_best', 'max_players_rec', 'min_players_rec'
]]

features_names = pd.read_csv(f'{DATA_ROOT}/game_features_names.csv.gz').values.flatten()
game_features = pd.read_csv(f'{DATA_ROOT}/game_features.csv.gz')

dataset = pd.read_pickle(f'{DATA_ROOT}/dataset.pkl')
item_features = pd.read_pickle(f'{DATA_ROOT}/item_features.pkl')

train_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/train_interactions.npz')
test_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/test_interactions.npz')

## Model fitting

In [6]:
PARAMS = {'no_components': 70,
          'learning_schedule': 'adadelta',
          'loss': 'warp',
          'item_alpha': 3e-06,
          'max_sampled': 10,
          'rho': 0.95,
          'epsilon': 1.0e-06,
          'random_state': 42}

model = LightFM(**PARAMS)

In [7]:
model.fit(train_interactions, verbose=True, item_features=item_features, epochs = 104, num_threads=THREADS)

Epoch: 100%|██████████| 104/104 [1:33:14<00:00, 53.80s/it]


<lightfm.lightfm.LightFM at 0x7f96d4059460>

In [8]:
pickle.dump(model, open('../notebooks_data/warp_model.pkl', 'wb'))