In [1]:
import pickle

import numpy as np
import pandas as pd
import scipy.sparse

from lightfm import LightFM

In [2]:
DATA_ROOT = '../../data'
THREADS = 8

## Data loading

In [3]:
full_df = pd.read_csv(f'{DATA_ROOT}/ratings_all_implicit.csv.gz')
train_df = pd.read_csv(f'{DATA_ROOT}/ratings_train_implicit.csv.gz')
games_df = pd.read_json(f'{DATA_ROOT}/bgg_GameItem.jl', lines=True)[[
    'name', 'bgg_id', 'mechanic', 'category', 'complexity',
    'max_players_best', 'min_players_best', 'max_players_rec', 'min_players_rec'
]]

features_names = pd.read_csv(f'{DATA_ROOT}/game_features_names.csv.gz').values.flatten()
game_features = pd.read_csv(f'{DATA_ROOT}/game_features.csv.gz')

dataset = pd.read_pickle(f'{DATA_ROOT}/dataset.pkl')
item_features = pd.read_pickle(f'{DATA_ROOT}/item_features.pkl')

train_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/train_interactions.npz')
test_interactions = scipy.sparse.load_npz(f'{DATA_ROOT}/test_interactions.npz')

In [4]:
users_profiles = pd.read_pickle(f'{DATA_ROOT}/users_profiles.pkl')
games_profiles = pd.read_pickle(f'{DATA_ROOT}/games_profiles.pkl')

## Data processing

In [5]:
mechanics_names = features_names[:20]
categories_names = features_names[20:40]

In [6]:
users_profiles.index = users_profiles.index.map(dataset.mapping()[0])
games_profiles.index = games_profiles.index.map(dataset.mapping()[2])
users_profiles = users_profiles.sort_index()
games_profiles = games_profiles.sort_index()

In [7]:
users_profiles.values[:, :40] *= 3

In [8]:
cython_users_profiles = np.ascontiguousarray(users_profiles.values, dtype=np.float32)
cython_games_profiles = np.ascontiguousarray(games_profiles.values, dtype=np.float32)

## Model fitting

In [9]:
PARAMS = {'no_components': 70,
          'learning_schedule': 'adadelta',
          'loss': 'custom',
          'item_alpha': 3e-06,
          'max_sampled': 10,
          'rho': 0.95,
          'epsilon': 1.0e-06,
          'random_state': 42}

model = LightFM(**PARAMS)

In [10]:
model.fit_partial(train_interactions, verbose=True, item_features=item_features, epochs = 104, num_threads=THREADS, user_profiles=cython_users_profiles, item_profiles=cython_games_profiles)

Epoch: 100%|██████████| 104/104 [1:22:15<00:00, 47.45s/it]


<lightfm.lightfm.LightFM at 0x7fd2e4f967c0>

In [11]:
pickle.dump(model, open('../notebooks_data/custom_model.pkl', 'wb'))