In [1]:
import pickle

import pandas as pd
import scipy.sparse
from lightfm import LightFM
from lightfm.data import Dataset

In [2]:
THREADS = 8

## Preparing dataset

In [3]:
df = pd.read_csv('/pio/scratch/1/i313924/data/train_data/slim_ratings.csv',
                 names=['asin', 'reviewerID', 'overall', 'unixReviewTime'])
categories = pd.read_json('/pio/scratch/1/recommender_systems/interim/Amazon/meta_Clothing_Shoes_and_Jewelry_categories.json', lines=True)

In [4]:
items_list = df.asin.unique()
categories  = categories[categories.asin.isin(items_list)]
categories_names = categories.category_1.unique()

In [5]:
dataset = Dataset()
dataset.fit((x for x in df['reviewerID']), (x for x in df['asin']), item_features=(x for x in categories_names))
with open('/pio/scratch/1/i313924/data/lightfm_data/slim_dataset_features.pkl', 'wb') as f:
    pickle.dump(dataset, f, -1)

In [6]:
item_features = dataset.build_item_features((row.asin, [row.asin, row.category_1])
                                            for _, row in categories.iterrows())
with open('/pio/scratch/1/i313924/data/lightfm_data/slim_item_features.pkl', 'wb') as f:
    pickle.dump(item_features, f, -1)

## Interactions

In [7]:
# same as in normal training

## Model training

In [10]:
interactions = scipy.sparse.load_npz('/pio/scratch/1/i313924/data/lightfm_data/slim_interactions.npz')

In [11]:
model = LightFM(no_components=100, learning_schedule='adadelta', loss='warp')

In [13]:
model.fit(interactions, item_features=item_features, verbose=True, epochs=1000, num_threads=THREADS)

Epoch: 100%|██████████| 1000/1000 [37:00<00:00,  2.22s/it]


<lightfm.lightfm.LightFM at 0x7f99dc4cbd68>

In [14]:
pickle.dump(model, open('/pio/scratch/1/i313924/data/lightfm_data/warp_model_1000_epochs_slim_features.pkl', 'wb'), protocol=4)