# LightFM Model Training and Evaluation

LightFM - hybrid content-collaborative model that can use both user/item interactions and features

In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

def seed_everything(seed):
    import random
    random.seed(seed)
    np.random.seed(seed)

from rectools import Columns
from rectools.dataset import Dataset, SparseFeatures
from rectools.models import LightFMWrapperModel
from lightfm import LightFM

from tecd_retail_recsys.data import DataPreprocessor
from tecd_retail_recsys.metrics import calculate_metrics

print(f"System version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")



System version: 3.11.14 (main, Oct 28 2025, 12:11:54) [Clang 20.1.4 ]
Pandas version: 2.3.3
Numpy version: 1.26.4


## Load Configuration

In [None]:
with open('configs/lightfm.yaml', 'r') as f:
    config = yaml.safe_load(f)

NO_COMPONENTS = config['model']['no_components']
K = config['model']['k']
N = config['model']['n']
LEARNING_SCHEDULE = config['model']['learning_schedule']
LOSS = config['model']['loss']
LEARNING_RATE = config['model']['learning_rate']
RHO = config['model']['rho']
EPSILON = config['model']['epsilon']
ITEM_ALPHA = config['model']['item_alpha']
USER_ALPHA = config['model']['user_alpha']
MAX_SAMPLED = config['model']['max_sampled']
RANDOM_STATE = config['model']['random_state']

EPOCHS = config['train']['epochs']
NUM_THREADS = config['train']['num_threads']
VERBOSE = config['train']['verbose']
TOP_K = config['train']['top_k']
RECOMMEND_N_THREADS = config['train']['recommend_n_threads']
RECOMMEND_USE_GPU_RANKING = config['train']['recommend_use_gpu_ranking']

MODEL_DIR = config['info']['MODEL_DIR']
METRICS = config['info']['metrics']
SAVE_MODEL = config['info']['save_model']

SEED = 42
seed_everything(SEED)

os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Model: LightFM")
print(f"Number of components: {NO_COMPONENTS}")
print(f"Loss: {LOSS}")
print(f"Learning schedule: {LEARNING_SCHEDULE}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Epochs: {EPOCHS}")
print(f"Number of threads: {NUM_THREADS}")
print(f"K: {K}")
print(f"N: {N}")
print(f"Item alpha: {ITEM_ALPHA}")
print(f"User alpha: {USER_ALPHA}")
print(f"Max sampled: {MAX_SAMPLED}")
print(f"Random state: {RANDOM_STATE}")

Model: LightFM
Number of components: 512
Loss: warp-kos
Learning schedule: adadelta
Learning rate: 0.03
Epochs: 200
Number of threads: 8
K: 10
N: 20
Item alpha: 0.0001
User alpha: 0.0001
Max sampled: 10
Random state: 42


## Data Preparation

In [3]:
dp = DataPreprocessor(
    day_begin=1082, 
    day_end=1308, 
    val_days=20, 
    test_days=20, 
    min_user_interactions=1, 
    min_item_interactions=20
)
train_df, val_df, test_df = dp.preprocess()

train_orig = train_df.copy()
val_orig = val_df.copy()

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"Number of users: {train_df['user_id'].nunique()}")
print(f"Number of items: {train_df['item_id'].nunique()}")

Starting data preprocessing...
Loading events from t_ecd_small_partial/dataset/small/retail/events
Loaded 236,479,226 total events
Loading items data from t_ecd_small_partial/dataset/small/retail/items.pq
Loaded 250,171 items with features: ['item_id', 'item_brand_id', 'item_category', 'item_subcategory', 'item_price', 'item_embedding']
Merged item features. Data shape: (236479226, 12)
Filtered to 3,758,762 events with action_type='added-to-cart'
After filtering (min_user_interactions=1, min_item_interactions=20): 3,249,972 events, 84,944 users, 30,954 items
Created mappings: 84944 users, 30954 items
Temporal split - Train: days < 1269 (902,543 events), Val: days 1269-1288 (228,339 events), Test: days >= 1289 (223,395 events)
Users in each part (train, val, test) - 7425
Train shape: (902543, 12)
Val shape: (228339, 12)
Test shape: (223395, 12)
Number of users: 7425
Number of items: 30751


## Prepare RecTools Dataset with Item Features

Using BERT4RecDatasetBuilder to prepare dataset with item features

In [45]:
from tecd_retail_recsys.data.bert4rec_dataset import BERT4RecDatasetBuilder

builder = BERT4RecDatasetBuilder(train_df)
dataset, item_net_config = builder.build_dataset(
    use_item_embeddings=False,
    use_price_features=False,
    use_temporal_features=False,
    use_user_features=True,
    n_factors=NO_COMPONENTS
)



üèóÔ∏è  BERT4Rec Dataset Builder
‚úÖ Interactions: 902543 —Å—Ç—Ä–æ–∫
üì¶ –î–æ–±–∞–≤–ª–µ–Ω–∏–µ –±–∞–∑–æ–≤—ã—Ö item features...
  ‚úÖ Brand: 30751 items
  ‚úÖ Category: 30199 items
  ‚úÖ Subcategory: 30199 items
üë§ –î–æ–±–∞–≤–ª–µ–Ω–∏–µ user features...
  ‚úÖ Interaction level: 7425 users
  ‚úÖ User price segment: 7425 users
  ‚úÖ Favorite brand: 7425 users
  ‚úÖ Favorite category: 7423 users
  ‚úÖ User diversity: 7423 users
  ‚úÖ User recency: 7425 users

üì¶ –ò—Ç–æ–≥–æ item features: 91149 —Å—Ç—Ä–æ–∫
   –§–∏—á–∏: ['brand', 'category', 'subcategory']
   –£–Ω–∏–∫–∞–ª—å–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤: 30751

üë§ –ò—Ç–æ–≥–æ user features: 44546 —Å—Ç—Ä–æ–∫
   –§–∏—á–∏: ['interaction_level', 'user_price_segment', 'favorite_brand', 'favorite_category', 'user_diversity', 'user_recency']
   –£–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π: 7425

üî® –°–æ–∑–¥–∞–Ω–∏–µ RecTools Dataset...
‚úÖ Dataset: 7425 users, 30751 items

‚úÖ ItemNet: ID + Categorical
‚úÖ Dataset –≥–æ—Ç–æ–≤ –∫ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞

## Model Training

In [None]:
base_model = LightFM(
    no_components=NO_COMPONENTS,
    k=K,
    n=N,
    learning_schedule=LEARNING_SCHEDULE,
    loss=LOSS,
    learning_rate=LEARNING_RATE,
    rho=RHO,
    epsilon=float(EPSILON),
    item_alpha=ITEM_ALPHA,
    user_alpha=USER_ALPHA,
    max_sampled=MAX_SAMPLED,
    random_state=RANDOM_STATE
)

model = LightFMWrapperModel(
    model=base_model,
    epochs=EPOCHS,
    num_threads=NUM_THREADS,
    recommend_n_threads=RECOMMEND_N_THREADS,
    recommend_use_gpu_ranking=RECOMMEND_USE_GPU_RANKING,
    verbose=VERBOSE
)

print("LightFM model created successfully")

LightFM model created successfully


In [None]:
%%time
model.fit(dataset)

print("Training completed!")

Epoch: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [26:26<00:00,  7.93s/it]

Training completed!
CPU times: user 26min 18s, sys: 5.29 s, total: 26min 23s
Wall time: 26min 26s





## Generate Recommendations

In [None]:
%%time
val_users = val_df['user_id'].unique()

recommendations = model.recommend(
    users=val_users,
    dataset=dataset,
    k=TOP_K,
    filter_viewed=False
)

recs_grouped = recommendations.groupby('user_id', as_index=False)['item_id'].agg(list)
recs_grouped.columns = ['user_id', 'lightfm_recs']
recs_grouped.head()

CPU times: user 13.6 s, sys: 2.47 s, total: 16 s
Wall time: 1.47 s


Unnamed: 0,user_id,lightfm_recs
0,11,"[29877, 14941, 7952, 12608, 18081, 14848, 3038..."
1,14,"[6207, 14972, 19756, 1809, 2175, 19040, 9654, ..."
2,21,"[486, 16067, 17934, 7258, 23428, 18328, 6680, ..."
3,29,"[24532, 17678, 1245, 9342, 29837, 8231, 10394,..."
4,39,"[5117, 15184, 28191, 13391, 14580, 18875, 4194..."


## Evaluation

In [None]:
joined = dp.get_grouped_data(train_orig, val_orig, test_df)
joined['train_val_interactions'] = joined['train_interactions'] + joined['val_interactions']

evaluation_df = joined.merge(
    recs_grouped, 
    on='user_id', 
    how='left'
)

evaluation_df['lightfm_recs'] = evaluation_df['lightfm_recs'].apply(
    lambda x: x if isinstance(x, list) else []
)

print(f"Evaluation dataframe shape: {evaluation_df.shape}")
print(f"Users with recommendations: {(evaluation_df['lightfm_recs'].str.len() > 0).sum()}")

Evaluation dataframe shape: (7425, 6)
Users with recommendations: 7425


In [None]:
metrics_result = calculate_metrics(
    evaluation_df,
    train_col='train_interactions',
    gt_col='val_interactions',
    model_preds='lightfm_recs',
    verbose=True
)

[Metrics debug] resolved gt_col='val_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (228339, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=22 pred_count=100 overlap=7
  user_id=14 gt_count=5 pred_count=100 overlap=0
    [ID spaces] gt sample=[9341, 16732, 17585, 28024, 30789] range=[9341, 30789] | rec sample=[311, 560, 958, 1809, 2175] range=[311, 30251]
  user_id=21 gt_count=47 pred_count=100 overlap=14

At k=10:
  MAP@10       = 0.0840
  NDCG@10      = 0.2315
  Precision@10 = 0.1242
  Recall@10    = 0.0430

At k=100:
  MAP@100       = 0.0542
  NDCG@100      = 0.2218
  Precision@100 = 0.0683
  Recall@100    = 0.2001

Other Metrics:
  MRR                 = 0.2199
  Catalog Coverage    = 0.9439
  Diversity     = 0.9966  [0=same recs for all, 1=unique recs]
  Novelty             = 0.9130
  Serendipity    

## Save Model and Recommendations

In [None]:
if SAVE_MODEL:
    model_path = os.path.join(MODEL_DIR, "lightfm_model.pkl")
    model.save(model_path)
    print(f"Model saved to {model_path}")
    
    recs_path = os.path.join(MODEL_DIR, "recommendations.parquet")
    recs_grouped.to_parquet(recs_path, index=False)
    print(f"Recommendations saved to {recs_path}")
    
    recs_full_path = os.path.join(MODEL_DIR, "recommendations_full.parquet")
    recommendations.to_parquet(recs_full_path, index=False)
    print(f"Full recommendations saved to {recs_full_path}")

Model saved to ./models/lightfm/lightfm_model.pkl
Recommendations saved to ./models/lightfm/recommendations.parquet
Full recommendations saved to ./models/lightfm/recommendations_full.parquet


In [None]:
user_vectors, item_vectors = model.get_vectors(dataset, add_biases=True)

print(f"User vectors shape: {user_vectors.shape}")
print(f"Item vectors shape: {item_vectors.shape}")

User vectors shape: (7425, 514)
Item vectors shape: (30751, 514)


<!DOCTYPE html>
<html>
<head>
    <style>
        table {
            border-collapse: collapse;
            width: 100%;
            font-family: Arial, sans-serif;
            margin: 20px 0;
        }
        th {
            background-color: #2196F3;
            color: white;
            padding: 12px;
            text-align: left;
            border: 1px solid #ddd;
            font-size: 13px;
        }
        td {
            padding: 10px;
            border: 1px solid #ddd;
            text-align: left;
            font-size: 12px;
        }
        tr:nth-child(even) {
            background-color: #f2f2f2;
        }
        tr:hover {
            background-color: #ddd;
        }
        .best {
            background-color: #c8e6c9 !important;
            font-weight: bold;
        }
        .worst {
            background-color: #ffcdd2 !important;
        }
        .good {
            background-color: #e8f5e9 !important;
        }
    </style>
</head>
<body>
    <h2>LightFM: —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã</h2>
    <table>
        <thead>
            <tr>
                <th>‚Ññ</th>
                <th>no_components</th>
                <th>loss</th>
                <th>k</th>
                <th>n</th>
                <th>epochs</th>
                <th>learning_schedule</th>
                <th>learning_rate</th>
                <th>item_alpha</th>
                <th>user_alpha</th>
                <th>max_sampled</th>
                <th>user_features</th>
                <th>NDCG@100</th>
            </tr>
        </thead>
        <tbody>
            <tr class="worst">
                <td>1</td>
                <td>256</td>
                <td>warp</td>
                <td>5</td>
                <td>10</td>
                <td>30</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.1986</td>
            </tr>
            <tr>
                <td>2</td>
                <td>512</td>
                <td>warp</td>
                <td>5</td>
                <td>10</td>
                <td>30</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.2068</td>
            </tr>
            <tr>
                <td>3</td>
                <td>512</td>
                <td>warp</td>
                <td>5</td>
                <td>10</td>
                <td>50</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.2076</td>
            </tr>
            <tr>
                <td>4</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>5</td>
                <td>10</td>
                <td>50</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.2150</td>
            </tr>
            <tr>
                <td>5</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>100</td>
                <td>100</td>
                <td>50</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.1983</td>
            </tr>
            <tr>
                <td>6</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>50</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚ùå</td>
                <td>0.2151</td>
            </tr>
            <tr>
                <td>7</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>50</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0</td>
                <td>0.0</td>
                <td>10</td>
                <td>‚úÖ</td>
                <td>0.2105</td>
            </tr>
            <tr>
                <td>8</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>70</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>10</td>
                <td>‚úÖ</td>
                <td>0.2048</td>
            </tr>
            <tr class="good">
                <td>9</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>100</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0001</td>
                <td>0.0001</td>
                <td>10</td>
                <td>‚úÖ</td>
                <td>0.2166</td>
            </tr>
            <tr>
                <td>10</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>100</td>
                <td>adagrad</td>
                <td>0.05</td>
                <td>0.0001</td>
                <td>0.0001</td>
                <td>50</td>
                <td>‚úÖ</td>
                <td>0.2157</td>
            </tr>
            <tr class="good">
                <td>11</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>100</td>
                <td>adadelta</td>
                <td>0.05</td>
                <td>0.0001</td>
                <td>0.0001</td>
                <td>10</td>
                <td>‚úÖ</td>
                <td>0.2195</td>
            </tr>
            <tr class="best">
                <td>12</td>
                <td>512</td>
                <td>warp-kos</td>
                <td>10</td>
                <td>20</td>
                <td>200</td>
                <td>adadelta</td>
                <td>0.03</td>
                <td>0.0001</td>
                <td>0.0001</td>
                <td>10</td>
                <td>‚úÖ</td>
                <td><strong>0.2218</strong></td>
            </tr>
        </tbody>
    </table>   
</body>
</html>

`–ù–∞–∏–ª—É—á—à–∞—è –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è –¥–æ—Å—Ç–∏–≥–ª–∞ NDCG@100 = 0.2218`