In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.utils.notebook_utils import store_metadata

from tecd_retail_recsys.data import DataPreprocessor
from tecd_retail_recsys.metrics import calculate_metrics

print(f"System version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Tensorflow version: {tf.__version__}")

System version: 3.11.14 (main, Oct 28 2025, 12:11:54) [Clang 20.1.4 ]
Pandas version: 2.3.3
Numpy version: 1.26.4
Tensorflow version: 2.15.1


In [None]:
TOP_K = 100
EPOCHS = 10
BATCH_SIZE = 1024

SEED = DEFAULT_SEED

yaml_file = "configs/lightgcn.yaml"
user_file = "models/lightgcn/user_embeddings.csv"
item_file = "models//lightgcn/item_embeddings.csv"

In [None]:
dp = DataPreprocessor(day_begin=1082, day_end=1308, val_days=20, test_days=20, min_user_interactions=1, min_item_interactions=20)
train_df, val_df, test_df = dp.preprocess()
train_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
train_df['rating'] = 1.0
val_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
val_df['rating'] = 1.0

In [26]:
data = ImplicitCF(train=train_df, test=val_df, seed=SEED)

In [39]:
hparams = prepare_hparams(yaml_file,
                        #   n_layers=3,
                        #   batch_size=BATCH_SIZE,
                        #   epochs=EPOCHS,
                        #   learning_rate=0.005,
                        #   eval_epoch=5,
                          top_k=TOP_K
                         )

In [40]:
for param in ['n_layers', 'embed_size', 'batch_size', 'epochs', 'learning_rate', 'decay', 'eval_epoch', 'top_k']:
    print(param, hparams.__dict__[param])

n_layers 3
embed_size 256
batch_size 1024
epochs 30
learning_rate 0.002
decay 0.0001
eval_epoch 5
top_k 100


In [41]:
model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


In [42]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)332.3s: train loss = 0.40599 = (mf)0.40533 + (embed)0.00065
Epoch 2 (train)325.9s: train loss = 0.27121 = (mf)0.26966 + (embed)0.00155
Epoch 3 (train)328.0s: train loss = 0.23212 = (mf)0.22990 + (embed)0.00222
Epoch 4 (train)304.4s: train loss = 0.21020 = (mf)0.20740 + (embed)0.00280
Epoch 5 (train)304.1s + (eval)6.6s: train loss = 0.19037 = (mf)0.18698 + (embed)0.00338, recall = 0.06718, ndcg = 0.06722, precision = 0.01961, map = 0.01297
Epoch 6 (train)301.7s: train loss = 0.17092 = (mf)0.16690 + (embed)0.00403
Epoch 7 (train)323.2s: train loss = 0.15151 = (mf)0.14677 + (embed)0.00474
Epoch 8 (train)323.8s: train loss = 0.13338 = (mf)0.12787 + (embed)0.00551
Epoch 9 (train)323.4s: train loss = 0.11699 = (mf)0.11069 + (embed)0.00630
Epoch 10 (train)321.8s + (eval)6.8s: train loss = 0.10316 = (mf)0.09606 + (embed)0.00710, recall = 0.06708, ndcg = 0.06680, precision = 0.01995, map = 0.01252
Epoch 11 (train)322.8s: train loss = 0.09101 = (mf)0.08312 + (embed)0.00788
Epoch 1

In [52]:
checkpoint_path = "models/lightgcn/model"
model.saver.save(model.sess, checkpoint_path)

'models/lightgcn/model'

In [43]:
topk_scores = model.recommend_k_items(val_df, top_k=TOP_K, remove_seen=False)
topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,40764,11984,8.961967
1,40764,11651,8.746389
2,40764,11413,8.726232
3,40764,2639,8.249516
4,40764,26596,8.207556


In [44]:
recs = topk_scores.sort_values(by=['userID', 'prediction'], ascending=[True,False]).groupby('userID', as_index=False)['itemID'].apply(list)
recs.head()

Unnamed: 0,userID,itemID
0,11,"[1725, 18846, 26666, 19213, 20405, 27077, 1836..."
1,14,"[17934, 5631, 7982, 8502, 29228, 25997, 11364,..."
2,21,"[3184, 17934, 29244, 20245, 13369, 17396, 2903..."
3,29,"[696, 1245, 12571, 17934, 16892, 683, 16749, 5..."
4,39,"[20562, 23218, 27827, 17750, 11753, 29623, 123..."


In [45]:
eval_map = map(val_df, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(val_df, topk_scores, k=TOP_K)
eval_precision = precision_at_k(val_df, topk_scores, k=TOP_K)
eval_recall = recall_at_k(val_df, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.051785
NDCG:	0.218634
Precision@K:	0.063160
Recall@K:	0.196344


In [50]:
model.infer_embedding(user_file, item_file)

In [46]:
train_df.rename(columns={'userID': 'user_id', 'itemID': 'item_id'}, inplace=True)
val_df.rename(columns={'userID': 'user_id', 'itemID': 'item_id'}, inplace=True)
joined = dp.get_grouped_data(train_df, val_df, test_df)
joined['train_val_interactions'] = joined['train_interactions'] + joined['val_interactions']
print(joined.shape)

joined = joined.merge(recs, left_on='user_id', right_on='userID').rename(columns={'itemID': 'lightgcn_recs'})
calculate_metrics(joined, train_col='train_interactions', gt_col='val_interactions', model_preds='lightgcn_recs', verbose=True)

(7425, 5)
[Metrics debug] resolved gt_col='val_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (228339, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=22 pred_count=100 overlap=6
  user_id=14 gt_count=5 pred_count=100 overlap=0
    [ID spaces] gt sample=[9341, 16732, 17585, 28024, 30789] range=[9341, 30789] | rec sample=[21, 83, 394, 1245, 1331] range=[21, 30434]
  user_id=21 gt_count=47 pred_count=100 overlap=14

At k=10:
  MAP@10       = 0.1050
  NDCG@10      = 0.2720
  Precision@10 = 0.1412
  Recall@10    = 0.0470

At k=100:
  MAP@100       = 0.0531
  NDCG@100      = 0.2186
  Precision@100 = 0.0632
  Recall@100    = 0.1963

Other Metrics:
  MRR                 = 0.2506
  Catalog Coverage    = 0.9917
  Diversity     = 0.9967  [0=same recs for all, 1=unique recs]
  Novelty             = 0.7926
  Serendip

{'MAP@10': 0.10499715770707833,
 'NDCG@10': 0.27195746357033646,
 'Precision@10': 0.14117171717171717,
 'Recall@10': 0.046980288350610584,
 'MAP@100': 0.05307955355564169,
 'NDCG@100': 0.21863417717459313,
 'Precision@100': 0.06315959595959597,
 'Recall@100': 0.19634418710425167,
 'MRR': 0.25063472556046146,
 'Catalog_Coverage': 0.9916750674774804,
 'Diversity': 0.9967207738973602,
 'Novelty': 0.7925676902772032,
 'Serendipity': 0.027445904820672617}

<!DOCTYPE html>
<html>
<head>
    <style>
        table {
            border-collapse: collapse;
            width: 100%;
            font-family: Arial, sans-serif;
            margin: 20px 0;
        }
        th {
            background-color: #4CAF50;
            color: white;
            padding: 12px;
            text-align: left;
            border: 1px solid #ddd;
        }
        td {
            padding: 10px;
            border: 1px solid #ddd;
            text-align: left;
        }
        tr:nth-child(even) {
            background-color: #f2f2f2;
        }
        tr:hover {
            background-color: #ddd;
        }
        .best {
            background-color: #c8e6c9 !important;
            font-weight: bold;
        }
        .worst {
            background-color: #ffcdd2 !important;
        }
    </style>
</head>
<body>
    <h2>LightGCN: эксперименты</h2>
    <table>
        <thead>
            <tr>
                <th>Номер эксперимента</th>
                <th>embed_size</th>
                <th>n_layers</th>
                <th>decay</th>
                <th>epochs</th>
                <th>learning_rate</th>
                <th>batch_size</th>
                <th>NDCG@100</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td>1</td>
                <td>64</td>
                <td>3</td>
                <td>0.0001</td>
                <td>10</td>
                <td>0.005</td>
                <td>1024</td>
                <td>0.161162</td>
            </tr>
            <tr>
                <td>2</td>
                <td>256</td>
                <td>3</td>
                <td>0.001</td>
                <td>10</td>
                <td>0.005</td>
                <td>1024</td>
                <td>0.188174</td>
            </tr>
            <tr class="worst">
                <td>3</td>
                <td>512</td>
                <td>4</td>
                <td>0.00005</td>
                <td>10</td>
                <td>0.002</td>
                <td>1024</td>
                <td>0.173303</td>
            </tr>
            <tr>
                <td>4</td>
                <td>384</td>
                <td>3</td>
                <td>0.0002</td>
                <td>10</td>
                <td>0.002</td>
                <td>1024</td>
                <td>0.176823</td>
            </tr>
            <tr class="best">
                <td>5</td>
                <td>256</td>
                <td>3</td>
                <td>0.0001</td>
                <td>30</td>
                <td>0.002</td>
                <td>1024</td>
                <td><strong>0.218600</strong></td>
            </tr>
        </tbody>
    </table>
</body>
</html>


`Наилучшая конфигурация смогли добиться NDCG@100 = 0.2186`