# BiVAE Model Training and Evaluation

Bilateral Variational Autoencoder for Collaborative Filtering

In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Fix for scipy sparse matrix .A attribute removed in scipy >= 1.13
from scipy import sparse
for sparse_class in [sparse.csr_matrix, sparse.csc_matrix, sparse.bsr_matrix, sparse.lil_matrix, sparse.dok_matrix, sparse.coo_matrix]:
    if not hasattr(sparse_class, 'A'):
        sparse_class.A = property(lambda self: self.toarray())

import cornac
from recommenders.utils.timer import Timer
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.datasets.python_splitters import python_stratified_split

from tecd_retail_recsys.data import DataPreprocessor
from tecd_retail_recsys.metrics import calculate_metrics

print(f"System version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Cornac version: {cornac.__version__}")

System version: 3.11.14 (main, Oct 28 2025, 12:11:54) [Clang 20.1.4 ]
Pandas version: 2.3.3
Numpy version: 1.26.4
Cornac version: 2.2.1


In [2]:
with open('configs/bivae.yaml', 'r') as f:
    config = yaml.safe_load(f)


TOP_K = config['train']['top_k']
EPOCHS = config['train']['epochs']
BATCH_SIZE = config['train']['batch_size']
LEARNING_RATE = config['train']['learning_rate']
BETA_KL = config['train']['beta_kl']
EVAL_EPOCH = config['train']['eval_epoch']

K = config['model']['k']
ENCODER_STRUCTURE = config['model']['encoder_structure']
ACT_FN = config['model']['act_fn']
LIKELIHOOD = config['model']['likelihood']

MODEL_DIR = config['info']['MODEL_DIR']
SEED = DEFAULT_SEED

os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Model: BiVAE")
print(f"Latent factors (k): {K}")
print(f"Encoder structure: {ENCODER_STRUCTURE}")
print(f"Activation function: {ACT_FN}")
print(f"Likelihood: {LIKELIHOOD}")
print(f"Epochs: {EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Beta KL (KL weight): {BETA_KL}")

Model: BiVAE
Latent factors (k): 100
Encoder structure: [200, 100]
Activation function: relu
Likelihood: pois
Epochs: 200
Batch size: 256
Learning rate: 0.001
Beta KL (KL weight): 0.05


## Data Preparation

In [3]:
dp = DataPreprocessor(day_begin=1082, day_end=1308, val_days=20, test_days=20, min_user_interactions=1, min_item_interactions=20)
train_df, val_df, test_df = dp.preprocess()

train_orig = train_df.copy()
val_orig = val_df.copy()

train_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
val_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
val_df['rating'] = 1.0

test_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
test_df['rating'] = 1.0

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"Number of users: {train_df['userID'].nunique()}")
print(f"Number of items: {train_df['itemID'].nunique()}")

Starting data preprocessing...
Loading events from t_ecd_small_partial/dataset/small/retail/events
Loaded 236,479,226 total events
Loading items data from t_ecd_small_partial/dataset/small/retail/items.pq
Loaded 250,171 items with features: ['item_id', 'item_brand_id', 'item_category', 'item_subcategory', 'item_price', 'item_embedding']
Merged item features. Data shape: (236479226, 12)
Filtered to 3,758,762 events with action_type='added-to-cart'
After filtering (min_user_interactions=1, min_item_interactions=20): 3,249,972 events, 84,944 users, 30,954 items
Created mappings: 84944 users, 30954 items
Temporal split - Train: days < 1269 (902,543 events), Val: days 1269-1288 (228,339 events), Test: days >= 1289 (223,395 events)
Users in each part (train, val, test) - 7425
Train shape: (902543, 12)
Val shape: (228339, 13)
Test shape: (223395, 13)
Number of users: 7425
Number of items: 30751


In [None]:
# Prepare data for Cornac

# train_data = [(str(row['userID']), str(row['itemID']), 1.0) for _, row in train_df.iterrows()]
train_agg = train_df.groupby(['userID', 'itemID']).size().reset_index(name='count')
train_data = [(str(row['userID']), str(row['itemID']), float(row['count'])) 
              for _, row in train_agg.iterrows()]


val_data = [(str(row['userID']), str(row['itemID']), 1.0) for _, row in val_df.iterrows()]

train_set = cornac.data.Dataset.from_uir(train_data, seed=SEED)
print(f"Train set: {train_set.num_users} users, {train_set.num_items} items, {train_set.num_ratings} interactions")

val_set = cornac.data.Dataset.from_uir(val_data, seed=SEED)
print(f"Val set: {val_set.num_users} users, {val_set.num_items} items, {val_set.num_ratings} interactions")

Train set: 7425 users, 30751 items, 630733 interactions
Val set: 7425 users, 26716 items, 198681 interactions


## Model Training

In [None]:
model = cornac.models.BiVAECF(
    k=K,
    encoder_structure=ENCODER_STRUCTURE,
    act_fn=ACT_FN,
    likelihood=LIKELIHOOD,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    beta_kl=BETA_KL,
    seed=SEED,
    verbose=True
)

print("Model initialized successfully")

Model initialized successfully


In [None]:
with Timer() as train_time:
    model.fit(train_set, val_set=val_set)

print(f"Training took {train_time.interval:.2f} seconds")

  0%|          | 0/200 [00:00<?, ?it/s]

Training took 719.53 seconds


## Generate Recommendations

In [28]:
from recommenders.models.cornac.cornac_utils import predict_ranking
with Timer() as t:
    all_predictions = predict_ranking(model, train_set, usercol='userID', itemcol='itemID', remove_seen=False)

In [29]:
all_predictions.head()

Unnamed: 0,userID,itemID,prediction
0,11,502,0.183386
1,11,750,0.001353
2,11,1725,0.063581
3,11,2101,0.035567
4,11,2104,0.028233


In [30]:
top100 = (all_predictions
          .sort_values(['userID', 'prediction'], ascending=[True, False])
          .groupby('userID', as_index=False)
          .head(100)
         )
print(top100.shape)
top100['userID'] = top100['userID'].astype(np.int64)
top100['itemID'] = top100['itemID'].astype(np.int64)
top100.head()

(742500, 3)


Unnamed: 0,userID,itemID,prediction
26098546,10000,6343,0.221295
26098547,10000,10210,0.16605
26083057,10000,21177,0.05912
26105366,10000,439,0.046461
26091892,10000,22720,0.036045


## Evaluation

In [31]:
eval_map = map(val_df, top100, k=TOP_K)
eval_ndcg = ndcg_at_k(val_df, top100, k=TOP_K)
eval_precision = precision_at_k(val_df, top100, k=TOP_K)
eval_recall = recall_at_k(val_df, top100, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.035134
NDCG:	0.162419
Precision@K:	0.045623
Recall@K:	0.138938


In [41]:
joined = dp.get_grouped_data(train_orig, val_orig, test_df)
joined['train_val_interactions'] = joined['train_interactions'] + joined['val_interactions']
print(f"Joined shape: {joined.shape}")

recs = top100.groupby('userID', as_index=False).itemID.agg(list)
joined = joined.merge(recs, left_on='user_id', right_on='userID').rename(columns={'itemID': 'bivae_recs'})
metrics = calculate_metrics(joined, train_col='train_interactions', gt_col='val_interactions', model_preds='bivae_recs', verbose=True)

Joined shape: (7425, 5)
[Metrics debug] resolved gt_col='val_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (228339, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=22 pred_count=100 overlap=4
  user_id=14 gt_count=5 pred_count=100 overlap=0
    [ID spaces] gt sample=[9341, 16732, 17585, 28024, 30789] range=[9341, 30789] | rec sample=[83, 182, 1052, 1245, 1331] range=[83, 30825]
  user_id=21 gt_count=47 pred_count=100 overlap=8

At k=10:
  MAP@10       = 0.0878
  NDCG@10      = 0.2281
  Precision@10 = 0.1202
  Recall@10    = 0.0374

At k=100:
  MAP@100       = 0.0360
  NDCG@100      = 0.1624
  Precision@100 = 0.0456
  Recall@100    = 0.1389

Other Metrics:
  MRR                 = 0.2323
  Catalog Coverage    = 0.6973
  Diversity     = 0.9953  [0=same recs for all, 1=unique recs]
  Novelty             = 0.

## Save Model

In [33]:
model_path = os.path.join(MODEL_DIR, "bivae_model.pkl")
model.save(model_path)
print(f"Model saved to {model_path}")

BiVAECF model is saved to ./models/bivae/bivae_model.pkl/BiVAECF/2026-02-23_23-29-23-451228.pkl
Model saved to ./models/bivae/bivae_model.pkl


In [42]:
recs_path = os.path.join(MODEL_DIR, "recommendations.parquet")
recs.to_parquet(recs_path)
print(f"Recommendations saved to {recs_path}")

Recommendations saved to ./models/bivae/recommendations.parquet


<!DOCTYPE html>
<html>
<head>
    <style>
        table {
            border-collapse: collapse;
            width: 100%;
            font-family: Arial, sans-serif;
            margin: 20px 0;
        }
        th {
            background-color: #4CAF50;
            color: white;
            padding: 12px;
            text-align: left;
            border: 1px solid #ddd;
        }
        td {
            padding: 10px;
            border: 1px solid #ddd;
            text-align: left;
        }
        tr:nth-child(even) {
            background-color: #f2f2f2;
        }
        tr:hover {
            background-color: #ddd;
        }
        .best {
            background-color: #c8e6c9 !important;
            font-weight: bold;
        }
        .worst {
            background-color: #ffcdd2 !important;
        }
    </style>
</head>
<body>
    <h2>BiVAE: эксперименты</h2>
    <table>
        <thead>
            <tr>
                <th>№</th>
                <th>k</th>
                <th>Encoder</th>
                <th>Activation</th>
                <th>Likelihood</th>
                <th>beta_kl</th>
                <th>Batch_size</th>
                <th>LR</th>
                <th>Epochs</th>
                <th>Count Agg</th>
                <th>Loss_i</th>
                <th>Loss_u</th>
                <th>NDCG@100</th>
            </tr>
        </thead>
        <tbody>
            <tr class="worst">
                <td>1</td>
                <td>10</td>
                <td>[20]</td>
                <td>tanh</td>
                <td>pois</td>
                <td>1.0</td>
                <td>128</td>
                <td>0.001</td>
                <td>100</td>
                <td>❌</td>
                <td>0.939</td>
                <td>3.82</td>
                <td>0.123</td>
            </tr>
            <tr>
                <td>2</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.5</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>❌</td>
                <td>0.47</td>
                <td>2.16</td>
                <td>0.147</td>
            </tr>
            <tr class="worst">
                <td>3</td>
                <td>200</td>
                <td>[400, 200]</td>
                <td>relu</td>
                <td>gaus</td>
                <td>0.2</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>❌</td>
                <td>0.090</td>
                <td>0.391</td>
                <td>0.005</td>
            </tr>
            <tr>
                <td>4</td>
                <td>200</td>
                <td>[400, 200]</td>
                <td>relu</td>
                <td>bern</td>
                <td>0.3</td>
                <td>256</td>
                <td>0.0005</td>
                <td>100</td>
                <td>❌</td>
                <td>0.244</td>
                <td>1.19</td>
                <td>0.115</td>
            </tr>
            <tr>
                <td>5</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>bern</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>❌</td>
                <td>0.434</td>
                <td>1.87</td>
                <td>0.150</td>
            </tr>
            <tr>
                <td>6</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>❌</td>
                <td>0.418</td>
                <td>1.76</td>
                <td>0.150</td>
            </tr>
            <tr>
                <td>7</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>✅</td>
                <td>0.425</td>
                <td>1.80</td>
                <td>0.153</td>
            </tr>
            <tr>
                <td>8</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>elu</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>✅</td>
                <td>0.424</td>
                <td>1.77</td>
                <td>0.131</td>
            </tr>
            <tr>
                <td>9</td>
                <td>150</td>
                <td>[300, 200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.0005</td>
                <td>150</td>
                <td>✅</td>
                <td>0.437</td>
                <td>1.94</td>
                <td>0.147</td>
            </tr>
            <tr>
                <td>10</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>sigmoid</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>✅</td>
                <td>-</td>
                <td>-</td>
                <td>0.134</td>
            </tr>
            <tr>
                <td>11</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>tanh</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>✅</td>
                <td>-</td>
                <td>-</td>
                <td>0.132</td>
            </tr>
            <tr>
                <td>12</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.1</td>
                <td>256</td>
                <td>0.001</td>
                <td>100</td>
                <td>✅</td>
                <td>-</td>
                <td>-</td>
                <td>0.153</td>
            </tr>
            <tr class="best">
                <td>13</td>
                <td>100</td>
                <td>[200, 100]</td>
                <td>relu</td>
                <td>pois</td>
                <td>0.05</td>
                <td>256</td>
                <td>0.001</td>
                <td>200</td>
                <td>✅</td>
                <td>0.38</td>
                <td>1.58</td>
                <td><strong>0.162</strong></td>
            </tr>
        </tbody>
    </table>
</body>
</html>

`Наилучшая конфигурация смогли добиться NDCG@100 = 0.162`

**Обучение для предсказания на тестовую выборку**

In [4]:
train_val_df = pd.concat([train_df, val_df], ignore_index=True)
trainval_agg = train_val_df.groupby(['userID', 'itemID']).size().reset_index(name='count')
trainval_data = [(str(row['userID']), str(row['itemID']), float(row['count'])) 
              for _, row in trainval_agg.iterrows()]

test_data = [(str(row['userID']), str(row['itemID']), 1.0) for _, row in test_df.iterrows()]

trainval_set = cornac.data.Dataset.from_uir(trainval_data, seed=SEED)
print(f"Train set: {trainval_set.num_users} users, {trainval_set.num_items} items, {trainval_set.num_ratings} interactions")

test_set = cornac.data.Dataset.from_uir(test_data, seed=SEED)
print(f"Test set: {test_set.num_users} users, {test_set.num_items} items, {test_set.num_ratings} interactions")

Train set: 7425 users, 30923 items, 782025 interactions
Test set: 7425 users, 26040 items, 193867 interactions


In [5]:
model = cornac.models.BiVAECF(
    k=K,
    encoder_structure=ENCODER_STRUCTURE,
    act_fn=ACT_FN,
    likelihood=LIKELIHOOD,
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    beta_kl=BETA_KL,
    seed=SEED,
    verbose=True
)

In [6]:
with Timer() as train_time:
    model.fit(trainval_set)

print(f"Training took {train_time.interval:.2f} seconds")

  0%|          | 0/200 [00:00<?, ?it/s]

Training took 663.17 seconds


In [7]:
from recommenders.models.cornac.cornac_utils import predict_ranking
with Timer() as t:
    all_predictions = predict_ranking(model, trainval_set, usercol='userID', itemcol='itemID', remove_seen=False)

In [8]:
top100 = (all_predictions
          .sort_values(['userID', 'prediction'], ascending=[True, False])
          .groupby('userID', as_index=False)
          .head(100)
         )
print(top100.shape)
top100['userID'] = top100['userID'].astype(np.int64)
top100['itemID'] = top100['itemID'].astype(np.int64)
top100.head()

(742500, 3)


Unnamed: 0,userID,itemID,prediction
26234533,10000,22671,0.554279
26239647,10000,13768,0.51579
26245878,10000,4943,0.484612
26246298,10000,6343,0.460553
26232574,10000,20394,0.36843


In [9]:
eval_map = map(val_df, top100, k=TOP_K)
eval_ndcg = ndcg_at_k(val_df, top100, k=TOP_K)
eval_precision = precision_at_k(val_df, top100, k=TOP_K)
eval_recall = recall_at_k(val_df, top100, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.071724
NDCG:	0.259036
Precision@K:	0.071981
Recall@K:	0.256842


In [10]:
model_path_trainval = "./models_test/bivae_model.pkl"
model.save(model_path_trainval)

BiVAECF model is saved to ./models_test/bivae_model.pkl/BiVAECF/2026-02-27_16-55-09-286650.pkl


'./models_test/bivae_model.pkl/BiVAECF/2026-02-27_16-55-09-286650.pkl'