In [1]:

import time
notebook_start_time = time.time()

In [2]:
import sys
from pathlib import Path

root_dir = str(Path().absolute().parent)

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

Adding the following directory to the PYTHONPATH: /home/massyl/projects/personalized-recommender-project


In [63]:
%load_ext autoreload
%autoreload 2

import warnings

warnings.filterwarnings("ignore")

from loguru import logger
from recsys.hopsworks_integration import feature_store, ranking_serving
from recsys.training import ranking
from recsys.config import settings

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Training pipeline: Training ranking model

## Connect to Hopsworks Feature Store 

In [33]:
project, fs = feature_store.get_feature_store()

[32m2025-08-16 20:13:07.743[0m | [1mINFO    [0m | [36mrecsys.hopsworks_integration.feature_store[0m:[36mget_feature_store[0m:[36m10[0m - [1mLoging to Hopsworks using HOPSWORKS_API_KEY env var.[0m


2025-08-16 20:13:07,745 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-08-16 20:13:07,748 INFO: Initializing external client
2025-08-16 20:13:07,749 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-08-16 20:13:09,013 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/90261


## Getting the training data

In [45]:
feature_view_ranking = feature_store.create_ranking_feature_views(
    fs
)

In [46]:
X_train, X_val, y_train, y_val = feature_view_ranking.train_test_split(
    test_size=settings.RANKING_DATASET_VALIDATON_SPLIT_SIZE,
    description="Ranking training dataset",
)
X_train.head(3)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (7.11s) 




Unnamed: 0,age,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_name,index_name,index_group_name,section_name,garment_group_name,month_sin,month_cos
0,32.0,Blouse,Garment Upper body,Solid,Greenish Khaki,Dusty Light,Khaki green,Blouse,Ladieswear,Ladieswear,Womens Tailoring,Blouses,,
1,25.0,Blouse,Garment Upper body,Stripe,Dark Blue,Dark,Blue,Blouse,Ladieswear,Ladieswear,Womens Casual,Blouses,,
2,31.0,Shirt,Garment Upper body,Solid,Pink,Medium,Pink,Blouse,Ladieswear,Ladieswear,Womens Everyday Collection,Blouses,,


In [10]:
y_train.head(3)

Unnamed: 0,label
0,0
1,0
2,0


## Training the ranking model

In [65]:
model = ranking.RankingModelFactory.build()
trainer = ranking.RankingModelTrainer(
    model=model, train_dataset=(X_train, y_train), eval_dataset=(X_val, y_val)
)

In [66]:
trainer.fit()

0:	learn: 0.6895725	test: 0.6898138	best: 0.6898138 (0)	total: 125ms	remaining: 12.4s
1:	learn: 0.6874909	test: 0.6879330	best: 0.6879330 (1)	total: 171ms	remaining: 8.36s
2:	learn: 0.6871920	test: 0.6877391	best: 0.6877391 (2)	total: 178ms	remaining: 5.76s
3:	learn: 0.6848421	test: 0.6855559	best: 0.6855559 (3)	total: 215ms	remaining: 5.15s
4:	learn: 0.6831561	test: 0.6848221	best: 0.6848221 (4)	total: 253ms	remaining: 4.81s
5:	learn: 0.6811348	test: 0.6831224	best: 0.6831224 (5)	total: 288ms	remaining: 4.52s
6:	learn: 0.6801642	test: 0.6825381	best: 0.6825381 (6)	total: 322ms	remaining: 4.28s
7:	learn: 0.6786971	test: 0.6816055	best: 0.6816055 (7)	total: 358ms	remaining: 4.12s
8:	learn: 0.6778101	test: 0.6808690	best: 0.6808690 (8)	total: 396ms	remaining: 4s
9:	learn: 0.6777569	test: 0.6808715	best: 0.6808690 (8)	total: 407ms	remaining: 3.66s
10:	learn: 0.6768264	test: 0.6802775	best: 0.6802775 (10)	total: 444ms	remaining: 3.59s
11:	learn: 0.6767669	test: 0.6802510	best: 0.6802510 (1

<catboost.core.CatBoostClassifier at 0x767fac5cd820>

## Evaluating the ranking model

In [67]:
metrics = trainer.evaluate(log=True)

[32m2025-08-16 20:46:37.325[0m | [1mINFO    [0m | [36mrecsys.training.ranking[0m:[36mevaluate[0m:[36m62[0m - [1m              precision    recall  f1-score   support

           0       0.93      0.61      0.74     19878
           1       0.12      0.54      0.20      1966

    accuracy                           0.61     21844
   macro avg       0.53      0.58      0.47     21844
weighted avg       0.86      0.61      0.69     21844
[0m


In [68]:
trainer.get_feature_importance()

{'age': 28.51168383484209,
 'index_name': 10.433638975796548,
 'section_name': 9.250007002901508,
 'department_name': 8.785307236168105,
 'product_group_name': 8.126771866184086,
 'index_group_name': 6.777395035822253,
 'product_type_name': 6.591389949822486,
 'garment_group_name': 6.565098393222948,
 'graphical_appearance_name': 5.311707555007342,
 'perceived_colour_master_name': 3.9039446933111943,
 'colour_group_name': 3.337645871100222,
 'perceived_colour_value_name': 2.405409585821184,
 'month_sin': 0.0,
 'month_cos': 0.0}

# Uploading the model to Hopsworks model registry

In [62]:
mr = project.get_model_registry()

In [None]:
ranking_module = ranking_serving.HopsworksRankingModel(model=model)
ranking_module.register(mr, feature_view_ranking, X_train, metrics)

Uploading /home/massyl/projects/personalized-recommender-project/notebooks/ranking_model.pkl: 100.000%|██████████| 1510787/1510787 elapsed<00:03 remaining<00:00
Uploading /home/massyl/projects/personalized-recommender-project/notebooks/input_example.json: 100.000%|██████████| 434/434 elapsed<00:01 remaining<00:00
Uploading /home/massyl/projects/personalized-recommender-project/notebooks/model_schema.json: 100.000%|██████████| 1269/1269 elapsed<00:01 remaining<00:00
Model export complete: 100%|██████████| 6/6 [00:13<00:00,  2.27s/it]                   

Model created, explore it at https://c.app.hopsworks.ai:443/p/90261/models/ranking_model/1





In [70]:
notebook_end_time = time.time()
notebook_execution_time = notebook_end_time - notebook_start_time

logger.info(
    f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds ~ {notebook_execution_time / 60:.2f} minutes"
)

[32m2025-08-16 20:47:09.987[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1m⌛️ Notebook Execution time: 3885.72 seconds ~ 64.76 minutes[0m
