In [1]:
import os
import pandas as pd
from tempfile import TemporaryDirectory
from recommenders.datasets.mind import download_mind
from recommenders.datasets.download_utils import unzip_file
from models.hybrid import HybridRecommender
from utils.evaluation import evaluate_model



In [2]:
# Create a temporary directory and download the dataset
tmpdir = TemporaryDirectory()
data_path = tmpdir.name

# Download and unzip the training and validation datasets
train_zip, valid_zip = download_mind(size="small", dest_path=data_path)
unzip_file(train_zip, os.path.join(data_path, 'train'), clean_zip_file=False)
unzip_file(valid_zip, os.path.join(data_path, 'valid'), clean_zip_file=False)

# Paths to the behavior files (TSV format)
train_behaviors_path = os.path.join(data_path, "train", "behaviors.tsv")
valid_behaviors_path = os.path.join(data_path, "valid", "behaviors.tsv")

100%|██████████| 51.8k/51.8k [00:05<00:00, 10.1kKB/s]
100%|██████████| 30.2k/30.2k [00:03<00:00, 9.07kKB/s]


In [3]:
# Initialize the recommender
model = HybridRecommender(batch_size=1000)

model.train(train_file=train_behaviors_path, use_timestamps=True)

Processing interactions...


Extracting interactions: 100%|██████████| 156965/156965 [00:05<00:00, 31327.78it/s]


Creating matrices...
Computing item similarity matrix...
Training completed.


In [6]:
sample_user_id = 'user123'
recommendations = model.recommend(user_id=sample_user_id, N=5)
print(f"Top 5 recommendations for user {sample_user_id}:")
print(recommendations)

Top 5 recommendations for user user123:
['N4607', 'N24298', 'N28257', 'N306', 'N5579']


In [7]:
columns = ["id", "user_id", "timestamp", "history", "impressions"]

valid_behaviors_df = pd.read_csv(valid_behaviors_path, sep="\t", header=None, names=columns)

ndcg, auc, mrr = evaluate_model(model, valid_behaviors_df, 5)
print(f"Hybrid Model - NDCG@5: {ndcg:.4f}, AUC@5: {auc:.4f}, MRR@5: {mrr:.4f}")

Hybrid Model - NDCG@5: 0.0410, AUC@5: 0.0415, MRR@5: 0.0453
