In [26]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)
from parquet_data_reader import ParquetDataReader
from models.ring_buffer_baseline import RingBufferBaseline

import polars as pl
pl.Config.set_tbl_cols(-1)
import numpy as np
parquet_reader = ParquetDataReader()

In [27]:
import polars as pl
from utils.ring_buffer_processing import process_behavior_data, random_split, time_based_split

train_behavior_df = parquet_reader.read_data("../../data/train/behaviors.parquet")
test_behaviours_df = parquet_reader.read_data('../../data/validation/behaviors.parquet')

# Processes the data
combined_df = process_behavior_data(train_behavior_df, test_behaviours_df)

# ----- Method 1: Random Split -----
train_random, test_random = random_split(combined_df, test_ratio=0.30)
print("Random Split:")
print("Train shape:", train_random.shape)
print("Test shape:", test_random.shape)

# ----- Method 2: Time-based Split -----
train_time, test_time = time_based_split(combined_df, test_ratio=0.30)
print("\nTime-based Split:")
print("Train shape:", train_time.shape)
print("Test shape:", test_time.shape)


Random Split:
Train shape: (99195, 17)
Test shape: (42961, 17)

Time-based Split:
Train shape: (99510, 17)
Test shape: (42646, 17)


In [28]:
# Creates a recommender and fits it to the training data split using the random split method
recommender = RingBufferBaseline(behaviors=train_random)
recommender.fit()

user_id_test = 151570
recommendations = recommender.recommend(user_id=user_id_test, n=5)

print(f"Recommendations for user {user_id_test}:")
print(recommendations)


Recommendations for user 151570:
[9770989, 9770538, 9771042, 9771042, 9770882]


In [29]:
# Creates a recommender and fits it to the training data split using the time-based split method
recommender2 = RingBufferBaseline(behaviors=train_time)
recommender2.fit()

user_id_test2 = 151570
recommendations2 = recommender.recommend(user_id=user_id_test2, n=5)

print(f"Recommendations for user {user_id_test2}:")
print(recommendations2)

Recommendations for user 151570:
[9770989, 9770538, 9771042, 9771042, 9770882]


In [30]:
# Evaluates the recommender using the same data as test data
metrics = recommender.evaluate(test_data=test_random, k=5)
print("\nEvaluation metrics (precision and recall at k):")
print(metrics)


# Evaluates the recommender using the same data as test data
metrics2 = recommender2.evaluate(test_data=test_time, k=5)
print("\nEvaluation metrics (precision and recall at k):")
print(metrics2)



Evaluation metrics (precision and recall at k):
{'precision': np.float64(0.001473175021987687), 'recall': np.float64(0.0014315117181903467), 'fpr': np.float64(0.002196693848986072)}

Evaluation metrics (precision and recall at k):
{'precision': np.float64(0.014185981569394025), 'recall': np.float64(0.02262336024255056), 'fpr': np.float64(0.004302749816280949)}
