## Demostration of the user-items bias baseline model


In [9]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)
from utils.process_data import user_item_interaction_scores
from parquet_data_reader import ParquetDataReader
from models.baseline import UserItemBiasRecommender

import polars as pl
import numpy as np
parquet_reader = ParquetDataReader()

### Reading data

In [10]:
train_behavior_df = parquet_reader.read_data("../../data/train/behaviors.parquet")
embeddings_df = parquet_reader.read_data("../../data/document_vector.parquet")
article_df = parquet_reader.read_data("../../data/articles.parquet")
test_behavior_df = parquet_reader.read_data("../../data/validation/behaviors.parquet")
processed_behavior_df = user_item_interaction_scores(train_behavior_df, article=article_df)


In [11]:
processed_behavior_df.schema

Schema([('user_id', UInt32),
        ('article_id', Int32),
        ('impression_time', Datetime(time_unit='us', time_zone=None)),
        ('score', Float64)])

In [12]:
processed_behavior_df.head(5)

user_id,article_id,impression_time,score
u32,i32,datetime[μs],f64
333882,9773697,2023-05-20 16:06:25,0.697158
178179,9757574,2023-05-21 12:37:44,0.763139
1036465,9777034,2023-05-24 06:27:18,0.534629
358303,9770450,2023-05-21 05:01:00,0.791902
1770038,9777492,2023-05-24 08:26:50,0.593415


### Create and fit the model

In [13]:
model = UserItemBiasRecommender(processed_behavior_df)
model.fit()


### Recommendations

In [14]:
model.recommend(2423448)

[9514727, 9667501, 9714376, 9419945, 9761391]

In [15]:
model.predict(2423448,9714376)

0.841934084892273

### Evaluation

In [17]:
results = model.evaluate_recommender(test_data=test_behavior_df,k=5,n_jobs=4,user_sample=1000)
print("Results")
results

Results


{'Precision@K': np.float64(0.0002),
 'NDCG@K': np.float64(0.0003391602052736161)}