## Demostration of a simple cosine similarity content-based recommendation system

In [5]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from utils.process_data import user_item_interaction_scores
from parquet_data_reader import ParquetDataReader
from models.content_based import CosineSimilarityContentBased, evaluate_recommender

import polars as pl
import numpy as np
parquet_reader = ParquetDataReader()

### Reading data and preprocessing it

In [6]:
train_behavior_df = parquet_reader.read_data("../../data/train/behaviors.parquet")
embeddings_df = parquet_reader.read_data("../../data/document_vector.parquet")
article_df = parquet_reader.read_data("../../data/articles.parquet")
test_behavior_df = parquet_reader.read_data("../../data/validation/behaviors.parquet")
processed_behavior_df = user_item_interaction_scores(train_behavior_df, article=article_df)
processed_behavior_df.describe()

statistic,user_id,article_id,score
str,f64,f64,f64
"""count""",53349.0,53349.0,52004.0
"""null_count""",0.0,0.0,1345.0
"""mean""",1323900.0,9764700.0,0.627594
"""std""",733898.681736,135439.146612,0.092245
"""min""",10068.0,3198681.0,0.192546
"""25%""",697684.0,9772038.0,0.576248
"""50%""",1341414.0,9774352.0,0.633611
"""75%""",1962928.0,9776882.0,0.68502
"""max""",2590571.0,9780271.0,0.998828


In [7]:
content_based = CosineSimilarityContentBased(behavior_data=processed_behavior_df, item_data=embeddings_df)

recommendations = content_based.recommend(14241, 10)
score = content_based.score(14241, 6044108)

print("Recommendations for user 14241")
print(recommendations)
print("Score for user 14241 and item 6044108")
print(score)

Recommendations for user 14241
[7160857, 7789111, 6044108, 4925298, 7124272, 7592637, 5574958, 5622577, 7141651, 4340954]
Score for user 14241 and item 6044108
0.9761224341910408


In [None]:
results = evaluate_recommender(content_based, test_behavior_df, k=5, n_jobs=10)



print(f"MAP@5: {results['MAP@K']:.4f}")
print(f"NDCG@5: {results['NDCG@K']:.4f}")

KeyboardInterrupt: 