In [1]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(parent_dir)

from utils.process_data import user_item_interaction_scores
from parquet_data_reader import ParquetDataReader
from models.content_based.cosine_model import CosineModel

import polars as pl
import numpy as np
parquet_reader = ParquetDataReader()

In [2]:
train_behavior_df = parquet_reader.read_data("../../data/train/behaviors.parquet")
embeddings_df = parquet_reader.read_data("../../data/document_vector.parquet")
article_df = parquet_reader.read_data("../../data/articles.parquet")
history_df = parquet_reader.read_data("../../data/train/history.parquet")
test_behavior_df = parquet_reader.read_data("../../data/validation/behaviors.parquet")

In [3]:
cosine_model = CosineModel(train_behavior=train_behavior_df, embeddings=embeddings_df, articles=article_df)
recommendations = cosine_model.recommend(14241,10)
print(recommendations)

[9773282, 9780193, 9729516, 9739783, 9724453, 9761255, 9591069, 9752312, 9764420, 9774352]


In [4]:
#print(cosine_model.evaluate_recommender(test_behavior_df, 10))

### Metrics for evaluation comparison

In [5]:
from utils.evaluation import perform_model_evaluation, append_model_metrics

metrics = perform_model_evaluation(cosine_model, test_data=test_behavior_df, k=5)
print("\nEvaluation metrics (precision and recall at k):")
print(metrics)

append_model_metrics(metrics, "Cosine_model")



Evaluation metrics (precision and recall at k):
{'precision@k': np.float64(0.00043019163081736407), 'recall@k': np.float64(0.0003361322811210372), 'fpr@k': np.float64(0.0027589804779780654)}


### Diversity

In [6]:
# Finds the unique user ids in the history data
users_ids = history_df['user_id'].unique()

from utils.evaluation import aggregate_diversity, append_aggregate_diversity

# For the random split model
diversity = aggregate_diversity(cosine_model, item_df=article_df, users_df=users_ids, user_sample=1000)

print("Diversity Cosine Model")
print(diversity)

append_aggregate_diversity(diversity, "cosine_model")


Diversity Cosine Model
0.07059504291638538


### Gini

In [7]:
from utils.evaluation import gini_coefficient, append_gini_coefficient

# For cosine model
gini = gini_coefficient(cosine_model, articles_ids_df=article_df, users_ids_df=users_ids, user_sample=1000)
print("Gini Coefficient Cosine Model")
print(gini)
append_gini_coefficient(gini, "cosine_model")

Sampling users
Computing Gini coefficient
[9773364, 9769630, 9772858, 9771352, 9774013, 9778669, 9723019, 9757876, 9716766, 9757537, 9771855, 8309907, 9440974, 9784044, 9555457, 9775763, 9714210, 9560129, 9796924, 7623107, 9586142, 9620842, 9518957, 6458569, 9596415, 3001353, 3003065, 3012771, 3023463, 3032577, 9297027, 9407345, 7067172, 7086478, 9775964, 9740236, 9793510, 9739634, 9734758, 9620842, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 9778769, 9779641, 9714305, 9775716, 9789491, 9739634, 9746483, 9740236, 9789616, 9746361, 3001353, 3003065, 3012771, 3023463, 3032577, 9769497, 9769531, 9793776, 9759612, 9723383, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 3001353, 3003065, 3012771, 3023463, 3032577, 9340369, 8907606, 8332015, 9316521, 8899217, 9779289, 977

### Emissions

In [8]:
from utils.evaluation import track_model_energy

# Records the carbon footprint of the recommender
user_id_test = 151570
print("\nCarbon footprint of the recommender:")
footprint = track_model_energy(cosine_model, "cosine_model", user_id=user_id_test, n=5)
footprint

[codecarbon INFO @ 14:45:38] [setup] RAM Tracking...
[codecarbon INFO @ 14:45:38] [setup] CPU Tracking...
 Windows OS detected: Please install Intel Power Gadget to measure CPU




Carbon footprint of the recommender:


[codecarbon INFO @ 14:45:40] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700H
[codecarbon INFO @ 14:45:40] [setup] GPU Tracking...
[codecarbon INFO @ 14:45:40] No GPU found.
[codecarbon INFO @ 14:45:40] >>> Tracker's metadata:
[codecarbon INFO @ 14:45:40]   Platform system: Windows-10-10.0.26100-SP0
[codecarbon INFO @ 14:45:40]   Python version: 3.11.9
[codecarbon INFO @ 14:45:40]   CodeCarbon version: 2.8.3
[codecarbon INFO @ 14:45:40]   Available RAM : 15.731 GB
[codecarbon INFO @ 14:45:40]   CPU count: 20
[codecarbon INFO @ 14:45:40]   CPU model: 13th Gen Intel(R) Core(TM) i7-13700H
[codecarbon INFO @ 14:45:40]   GPU count: None
[codecarbon INFO @ 14:45:40]   GPU model: None
[codecarbon INFO @ 14:45:43] Saving emissions data to file c:\Users\magnu\NewDesk\An.sys\TDT4215\recommender_system\demostrations\output\cosine_model_fit_emission.csv
[codecarbon INFO @ 14:45:58] Energy consumed for RAM : 0.000025 kWh. RAM Power : 5.899243354797363 W
[codecarbon INFO 

{'fit': (None, 7.108579502315465e-05),
 'recommend': ([9472893, 9312829, 9087328, 9484790, 8220538],
  4.778467287549869e-09)}