In [None]:
import os
import pandas as pd
from surprise import KNNBasic
from surprise.accuracy import rmse, mae
from codecarbon import EmissionsTracker

In [None]:
dataset = "coveo"   # coveo, diginetica, rees46, retailrocket, yoochoose
dataset_path = f"../datasets/{dataset}"

In [None]:
train_path = os.path.join(dataset_path,f"{dataset}_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,f"{dataset}_processed_view_test.tsv")

In [None]:
def load_data(train_path, test_path):
    train_data = pd.read_csv(train_path, delimiter='\t')
    test_data = pd.read_csv(test_path, delimiter='\t')
    return train_data, test_data

def train_evaluate_knn(trainset, testset):

    sim_options = {
        'name': 'manhattan',  # Usar similitud manhattan
        'user_based': False  # Filtro basado en ítems
    }
    model = KNNBasic(sim_options=sim_options)

    codecarbon_tracker = EmissionsTracker()
    codecarbon_tracker.start()

    model.fit(trainset)

    emissions = codecarbon_tracker.stop()
    print(f"CO2 emissions: {emissions} kg")
    
    predictions = model.test(testset)
    rmse_score = rmse(predictions)
    mae_score = mae(predictions)
    
    print(f"RMSE: {rmse_score}")
    print(f"MAE: {mae_score}")
    
    return model

In [None]:
trainset, testset = load_data(train_path, test_path)
trained_model = train_evaluate_knn(trainset, testset)