In [1]:
import pandas as pd
import numpy as np

# Leer el dataset original
cols = ['user', 'item', 'rating', 'timestamp']
df = pd.read_csv('datasets/ml-100k/u.data', sep='\t', names=cols)

# Agregar hora del día
df['hour'] = pd.to_datetime(df['timestamp'], unit='s').dt.hour
df['hora_dia'] = df['hour'].apply(
    lambda h: 'mañana' if 5 <= h < 12 else ('tarde' if 12 <= h < 18 else 'noche')
)

# Simular dispositivo
np.random.seed(42)
df['dispositivo'] = np.random.choice(['pc', 'mobile'], size=len(df))

# Crear ID artificial de usuario contextual
df['user_ctx'] = df['user'].astype(str) + '@' + df['hora_dia'] + '#' + df['dispositivo']

# Mostrar ejemplo
print(df[['user', 'hora_dia', 'dispositivo', 'user_ctx']].head())

# Guardar dataset con usuario contextual
df[['user_ctx', 'item', 'rating']].to_csv('datasets/ml-100k/u_context.csv', index=False, header=False)


   user hora_dia dispositivo          user_ctx
0   196    tarde          pc      196@tarde#pc
1   186    noche      mobile  186@noche#mobile
2    22   mañana          pc      22@mañana#pc
3   244   mañana          pc     244@mañana#pc
4   166   mañana          pc     166@mañana#pc


In [3]:
from cornac.data import Reader
from cornac.eval_methods import RatioSplit
from cornac.models import MF
from cornac.metrics import RMSE, MAE, Precision, NDCG

# Leer el dataset con usuario contextual
reader = Reader()
data_ctx = reader.read(fpath='datasets/ml-100k/u_context.csv', fmt='UIR', sep=',')

# Método de evaluación (igual que antes)
eval_method_ctx = RatioSplit(
    data=data_ctx,
    test_size=0.2,
    rating_threshold=0.0,
    exclude_unknowns=True,
    verbose=True,
    seed=42
)

# Entrenar el modelo con usuario contextual
model_ctx = MF(k=10, max_iter=50, learning_rate=0.01, verbose=True)
model_ctx.fit(eval_method_ctx.train_set)

# Evaluar el modelo con usuario contextual
metrics_ctx = eval_method_ctx.evaluate(
    model_ctx,
    metrics=[RMSE(), MAE(), Precision(k=5), NDCG(k=5)],
    user_based=True
)

# Mostrar resultados
for metric, value in metrics_ctx[0].metric_avg_results.items():
    print(f"{metric}: {value:.4f}")

rating_threshold = 0.0
exclude_unknowns = True
---
Training data:
Number of users = 2526
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 2526
Number of items = 1651
Number of ratings = 19947
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 2526
Total items = 1651


  0%|          | 0/50 [00:00<?, ?it/s]

Optimization finished!

[MF] Training started!


  0%|          | 0/50 [00:00<?, ?it/s]

Optimization finished!

[MF] Evaluation started!


Rating:   0%|          | 0/19947 [00:00<?, ?it/s]

Ranking:   0%|          | 0/2300 [00:00<?, ?it/s]

MAE: 0.8305
RMSE: 0.9813
NDCG@5: 0.0159
Precision@5: 0.0139
Train (s): 0.2671
Test (s): 1.6925


MAE: 0.8305
RMSE: 0.9813
NDCG@5: 0.0159
Precision@5: 0.0139
Train (s): 0.2671
Test (s): 1.6925