# Evaluación del modelo

In [7]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
import pickle
import os
import json

from src.data.load_data import load_training_data
from src.data.preprocessing import create_dt_dataset, validate_preprocessing
from src.data.dataset import RecommendationDataset
from src.models.decision_transformer import DecisionTransformer
from src.training.trainer import train_decision_transformer
from src.evaluation.evaluate import evaluate_model


# === CONFIGURACIÓN DEL DATASET ===
# Cambiar aquí para elegir Netflix o Goodreads
DATASET = 'netflix'  # o 'goodreads'

if DATASET == 'netflix':
    NUM_ITEMS = 752
    train_path = 'data/train/netflix8_train.df'
    test_path = 'data/test_users/netflix8_test.json'
else:
    NUM_ITEMS = 472
    train_path = 'data/train/goodreads8_train.df'
    test_path = 'data/test_users/goodreads8_test.json'

NUM_GROUPS = 8

In [8]:
# Cargar datos de entrenamiento
from src.data.load_data import load_training_data

df_train = pd.read_pickle(train_path)

In [9]:
# Crear dataset
from src.data.preprocessing import create_dt_dataset, validate_preprocessing

trajectories = create_dt_dataset(df_train)
validate_preprocessing(trajectories)

In [10]:
from src.data.dataset import RecommendationDataset
from torch.utils.data import DataLoader

dataset = RecommendationDataset(trajectories, context_length=20)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Verificar un batch
batch = next(iter(loader))
print(f"Keys: {batch.keys()}")
print(f"States shape: {batch['states'].shape}")  # (64, 20)

Keys: dict_keys(['states', 'actions', 'rtg', 'timesteps', 'groups', 'targets'])
States shape: torch.Size([64, 20])


In [11]:
# Creamos un modelo
from src.models.decision_transformer import DecisionTransformer

model = DecisionTransformer(
    num_items=752,
    num_groups=8,
    hidden_dim=128,
    n_layers=3,
    n_heads=4
)

print(f"Parámetros totales: {sum(p.numel() for p in model.parameters())}")
# Debería ser ~10-20M parámetros

Parámetros totales: 831728


In [29]:
# Entrnamiento básico
from src.training.trainer import train_decision_transformer

# Configuración del dispositivo
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

train, history = train_decision_transformer(
    model=model,
    train_loader=loader,
    optimizer=torch.optim.Adam(model.parameters(), lr=1e-4),
    device=device,
    num_epochs=50
)

Using device: cuda
Epoch 1/50:
  Train Loss: 6.6255
Epoch 2/50:
  Train Loss: 6.6240
Epoch 3/50:
  Train Loss: 6.6240
Epoch 4/50:
  Train Loss: 6.6239
Epoch 5/50:
  Train Loss: 6.6237
Epoch 6/50:
  Train Loss: 6.6238
Epoch 7/50:
  Train Loss: 6.6236
Epoch 8/50:
  Train Loss: 6.6235
Epoch 9/50:
  Train Loss: 6.6235
Epoch 10/50:
  Train Loss: 6.6234
Epoch 11/50:
  Train Loss: 6.6232
Epoch 12/50:
  Train Loss: 6.6232
Epoch 13/50:
  Train Loss: 6.6231
Epoch 14/50:
  Train Loss: 6.6232
Epoch 15/50:
  Train Loss: 6.6230
Epoch 16/50:
  Train Loss: 6.6231
Epoch 17/50:
  Train Loss: 6.6231
Epoch 18/50:
  Train Loss: 6.6230
Epoch 19/50:
  Train Loss: 6.6230
Epoch 20/50:
  Train Loss: 6.6229
Epoch 21/50:
  Train Loss: 6.6229
Epoch 22/50:
  Train Loss: 6.6229
Epoch 23/50:
  Train Loss: 6.6228
Epoch 24/50:
  Train Loss: 6.6228
Epoch 25/50:
  Train Loss: 6.6229
Epoch 26/50:
  Train Loss: 6.6228
Epoch 27/50:
  Train Loss: 6.6227
Epoch 28/50:
  Train Loss: 6.6227
Epoch 29/50:
  Train Loss: 6.6227
Epoc

In [None]:
# Evaluación del modelo
from src.evaluation.evaluate import evaluate_model

print("\nEvaluando en cold-start...")

with open(test_path, 'r') as f:
    test_data = json.load(f)

print(f"Test users: {len(test_data)}")

results = evaluate_model(
    model=model,
    test_data=test_data,
    device=device,
    target_return=None,  # usar máximo posible
    k_list=[5, 10, 20]
)

print("\n" + "="*60)
print("RESULTADOS FINALES")
print("="*60)
for metric, value in results.items():
    print(f"{metric:12s}: {value:.4f}")
print("="*60)

In [15]:
from src.models.baselines import PopularityRecommender

pop_rec = PopularityRecommender()
pop_rec.fit(trajectories)

rec = pop_rec.recommend(user_history=[10, 20, 30], k=10)
print(f"Recomendaciones basadas en popularidad: {rec}")

Recomendaciones basadas en popularidad: [np.int64(505), np.int64(579), np.int64(420), np.int64(532), np.int64(589), np.int64(63), np.int64(13), np.int64(622), np.int64(358), np.int64(213)]


In [16]:
from src.evaluation.evaluate import evaluate_model

print("\nEvaluando en cold-start...")

with open(test_path, 'r') as f:
    test_data = json.load(f)

print(f"Test users: {len(test_data)}")


Evaluando en cold-start...
Test users: 1600


In [None]:
from src.evaluation.metrics import hit_rate_at_k

