In [None]:
#Importaciones
import pandas as pd
import kagglehub
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
import time
import matplotlib.pyplot as plt
import copy

from kagglehub import KaggleDatasetAdapter
from datasets import Dataset, load_dataset
from transformers import AutoTokenizer

In [9]:
# Descargar archivo
file_path = kagglehub.dataset_download("mexwell/amazon-reviews-multi")

csv_path = file_path + "/train.csv"

df = pd.read_csv(csv_path, encoding="latin-1")

print("Shape:", df.shape)
print("Columnas:", df.columns)
print("Primeras 5 filas:\n", df.head())


Downloading from https://www.kaggle.com/api/v1/datasets/download/mexwell/amazon-reviews-multi?dataset_version_number=1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 131M/131M [01:59<00:00, 1.15MB/s] 

Extracting files...





Shape: (1200000, 9)
Columnas: Index(['Unnamed: 0', 'review_id', 'product_id', 'reviewer_id', 'stars',
       'review_body', 'review_title', 'language', 'product_category'],
      dtype='object')
Primeras 5 filas:
    Unnamed: 0   review_id          product_id          reviewer_id  stars  \
0           0  de_0203609  product_de_0865382  reviewer_de_0267719      1   
1           1  de_0559494  product_de_0678997  reviewer_de_0783625      1   
2           2  de_0238777  product_de_0372235  reviewer_de_0911426      1   
3           3  de_0477884  product_de_0719501  reviewer_de_0836478      1   
4           4  de_0270868  product_de_0022613  reviewer_de_0736276      1   

                                         review_body  \
0     Armband ist leider nach 1 Jahr kaputt gegangen   
1                 In der Lieferung war nur Ein Akku!   
2  Ein Stern, weil gar keine geht nicht. Es hande...   
3  Dachte, das w√É¬§ren einfach etwas festere Binde...   
4  Meine Kinder haben kaum damit gespielt

In [10]:
#preprocesamiento. ESTRUCTURA DE LAS REVIEWS

df["text"] = df["review_title"].fillna("") + " " + df["review_body"].fillna("")
df["labels"] = df["stars"] - 1
df = df[["text", "labels", "language"]]

In [11]:
#Conversi√≥n a hugging face dataset
dataset = Dataset.from_pandas(df)

#Divisi√≥n del dataset train/test
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_val = dataset["train"].train_test_split(test_size=0.1, seed=42)

train_dataset = train_val["train"]
validation_dataset = train_val["test"]
test_dataset = dataset["test"]

In [17]:
#Tokenizaci√≥n
# NOTA: Si eligen otro modelo. esta parte se corrige dado que la tokenizaci√≥n no ser√° la misma. Yo me encargo de ello
model_name = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(batch):
  return tokenizer(
      batch["text"],
      truncation=True,
      padding="max_length",
      max_length=180
  )

train_dataset = train_dataset.map(preprocess, batched=True)
validation_dataset = validation_dataset.map(preprocess, batched=True)
test_dataset = test_dataset.map(preprocess, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "labels"])
validation_dataset.set_format(type="torch", columns=["input_ids", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "labels"])

Map:   0%|          | 0/972000 [00:00<?, ? examples/s]

Map:   0%|          | 0/108000 [00:00<?, ? examples/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

## Arquitectura de Red Neuronal

In [37]:
class RedNeuronalSimple(nn.Module):
    def __init__(self):
        super(RedNeuronalSimple, self).__init__()

        # -------------------------
        # Capa oculta 1: 180 entradas, 128 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_1 = nn.Linear(180, 128)
        self.activacion_1 = nn.ReLU() 
        self.dropout_1 = nn.Dropout(0.2)

        # -------------------------
        # Capa oculta 2: 128 entradas, 64 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_2 = nn.Linear(128, 64)
        self.activacion_2 = nn.ReLU() 
        self.dropout_2 = nn.Dropout(0.2)

        # -------------------------
        # Capa de salida: 64 entradas, 5 salidas (una por clase)
        # -------------------------
        self.capa_salida = nn.Linear(64, 5)
        # Nota: No ponemos softmax porque nn.CrossEntropyLoss lo aplica internamente

    def forward(self, x):
        # -------------------------
        # Paso por la capa oculta 1
        # -------------------------
        x = self.capa_oculta_1(x)
        x = self.activacion_1(x)
        x = self.dropout_1(x)

        # -------------------------
        # Paso por la capa oculta 2
        # -------------------------
        x = self.capa_oculta_2(x)
        x = self.activacion_2(x)
        x = self.dropout_2(x)

        # -------------------------
        # Paso por la capa de salida
        # -------------------------
        x = self.capa_salida(x)

        return x
    
class RedNeuronalIntermedia(nn.Module):
    def __init__(self):
        super(RedNeuronalIntermedia, self).__init__()

        # -------------------------
        # Capa oculta 1: 180 entradas, 256 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_1 = nn.Linear(180, 256)
        self.activacion_1 = nn.ReLU() 
        self.dropout_1 = nn.Dropout(0.2)

        # -------------------------
        # Capa oculta 2: 256 entradas, 128 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_2 = nn.Linear(256, 128)
        self.activacion_2 = nn.ReLU() 
        self.dropout_2 = nn.Dropout(0.2)

        # -------------------------
        # Capa de oculta 3: 128 entradas, 64 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_3 = nn.Linear(128, 64)
        self.activacion_3 = nn.ReLU() 
        self.dropout_3 = nn.Dropout(0.2)

        # -------------------------
        # Capa de salida: 64 entradas, 5 salidas (una por clase)
        # -------------------------
        self.capa_salida = nn.Linear(64, 5)
        # Nota: No ponemos softmax porque nn.CrossEntropyLoss lo aplica internamente

    def forward(self, x):
        # -------------------------
        # Paso por la capa oculta 1
        # -------------------------
        x = self.capa_oculta_1(x)
        x = self.activacion_1(x)
        x = self.dropout_1(x)

        # -------------------------
        # Paso por la capa oculta 2
        # -------------------------
        x = self.capa_oculta_2(x)
        x = self.activacion_2(x)
        x = self.dropout_2(x)

        # -------------------------
        # Paso por la capa oculta 3
        # -------------------------
        x = self.capa_oculta_3(x)
        x = self.activacion_3(x)
        x = self.dropout_3(x)

        # -------------------------
        # Paso por la capa de salida
        # -------------------------
        x = self.capa_salida(x)

        return x

class RedNeuronalAvanzada(nn.Module):
    def __init__(self):
        super(RedNeuronalAvanzada, self).__init__()

        # -------------------------
        # Capa oculta 1: 180 entradas, 512 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_1 = nn.Linear(180, 512)
        self.activacion_1 = nn.ReLU()
        self.dropout_1 = nn.Dropout(0.2)

        # -------------------------
        # Capa oculta 2: 512 entradas, 256 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_2 = nn.Linear(512, 256)
        self.activacion_2 = nn.ReLU()
        self.dropout_2 = nn.Dropout(0.2)

        # -------------------------
        # Capa oculta 3: 256 entradas, 128 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_3 = nn.Linear(256, 128)
        self.activacion_3 = nn.ReLU()
        self.dropout_3 = nn.Dropout(0.2)

        # -------------------------
        # Capa oculta 4: 128 entradas, 64 salidas, ReLU, Dropout 0.2
        # -------------------------
        self.capa_oculta_4 = nn.Linear(128, 64)
        self.activacion_4 = nn.ReLU()
        self.dropout_4 = nn.Dropout(0.2)

        # -------------------------
        # Capa de salida: 64 entradas, 5 salidas (una por clase)
        # -------------------------
        self.capa_salida = nn.Linear(64, 5)
        # Nota: No ponemos softmax porque nn.CrossEntropyLoss lo aplica internamente

    def forward(self, x):
        # -------------------------
        # Paso por la capa oculta 1
        # -------------------------
        x = self.capa_oculta_1(x)
        x = self.activacion_1(x)
        x = self.dropout_1(x)

        # -------------------------
        # Paso por la capa oculta 2
        # -------------------------
        x = self.capa_oculta_2(x)
        x = self.activacion_2(x)
        x = self.dropout_2(x)

        # -------------------------
        # Paso por la capa oculta 3
        # -------------------------
        x = self.capa_oculta_3(x)
        x = self.activacion_3(x)
        x = self.dropout_3(x)

        # -------------------------
        # Paso por la capa oculta 4
        # -------------------------
        x = self.capa_oculta_4(x)
        x = self.activacion_4(x)
        x = self.dropout_4(x)

        # -------------------------
        # Paso por la capa de salida
        # -------------------------
        x = self.capa_salida(x)

        return x

def setup_training(model, learning_rate=0.001):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    return optimizer, criterion

#### Entrenamiento


In [48]:
# Configuraci√≥n del dispositivo
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üîß Dispositivo de entrenamiento: {device}")

# Funci√≥n de entrenamiento por √©poca
def train_epoch(model, train_loader, optimizer, criterion, device):
    """Entrena el modelo por una √©poca"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch in train_loader:
        # Obtener datos del batch (ya est√°n en formato torch)
        inputs = batch['input_ids'].float().to(device)  # Convertir a float para compatibilidad
        labels = batch['labels'].to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Estad√≠sticas
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    
    return epoch_loss, epoch_accuracy

# Funci√≥n de evaluaci√≥n
def evaluate_model(model, val_loader, criterion, device):
    """Eval√∫a el modelo en el conjunto de validaci√≥n"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch in val_loader:
            inputs = batch['input_ids'].float().to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_loss = running_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    
    return val_loss, val_accuracy

# Funci√≥n principal de entrenamiento
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs, device, model_name, exp_name):
    """Funci√≥n principal de entrenamiento con guardado del mejor modelo"""
    print(f"\nüöÄ Iniciando entrenamiento - {model_name} - {exp_name}")
    print("=" * 70)
    
    # M√©tricas para seguimiento
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    # Variables para el mejor modelo
    best_val_accuracy = 0.0
    best_model_state = None
    best_epoch = 0
    
    # Loop de entrenamiento
    for epoch in range(epochs):
        print(f"\nüìä √âpoca {epoch + 1}/{epochs}")
        print("-" * 50)
        
        # Entrenar una √©poca
        train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion, device)
        
        # Evaluar en validaci√≥n
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        
        # Guardar m√©tricas
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        
        # Guardar mejor modelo
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            best_model_state = model.state_dict().copy()
            best_epoch = epoch + 1
        
        # Imprimir m√©tricas
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}   | Val Acc: {val_acc:.2f}%")
        
        # Indicar si es el mejor modelo hasta ahora
        if val_acc == best_val_accuracy:
            print("üåü ¬°Nuevo mejor modelo!")
    
    # Cargar el mejor modelo
    model.load_state_dict(best_model_state)
    
    print(f"\n‚úÖ Entrenamiento completado!")
    print(f"üèÜ Mejor modelo: √âpoca {best_epoch} con Val Accuracy: {best_val_accuracy:.2f}%")
    
    return {
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'val_losses': val_losses,
        'val_accuracies': val_accuracies,
        'best_val_accuracy': best_val_accuracy,
        'best_epoch': best_epoch,
        'model': model
    }

üîß Dispositivo de entrenamiento: cpu


In [None]:
# EJECUCI√ìN DE TODOS LOS EXPERIMENTOS
print("üéØ INICIANDO EXPERIMENTOS DE REDES NEURONALES")
print("=" * 80)

# Definici√≥n de modelos y experimentos
models_dict = {
    'RedNeuronalSimple': RedNeuronalSimple,
    'RedNeuronalIntermedia': RedNeuronalIntermedia,
    'RedNeuronalAvanzada': RedNeuronalAvanzada
}

experiments = {
    "exp_1": {"learning_rate": 0.001, "epochs": 100, "batch_size": 1024},
    "exp_2": {"learning_rate": 0.0005, "epochs": 500, "batch_size": 2048},
}

# Diccionario para almacenar todos los resultados
all_results = {}

# Ejecutar experimentos para cada modelo y configuraci√≥n
for model_name, ModelClass in models_dict.items():
    print(f"\nüî¨ MODELO: {model_name}")
    print("=" * 60)
    
    model_results = {}
    
    for exp_name, params in experiments.items():
        print(f"\nüìã Experimento: {exp_name}")
        print(f"   Par√°metros: {params}")
        
        # Crear DataLoaders con batch_size espec√≠fico del experimento
        train_loader = DataLoader(train_dataset, 
                                 batch_size=params["batch_size"], 
                                 shuffle=True)
        val_loader = DataLoader(validation_dataset, 
                               batch_size=params["batch_size"], 
                               shuffle=False)
        
        # Crear modelo fresco para cada experimento
        model = ModelClass().to(device)
        optimizer, criterion = setup_training(model, learning_rate=params["learning_rate"])
        
        # Entrenar modelo
        results = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            optimizer=optimizer,
            criterion=criterion,
            epochs=params["epochs"],
            device=device,
            model_name=model_name,
            exp_name=exp_name
        )
        
        # Guardar resultados
        results['params'] = params
        model_results[exp_name] = results
    
    # Guardar resultados del modelo
    all_results[model_name] = model_results

print(f"\nüéâ TODOS LOS EXPERIMENTOS COMPLETADOS")
print("=" * 80)

üéØ INICIANDO EXPERIMENTOS DE REDES NEURONALES

üî¨ MODELO: RedNeuronalSimple

üìã Experimento: exp_1
   Par√°metros: {'learning_rate': 0.001, 'epochs': 1, 'batch_size': 1024}

üöÄ Iniciando entrenamiento - RedNeuronalSimple - exp_1

üìä √âpoca 1/1
--------------------------------------------------
Train Loss: 63.8235 | Train Acc: 20.01%
Val Loss: 1.6094   | Val Acc: 20.03%
üåü ¬°Nuevo mejor modelo!

‚úÖ Entrenamiento completado!
üèÜ Mejor modelo: √âpoca 1 con Val Accuracy: 20.03%

üî¨ MODELO: RedNeuronalIntermedia

üìã Experimento: exp_1
   Par√°metros: {'learning_rate': 0.001, 'epochs': 1, 'batch_size': 1024}

üöÄ Iniciando entrenamiento - RedNeuronalIntermedia - exp_1

üìä √âpoca 1/1
--------------------------------------------------
Train Loss: 63.8235 | Train Acc: 20.01%
Val Loss: 1.6094   | Val Acc: 20.03%
üåü ¬°Nuevo mejor modelo!

‚úÖ Entrenamiento completado!
üèÜ Mejor modelo: √âpoca 1 con Val Accuracy: 20.03%

üî¨ MODELO: RedNeuronalIntermedia

üìã Experimento:

In [50]:
# GUARDAR RESULTADOS EN ARCHIVOS CSV
print("\nüíæ GUARDANDO RESULTADOS EN ARCHIVOS CSV")
print("=" * 60)

import os

# Crear directorio de resultados si no existe
results_dir = "resultados"
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    print(f"üìÅ Directorio '{results_dir}' creado")

# Mapeo de nombres de modelos a nombres de archivos
file_mapping = {
    'RedNeuronalSimple': 'resultados_simple.csv',
    'RedNeuronalIntermedia': 'resultados_intermedia.csv',
    'RedNeuronalAvanzada': 'resultados_avanzada.csv'
}

# Guardar resultados de cada modelo
for model_name, model_results in all_results.items():
    # Crear lista para almacenar datos del CSV
    csv_data = []
    
    print(f"\nüìä Procesando {model_name}...")
    
    # Procesar cada experimento del modelo
    for exp_name, results in model_results.items():
        params = results['params']
        
        # Crear fila base con informaci√≥n del experimento
        base_row = {
            'modelo': model_name,
            'experimento': exp_name,
            'learning_rate': params['learning_rate'],
            'epochs_total': params['epochs'],
            'batch_size': params['batch_size'],
            'mejor_epoch': results['best_epoch'],
            'mejor_val_accuracy': results['best_val_accuracy']
        }
        
        # Agregar m√©tricas por √©poca
        for epoch in range(len(results['train_losses'])):
            row = base_row.copy()
            row.update({
                'epoca': epoch + 1,
                'train_loss': results['train_losses'][epoch],
                'train_accuracy': results['train_accuracies'][epoch],
                'val_loss': results['val_losses'][epoch],
                'val_accuracy': results['val_accuracies'][epoch],
                'es_mejor_modelo': (epoch + 1) == results['best_epoch']
            })
            csv_data.append(row)
    
    # Convertir a DataFrame y guardar
    df_results = pd.DataFrame(csv_data)
    
    # Ordenar por experimento y √©poca para mejor lectura
    df_results = df_results.sort_values(['experimento', 'epoca'])
    
    # Nombre del archivo
    filename = file_mapping[model_name]
    filepath = os.path.join(results_dir, filename)
    
    # Guardar CSV
    df_results.to_csv(filepath, index=False, encoding='utf-8')
    
    print(f"   ‚úÖ {filename} guardado ({len(df_results)} filas)")
    print(f"      Columnas: {list(df_results.columns)}")

print(f"\nüéâ TODOS LOS ARCHIVOS CSV GUARDADOS EN '{results_dir}/'")
print("=" * 60)

# Mostrar resumen de archivos creados
print("\nüìã ARCHIVOS CREADOS:")
for model_name, filename in file_mapping.items():
    filepath = os.path.join(results_dir, filename)
    if os.path.exists(filepath):
        file_size = os.path.getsize(filepath)
        print(f"   üìÑ {filename} ({file_size:,} bytes)")
        
        # Mostrar preview de las primeras filas
        df_preview = pd.read_csv(filepath, nrows=3)
        print(f"      Preview: {len(df_preview)} filas de muestra")

print(f"\nüí° Para cargar los resultados posteriormente:")
print(f"   df_simple = pd.read_csv('{results_dir}/resultados_simple.csv')")
print(f"   df_intermedia = pd.read_csv('{results_dir}/resultados_intermedia.csv')")
print(f"   df_avanzada = pd.read_csv('{results_dir}/resultados_avanzada.csv')")


üíæ GUARDANDO RESULTADOS EN ARCHIVOS CSV
üìÅ Directorio 'resultados' creado

üìä Procesando RedNeuronalSimple...
   ‚úÖ resultados_simple.csv guardado (1 filas)
      Columnas: ['modelo', 'experimento', 'learning_rate', 'epochs_total', 'batch_size', 'mejor_epoch', 'mejor_val_accuracy', 'epoca', 'train_loss', 'train_accuracy', 'val_loss', 'val_accuracy', 'es_mejor_modelo']

üìä Procesando RedNeuronalIntermedia...
   ‚úÖ resultados_intermedia.csv guardado (1 filas)
      Columnas: ['modelo', 'experimento', 'learning_rate', 'epochs_total', 'batch_size', 'mejor_epoch', 'mejor_val_accuracy', 'epoca', 'train_loss', 'train_accuracy', 'val_loss', 'val_accuracy', 'es_mejor_modelo']

üìä Procesando RedNeuronalAvanzada...
   ‚úÖ resultados_avanzada.csv guardado (1 filas)
      Columnas: ['modelo', 'experimento', 'learning_rate', 'epochs_total', 'batch_size', 'mejor_epoch', 'mejor_val_accuracy', 'epoca', 'train_loss', 'train_accuracy', 'val_loss', 'val_accuracy', 'es_mejor_modelo']

üéâ TOD

In [51]:
# RESUMEN FINAL DE RESULTADOS
print("\nüìà RESUMEN FINAL DE TODOS LOS EXPERIMENTOS")
print("=" * 80)

# Tabla de resultados
print(f"{'Modelo':<20} {'Experimento':<12} {'Val Accuracy':<15} {'Mejor √âpoca':<12} {'LR':<8} {'Batch':<8}")
print("-" * 80)

best_overall = {'accuracy': 0, 'model': '', 'exp': '', 'epoch': 0}

for model_name, model_results in all_results.items():
    for exp_name, results in model_results.items():
        accuracy = results['best_val_accuracy']
        epoch = results['best_epoch']
        lr = results['params']['learning_rate']
        batch_size = results['params']['batch_size']
        
        print(f"{model_name:<20} {exp_name:<12} {accuracy:<15.2f}% {epoch:<12} {lr:<8} {batch_size:<8}")
        
        # Tracking del mejor resultado general
        if accuracy > best_overall['accuracy']:
            best_overall['accuracy'] = accuracy
            best_overall['model'] = model_name
            best_overall['exp'] = exp_name
            best_overall['epoch'] = epoch


üìà RESUMEN FINAL DE TODOS LOS EXPERIMENTOS
Modelo               Experimento  Val Accuracy    Mejor √âpoca  LR       Batch   
--------------------------------------------------------------------------------
RedNeuronalSimple    exp_1        20.03          % 1            0.001    1024    
RedNeuronalIntermedia exp_1        19.93          % 1            0.001    1024    
RedNeuronalAvanzada  exp_1        20.03          % 1            0.001    1024    
