In [3]:
import os
import random
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
import matplotlib.pyplot as plt

# Seeds
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Config
DATA_DIR = "data/stanford_cars"
TRAIN_CSV = os.path.join(DATA_DIR, "labels_train.csv")
VAL_CSV = os.path.join(DATA_DIR, "labels_val.csv")
NAMES_CSV = os.path.join(DATA_DIR, "names.csv")
TRAIN_IMG_DIR = os.path.join(DATA_DIR, "train")
VAL_IMG_DIR = os.path.join(DATA_DIR, "validation")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
EPOCHS = 20
LR = 0.001
IMG_SIZE = 128
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

In [None]:
class CarsDataset(Dataset):
    def __init__(self, anno_csv, img_dir, names_csv, transform=None):
        self.anno = pd.read_csv(anno_csv, header=None)
        self.img_dir = img_dir
        self.transform = transform
        with open(names_csv) as f:
            self.class_names = [line.strip() for line in f]
        self.num_classes = len(self.class_names)
    def __len__(self):
        return len(self.anno)
    def __getitem__(self, idx):
        row = self.anno.iloc[idx]
        img_name = row[0]
        label = int(row[5]) - 1
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Transforms
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean.tolist(), std.tolist())
])
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean.tolist(), std.tolist())
])

# DataLoaders
train_ds = CarsDataset(TRAIN_CSV, TRAIN_IMG_DIR, NAMES_CSV, transform=train_transform)
val_ds = CarsDataset(VAL_CSV, VAL_IMG_DIR, NAMES_CSV, transform=val_transform)
train_subset = torch.utils.data.Subset(train_ds, range(int(len(train_ds)*0.25)))  # Limitar a 500 imágenes
val_subset = torch.utils.data.Subset(val_ds, range(int(len(val_ds)*0.25)))      # Limitar a 200 imágenes
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=0)
val_loader = DataLoader(val_subset, batch_size=64, shuffle=False, num_workers=0)
class_names = train_ds.class_names

In [None]:
# Simple CNN (nos basamos en una VGG pequeña)
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(0.2),

            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(0.3),

            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2), nn.Dropout(0.4),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * (IMG_SIZE // 8) * (IMG_SIZE // 8), 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
model = SimpleCNN(num_classes=len(class_names)).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

# Entrenamiento
for epoch in range(EPOCHS):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()
    train_acc = 100 * correct / total

    # Validación
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    val_acc = 100 * val_correct / val_total

    # Guardar historial
    train_losses.append(running_loss / len(train_loader))
    val_losses.append(val_loss / len(val_loader))
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")
    print(f"           | Val   Loss: {val_loss/len(val_loader):.4f} | Val   Acc: {val_acc:.2f}%")

# Guardar modelo
torch.save(model.state_dict(), "pesos/simplecnn_best.pth")

Epoch 1/20 | Train Loss: 5.1691 | Train Acc: 4.80%
           | Val   Loss: 3.7211 | Val   Acc: 6.19%
Epoch 2/20 | Train Loss: 3.7133 | Train Acc: 8.23%
           | Val   Loss: 3.4874 | Val   Acc: 5.45%
Epoch 3/20 | Train Loss: 3.6232 | Train Acc: 9.60%
           | Val   Loss: 3.4625 | Val   Acc: 12.38%
Epoch 4/20 | Train Loss: 3.5671 | Train Acc: 9.96%
           | Val   Loss: 3.4537 | Val   Acc: 12.38%
Epoch 5/20 | Train Loss: 3.5366 | Train Acc: 9.78%
           | Val   Loss: 3.4238 | Val   Acc: 11.88%
Epoch 6/20 | Train Loss: 3.5402 | Train Acc: 10.35%
           | Val   Loss: 3.4146 | Val   Acc: 12.13%
Epoch 7/20 | Train Loss: 3.5040 | Train Acc: 10.28%
           | Val   Loss: 3.4253 | Val   Acc: 13.12%
Epoch 8/20 | Train Loss: 3.5017 | Train Acc: 9.15%
           | Val   Loss: 3.4350 | Val   Acc: 12.38%
Epoch 9/20 | Train Loss: 3.4938 | Train Acc: 10.84%
           | Val   Loss: 3.3687 | Val   Acc: 12.13%
Epoch 10/20 | Train Loss: 3.4934 | Train Acc: 10.84%
           | Val   

In [None]:
epochs = range(1, EPOCHS + 1)
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, label='Train Loss')
plt.plot(epochs, val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.grid()
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, label='Train Acc')
plt.plot(epochs, val_accuracies, label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Curve')
plt.grid()
plt.legend()

plt.tight_layout()
plt.show()