In [None]:
# Criar e ativar uma venv
%pip install -r requirements.txt

In [None]:
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics.functional import accuracy


### Dataset e Análise

In [None]:

# Transformações: converte para tensor e normaliza (média=0.5, desvio=0.5)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 🔹 Carregar dataset de treino completo (FashionMNIST)
full_train_data = datasets.FashionMNIST(
    root='./data',
    train=True,
    transform=transform,
    download=True
)

# 🔹 Dividir em treino (50k) e validação (10k)
train_size = 50000
val_size = len(full_train_data) - train_size
train_data, val_data = random_split(full_train_data, [train_size, val_size])

# 🔹 Carregar dataset de teste
test_data = datasets.FashionMNIST(
    root='./data',
    train=False,
    transform=transform,
    download=True
)

# 🔹 DataLoaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=64, shuffle=False)
test_loader  = DataLoader(test_data, batch_size=64, shuffle=False)


In [None]:
print(f'Torch size: {test_data[0][0].shape} e labels: {test_data[0][1]}')

In [None]:
# Classes e indíces
print(train_data.dataset.classes)
print(train_data.dataset.class_to_idx)

In [None]:
image, label = train_data[0]
print(f"Image shape: {image.shape}")
print(f"image.squeeze() shape: {image.squeeze().shape}")
plt.imshow(image.squeeze())
plt.title(label);

In [None]:
class_names = train_data.dataset.classes
plt.imshow(image.squeeze(), cmap="gray")
plt.title(class_names[label]);

In [None]:
fig = plt.figure(figsize=(10, 10))
rows, cols = 5, 5
for i in range(1, rows*cols + 1):
    random_idx = torch.randint(0, len(train_data), size=[1]).item()
    img, label = train_data[random_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(img.squeeze(), cmap="gray")
    plt.title(class_names[label])
    plt.axis(False)

In [None]:
first_train_image_batch, first_batch_labels = next(iter(train_loader))
first_train_image_batch.shape, first_batch_labels.shape

### Modelos

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32,
        kernel_size=3, stride=1, padding=1)
        # Convolution layer 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,
        kernel_size=3, stride=1, padding=1)
        # Fully connected layers
        # 7x7 is the size of the image after pooling layers
        self.fc1 = nn.Linear(7*7*64, 128)
        self.fc2 = nn.Linear(128, 10)
    def forward(self, x):
        # Layer 1: Convolution -> Activation -> Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2) # 2x2 max pooling
        # Layer 2: Convolution -> Activation -> Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2) # 2x2 max pooling
        # Flatten the tensor
        x = x.view(-1, 7*7*64)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
class SimpleCNNDropout(nn.Module):
    def __init__(self):
        super(SimpleCNNDropout, self).__init__()
        # Convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32,
        kernel_size=3, stride=1, padding=1)
        # Convolution layer 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,
        kernel_size=3, stride=1, padding=1)
        # Fully connected layers
        # 7x7 is the size of the image after pooling layers
        self.fc1 = nn.Linear(7*7*64, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)


    def forward(self, x):
        # Layer 1: Convolution -> Activation -> Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2) # 2x2 max pooling
        # Layer 2: Convolution -> Activation -> Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2) # 2x2 max pooling
        # Flatten the tensor
        x = x.view(-1, 7*7*64)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        return x

In [None]:
class SimpleCNNDropoutNorm(nn.Module):
    def __init__(self):
        super(SimpleCNNDropoutNorm, self).__init__()
        # Convolution layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32,
        kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        # Convolution layer 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,
        kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        # Fully connected layers
        # 7x7 is the size of the image after pooling layers
        self.fc1 = nn.Linear(7*7*64, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)


    def forward(self, x):
        # Layer 1: Convolution -> Activation -> Pooling
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        # Flatten the tensor
        x = x.view(-1, 7*7*64)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        return x

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNNDropoutNormRes(nn.Module):
    def __init__(self):
        super(SimpleCNNDropoutNormRes, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(7 * 7 * 64, 128)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 10)
        self.dropout2 = nn.Dropout(0.5)

    def forward(self, x):
        # Bloco Residual 1
        identity = x
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        x += F.interpolate(identity, size=x.shape[-2:])  # Se necessário ajustar tamanho
        x = F.relu(x)
        x = F.max_pool2d(x, 2)

        # Segunda convolução + pooling
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)

        # Flatten e FC
        x = x.view(-1, 7 * 7 * 64)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        return x


### Treinamento

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
def treinamento(model, optimizer, num_epochs=5):
    train_losses = []
    eval_losses = []
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for inputs, labels in train_loader:
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)
        print(f"Epoch train [{epoch+1}/{num_epochs}], Loss train: {avg_loss:.4f}")
        
        eval_loss = 0.0
        model.eval()
        with torch.inference_mode():
            for inputs, labels in val_loader:
                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                # Backward pass and optimization
                eval_loss += loss.item()
        avg_loss_eval = eval_loss / len(val_loader)
        eval_losses.append(avg_loss_eval)
        print(f"Epoch eval [{epoch+1}/{num_epochs}], Loss eval: {avg_loss_eval:.4f}")
    return model, train_losses, eval_losses

In [None]:
# Definição criterion e otimizador para todos modelos
def escolha_optimizer(model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    return optimizer

### Métricas e Avaliação

In [None]:
def grafico_treinamento(nome_modelo, num_epochs, train_losses, eval_losses):
    plt.plot(range(1, num_epochs + 1), train_losses, label="Train Loss")
    plt.plot(range(1, num_epochs + 1), eval_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"{nome_modelo} Train vs Validation Loss")
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{nome_modelo}_loss_curve.png")
    plt.show()

In [None]:
def predicao(model):
    # Prediction loop
    predictions = []
    model.eval
    with torch.inference_mode():  # Disable gradient computation
        for X, _ in test_loader:  # `_` means we won't use the labels here
            # Forward pass
            y_pred = model(X)

            # For classification (e.g., binary or multi-class):
            predicted_classes = torch.argmax(y_pred, dim=1)

            # Append predictions
            predictions.extend(predicted_classes.cpu().numpy())

    print("Predictions:", [class_names[x] for x in predictions])
    return predictions

In [None]:
def exatidao_modelo(model):

    all_preds = []
    all_labels = []

    with torch.inference_mode():
        for X, y in test_loader:
            preds = torch.argmax(model(X), dim=1)
            all_preds.append(preds)
            all_labels.append(y)

    # Concatena todos os tensores
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    # Usa torchmetrics para calcular
    acc = accuracy(all_preds, all_labels, task='multiclass', num_classes=10)
    # print(f"Exatidão: {acc:.4f}")
    return acc


In [None]:
def predicao_unica(model, name_model, predictions):
    index = torch.randint(len(test_loader), size=[1]).item()
    predicted_labels = [class_names[x] for x in predictions]

    image, label = test_data[index]
    print(f"Image shape: {image.shape}")
    # image shape is [1, 28, 28] (colour channels, height, width)
    # squeeze will get rid of first dimension
    # so the shape of image.squeeze() will be [28, 28]
    print(f"image.squeeze() shape: {image.squeeze().shape}")
    plt.imshow(image.squeeze())
    plt.title(f"Real - {class_names[label]} : Predicted - {predicted_labels[index]}");
    plt.axis("off")

    # Salvar imagem
    plt.savefig(f"{name_model}_predicao_index{index}.png", bbox_inches='tight')
    plt.show()

### Chamadas treinamento

In [None]:
model_cnn = SimpleCNN()
model_cnn_drop = SimpleCNNDropout()
model_cnn_norm = SimpleCNNDropoutNorm()
model_cnn_res = SimpleCNNDropoutNormRes()

In [None]:
optimizer_cnn = escolha_optimizer(model_cnn)
optimizer_drop = escolha_optimizer(model_cnn_drop)
optimizer_norm = escolha_optimizer(model_cnn_norm)
optimizer_res = escolha_optimizer(model_cnn_res)

In [None]:
num_epochs = 5

model_cnn, train_losses_cnn, eval_losses_cnn  = treinamento(model_cnn, optimizer_cnn, num_epochs)
model_cnn_drop, train_losses_drop, eval_losses_drop  = treinamento(model_cnn_drop, optimizer_drop, num_epochs)
model_cnn_norm, train_losses_norm, eval_losses_norm  = treinamento(model_cnn_norm, optimizer_norm, num_epochs)
model_cnn_res, train_losses_res, eval_losses_res  = treinamento(model_cnn_res, optimizer_res, num_epochs)

In [None]:
grafico_treinamento('cnn', num_epochs, train_losses_cnn, eval_losses_cnn)
grafico_treinamento('cnn_drop', num_epochs, train_losses_drop, eval_losses_drop)
grafico_treinamento('cnn_norm', num_epochs, train_losses_norm, eval_losses_norm)
grafico_treinamento('cnn_res', num_epochs, train_losses_res, eval_losses_res)

In [None]:
predictions_cnn = predicao(model_cnn)
predictions_drop = predicao(model_cnn_drop)
predictions_norm = predicao(model_cnn_norm)
predictions_res = predicao(model_cnn_res)

In [None]:
exatidao_cnn = exatidao_modelo(model_cnn)
exatidao_drop = exatidao_modelo(model_cnn_drop)
exatidao_norm = exatidao_modelo(model_cnn_norm)
exatidao_res = exatidao_modelo(model_cnn_res)
print(f"CNN: {exatidao_cnn}")
print(f"CNN-DROP: {exatidao_drop}")
print(f"CNN-NORM: {exatidao_norm}")
print(f"CNN-RES: {exatidao_res}")

In [None]:
predicao_unica(model_cnn, 'cnn', predictions_cnn)
predicao_unica(model_cnn_drop, 'cnn', predictions_drop)
predicao_unica(model_cnn_norm, 'cnn', predictions_norm)
predicao_unica(model_cnn_res, 'cnn', predictions_res)