In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [24]:
print("CUDA disponível:", torch.cuda.is_available())
print("Versão do CUDA no PyTorch:", torch.version.cuda)
print("Nome da GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Nenhuma GPU encontrada")

CUDA disponível: True
Versão do CUDA no PyTorch: 11.8
Nome da GPU: NVIDIA GeForce GTX 1650


#Implementação AlexNet

In [25]:
# Definição da rede AlexNet (para CIFAR-10, definindo num_classes=10)
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()

        # Camadas convolucionais (features)
        # Note que usamos padding fixo de forma a simular o "same" do TensorFlow.
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            # Para pooling "same" em TF, usamos kernel=3, stride=2 e padding=1
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        # # Usamos um pooling adaptativo para garantir que, independente do tamanho de entrada,
        # # a saída da parte convolucional terá tamanho fixo (6x6), como esperado pelo TF.
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        # Camadas totalmente conectadas (classifier)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 6 * 6, 9216), nn.ReLU(inplace=True),
            nn.Linear(9216, 4096), nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096), nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
            # nn.Softmax(dim=1)  # Atenção: normalmente a softmax é omitida quando se usa CrossEntropyLoss.
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.classifier(x)
        return x

#Implementação VGG13

In [26]:
class VGG13(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG13, self).__init__()

        # Bloco de extração de features (camadas convolucionais e pooling)
        self.features = nn.Sequential(
            # Bloco 1
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Bloco 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Bloco 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Bloco 4
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Bloco 5
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Observação:
        # Para uma imagem de entrada com tamanho (227, 227), após os 5 blocos de pooling (sem padding adicional),
        # a dimensão espacial é reduzida para 7x7 (verificação: 227 -> 113 -> 56 -> 28 -> 14 -> 7).
        # Assim, o tamanho do vetor flatten será 512 * 7 * 7 = 25088.
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
            # Atenção: normalmente em PyTorch a camada final de softmax é omitida
            # quando se utiliza nn.CrossEntropyLoss, pois esta loss já aplica log-softmax.
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [34]:
# # Função de treinamento
# def train(model, device, train_loader, optimizer, criterion, epoch):
#     model.train()
#     running_loss = 0.0
#     correct = 0
#     total = 0

#     for batch_idx, (data, target) in enumerate(train_loader):
#         data, target = data.to(device), target.to(device)
#         optimizer.zero_grad()
#         output = model(data)
#         loss = criterion(output, target)
#         loss.backward()
#         optimizer.step()

#         running_loss += loss.item() * data.size(0)
#         # Predição: pegamos o índice com maior probabilidade
#         _, predicted = torch.max(output, 1)
#         total += target.size(0)
#         correct += predicted.eq(target).sum().item()

#     epoch_loss = running_loss / total
#     epoch_acc = 100. * correct / total
#     print(f"Epoch {epoch}: Loss de Treino = {epoch_loss:.4f} | Acurácia de Treino = {epoch_acc:.2f}%")
#     return epoch_loss, epoch_acc

from torch.cuda.amp import autocast, GradScaler  # AMP para treino acelerado na GPU

# Função de treinamento otimizada
def train(model, device, train_loader, optimizer, criterion, epoch, scaler):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device, non_blocking=True), target.to(device, non_blocking=True)

        optimizer.zero_grad()

        with torch.autocast('cuda'):
            output = model(data)
            loss = criterion(output, target)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * data.size(0)
        _, predicted = torch.max(output, 1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    print(f"Epoch {epoch}: Loss = {epoch_loss:.4f} | Acurácia = {epoch_acc:.2f}%")

    return epoch_loss, epoch_acc

In [None]:
# # Função de avaliação
# def evaluate(model, device, test_loader, criterion):
#     model.eval()
#     test_loss = 0.0
#     correct = 0
#     total = 0

#     with torch.no_grad():
#         for data, target in test_loader:
#             data, target = data.to(device), target.to(device)
#             output = model(data)
#             loss = criterion(output, target)
#             test_loss += loss.item() * data.size(0)
#             _, predicted = torch.max(output, 1)
#             total += target.size(0)
#             correct += predicted.eq(target).sum().item()

#     avg_loss = test_loss / total
#     acc = 100. * correct / total
#     print(f"Loss de Teste = {avg_loss:.4f} | Acurácia de Teste = {acc:.2f}%")
#     return avg_loss, acc

In [42]:
import gc

# Função de avaliação otimizada para reduzir consumo de memória
def evaluate(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)

            # Forward pass
            output = model(data)
            loss = criterion(output, target)

            # Acumulando loss e acurácia
            test_loss += loss.item() * data.size(0)
            _, predicted = torch.max(output, 1)

            # Movendo para CPU antes de calcular acertos para evitar acumular na GPU
            predicted = predicted.cpu()
            target = target.cpu()
            correct += predicted.eq(target).sum().item()
            total += target.size(0)

    # Cálculo de loss e acurácia
    avg_loss = test_loss / total
    acc = 100. * correct / total

    # Limpeza de memória
    del data, target, output, loss, predicted
    gc.collect()
    torch.cuda.empty_cache()

    print(f"Loss de Teste = {avg_loss:.4f} | Acurácia de Teste = {acc:.2f}%")
    return avg_loss, acc

# Treino AlexNet


## Teste 1
batch_size = 64<p>
learning_rate = 0.001<p>
num_epochs = 20

In [1]:
# Configurações e hiperparâmetros
batch_size = 64
learning_rate = 0.001
num_epochs = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

# Transformações para as imagens: redimensiona para 227x227, converte para tensor e normaliza.
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # CIFAR-10 originalmente tem 32x32
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Carrega os datasets de treino e teste do CIFAR-10
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Instancia o modelo e move para o device
model = AlexNet(num_classes=10)
model.to(device)

# Define a função de perda e o otimizador
# Se optar por manter a camada Softmax final, talvez seja necessário usar outra loss,
# mas o mais comum é remover a Softmax e usar nn.CrossEntropyLoss.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_test_acc = 0.0
for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        # Salva o modelo com melhor desempenho
        torch.save(model.state_dict(), "best_alexnet_cifar10.pth")
print("Melhor acurácia no teste: {:.2f}%".format(best_test_acc))


Usando device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:07<00:00, 23.7MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1: Loss de Treino = 1.8911 | Acurácia de Treino = 28.66%
Loss de Teste = 1.5393 | Acurácia de Teste = 41.66%
Epoch 2: Loss de Treino = 1.4958 | Acurácia de Treino = 45.33%
Loss de Teste = 1.3502 | Acurácia de Teste = 51.10%
Epoch 3: Loss de Treino = 1.3537 | Acurácia de Treino = 51.44%
Loss de Teste = 1.3032 | Acurácia de Teste = 52.77%
Epoch 4: Loss de Treino = 1.2533 | Acurácia de Treino = 55.56%
Loss de Teste = 1.2522 | Acurácia de Teste = 55.71%
Epoch 5: Loss de Treino = 1.1792 | Acurácia de Treino = 58.18%
Loss de Teste = 1.1751 | Acurácia de Teste = 58.12%
Epoch 6: Loss de Treino = 1.1112 | Acurácia de Treino = 60.95%
Loss de Teste = 1.1626 | Acurácia de Teste = 58.22%
Epoch 7: Loss de Treino = 1.0558 | Acurácia de Treino = 62.99%
Loss de Teste = 1.1322 | Acurácia de Teste = 60.26%
Epoch 8: Loss de Treino = 1.0150 | Acurácia de Treino = 64.24%
Loss de Teste = 1.1253 | Acurácia de Teste 

## Teste 2
batch_size = 256<p>
learning_rate = 0.001<p>
num_epochs = 20

In [43]:
# Configurações e hiperparâmetros
batch_size = 32
learning_rate = 0.001
num_epochs = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

# Transformações para as imagens: redimensiona para 227x227, converte para tensor e normaliza.
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # CIFAR-10 originalmente tem 32x32
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Carrega os datasets de treino e teste do CIFAR-10
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# usar ou n esse pin_memory?
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

Usando device: cuda


In [47]:
print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 2            |        cudaMalloc retries: 4         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   6891 MiB |   6891 MiB |   7823 GiB |   7816 GiB |
|       from large pool |   6885 MiB |   6885 MiB |   7817 GiB |   7810 GiB |
|       from small pool |      6 MiB |      7 MiB |      5 GiB |      5 GiB |
|---------------------------------------------------------------------------|
| Active memory         |   6891 MiB |   6891 MiB |   7823 GiB |   7816 GiB |
|       from large pool |   6885 MiB |   6885 MiB |   7817 GiB |   7810 GiB |
|       from small pool |      6 MiB |      7 MiB |      5 GiB |      5 GiB |
|---------------------------------------------------------------

In [48]:
torch.cuda.empty_cache()

In [49]:
# Instancia o modelo e move para o device
model = AlexNet(num_classes=10)
model.to(device)

# Define a função de perda e o otimizador
# Se optar por manter a camada Softmax final, talvez seja necessário usar outra loss,
# mas o mais comum é remover a Softmax e usar nn.CrossEntropyLoss.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scaler = torch.amp.GradScaler()

best_test_acc = 0.0
for epoch in range(1, num_epochs + 1):
    torch.cuda.empty_cache()
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch, scaler)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        # Salva o modelo com melhor desempenho
        torch.save(model.state_dict(), "best_alexnet_cifar10.pth")
print("Melhor acurácia no teste: {:.2f}%".format(best_test_acc))


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 6.74 GiB is allocated by PyTorch, and 14.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

##Treino VGG13


### Teste 1
batch_size = 64<p>
learning_rate = 0.001<p>
num_epochs = 20

In [None]:
# Configurações e hiperparâmetros
batch_size = 64
learning_rate = 0.001
num_epochs = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

# Transformações para as imagens: redimensiona para 227x227, converte para tensor e normaliza.
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # CIFAR-10 originalmente tem 32x32
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Carrega os datasets de treino e teste do CIFAR-10
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Instancia o modelo e move para o device
model = VGG13(num_classes=10)
model.to(device)

# Define a função de perda e o otimizador
# Se optar por manter a camada Softmax final, talvez seja necessário usar outra loss,
# mas o mais comum é remover a Softmax e usar nn.CrossEntropyLoss.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_test_acc = 0.0
for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        # Salva o modelo com melhor desempenho
        torch.save(model.state_dict(), "best_alexnet_cifar10.pth")
print("Melhor acurácia no teste: {:.2f}%".format(best_test_acc))


### Teste 2
batch_size = 256<p>
learning_rate = 0.001<p>
num_epochs = 20

In [None]:
# Configurações e hiperparâmetros
batch_size = 256
learning_rate = 0.001
num_epochs = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

# Transformações para as imagens: redimensiona para 227x227, converte para tensor e normaliza.
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # CIFAR-10 originalmente tem 32x32
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Carrega os datasets de treino e teste do CIFAR-10
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Instancia o modelo e move para o device
model = VGG13(num_classes=10)
model.to(device)

# Define a função de perda e o otimizador
# Se optar por manter a camada Softmax final, talvez seja necessário usar outra loss,
# mas o mais comum é remover a Softmax e usar nn.CrossEntropyLoss.
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_test_acc = 0.0
for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch)
    test_loss, test_acc = evaluate(model, device, test_loader, criterion)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        # Salva o modelo com melhor desempenho
        torch.save(model.state_dict(), "best_alexnet_cifar10.pth")
print("Melhor acurácia no teste: {:.2f}%".format(best_test_acc))


In [None]:
# import time

# # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cuda")
# # device = torch.device("cpu")

# # Modelo simples
# model = torch.nn.Linear(1000, 1000).to(device)
# data = torch.randn(10000, 1000).to(device)

# # Teste de tempo na GPU
# start = time.time()
# for _ in range(100):
#     output = model(data)
# end = time.time()

# print(f"Tempo de execução: {end - start:.4f} segundos")

Tempo de execução: 0.0082 segundos
