<a href="https://colab.research.google.com/github/RaphaelCarvalh/BootCampAVANTI_machine_learning/blob/ativ04-et02-analise-dataset/Et01_analise_dataset_fase2_pr%C3%A9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Projeto Clothing Co-Parsing - Etapa 1 - Notebook: Análise do Dataset - TIV-04-ET-02


In [8]:
import pandas as pd

# Carregar dataset já limpo
df = pd.read_csv("df_clean.csv")
print(df.head())
print(f"Total de imagens: {len(df)}")

                                     image_path  corrupted        image_hash  \
0  ./clothing-coparsing-dataset/images/1433.jpg      False  b4538fc9136c6cd2   
1  ./clothing-coparsing-dataset/images/0621.jpg      False  f3b5a45b4b241c8d   
2  ./clothing-coparsing-dataset/images/1649.jpg      False  b35889a266594bf3   
3  ./clothing-coparsing-dataset/images/0667.jpg      False  b3f6cb834c5d2438   
4  ./clothing-coparsing-dataset/images/2052.jpg      False  abf3d2a563cc8c18   

   width  height  channels        label  
0    550     831         3  image-level  
1    550     832         3  pixel-level  
2    550     828         3  image-level  
3    550     842         3  pixel-level  
4    550     834         3  image-level  
Total de imagens: 2096


transformações e augmentations

In [9]:
from torchvision import transforms

print("Configurando transformações e augmentations...\n")

IMG_SIZE = 224  # definir tamanho das imagens

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

print("Transformações configuradas com sucesso.")

Configurando transformações e augmentations...

Transformações configuradas com sucesso.


Criar Datasets e DataLoaders

In [10]:
from torch.utils.data import Dataset, DataLoader
import cv2
import torch
from sklearn.model_selection import train_test_split

# Pipeline para carregar imagens
class FlowerDataset(Dataset):
    def __init__(self, data, transform):
        """
        data: array ou DataFrame com ['images', 'labels']
        transform: transformações de imagem definidas (train_transform ou val_transform)
        """
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx, 0], self.data[idx, 1]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transform:
            img = self.transform(img)
        return img, label


Dividir os dados em treino, validação e teste

In [11]:
# Divisão: 80% treino, 10% validação, 10% teste
train_data, temp_data = train_test_split(df.values, test_size=0.2, random_state=42, stratify=df.values[:,1])
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, stratify=temp_data[:,1])

# Criar datasets
train_ds = FlowerDataset(train_data, transform=train_transform)
val_ds = FlowerDataset(val_data, transform=val_test_transform)
test_ds = FlowerDataset(test_data, transform=val_test_transform)

# Criar dataloaders
BATCH_SIZE = 32  # Podemos ajustar depois se a memória permitir

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

print(f"Treino: {len(train_ds)} | Validação: {len(val_ds)} | Teste: {len(test_ds)}")


Treino: 1676 | Validação: 210 | Teste: 210


criar os DataLoaders

In [12]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32  # podemos ajustar depois se precisar

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train batches: {len(train_dl)}, Val batches: {len(val_dl)}, Test batches: {len(test_dl)}")

Train batches: 53, Val batches: 7, Test batches: 7


In [13]:
import time
import copy
import torch
from tqdm import tqdm

def train_model(
    model,
    train_dl,
    val_dl,
    criterion,
    optimizer,
    num_epochs=10,
    device=None,
    scheduler=None,
    scheduler_step_on_val=False,
    save_path="best_model.pth",
    early_stopping_patience=None,
    grad_clip=None,
    use_amp=False
):
    """
    Treina e valida um modelo PyTorch de forma genérica.

    Parâmetros
    ----------
    model : torch.nn.Module
        Modelo a ser treinado (ex: resnet50, efficientnet, vgg).
    train_dl, val_dl : DataLoader
        DataLoaders de treino e validação.
    criterion : loss function
        Ex.: torch.nn.CrossEntropyLoss().
    optimizer : torch.optim.Optimizer
        Ex.: torch.optim.Adam(model.parameters(), lr=1e-4).
    num_epochs : int
        Número de épocas.
    device : torch.device ou str
        Ex.: "cuda" ou "cpu". Se None, é detectado automaticamente.
    scheduler : torch.optim.lr_scheduler (opcional)
        Scheduler de taxa de aprendizado. Se fornecido, será chamado a cada época.
    scheduler_step_on_val : bool
        Se True, chama scheduler.step(val_loss) (útil para ReduceLROnPlateau).
        Caso contrário, chama scheduler.step() no final da época.
    save_path : str
        Caminho para salvar o melhor modelo (padrão "best_model.pth").
    early_stopping_patience : int ou None
        Se int, para o treino se não houver melhoria em `patience` épocas.
    grad_clip : float ou None
        Valor para clip_grad_norm_ (útil para estabilidade).
    use_amp : bool
        Se True, usa autocast/GradScaler (apenas se GPU estiver disponível).
    """

    # Ajuste do dispositivo
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    elif isinstance(device, str):
        device = torch.device(device)

    model.to(device)

    # Se for usar AMP (mixed precision), habilita o scaler somente em CUDA
    use_amp = use_amp and (device.type == "cuda")
    scaler = torch.cuda.amp.GradScaler() if use_amp else None

    history = {
        "train_loss": [],
        "train_acc": [],
        "val_loss": [],
        "val_acc": []
    }

    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_acc = 0.0
    epochs_since_improvement = 0

    # Loop de épocas
    for epoch in range(1, num_epochs + 1):
        start_epoch = time.time()
        print(f"\nIniciando época {epoch}/{num_epochs} - device: {device}")

        # ---------- Treino ----------
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total_samples = 0

        loop = tqdm(train_dl, desc="Treinando", leave=False)
        for inputs, labels in loop:
            # Garante que labels sejam inteiros (Long) — evita erros com Bool
            if isinstance(labels, torch.Tensor):
                if labels.dtype == torch.bool:
                    labels = labels.long()
            else:
                # Se labels vierem como numpy ou lista, converte
                labels = torch.tensor(labels, dtype=torch.long)

            inputs = inputs.to(device)
            labels = labels.to(device, dtype=torch.long)

            optimizer.zero_grad()

            # Forward (com/sem AMP conforme disponível)
            if use_amp:
                with torch.cuda.amp.autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                # Backprop escalado
                scaler.scale(loss).backward()
                # Optionally unscale then clip grads
                if grad_clip is not None:
                    scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
                scaler.step(optimizer)
                scaler.update()
            else:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                # clipping de gradiente comum
                if grad_clip is not None:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
                optimizer.step()

            # Estatísticas
            _, preds = torch.max(outputs, 1)
            batch_size = inputs.size(0)
            running_loss += loss.item() * batch_size
            running_corrects += torch.sum(preds == labels.data).item()
            total_samples += batch_size

            # atualiza barra
            loop.set_postfix(loss=loss.item())

        epoch_loss = running_loss / total_samples if total_samples > 0 else 0.0
        epoch_acc = running_corrects / total_samples if total_samples > 0 else 0.0
        history["train_loss"].append(epoch_loss)
        history["train_acc"].append(epoch_acc)

        print(f"Treino - Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}")

        # ---------- Validação ----------
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0
        val_total = 0

        with torch.no_grad():
            loop_val = tqdm(val_dl, desc="Validando", leave=False)
            for inputs, labels in loop_val:
                if isinstance(labels, torch.Tensor):
                    if labels.dtype == torch.bool:
                        labels = labels.long()
                else:
                    labels = torch.tensor(labels, dtype=torch.long)

                inputs = inputs.to(device)
                labels = labels.to(device, dtype=torch.long)

                if use_amp:
                    with torch.cuda.amp.autocast():
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                else:
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)
                b = inputs.size(0)
                val_running_loss += loss.item() * b
                val_running_corrects += torch.sum(preds == labels.data).item()
                val_total += b

                loop_val.set_postfix(loss=loss.item())

        val_loss = val_running_loss / val_total if val_total > 0 else 0.0
        val_acc = val_running_corrects / val_total if val_total > 0 else 0.0
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        print(f"Validação - Loss: {val_loss:.4f} | Acc: {val_acc:.4f}")

        # ---------- Scheduler ----------
        if scheduler is not None:
            # Alguns schedulers (ex: ReduceLROnPlateau) esperam metricas de validação
            if scheduler_step_on_val:
                # Se o scheduler precisa do loss/metric da val, chamamos com val_loss
                try:
                    scheduler.step(val_loss)
                except TypeError:
                    # Alguns schedulers aceitam (val_acc) também — deixe como fallback
                    scheduler.step()
            else:
                scheduler.step()

        # ---------- Salvamento do melhor modelo ----------
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, save_path)
            print(f"Novo melhor modelo salvo em {save_path} (val_acc = {val_acc:.4f})")
            epochs_since_improvement = 0
        else:
            epochs_since_improvement += 1

        # ---------- Early stopping ----------
        if early_stopping_patience is not None:
            if epochs_since_improvement >= early_stopping_patience:
                print(f"Early stopping: sem melhoria por {early_stopping_patience} épocas. Interrompendo treino.")
                break

        epoch_time = time.time() - start_epoch
        print(f"Época concluída em {epoch_time:.1f}s")

    # Carrega os melhores pesos antes de retornar
    model.load_state_dict(best_model_wts)
    print(f"Treino finalizado. Melhor val_acc = {best_val_acc:.4f}")

    return model, history

ResNet50

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# ----- Modelo -----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(df['label'].unique())

resnet50 = models.resnet50(weights="IMAGENET1K_V1")
in_features = resnet50.fc.in_features
resnet50.fc = nn.Linear(in_features, num_classes)
resnet50 = resnet50.to(device)

# ----- Treinamento -----
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50.parameters(), lr=1e-4)

print("🚀 Treinando ResNet50...")


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 156MB/s]


🚀 Treinando ResNet50...


EfficientNet-B0

In [15]:
# ----- Modelo -----
efficientnet = models.efficientnet_b0(weights="IMAGENET1K_V1")
in_features = efficientnet.classifier[1].in_features
efficientnet.classifier[1] = nn.Linear(in_features, num_classes)
efficientnet = efficientnet.to(device)

# ----- Treinamento -----
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(efficientnet.parameters(), lr=1e-4)

print(" Treinando EfficientNet-B0...")

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:00<00:00, 170MB/s]

 Treinando EfficientNet-B0...





VGG16

In [16]:
# ----- Modelo -----
vgg16 = models.vgg16(weights="IMAGENET1K_V1")
in_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(in_features, num_classes)
vgg16 = vgg16.to(device)

# ----- Treinamento -----
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16.parameters(), lr=1e-4)

print("🚀 Treinando VGG16...")


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


100%|██████████| 528M/528M [00:05<00:00, 95.5MB/s]


🚀 Treinando VGG16...


In [17]:
# Treinar ResNet50
resnet50, history_resnet = train_model(
    model=resnet50,
    train_dl=train_dl,
    val_dl=val_dl,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,   # ajuste conforme tempo/máquina
    device=device,
    save_path="best_resnet50.pth"
)

# Treinar EfficientNet-B0
efficientnet, history_efficient = train_model(
    model=efficientnet,
    train_dl=train_dl,
    val_dl=val_dl,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    device=device,
    save_path="best_efficientnet.pth"
)

# Treinar VGG16
vgg16, history_vgg = train_model(
    model=vgg16,
    train_dl=train_dl,
    val_dl=val_dl,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    device=device,
    save_path="best_vgg16.pth"
)



Iniciando época 1/10 - device: cpu




KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,5))
plt.plot(history_resnet['val_acc'], label="ResNet50")
plt.plot(history_efficient['val_acc'], label="EfficientNet-B0")
plt.plot(history_vgg['val_acc'], label="VGG16")
plt.title("Comparação da Acurácia de Validação")
plt.xlabel("Épocas")
plt.ylabel("Acurácia")
plt.legend()
plt.show()
