# Implementação de FCN para segmnetação

semana 12 do Onboarding LIPAI

In [94]:
import os
import random
from pathlib import Path


import cv2
import numpy as np
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
from torchvision import transforms


import albumentations as A
from albumentations.pytorch import ToTensorV2

Configurações gerais

In [95]:
DATA_IMG_ROOT = "/content/dataset_oral/imgs"
DATA_MASK_ROOT = "/content/dataset_oral/masks"
OUTPUT_DIR = "./model_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [96]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x78dc66df7610>

In [97]:
NUM_CLASSES = 2 # fundo e núcleo
BATCH_SIZE = 8
LR = 1e-4
NUM_EPOCHS = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


## Import do dataset

In [98]:
import os

if not os.path.exists('/content/dataset/dataset/oral'):

  from google.colab import drive
  drive.mount('/content/drive')

  !mkdir -p /content/dataset_oral/imgs
  !mkdir -p /content/dataset_oral/masks

  !cp -r "/content/drive/MyDrive/Datasets Projeto 1 - LIPAI Onboarding/Original ROI images/Original ROI images/"* /content/dataset_oral/imgs
  !cp -r "/content/drive/MyDrive/Gold_Standard_Semantic_Segmentation/"* /content/dataset_oral/masks

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [99]:
import cv2

img = cv2.imread("/content/dataset_oral/imgs/healthy/healthy-01-roi1.tif")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

print(img.shape)

(250, 450, 3)


## Funções auxiliares

### Computa o kernel bilinear

Disponível em https://d2l.ai/chapter_computer-vision/fcn.html

In [100]:
def bilinear_kernel(in_channels, out_channels, kernel_size):
  # Gera pesos que implementam upsampling bilinear para inicialização
  factor = (kernel_size + 1) // 2
  if kernel_size % 2 == 1:
    center = factor - 1
  else:
    center = factor - 0.5
    og = (torch.arange(kernel_size).reshape(-1, 1),
    torch.arange(kernel_size).reshape(1, -1))
    filt = (1 - torch.abs(og[0] - center) / factor) * \
    (1 - torch.abs(og[1] - center) / factor)
    weight = torch.zeros((in_channels, out_channels, kernel_size, kernel_size))
  for i in range(min(in_channels, out_channels)):
    weight[i, i, :, :] = filt
  return weight

### Computa o IoU

In [101]:
def compute_iou(preds, labels, num_classes=2):
    # preds: Tensor (B,H,W) predicted class index
    # labels: Tensor (B,H,W)
  ious = []
  preds = preds.view(-1)
  labels = labels.view(-1)
  for cls in range(num_classes):
    pred_inds = preds == cls
    target_inds = labels == cls
    intersection = (pred_inds & target_inds).long().sum().item()
    union = (pred_inds | target_inds).long().sum().item()
  if union == 0:
    iou = float('nan')
  else:
    iou = intersection / union
  ious.append(iou)
  return ious

## Dataset

In [102]:
class OralDataset(Dataset):
  def __init__(self, img_root, mask_root, transform=None):
    self.img_root = img_root
    self.mask_root = mask_root
    self.transform = transform

    self.img_paths = []

    for subdir in os.listdir(img_root):
      full = os.path.join(img_root, subdir)
      if os.path.isdir(full):
        for fname in os.listdir(full):
          self.img_paths.append(os.path.join(full, fname))

  def __len__(self):
    return len(self.img_paths)

  def __getitem__(self, idx):
      img_path = self.img_paths[idx]

      parts = img_path.split(os.sep)
      subfolder = parts[-2]
      filename = parts[-1]

      base_name = os.path.splitext(filename)[0]
      mask_filename = base_name + ".png"
      mask_path = os.path.join(self.mask_root, subfolder, mask_filename)

      img = cv2.imread(img_path)
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

      mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
      mask = (mask > 127).astype("uint8")

      if self.transform:
        augmented = self.transform(image=img, mask=mask)
        img = augmented["image"]
        mask = augmented["mask"]

      if isinstance(mask, torch.Tensor):
        mask = mask.long()

      return img, mask

### Cria a transformação -> augmentação

In [103]:
CROP_H = 224
CROP_W = 224
# dava pra deixar as imagem bem maiores horizontalmente...

train_transform = A.Compose([
    A.RandomCrop(CROP_H, CROP_W),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.Normalize( mean=(0.485, 0.456, 0.406),
                 std=(0.229, 0.224, 0.225) ),
    ToTensorV2()
])

test_val_transform = A.Compose([
    A.RandomCrop(CROP_H, CROP_W),
    A.Normalize( mean=(0.485, 0.456, 0.406),
                 std=(0.229, 0.224, 0.225) ),
    ToTensorV2()
])

### Divisão do dataset

In [104]:
full_ds = OralDataset(
    DATA_IMG_ROOT, DATA_MASK_ROOT,transform=None
)

n = len(full_ds)
if n == 0:
  raise RuntimeError(f"Dataset vazio em {DATA_IMG_ROOT} — verifique aí!")

In [105]:
train_len = int(0.7 * len(full_ds))
val_len = int(0.15 * len(full_ds))
test_len = len(full_ds) - train_len - val_len

train_ds, val_ds, test_ds = random_split(full_ds, [train_len, val_len, test_len])

Adicionar a augmentação

In [106]:
# python tem dessas de vc criar algo e na vdd ser só uma view ai
# vc precisa fazer maracutáia estranha pra funcionar ne

train_ds.dataset.transform = train_transform
val_ds.dataset.transform  = test_val_transform
test_ds.dataset.transform  = test_val_transform

# + eficiência usando mais trabalhos e pin_memory, não conhecia
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

## Criando a rede


(pelo que entendi é obrigatório seguir a referência do https://d2l.ai/chapter_computer-vision/fcn.html então ta igual)

In [107]:
class FCNResNet18(nn.Module):
    def __init__(self, num_classes, pretrained):
        super().__init__()

        if pretrained:
          model = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
        else:
          model = torchvision.models.resnet18(pretrained=False)

        self.encoder = nn.Sequential(*list(model.children())[:-2])

        self.conv1x1 = nn.Conv2d(512, num_classes, kernel_size=1)

        self.up = nn.ConvTranspose2d(
            num_classes, num_classes,
            kernel_size=64, stride=32, padding=16, bias=False
        )


        # Aqui entra a inicialização bilinear
        #self.up.weight.data.copy_(bilinear_kernel(num_classes, num_classes, 64))

        W = bilinear_kernel(num_classes, num_classes, 64)
        with torch.no_grad():
          self.up.weight.copy_(W)

    def forward(self, x):
        x = self.encoder(x)
        x = self.conv1x1(x)
        x = self.up(x)
        return x

## LOOP de Treinamento

In [108]:
for img, mask in train_loader:
    print(mask.unique())
    break

tensor([0, 1])


In [109]:
model = FCNResNet18(NUM_CLASSES, True).to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer,
#     mode='max',
#     factor=0.5,
#     patience=3,
#     min_lr=1e-6
# )


best_miou = 0.0

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0

    for imgs, masks in train_loader:
      imgs = imgs.to(DEVICE)
      masks = masks.to(DEVICE)

      preds = model(imgs)

      loss = criterion(preds, masks)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)

    model.eval()
    all_ious = []
    val_loss = 0.0
    with torch.no_grad():
      for imgs, masks in val_loader:
        imgs = imgs.to(DEVICE)
        masks = masks.to(DEVICE)

        preds = model(imgs)

        loss = criterion(preds, masks)
        val_loss += loss.item()

        preds = torch.argmax(preds, dim=1)
        ious = compute_iou(preds.cpu(), masks.cpu(), num_classes=NUM_CLASSES)
        all_ious.append(ious)

    val_loss = val_loss / len(val_loader)
    all_ious = np.array(all_ious)
    mean_ious = np.nanmean(all_ious, axis=0)
    miou = np.nanmean(mean_ious)

    print(f"Epoch {epoch+1} | TrainLoss: {epoch_loss:.4f} ValLoss: {val_loss:.4f} mIoU: {miou:.4f} (per_class: {mean_ious})")

    if miou > best_miou:
      best_miou = miou
      torch.save({'epoch': epoch+1, 'model_state': model.state_dict(), 'miou': miou}, os.path.join(OUTPUT_DIR, 'best_model.pth'))
      print(f"Novo melhor modelo salvo com mIoU={miou:.4f}")

    # scheduler.step(miou)


Epoch 1 | TrainLoss: 0.6500 ValLoss: 0.5597 mIoU: 0.2820 (per_class: [0.28195821])
Novo melhor modelo salvo com mIoU=0.2820
Epoch 2 | TrainLoss: 0.5494 ValLoss: 0.5036 mIoU: 0.2640 (per_class: [0.26401186])
Epoch 3 | TrainLoss: 0.5102 ValLoss: 0.4912 mIoU: 0.3591 (per_class: [0.35908503])
Novo melhor modelo salvo com mIoU=0.3591
Epoch 4 | TrainLoss: 0.4830 ValLoss: 0.4593 mIoU: 0.3228 (per_class: [0.32279304])
Epoch 5 | TrainLoss: 0.4586 ValLoss: 0.4539 mIoU: 0.4019 (per_class: [0.40185456])
Novo melhor modelo salvo com mIoU=0.4019
Epoch 6 | TrainLoss: 0.4494 ValLoss: 0.4411 mIoU: 0.3865 (per_class: [0.38645425])
Epoch 7 | TrainLoss: 0.4477 ValLoss: 0.4406 mIoU: 0.4224 (per_class: [0.42238992])
Novo melhor modelo salvo com mIoU=0.4224
Epoch 8 | TrainLoss: 0.4336 ValLoss: 0.4311 mIoU: 0.4211 (per_class: [0.42112155])
Epoch 9 | TrainLoss: 0.4184 ValLoss: 0.4253 mIoU: 0.4249 (per_class: [0.42490007])
Novo melhor modelo salvo com mIoU=0.4249
Epoch 10 | TrainLoss: 0.4165 ValLoss: 0.4209 mIo

## Avaliação

In [110]:
model.eval()
all_ious = []

with torch.no_grad():
  for imgs, masks in test_loader:
    imgs = imgs.to(DEVICE)
    masks = masks.to(DEVICE)

    outputs = model(imgs)

    preds = torch.argmax(outputs, dim=1)
    ious = compute_iou(preds.cpu(), masks.cpu(), num_classes=NUM_CLASSES)
    all_ious.append(ious)


all_ious = np.array(all_ious)
mean_ious = np.nanmean(all_ious, axis=0)
miou = np.nanmean(mean_ious)
print(f"Test mIoU: {miou:.4f} (per_class: {mean_ious})")




Test mIoU: 0.5265 (per_class: [0.5265093])


Isso aqui é o chat

In [112]:
# Visualizar alguns exemplos
os.makedirs(os.path.join(OUTPUT_DIR, 'vis'), exist_ok=True)
model.cpu()
with torch.no_grad():
  imgs, masks = next(iter(test_loader))
  outputs = model(imgs)
  outputs = F.interpolate(outputs, size=masks.shape[-2:], mode='bilinear', align_corners=False)
  preds = torch.argmax(outputs, dim=1).cpu().numpy()


  imgs_np = imgs.permute(0, 2, 3, 1).numpy()
  masks_np = masks.numpy()


  for i in range(min(8, imgs_np.shape[0])):
    fig, ax = plt.subplots(1, 3, figsize=(12, 4))
    ax[0].imshow((imgs_np[i] * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])))
    ax[0].set_title('Imagem')
    ax[0].axis('off')


    ax[1].imshow(masks_np[i], cmap='gray')
    ax[1].set_title('Mascara GT')
    ax[1].axis('off')


    ax[2].imshow(preds[i], cmap='gray')
    ax[2].set_title('Predicao')
    ax[2].axis('off')


    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'vis', f'vis_{i}.png'))
    plt.close(fig)


print(f"Visualizacoes salvas em {os.path.join(OUTPUT_DIR, 'vis')}")



Visualizacoes salvas em ./model_output/vis


##