In [2]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
import xml.etree.ElementTree as ET
from PIL import Image
import torchvision


In [None]:
class VOCDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms

        # Carrega todas as imagens e anotações, ordenadas para correspondência
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.annotations = list(sorted(os.listdir(os.path.join(root, "annotations"))))

    def __getitem__(self, idx):
        # Carrega imagem
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        img = Image.open(img_path).convert("RGB")

        # Carrega anotações
        annot_path = os.path.join(self.root, "annotations", self.annotations[idx])
        target = self.parse_voc_xml(ET.parse(annot_path).getroot())

        boxes = []
        labels = []

        # Verifica se a chave 'object' existe nas anotações
        if 'object' in target['annotation']:
            objs = target['annotation']['object']
            if not isinstance(objs, list):
                objs = [objs]

            for obj in objs:
                bbox = obj['bndbox']
                xmin = float(bbox['xmin'])
                ymin = float(bbox['ymin'])
                xmax = float(bbox['xmax'])
                ymax = float(bbox['ymax'])
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(1)
        else:
            # Se não houver objetos, criar tensores vazios
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)

        target_dict = {}
        target_dict["boxes"] = boxes
        target_dict["labels"] = labels
        target_dict["image_id"] = image_id
        target_dict["area"] = area
        target_dict["iscrowd"] = iscrowd

        if self.transforms:
            img = self.transforms(img)

        return img, target_dict

    def __len__(self):
        return len(self.imgs)

    def parse_voc_xml(self, node):
        voc_dict = {}
        children = list(node)
        if children:
            def_dict = {}
            for dc in map(self.parse_voc_xml, children):
                for ind, v in dc.items():
                    if ind in def_dict:
                        if not isinstance(def_dict[ind], list):
                            def_dict[ind] = [def_dict[ind]]
                        def_dict[ind].append(v)
                    else:
                        def_dict[ind] = v
            voc_dict[node.tag] = def_dict
        else:
            voc_dict[node.tag] = node.text
        return voc_dict

In [32]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [33]:
def collate_fn(batch):
    return tuple(zip(*batch))


In [3]:
def get_model(num_classes):
    # Carrega o modelo pré-treinado no COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')

    # Obtem o número de características de entrada do classificador
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Substitui o cabeçalho preditor por um novo com o número de classes desejado
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


In [35]:
# Diretórios do conjunto de dados
train_dir = 'BD_joing/train'
valid_dir = 'BD_joing/valid'
test_dir = 'BD_joing/test'

In [5]:
# Número de classes (incluindo o fundo)
num_classes = 3 

In [37]:
# Cria os datasets
dataset = VOCDataset(train_dir, transforms=get_transform(train=True))
dataset_valid = VOCDataset(valid_dir, transforms=get_transform(train=False))
dataset_test = VOCDataset(test_dir, transforms=get_transform(train=False))

In [38]:
# DataLoaders
data_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=12, collate_fn=collate_fn)
data_loader_valid = DataLoader(dataset_valid, batch_size=16, shuffle=False, num_workers=12, collate_fn=collate_fn)
data_loader_test = DataLoader(dataset_test, batch_size=16, shuffle=False, num_workers=12, collate_fn=collate_fn)

In [39]:
# Obtem o modelo
model = get_model(num_classes)

In [40]:
# Move modelo para o dispositivo
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
model.to(device)

cuda


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [41]:
# Parâmetros do otimizador
params = [p for p in model.parameters() if p.requires_grad]


In [None]:
# Define otimizador
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

In [None]:
# Marca a taxa de aprendizado
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# Número de épocas
num_epochs = 100

# Treinamento
for epoch in range(num_epochs):
    print(f"Época {epoch+1}/{num_epochs}")
    model.train()
    i = 0
    epoch_loss = 0
    # Loop de treinamento
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        i += 1

    # Média da perda na época
    epoch_loss /= len(data_loader)
    print(f"Perda na época {epoch+1}: {epoch_loss:.4f}")

    # Atualiza a taxa de aprendizado
    lr_scheduler.step()

print("Treinamento concluído.")


Época 1/100
Perda na época 1: 0.2733
Época 2/100
Perda na época 2: 0.2057
Época 3/100
Perda na época 3: 0.1816
Época 4/100
Perda na época 4: 0.1697
Época 5/100
Perda na época 5: 0.1704
Época 6/100
Perda na época 6: 0.1665
Época 7/100
Perda na época 7: 0.1664
Época 8/100
Perda na época 8: 0.1650
Época 9/100
Perda na época 9: 0.1638
Época 10/100
Perda na época 10: 0.1657
Época 11/100
Perda na época 11: 0.1660
Época 12/100
Perda na época 12: 0.1630
Época 13/100
Perda na época 13: 0.1692
Época 14/100
Perda na época 14: 0.1668
Época 15/100
Perda na época 15: 0.1659
Época 16/100
Perda na época 16: 0.1669
Época 17/100
Perda na época 17: 0.1664
Época 18/100
Perda na época 18: 0.1660
Época 19/100
Perda na época 19: 0.1675
Época 20/100
Perda na época 20: 0.1633
Época 21/100
Perda na época 21: 0.1656
Época 22/100
Perda na época 22: 0.1643
Época 23/100
Perda na época 23: 0.1637
Época 24/100
Perda na época 24: 0.1675
Época 25/100
Perda na época 25: 0.1635
Época 26/100
Perda na época 26: 0.1626
Époc

In [None]:
torch.save(model.state_dict(), 'faster_rcnn_model.pth')
print("Modelo salvo como 'faster_rcnn_model.pth'.")

Modelo salvo como 'faster_rcnn_model.pth'.
