In [7]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms as T
from PIL import Image
import json
import os
import numpy as np

In [8]:
class CustomDataset(Dataset):
    def __init__(self, json_file, img_dir, transforms=None):
        with open(json_file) as f:
            self.data = json.load(f) 
        self.img_dir = img_dir
        self.transforms = transforms

    def __getitem__(self, idx):
        img_info = self.data['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = Image.open(img_path).convert("L")
        
        img = img.convert("RGB")

        annotations = [ann for ann in self.data['annotations'] if ann['image_id'] == img_info['id']]
        
        boxes = []
        labels = []
        
        for ann in annotations:
            xmin, ymin, width, height = ann['bbox']
            xmax = xmin + width
            ymax = ymin + height
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann['category_id']) 

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.data['images'])

transform = T.Compose([
    T.ToTensor(),
])

dataset = CustomDataset(json_file='../../data/detection/labeled/train/_annotations.coco.json', img_dir='../../data/detection/labeled/train', transforms=transform)

In [21]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

In [22]:
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn

model = fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 91 
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

model.train()


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [23]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [27]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

device = torch.device("cuda") if False else torch.device("cpu")
model.to(device)

num_epochs = 10 
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for images, targets in dataloader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
    
    lr_scheduler.step()
    
    print(f"Epoch #{epoch+1} Loss: {total_loss:.4f}")

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes

# Fungsi untuk menampilkan gambar dengan deteksi objek
def show_results(images, targets, model):
    model.eval()
    with torch.no_grad():
        for image, target in zip(images, targets):
            prediction = model([image.to(device)])
            boxes = prediction[0]['boxes'].cpu().numpy()
            labels = prediction[0]['labels'].cpu().numpy()
            scores = prediction[0]['scores'].cpu().numpy()

            # Tampilkan gambar dan bounding box
            fig, ax = plt.subplots(1, figsize=(12, 9))
            ax.imshow(image.permute(1, 2, 0).cpu().numpy())
            for box, label, score in zip(boxes, labels, scores):
                if score > 0.5:  # Tampilkan hanya deteksi dengan skor lebih tinggi dari 0.5
                    ax.add_patch(plt.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], 
                                              fill=False, color='red', linewidth=2))
                    ax.text(box[0], box[1], f'{label} {score:.2f}', fontsize=12, color='white')
            plt.show()

# Ambil gambar dan target dari data loader untuk evaluasi
images, targets = next(iter(dataloader))

# Tampilkan hasil deteksi
show_results(images, targets, model)


In [None]:
# Simpan model yang telah dilatih
torch.save(model.state_dict(), 'model_fasterrcnn.pth')