In [1]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import xml.etree.ElementTree as ET
import os

HOME = os.path.abspath(os.sep)
dataset_folder = os.getcwd() + "/Dataset/Images"

In [2]:
import os
import json
import torch
from PIL import Image
from torch.utils.data import Dataset

class GunDataset(Dataset):
    def __init__(self, root_dir, annotations_file, transforms=None):
        """
        Args:
            root_dir (str): Directory with images.
            annotations_file (str): Path to _annotations.coco.json.
            transform (callable, optional): Optional transform.
        """
        with open(annotations_file, 'r') as f:
            coco_data = json.load(f)
            
        self.root_dir = root_dir
        self.transforms = transforms
        self.images = coco_data['images']
        self.annotations = coco_data['annotations']
        self.classes = ['gun']

        self.img_to_anns = {img['id']: [] for img in self.images}
        for ann in self.annotations:
            self.img_to_anns[ann['image_id']].append(ann)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info['id']
        img_path = f"{self.root_dir}/{img_info['file_name']}"
        image = Image.open(img_path).convert("RGB")

        annotations = self.img_to_anns[img_id]
        boxes = []
        labels = []
        iscrowd = []
        areas = []

        for ann in annotations:
            bbox = ann['bbox']
            x_min, y_min, width, height = bbox
            x_max = x_min + width
            y_max = y_min + height
            boxes.append([x_min, y_min, x_max, y_max])
            area = width * height
            areas.append(area)
            label = ann['category_id']
            labels.append(label)
            iscrowd.append(ann.get('iscrowd', 0))

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        iscrowd = torch.tensor(iscrowd, dtype=torch.int64)
        areas = torch.tensor(areas, dtype=torch.float32)

        target = {
            'image_id': img_id,
            'boxes': boxes,
            'labels': labels,
            'iscrowd': iscrowd,
            'area': areas
        }

        if self.transforms:
            image = self.transforms(image)

        return image, target

In [3]:
import torchvision.transforms as T
from torch.utils.data import DataLoader, random_split

transforms = T.Compose([T.ToTensor()])

# load dataset
dataset = GunDataset(dataset_folder, dataset_folder + "/_annotations.coco.json", transforms=transforms)

# define splits
train_size = int(0.8*len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

num_workers = 2
if os.name == 'nt':
    num_workers = 0

# data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=num_workers, collate_fn=lambda x: tuple(zip(*x)), pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=num_workers, collate_fn=lambda x: tuple(zip(*x)))

In [4]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# Higher learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [5]:
from engine import train_one_epoch, evaluate
from metrics_utils import MetricsLogger

num_epochs = 20
metrics_logger = MetricsLogger('higher_lr')

for epoch in range(num_epochs):
    # Training
    train_metrics = train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
    
    # Validation
    coco_evaluator = evaluate(model, val_loader, device)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Log metrics
    metrics_logger.log_epoch(
        epoch_num=epoch,
        train_loss=train_metrics.meters['loss'].global_avg,
        lr=optimizer.param_groups[0]['lr'],
        coco_evaluator=coco_evaluator
    )

# Save final metrics
metrics_logger.save_training_metrics()

Epoch: [0]  [  0/297]  eta: 0:17:01  lr: 0.000044  loss: 0.7250 (0.7250)  loss_classifier: 0.5823 (0.5823)  loss_box_reg: 0.1278 (0.1278)  loss_objectness: 0.0048 (0.0048)  loss_rpn_box_reg: 0.0101 (0.0101)  time: 3.4406  data: 0.2182  max mem: 5782
Epoch: [0]  [ 10/297]  eta: 0:05:28  lr: 0.000381  loss: 0.5700 (0.5683)  loss_classifier: 0.4320 (0.4076)  loss_box_reg: 0.1407 (0.1398)  loss_objectness: 0.0124 (0.0125)  loss_rpn_box_reg: 0.0084 (0.0084)  time: 1.1439  data: 0.0412  max mem: 5941
Epoch: [0]  [ 20/297]  eta: 0:04:44  lr: 0.000719  loss: 0.2979 (0.4255)  loss_classifier: 0.1454 (0.2761)  loss_box_reg: 0.1173 (0.1280)  loss_objectness: 0.0131 (0.0130)  loss_rpn_box_reg: 0.0081 (0.0083)  time: 0.9052  data: 0.0230  max mem: 5941
Epoch: [0]  [ 30/297]  eta: 0:04:22  lr: 0.001056  loss: 0.2751 (0.3902)  loss_classifier: 0.1373 (0.2396)  loss_box_reg: 0.1160 (0.1282)  loss_objectness: 0.0131 (0.0137)  loss_rpn_box_reg: 0.0085 (0.0086)  time: 0.8961  data: 0.0224  max mem: 5941


In [6]:
path_model = "Model/faster_rcnn_higher_lr.onnx"
if not os.path.exists("Model"):
    os.makedirs("Model")

input = torch.randn(1, 3, 150, 150)
print(input.shape)
torch.onnx.export(
    model, 
    input.cuda(), 
    path_model, 
    export_params=True, 
    opset_version=17, 
    input_names=['image'], 
    output_names=['boxes', 'labels', 'scores']
)

torch.Size([1, 3, 150, 150])


  (torch.floor((input.size(i + 2).float() * torch.tensor(scale_factors[i], dtype=torch.float32)).float()))
  boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
  boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
  assert condition, message
  torch.tensor(s, dtype=torch.float32, device=boxes.device)
  / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
