In [1]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import xml.etree.ElementTree as ET
import os

HOME = os.path.abspath(os.sep)
dataset_folder = os.getcwd() + "/Dataset/Images"

In [2]:
import os
import json
import torch
from PIL import Image
import torchvision.transforms.functional as F
from torchvision.transforms import RandomHorizontalFlip, ColorJitter
from torch.utils.data import Dataset

class GunDataset(Dataset):
    def __init__(self, root_dir, annotations_file, transforms=None):
        """
        Args:
            root_dir (str): Directory with images.
            annotations_file (str): Path to _annotations.coco.json.
            transform (callable, optional): Optional transform.
        """
        with open(annotations_file, 'r') as f:
            coco_data = json.load(f)
            
        self.root_dir = root_dir
        self.transforms = transforms
        self.images = coco_data['images']
        self.annotations = coco_data['annotations']
        self.classes = ['gun']

        # Data augmentation transforms
        self.color_jitter = ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
        self.horizontal_flip = RandomHorizontalFlip(p=0.5)

        self.img_to_anns = {img['id']: [] for img in self.images}
        for ann in self.annotations:
            self.img_to_anns[ann['image_id']].append(ann)

    def __len__(self):
        return len(self.images)
    
    def apply_transforms(self, image, target):
        # Apply color jitter
        image = self.color_jitter(image)
        
        # Apply horizontal flip
        if torch.rand(1) < 0.5:
            image = F.hflip(image)
            boxes = target['boxes']
            # Flip box coordinates
            boxes[:, [0, 2]] = image.shape[-1] - boxes[:, [2, 0]]
            target['boxes'] = boxes
            
        return image, target
    
    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info['id']
        img_path = f"{self.root_dir}/{img_info['file_name']}"
        image = Image.open(img_path).convert("RGB")

        annotations = self.img_to_anns[img_id]
        boxes = []
        labels = []
        iscrowd = []
        areas = []

        for ann in annotations:
            bbox = ann['bbox']
            x_min, y_min, width, height = bbox
            x_max = x_min + width
            y_max = y_min + height
            boxes.append([x_min, y_min, x_max, y_max])
            area = width * height
            areas.append(area)
            label = ann['category_id']
            labels.append(label)
            iscrowd.append(ann.get('iscrowd', 0))

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        iscrowd = torch.tensor(iscrowd, dtype=torch.int64)
        areas = torch.tensor(areas, dtype=torch.float32)

        target = {
            'image_id': img_id,
            'boxes': boxes,
            'labels': labels,
            'iscrowd': iscrowd,
            'area': areas
        }

        if self.transforms:
            image = self.transforms(image)
            # Apply data augmentation
            image, target = self.apply_transforms(image, target)

        return image, target

In [3]:
import torchvision.transforms as T
from torch.utils.data import DataLoader, random_split

transforms = T.Compose([T.ToTensor()])

# load dataset
dataset = GunDataset(dataset_folder, dataset_folder + "/_annotations.coco.json", transforms=transforms)

# define splits
train_size = int(0.8*len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

num_workers = 2
if os.name == 'nt':
    num_workers = 0

# data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=num_workers, collate_fn=lambda x: tuple(zip(*x)), pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=num_workers, collate_fn=lambda x: tuple(zip(*x)))

In [4]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [5]:
from engine import train_one_epoch, evaluate
from metrics_utils import MetricsLogger

num_epochs = 20
metrics_logger = MetricsLogger('data_augmentation')

for epoch in range(num_epochs):
    # Training
    train_metrics = train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
    
    # Validation
    coco_evaluator = evaluate(model, val_loader, device)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Log metrics
    metrics_logger.log_epoch(
        epoch_num=epoch,
        train_loss=train_metrics.meters['loss'].global_avg,
        lr=optimizer.param_groups[0]['lr'],
        coco_evaluator=coco_evaluator
    )

# Save final metrics
metrics_logger.save_training_metrics()

Epoch: [0]  [  0/297]  eta: 0:20:12  lr: 0.000022  loss: 1.0443 (1.0443)  loss_classifier: 0.7830 (0.7830)  loss_box_reg: 0.2189 (0.2189)  loss_objectness: 0.0310 (0.0310)  loss_rpn_box_reg: 0.0114 (0.0114)  time: 4.0829  data: 0.4279  max mem: 5782
Epoch: [0]  [ 10/297]  eta: 0:08:57  lr: 0.000191  loss: 0.8340 (0.7728)  loss_classifier: 0.6119 (0.5838)  loss_box_reg: 0.1526 (0.1603)  loss_objectness: 0.0153 (0.0199)  loss_rpn_box_reg: 0.0089 (0.0088)  time: 1.8736  data: 0.1379  max mem: 5940
Epoch: [0]  [ 20/297]  eta: 0:14:17  lr: 0.000359  loss: 0.4393 (0.5577)  loss_classifier: 0.2549 (0.3804)  loss_box_reg: 0.1418 (0.1490)  loss_objectness: 0.0153 (0.0192)  loss_rpn_box_reg: 0.0085 (0.0090)  time: 3.0453  data: 0.1096  max mem: 5940
Epoch: [0]  [ 30/297]  eta: 0:13:27  lr: 0.000528  loss: 0.3236 (0.4838)  loss_classifier: 0.1443 (0.3087)  loss_box_reg: 0.1371 (0.1478)  loss_objectness: 0.0104 (0.0186)  loss_rpn_box_reg: 0.0085 (0.0088)  time: 3.6575  data: 0.1080  max mem: 5940


In [1]:
path_model = "Model/faster_rcnn_data_augmentation.onnx"
if not os.path.exists("Model"):
    os.makedirs("Model")

input = torch.randn(1, 3, 150, 150)
print(input.shape)
torch.onnx.export(
    model, 
    input.cuda(), 
    path_model, 
    export_params=True, 
    opset_version=17, 
    input_names=['image'], 
    output_names=['boxes', 'labels', 'scores']
)

NameError: name 'os' is not defined