In [81]:
import json
from PIL import Image
import torch
import random

In [82]:
class COCODataset(torch.utils.data.Dataset):
    def __init__(self, annotation_file, image_dir, classes, transform=None):
        with open(annotation_file, 'r') as f:
            self.coco_data = json.load(f)
        self.image_dir = image_dir
        self.classes = classes
        self.transform = transform

        self.image_annotations = {img['id']: [] for img in self.coco_data['images']}
        for ann in self.coco_data['annotations']:
            self.image_annotations[ann['image_id']].append(ann)
        
        #Identify the background image
        filtered_images = [
            img for img in self.coco_data['images']
            if len(self.image_annotations[img['id']]) == 0
        ]

        #Select 100% of non annotated to be removed
        num_to_remove = int(len(filtered_images) * 1)
        images_to_remove = random.sample(filtered_images, num_to_remove)
        images_to_remove_ids = {img['id'] for img in images_to_remove}

        #Remove the selected images
        self.coco_data['images'] = [
            img for img in self.coco_data['images']
            if img['id'] not in images_to_remove_ids
        ]

        #Remove the annotations as well
        self.coco_data['annotations'] = [
            ann for ann in self.coco_data['annotations']
            if ann['image_id'] in {img['id'] for img in self.coco_data['images']}
        ]

        #Rebuild the annotations
        self.image_annotations = {img['id']: [] for img in self.coco_data['images']}
        for ann in self.coco_data['annotations']:
            self.image_annotations[ann['image_id']].append(ann)

        self.images = self.coco_data['images']
        self.image_ids = [img['id'] for img in self.coco_data['images']]
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image_data = self.images[idx]
        image_id = image_data['id']
        image_path = f"{self.image_dir}/{image_data['file_name']}"
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        annotations = self.image_annotations[image_id]
        boxes = []
        labels = []
        for ann in annotations:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(int(ann['category_id']))
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([image_id])

        target = {
            'boxes': boxes,
            'labels': labels, 
            'image_id': image_id
        }
        return image, target

    

In [83]:
import torchvision
from torchvision import transforms
import numpy as np

In [84]:
print(torch.__version__)
print(torchvision.__version__)

2.5.1+cu118
0.20.1+cu118


In [85]:
annotations_path = "instances_Train.json"
frames_path = "frames"
classes = {0: "No_hit", 1: "Hit", 3:"Oc_hit", 4:"Full_occ"}
transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [86]:
dataset = COCODataset(annotations_path, frames_path, classes, transforms)

In [87]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
from torch.utils.data import DataLoader

In [88]:
remaining_indices = np.arange(len(dataset.image_ids))
train_indices, test_indices = train_test_split(remaining_indices, test_size=0.2, random_state=10)

train_dataset = Subset(dataset, train_indices)
test_dataset = Subset(dataset, test_indices)

In [89]:
def collate_fn(batch):
    max_boxes = max([len(item[1]['boxes']) for item in batch])

    images = []
    targets = []

    for img, target in batch:
        boxes = target['boxes']
        labels = target['labels']

        if len(boxes) < max_boxes:
            pad_size = max_boxes - len(boxes)
            boxes = torch.cat([boxes, torch.zeros(pad_size, 4)], dim=0)
            labels = torch.cat([labels, torch.zeros(pad_size, dtype=torch.int64)], dim=0)
        
        targets.append({
            'boxes': boxes,
            'labels': labels,
            'image_id': target['image_id']
        })

        images.append(img)
    
    return images, targets

In [90]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, pin_memory=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True, collate_fn=collate_fn)

In [91]:
for batch_idx, (images, targets) in enumerate(train_loader):
    print(f"Batch {batch_idx} loaded successfully.")

Batch 0 loaded successfully.
Batch 1 loaded successfully.
Batch 2 loaded successfully.
Batch 3 loaded successfully.
Batch 4 loaded successfully.
Batch 5 loaded successfully.
Batch 6 loaded successfully.
Batch 7 loaded successfully.


Batch 8 loaded successfully.
Batch 9 loaded successfully.
Batch 10 loaded successfully.
Batch 11 loaded successfully.
Batch 12 loaded successfully.
Batch 13 loaded successfully.
Batch 14 loaded successfully.
Batch 15 loaded successfully.
Batch 16 loaded successfully.
Batch 17 loaded successfully.
Batch 18 loaded successfully.
Batch 19 loaded successfully.
Batch 20 loaded successfully.
Batch 21 loaded successfully.
Batch 22 loaded successfully.
Batch 23 loaded successfully.
Batch 24 loaded successfully.
Batch 25 loaded successfully.
Batch 26 loaded successfully.
Batch 27 loaded successfully.
Batch 28 loaded successfully.
Batch 29 loaded successfully.
Batch 30 loaded successfully.
Batch 31 loaded successfully.
Batch 32 loaded successfully.
Batch 33 loaded successfully.
Batch 34 loaded successfully.
Batch 35 loaded successfully.
Batch 36 loaded successfully.
Batch 37 loaded successfully.
Batch 38 loaded successfully.
Batch 39 loaded successfully.
Batch 40 loaded successfully.
Batch 41 loa

In [92]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn

In [93]:
model = fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = len(classes)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor.cls_score = torch.nn.Linear(in_features, 4)

In [94]:
import torch.optim as optim

In [95]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9)

In [96]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0

    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        train_loss += losses.item()
    
    print(f"{epoch+1} loss: {train_loss/len(train_loader):.4f}")

print("Finished training")

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
