#### Worked. Since training on vehicles, took time to progress from one epoch to next

In [1]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR
from pycocotools.coco import COCO

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
data = '/content/drive/MyDrive/vehicles/train'

In [5]:
# Define custom collate function to handle varying size images and annotations
def custom_collate_fn(batch):
    return tuple(zip(*batch))

In [11]:
# 1. Data Preparation
coco_root = data
coco_dataset = CocoDetection(root=coco_root, annFile=f'{coco_root}/_annotations.coco.json')
print("coco_dataset is: " + str(coco_dataset)) #Correctly shows the vehicles dataset
data_loader = DataLoader(coco_dataset, batch_size=2, shuffle=True, collate_fn=custom_collate_fn)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
coco_dataset is: Dataset CocoDetection
    Number of datapoints: 878
    Root location: /content/drive/MyDrive/vehicles/train


In [8]:
# 2. Model Selection
model = fasterrcnn_resnet50_fpn(pretrained=True)#(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = len(coco_dataset.coco.cats)
print("coco_dataset.coco.cats is: ", coco_dataset.coco.cats) #prints correct classes of vehicles: general class + 5 classes
print("num_classes: " + str(num_classes)) #prints 6 classes total (incl. 1 general class)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:02<00:00, 82.0MB/s]


coco_dataset.coco.cats is:  {0: {'id': 0, 'name': 'vehicles', 'supercategory': 'none'}, 1: {'id': 1, 'name': 'Ambulance', 'supercategory': 'vehicles'}, 2: {'id': 2, 'name': 'Bus', 'supercategory': 'vehicles'}, 3: {'id': 3, 'name': 'Car', 'supercategory': 'vehicles'}, 4: {'id': 4, 'name': 'Motorcycle', 'supercategory': 'vehicles'}, 5: {'id': 5, 'name': 'Truck', 'supercategory': 'vehicles'}}
num_classes: 6


In [9]:
# 3. Loss Function
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    print("This is epoch number ", epoch)

    for images, annotations in data_loader:
        #print("pre-tensor images is: ", images) #PIL object
        images = [F.to_tensor(image).to(device) for image in images]
        # Process your dataset's annotations to extract bounding boxes and labels
        custom_targets = []
        #print("post-tensor images (list type) is: ", images) #Tensor object
        #print("annotations (tuple type) is: ", annotations)

        for annotation in annotations:
            boxes = []
            labels = []
            #print("annotation is: ", annotation)

            for annot in annotation: #annot represents a single annotation detailed from 'id' to 'iscrowd'
                print("annot is: ", annot)
                bbox = annot['bbox']
                x, y, w, h = bbox
                label = annot['category_id']
                boxes.append([x, y, x+w, y+h])
                labels.append(label)
                custom_targets.append({
                            'boxes': torch.tensor(boxes, dtype=torch.float32).to(device),
                            'labels': torch.tensor(labels, dtype=torch.int64).to(device)
                        })
        optimizer.zero_grad()
        loss_dict = model(images, custom_targets)
        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Print training statistics
    print(f"Epoch [{epoch-1}/{num_epochs}] Loss: {total_loss / len(data_loader)}")

    lr_scheduler.step()

print("Finished Training")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
         ...,
         [0.3255, 0.3216, 0.3098,  ..., 0.4392, 0.4275, 0.4196],
         [0.3333, 0.3255, 0.3098,  ..., 0.4078, 0.4196, 0.4275],
         [0.3333, 0.3216, 0.3020,  ..., 0.3529, 0.3882, 0.4157]]]), tensor([[[0.3059, 0.3098, 0.3059,  ..., 0.8235, 0.8235, 0.8235],
         [0.3059, 0.3020, 0.3059,  ..., 0.8235, 0.8235, 0.8235],
         [0.2980, 0.2980, 0.3020,  ..., 0.8235, 0.8235, 0.8235],
         ...,
         [0.3333, 0.2941, 0.3059,  ..., 0.5765, 0.8000, 0.8980],
         [0.3647, 0.3686, 0.3804,  ..., 0.2902, 0.5294, 0.6471],
         [0.3216, 0.3608, 0.3765,  ..., 0.1098, 0.1725, 0.2392]],

        [[0.3961, 0.4000, 0.4078,  ..., 0.9412, 0.9412, 0.9412],
         [0.3961, 0.4039, 0.4078,  ..., 0.9412, 0.9412, 0.9412],
         [0.4000, 0.4078, 0.4118,  ..., 0.9412, 0.9412, 0.9412],
         ...,
         [0.3490, 0.3098, 0.3216,  ..., 0.5804, 0.8039, 0.9020],
         [0.3843, 0.3882, 0.4000,  ..., 0.3

In [None]:
# Save the trained model weights
torch.save(model.state_dict(), '/content/drive/MyDrive/frcnn_vehicles_20Oct.pth')

In [None]:
# 5. Evaluation
model.eval()
# Load the validation dataset and perform evaluation (similar to previous example)