Importing the required libraries

In [None]:
import torch
import torchvision
import optuna
import os
import json
import cv2
import json
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
import torchvision.models.detection
from torchvision.ops import box_iou
from PIL import Image

* Now we are extracting the annotations relevant to the one image, extracting bounding box information and corresponding labels.
* It will display the Image with bounding box using the respective annotation and corresponding labels

In [None]:
annotations_path = '/kaggle/input/fracatlas/FracAtlas/Annotations/COCO JSON/COCO_fracture_masks.json'
image_directory = '/kaggle/input/fracatlas/FracAtlas/images/Fractured'

with open(annotations_path) as file:
    coco_data = json.load(file)

category_mapping = {category['id']: category['name'] for category in coco_data['categories']}

image_info = coco_data['images'][4] 
image_path = os.path.join(image_directory, image_info['file_name'])
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] == image_info['id']]

plt.imshow(image)
for ann in annotations:
    bbox = ann['bbox']
    label = category_mapping[ann['category_id']]
    x, y, w, h = bbox
    rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
    plt.gca().add_patch(rect)
    plt.text(x, y, label, color='white', fontsize=8, backgroundcolor='red')
plt.show()


This Class is essentially a data preprocessing of fracatlas images.
It takes COCO JSON Annotations and Images as a input for the Data Preprocessing
There are three main functions in this class
1. Getitem -  It retrieves a annotations and calculates the bounding boxes for each image
2. Len - It returns the number of images in the dataset
3. Transform - It converts the images to tensors


In [None]:
class FracAtlasDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transforms=None):
        self.transforms = transforms
        with open(annotations_file) as f:
            self.coco_data = json.load(f)
        self.img_dir = img_dir
        self.images = self.coco_data['images']
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx]['file_name'])
        img = Image.open(img_path).convert("RGB")
        num_objs = len(self.coco_data['annotations'])
        boxes = []
        for i in range(num_objs):
            if self.coco_data['annotations'][i]['image_id'] == self.images[idx]['id']:
                xmin = self.coco_data['annotations'][i]['bbox'][0]
                ymin = self.coco_data['annotations'][i]['bbox'][1]
                xmax = xmin + self.coco_data['annotations'][i]['bbox'][2]
                ymax = ymin + self.coco_data['annotations'][i]['bbox'][3]
                boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        if self.transforms is not None:
            img = self.transforms(img)
        return img, target
    def __len__(self):
        return len(self.images)
def get_transform():
    return torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
    ])

* This objective function is the hyperparameter tuning of the Faster RCNN model using Python Optuna (a hyperparameter framework).
* The function takes a trial object from Optuna and a list to store results. 
* It suggests values for learning rate (lr), weight decay, and number of epochs within specified ranges.
* This model is optimized using SGD (Stochastic Gradient Descent) After training the model, it is evaluated using Evaluate function and it will result IOU 
* IOU - Intersection Over Union - A common metric in object detection task - this will give the score by comparing the Ground truth bounding boxes with the predicted bounding boxes from the model
* If the IOU more than 0.5, the Prediction of bounding boxes will be more precise and reliable

In [None]:
def objective(trial,results_list):
    lr = trial.suggest_loguniform('lr', 0.0001,0.001)
    weight_decay = trial.suggest_uniform('weight_decay', 0.0001,0.001)
    num_epochs = trial.suggest_int('num_epochs', 2,6)
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 2 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    annotations_file = '/kaggle/input/fracatlas/FracAtlas/Annotations/COCO JSON/COCO_fracture_masks.json'
    img_dir = '/kaggle/input/fracatlas/FracAtlas/images/Fractured'
    dataset = FracAtlasDataset(annotations_file, img_dir, get_transform())
    data_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()
    mean_iou = evaluate_model(model, data_loader, device)
    trial_result = {
        'trial_number': trial.number,
        'mean_iou': mean_iou,
        'lr': lr,
        'weight_decay': weight_decay,
        'num_epochs': num_epochs
    }
    results_list.append(trial_result)
    return mean_iou

In [None]:
def evaluate_model(model, data_loader, device):
    model.eval()
    ious = []
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            predictions = model(images)

            for target, prediction in zip(targets, predictions):
                gt_boxes = target['boxes']
                pred_boxes = prediction['boxes']

                if pred_boxes.shape[0] > 0 and gt_boxes.shape[0] > 0:
                    iou = box_iou(gt_boxes, pred_boxes)
                    ious.extend(iou.flatten().tolist())

    mean_iou = sum(ious) / len(ious) if ious else 0
    print(mean_iou)
    return mean_iou

* I have set the number of trials to 20, the optuna library will produce new hyperparam based on the previous results.
* Storing the results in the CSV

In [None]:
study = optuna.create_study(direction='maximize')
trial_results = []
study.optimize(lambda trial: objective(trial, trial_results), n_trials=20)
results_df = pd.DataFrame(trial_results)
print(results_df)
results_df.to_csv('optuna_trial_results.csv', index=False)

In [None]:
print("Best trial:")
trial = study.best_trial
print("  Value (Mean IoU): ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

In [None]:
best_lr = trial.params['lr']
print(best_lr)
best_weight_decay = trial.params['weight_decay']
print(best_weight_decay)
best_num_epochs = trial.params['num_epochs']
print(best_num_epochs)

I had used the best hyperparameters to train the model and saved the model

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=0.9, weight_decay=best_weight_decay)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
annotations_file = '/kaggle/input/fracatlas/FracAtlas/Annotations/COCO JSON/COCO_fracture_masks.json'
img_dir = '/kaggle/input/fracatlas/FracAtlas/images/Fractured'
dataset = FracAtlasDataset(annotations_file, img_dir, get_transform())
data_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
for epoch in range(best_num_epochs):
    model.train()
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
torch.save(model.state_dict(), 'best_model.pth')