## Setup Google Colab

First, mount Google Drive to access files:

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

# project_name = "SeamTaping"
project_name = "WRB"
print("Project:", project_name)

# Path to saved images
image_folder = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/images'

# Load dataset from JSON
train_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/train_data.json'
val_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/val_data.json'
test_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/test_data.json'


## Define Custom Dataset Class

Create a custom dataset class to load images and annotations.

In [None]:
import os
import json
import numpy as np
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset
from PIL import Image, ImageDraw

class CustomDataset(Dataset):
    def __init__(self, dataset_json_path, image_folder):
        with open(dataset_json_path, 'r') as f:
            dataset = json.load(f)

        self.dataset = dataset
        self.image_folder = image_folder
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.image_size = (800, 800)
        self.transforms = T.Compose([
            T.Resize(self.image_size),
            T.ToTensor(),
            T.Normalize(mean=self.mean, std=self.std)
        ])

        self.label_map = {
            'WRB-Bad': 0,
            # Add more labels as needed
        }

    def __len__(self):
        return len(self.dataset)

    def xywh_to_xyxy(self, xywh):
        x, y, w, h = xywh
        x2 = x + w
        y2 = y + h
        xyxy = [x, y, x2, y2]
        return xyxy

    def __getitem__(self, idx):
        image_data = self.dataset[idx]
        image_file_name = image_data['image_file_name']
        image_path = os.path.join(self.image_folder, image_file_name)

        # Load image
        image_original = Image.open(image_path).convert("RGB")

        # Get bounding boxes and labels
        boxes = []
        labels = []
        for annotation in image_data['annotations']:
            bbox = annotation['bbox']
            box = self.xywh_to_xyxy(bbox)
            boxes.append(box)
            labels.append(self.label_map[annotation['label']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # Apply transformations
        if self.transforms is not None:
            image = self.transforms(image_original)

        # Calculate scaling factor for resizing bounding boxes AFTER transforms
        original_size = np.array(image_original.size)  # Get original size from the image file
        # print(original_size)
        resized_size = self.image_size
        scale = resized_size / original_size
        # print(scale)

        # Adjust bounding box coordinates based on resizing
        boxes[:, 0] *= scale[0]  # x_min
        boxes[:, 1] *= scale[1]  # y_min
        boxes[:, 2] *= scale[0]  # x_max
        boxes[:, 3] *= scale[1]  # y_max

        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels
        }

        return image, target

# Create custom dataset instance with augmentation enabled
train_dataset = CustomDataset(train_dataset_json_path, image_folder)
val_dataset = CustomDataset(val_dataset_json_path, image_folder)
test_dataset = CustomDataset(test_dataset_json_path, image_folder)

## Define the Objective Function
First, define the objective function that Optuna will optimize. In this case, the objective function will train the FasterRCNN model with given hyperparameters and return the validation loss.

In [None]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader
from torchvision import transforms
import optuna

def objective(trial):
    # Define hyperparameters to tune
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-3)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-3)
    steps = trial.suggest_int('steps', 500, 2000, 500)
    batch_size = trial.suggest_categorical('batch_size', [2, 4, 8])

    
    dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    # Define model and optimizer
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 1  # 1 class (change accordingly if more classes)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay)

    # Training loop
    model.train()
    for epoch in range(steps):
        for images, targets in dataloader:
            images = list(image for image in images)
            targets = [{k: v for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

    # Validation (calculate validation loss)
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_dataloader:  # Replace with your validation dataloader
            images = list(image for image in images)
            targets = [{k: v for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    val_loss /= len(val_dataloader)

    return val_loss
