## Setup Google Colab

First, mount Google Drive to access files:

In [None]:
import json

from google.colab import drive
drive.mount('/content/gdrive')

dataset_json_path = '/content/gdrive/MyDrive/CrackDetection/WRB_All_bbox_annotations.json'
with open(dataset_json_path, 'r') as f:
    dataset = json.load(f)

# Path to saved images
image_folder = '/content/gdrive/MyDrive/CrackDetection/images'


In [None]:
def xywh_to_xyxy(xywh):
    x, y, w, h = xywh
    x2 = x + w
    y2 = y + h
    xyxy = [x, y, x2, y2]
    return xyxy

## Define Custom Dataset Class

Create a custom dataset class to load images and annotations.

In [None]:
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageDraw

class CustomDataset(Dataset):
    def __init__(self, dataset, image_folder, transforms=None):
        self.dataset = dataset
        self.image_folder = image_folder
        self.transforms = transforms

        # Define preprocessing transforms to resize images to model input size
        self.resize_transform = transforms.Resize((800, 800))  # Resize images to (800, 800)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image_data = self.dataset[idx]
        image_file_name = image_data['image_file_name']
        image_path = os.path.join(self.image_folder, image_file_name)

        # Load image
        image = Image.open(image_path).convert("RGB")

        # Resize image
        original_size = np.array(image.size)  # Get original image size
        image = self.resize_transform(image)
        resized_size = np.array(image.size)  # Get resized image size
        
        # Calculate scaling factor for resizing bounding boxes
        scale = resized_size / original_size

        # Get bounding boxes and labels
        boxes = []
        labels = []
        for annotation in image_data['annotations']:
            bbox = annotation['bbox']
            box = xywh_to_xyxy(bbox)
            boxes.append(box)
            labels.append(annotation['label'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Adjust bounding box coordinates based on resizing
        boxes[:, 0] *= scale[1]  # x_min
        boxes[:, 1] *= scale[0]  # y_min
        boxes[:, 2] *= scale[1]  # x_max
        boxes[:, 3] *= scale[0]  # y_max
        
        labels = torch.tensor(labels, dtype=torch.int64)

        # Apply transformations
        if self.transforms is not None:
            image = self.transforms(image)

        target = {
            'boxes': boxes,
            'labels': labels
        }

        return image, target


# Define transformations for your dataset (e.g., normalization)
transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Example normalization
])

# Create custom dataset instance
custom_dataset = CustomDataset(dataset, image_folder, transforms=transform)

### Check Dataset

In [None]:
import random
from PIL import Image
import matplotlib.pyplot as plt

# Function to display image with bounding boxes
def display_images_bboxes(image, boxes, labels):
    # Convert image to numpy array
    image = image.mul(255).permute(1, 2, 0).byte().numpy()

    # Create figure and axes
    fig, ax = plt.subplots(1)
    ax.imshow(image)

    # Draw bounding boxes
    for box, label in zip(boxes, labels):
        xmin, ymin, xmax, ymax = box
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='red', linewidth=2)
        ax.add_patch(rect)
        ax.text(xmin, ymin, f'Label {label.item()}', fontsize=12, color='red')

    plt.axis('off')
    plt.show()

# Randomly select 3 indices
indices = random.sample(range(len(custom_dataset)), 3)
# Display images with bounding boxes
for idx in indices:
    image, target = custom_dataset[idx]
    display_images_bboxes(image, target["boxes"], target["labels"])

## Train TorchVision FasterRCNN model

### Define Model

Define Faster R-CNN model using a ResNet-50 backbone.

In [None]:
import os
import tqdm
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

def get_torchvision_fastrcnn_model(pretrained=False):
    # Load a pre-trained ResNet-50 model
    backbone = torchvision.models.resnet50(pretrained=pretrained)
    backbone.out_channels = 2048  # Change output channels to match ResNet-50 output

    # Create anchor generator
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))

    # Create ROI pooler
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # Define Faster R-CNN model
    model = FasterRCNN(backbone,
                       num_classes=2,  # Assuming 2 classes (background + object)
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model

model = get_torchvision_fastrcnn_model(pretrained=True)

### Define Training Function

Set up the training function.

In [None]:
# Function to train the model
def train_model(model, data_loader, optimizer, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for images, targets in tqdm(data_loader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()


### Run Training Model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
data_loader = DataLoader(custom_dataset, batch_size=2, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, data_loader, optimizer, device, num_epochs=10)

### Save the Model

Save the trained model.

In [None]:
# Save model
checkpoint_dir = '/content/drive/My Drive/CrackDetection'
torch.save(model.state_dict(), os.path.join(checkpoint_dir, f'faster_rcnn_model.pth'))

## Load and Evaluate the Model


### Load the model for inference.

In [None]:
# Load model
model = get_torchvision_fastrcnn_model()
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'faster_rcnn_model.pth')))
model.eval()

### Evaluate the trained model

using metrics like accuracy, precision, recall, and F1-score.

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

def evaluate_model(model, data_loader, device):
    model.eval()
    true_labels = []
    pred_labels = []

    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            predictions = model(images)
            
            # Process predictions and targets to calculate metrics
            for pred, target in zip(predictions, targets):
                true_labels.extend(target['labels'].cpu().numpy())
                pred_labels.extend(pred['labels'].cpu().numpy())

    # Calculate evaluation metrics
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, average='weighted')
    recall = recall_score(true_labels, pred_labels, average='weighted')
    f1 = f1_score(true_labels, pred_labels, average='weighted')

    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")

# Example usage of evaluation function
data_loader_test = DataLoader(custom_dataset, batch_size=1, shuffle=False, num_workers=4)
evaluate_model(model, data_loader_test, device)
