## Setup Google Colab

First, mount Google Drive to access files:

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

# project_name = "SeamTaping"
project_name = "WRB"
print("Project:", project_name)

# Path to saved images
image_folder = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/images'

# Load dataset from JSON
train_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/train_data.json'
val_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/val_data.json'
test_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/test_data.json'


Mounted at /content/gdrive
Project: WRB


## Define Custom Dataset Class

Create a custom dataset class to load images and annotations.

In [13]:
import os
import json
import numpy as np
import torch
from PIL import Image, ImageDraw
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataset_json_path, image_folder):
        with open(dataset_json_path, 'r') as f:
            dataset = json.load(f)

        self.dataset = dataset
        self.image_folder = image_folder
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.image_size = (800, 800)
        self.transforms = T.Compose([
            T.Resize(self.image_size),
            T.ToTensor(),
            T.Normalize(mean=self.mean, std=self.std)
        ])

        self.label_map = {
            'WRB-Bad': 1,
            # Add more labels as needed
        }

    def __len__(self):
        return len(self.dataset)

    def xywh_to_xyxy(self, xywh):
        x, y, w, h = xywh
        x2 = x + w
        y2 = y + h
        xyxy = [x, y, x2, y2]
        return xyxy

    def __getitem__(self, idx):
        image_data = self.dataset[idx]
        image_file_name = image_data['image_file_name']
        image_path = os.path.join(self.image_folder, image_file_name)

        # Load image
        image_original = Image.open(image_path).convert("RGB")
        # Apply transformations
        if self.transforms is not None:
            image = self.transforms(image_original)

        # Calculate scaling factor for resizing bounding boxes AFTER transforms
        original_size = np.array(image_original.size)  # Get original size from the image file
        # print(original_size)
        resized_size = self.image_size
        scale = resized_size / original_size
        # print(scale)


        # Get bounding boxes and labels
        boxes = []
        labels = []
        for annotation in image_data['annotations']:
            bbox = annotation['bbox']
            box = self.xywh_to_xyxy(bbox)
            # Adjust bounding box coordinates based on resizing
            box[0] *= scale[0]  # x_min
            box[1] *= scale[1]  # y_min
            box[2] *= scale[0]  # x_max
            box[3] *= scale[1]  # y_max

            boxes.append(box)
            labels.append(self.label_map[annotation['label']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {
            'boxes': boxes,
            'labels': labels
        }

        return image, target

# Create custom dataset instance with augmentation enabled
train_dataset = CustomDataset(train_dataset_json_path, image_folder)
val_dataset = CustomDataset(val_dataset_json_path, image_folder)
test_dataset = CustomDataset(test_dataset_json_path, image_folder)

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Assuming targets is a list of dictionaries
    for idx, target in enumerate(targets):
        # Convert target to a format suitable for the model
        targets[idx] = {
            'boxes': target['boxes'].clone().detach().to(torch.float32),  # Ensure boxes are float32
            'labels': target['labels'].clone().detach().to(torch.int64),  # Ensure boxes are int64
            # Add other keys as necessary (e.g., masks, keypoints)
        }

    return images, targets

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)


## Train TorchVision FasterRCNN model

In [3]:
import os
from tqdm import tqdm
import torchvision

def get_model(weights=None):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 2)
    return model

In [4]:
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from tqdm import tqdm

def train_epoch(model, train_dataloader, optimizer, device):
    model.train()
    train_loss = 0
    print("Training ")
    for images, targets in tqdm(train_dataloader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

        train_loss += losses.item()

    return train_loss / len(train_dataloader)

def validate_epoch(model, val_dataloader, device):
# https://stackoverflow.com/questions/60339336/validation-loss-for-pytorch-faster-rcnn/65347721#65347721
    model.train()
    val_loss = 0
    print("Validating ")
    with torch.no_grad():
        for images, targets in tqdm(val_dataloader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    return val_loss / len(val_dataloader)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model(weights="DEFAULT")
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)  # Adjust learning rate every 2 epochs

num_epochs = 10

train_losses = []
val_losses = []
learning_rates = []

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss = train_epoch(model, train_dataloader, optimizer, device)
    val_loss = validate_epoch(model, val_dataloader, device)

    train_losses.append(train_loss)
    val_losses.append(val_loss)

    current_lr = optimizer.param_groups[0]['lr']
    print(f"\tTrain Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Learning Rate: {current_lr:.6f}")
    learning_rates.append(current_lr)

    scheduler.step()  # Update learning rate


print("Training complete.")


Epoch 1/10
Training 


100%|██████████| 734/734 [06:31<00:00,  1.88it/s]


Validating 


100%|██████████| 157/157 [02:37<00:00,  1.01s/it]


	Train Loss: 0.3706, Val Loss: 0.3401, Learning Rate: 0.005000
Epoch 2/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.29it/s]


	Train Loss: 0.3029, Val Loss: 0.3134, Learning Rate: 0.000500
Epoch 3/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.33it/s]


	Train Loss: 0.2416, Val Loss: 0.2917, Learning Rate: 0.000500
Epoch 4/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.30it/s]


	Train Loss: 0.2247, Val Loss: 0.3034, Learning Rate: 0.000050
Epoch 5/10
Training 


100%|██████████| 734/734 [06:12<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.30it/s]


	Train Loss: 0.2124, Val Loss: 0.3130, Learning Rate: 0.000050
Epoch 6/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.29it/s]


	Train Loss: 0.2113, Val Loss: 0.3149, Learning Rate: 0.000005
Epoch 7/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.98it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.31it/s]


	Train Loss: 0.2077, Val Loss: 0.3150, Learning Rate: 0.000005
Epoch 8/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.98it/s]


Validating 


100%|██████████| 157/157 [00:48<00:00,  3.25it/s]


	Train Loss: 0.2080, Val Loss: 0.3154, Learning Rate: 0.000001
Epoch 9/10
Training 


100%|██████████| 734/734 [06:13<00:00,  1.97it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.29it/s]


	Train Loss: 0.2076, Val Loss: 0.3146, Learning Rate: 0.000001
Epoch 10/10
Training 


100%|██████████| 734/734 [06:11<00:00,  1.98it/s]


Validating 


100%|██████████| 157/157 [00:47<00:00,  3.30it/s]

	Train Loss: 0.2073, Val Loss: 0.3155, Learning Rate: 0.000000
Training complete.





### Save the Model

Save the trained model.

In [7]:
# Save model
checkpoint_dir = '/content/gdrive/MyDrive/CrackDetection'
torch.save(model.state_dict(), os.path.join(checkpoint_dir, f'faster_rcnn_model_WRB.pth'))

## Load and Evaluate the Model


### Load the model for inference.

In [16]:
# Load model
model = get_model()
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'faster_rcnn_model_WRB.pth')))
model.to(device)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu