## Setup Google Colab

First, mount Google Drive to access files:

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

# project_name = "SeamTaping"
project_name = "WRB"
print("Project:", project_name)

# Path to saved images
image_folder = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/images'

# Load dataset from JSON
train_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/train_data.json'
val_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/val_data.json'
test_dataset_json_path = f'/content/gdrive/MyDrive/CrackDetection/{project_name}_dataset/test_data.json'


Mounted at /content/gdrive
Project: WRB


## Define Custom Dataset Class

Create a custom dataset class to load images and annotations.

In [None]:
import os
import json
import numpy as np
import torch
from PIL import Image, ImageDraw
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataset_json_path, image_folder):
        with open(dataset_json_path, 'r') as f:
            dataset = json.load(f)

        self.dataset = dataset
        self.image_folder = image_folder
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.image_size = (800, 800)
        self.transforms = T.Compose([
            T.Resize(self.image_size),
            T.ToTensor(),
            T.Normalize(mean=self.mean, std=self.std)
        ])

        self.label_map = {
            'WRB-Bad': 1,
            # Add more labels as needed
        }

    def __len__(self):
        return len(self.dataset)

    def xywh_to_xyxy(self, xywh):
        x, y, w, h = xywh
        x2 = x + w
        y2 = y + h
        xyxy = [x, y, x2, y2]
        return xyxy

    def __getitem__(self, idx):
        image_data = self.dataset[idx]
        image_file_name = image_data['image_file_name']
        image_path = os.path.join(self.image_folder, image_file_name)

        # Load image
        image_original = Image.open(image_path).convert("RGB")
        # Apply transformations
        if self.transforms is not None:
            image = self.transforms(image_original)

        # Calculate scaling factor for resizing bounding boxes AFTER transforms
        original_size = np.array(image_original.size)  # Get original size from the image file
        # print(original_size)
        resized_size = self.image_size
        scale = resized_size / original_size
        # print(scale)


        # Get bounding boxes and labels
        boxes = []
        labels = []
        for annotation in image_data['annotations']:
            bbox = annotation['bbox']
            box = self.xywh_to_xyxy(bbox)
            # Adjust bounding box coordinates based on resizing
            box[0] *= scale[0]  # x_min
            box[1] *= scale[1]  # y_min
            box[2] *= scale[0]  # x_max
            box[3] *= scale[1]  # y_max

            boxes.append(box)
            labels.append(self.label_map[annotation['label']])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {
            'boxes': boxes,
            'labels': labels
        }

        return image, target

# Create custom dataset instance with augmentation enabled
train_dataset = CustomDataset(train_dataset_json_path, image_folder)
val_dataset = CustomDataset(val_dataset_json_path, image_folder)
test_dataset = CustomDataset(test_dataset_json_path, image_folder)

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Assuming targets is a list of dictionaries
    for idx, target in enumerate(targets):
        # Convert target to a format suitable for the model
        targets[idx] = {
            'boxes': target['boxes'].clone().detach().to(torch.float32),  # Ensure boxes are float32
            'labels': target['labels'].clone().detach().to(torch.int64),  # Ensure boxes are int64
            # Add other keys as necessary (e.g., masks, keypoints)
        }

    return images, targets

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn)


## Train TorchVision FasterRCNN model

In [None]:
import os
from tqdm import tqdm
import torchvision

def get_model(weights=None):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 2)
    return model

## Load and Evaluate the Model


### Load the model for inference.

In [None]:
# Load model
model = get_model()

checkpoint_dir = '/content/gdrive/MyDrive/CrackDetection'
# _ = model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'faster_rcnn_model_WRB.pth'))) # if gpu is available
_ = model.load_state_dict(torch.load(os.path.join(checkpoint_dir, f'faster_rcnn_model_WRB.pth'), map_location=torch.device('cpu'))) # if gpu is not available

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

### Evaluate the trained model

using metrics like accuracy, precision, recall, and F1-score.

In [None]:
  model.eval()

  with torch.no_grad():
      for images, targets in test_dataloader:
          images = list(image.to(device) for image in images)
          ground_truths = [{k: v.to(device) for k, v in t.items()} for t in targets]
          print(ground_truths)

          predictions = model(images)
          print(predictions)

[{'boxes': tensor([[367.3355, 340.1585, 400.5655, 374.7926],
        [333.2627, 373.3300, 353.4489, 391.5261],
        [409.2523, 365.2770, 447.5489, 428.9388],
        [395.2663, 232.9047, 460.6442, 289.9859]]), 'labels': tensor([1, 1, 1, 1])}, {'boxes': tensor([[332.4328, 417.6826, 358.7670, 491.6311]]), 'labels': tensor([1])}]
[{'boxes': tensor([[409.8213, 357.2304, 449.1165, 430.6518],
        [392.0437, 210.3670, 460.4063, 420.0887],
        [364.6435, 338.2320, 401.9261, 381.4536],
        [561.8439, 459.6078, 574.3701, 479.9553],
        [328.3272, 330.3872, 445.0329, 438.3831],
        [331.7285, 374.9957, 350.6353, 392.8164],
        [451.0667, 496.8382, 464.9714, 511.9885],
        [327.8813, 337.8919, 407.8651, 408.5976],
        [ 33.1398, 388.7067,  52.1851, 414.1129],
        [557.9310, 456.5788, 578.0540, 486.0414],
        [428.4923, 360.7051, 449.7566, 422.6173],
        [335.2968, 214.2647, 458.3116, 437.4513],
        [390.0216, 213.9999, 458.1707, 290.6931],
       

In [None]:
for ground_truth in ground_truths:
    ground_truth['boxes'] = ground_truth['boxes'].cpu().numpy()
    ground_truth['labels'] = ground_truth['labels'].cpu().numpy()

print(ground_truths)

[{'boxes': array([[367.33548, 340.15854, 400.56546, 374.7926 ],
       [333.26273, 373.33005, 353.44894, 391.5261 ],
       [409.25235, 365.277  , 447.54892, 428.9388 ],
       [395.26633, 232.90468, 460.64423, 289.9859 ]], dtype=float32), 'labels': array([1, 1, 1, 1])}, {'boxes': array([[332.4328 , 417.68265, 358.76697, 491.63113]], dtype=float32), 'labels': array([1])}]


In [None]:
for prediction in predictions:
    prediction['boxes'] = prediction['boxes'].cpu().numpy()
    prediction['labels'] = prediction['labels'].cpu().numpy()
    prediction['scores'] = prediction['scores'].cpu().numpy()

print(predictions)

[{'boxes': array([[409.82132 , 357.2304  , 449.1165  , 430.6518  ],
       [392.0437  , 210.36697 , 460.4063  , 420.08865 ],
       [364.6435  , 338.23196 , 401.92615 , 381.45364 ],
       [561.84393 , 459.60782 , 574.37006 , 479.9553  ],
       [328.3272  , 330.38718 , 445.0329  , 438.38315 ],
       [331.72852 , 374.9957  , 350.63525 , 392.81638 ],
       [451.0667  , 496.8382  , 464.97144 , 511.98846 ],
       [327.88135 , 337.89188 , 407.86505 , 408.59763 ],
       [ 33.13978 , 388.70667 ,  52.18512 , 414.1129  ],
       [557.93097 , 456.5788  , 578.054   , 486.04138 ],
       [428.49228 , 360.70508 , 449.75656 , 422.6173  ],
       [335.29684 , 214.26468 , 458.31155 , 437.45132 ],
       [390.02158 , 213.99994 , 458.17075 , 290.69315 ],
       [563.00653 , 461.26596 , 572.1845  , 475.11636 ],
       [447.49625 , 492.52808 , 468.6929  , 515.8289  ],
       [357.11096 , 419.66318 , 366.7544  , 442.35812 ],
       [330.43155 , 370.5042  , 359.13052 , 401.0913  ],
       [392.85696 , 

In [None]:
import numpy as np

# Example data for multiple images
print("Example Calculation...")
images_predictions = [
    [
        {'label': 0, 'probability': 0.9, 'bounding_box': [100, 100, 50, 50]},  # label 0, high confidence
        {'label': 1, 'probability': 0.8, 'bounding_box': [200, 200, 50, 50]},  # label 1, high confidence
        {'label': 0, 'probability': 0.7, 'bounding_box': [120, 120, 40, 40]},  # label 0, medium confidence
        {'label': 1, 'probability': 0.6, 'bounding_box': [210, 210, 60, 60]},  # label 1, medium confidence
    ],
    [
        {'label': 0, 'probability': 0.85, 'bounding_box': [95, 95, 55, 55]},  # label 0, high confidence
        {'label': 1, 'probability': 0.75, 'bounding_box': [210, 210, 45, 45]},  # label 1, high confidence
        {'label': 0, 'probability': 0.65, 'bounding_box': [125, 125, 35, 35]},  # label 0, medium confidence
        {'label': 1, 'probability': 0.55, 'bounding_box': [200, 200, 55, 55]},  # label 1, medium confidence
    ]
]

images_ground_truths = [
    [
        {'label': 0, 'bounding_box': [105, 105, 60, 60]},  # label 0
        {'label': 1, 'bounding_box': [200, 200, 50, 50]},  # label 1
    ],
    [
        {'label': 0, 'bounding_box': [100, 100, 60, 60]},  # label 0
        {'label': 1, 'bounding_box': [210, 210, 50, 50]},  # label 1
    ]
]

def evaluate_object_detection_multiple_images(images_predictions, images_ground_truths, prob_threshold=0.5, overlap_threshold=0.5):
    """
    Evaluate object detection predictions for multiple images.

    Args:
    - images_predictions (list of lists): List where each element is a list of dictionaries containing prediction data for one image.
      Each dictionary should have keys 'label', 'probability', 'bounding_box'.
      Example format for one image:
      [{'label': 'car', 'probability': 0.92, 'bounding_box': [x, y, width, height]}, ...]

    - images_ground_truths (list of lists): List where each element is a list of dictionaries containing ground truth data for one image.
      Each dictionary should have keys 'label' and 'bounding_box'.
      Example format for one image:
      [{'label': 'car', 'bounding_box': [xmin, ymin, width, height]}, ...]

    - prob_threshold (float): Minimum probability threshold for predictions.

    - overlap_threshold (float): Minimum IoU threshold for considering a detection as correct.

    Returns:
    - precision (float): Average Precision score across all images.
    - recall (float): Average Recall score across all images.
    - f1_score (float): Average F1 score across all images.
    - mAP (float): mean Average Precision (mAP) score across all images.
    """

    def calculate_precision_recall(predictions, ground_truths, prob_threshold, overlap_threshold):
        # Filter predictions based on probability threshold
        predictions = [pred for pred in predictions if pred['probability'] >= prob_threshold]

        # Initialize variables
        true_positives = 0
        false_positives = len(predictions)
        false_negatives = len(ground_truths)

        for gt in ground_truths:
            found_match = False
            for pred in predictions:
                if pred['label'] == gt['label']:
                    iou = calculate_iou(pred['bounding_box'], gt['bounding_box'])
                    if iou >= overlap_threshold:
                        found_match = True
                        break

            if found_match:
                true_positives += 1
                false_positives -= 1
                false_negatives -= 1

        precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
        recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

        return precision, recall

    def calculate_iou(boxA, boxB):
        # Convert to (x1, y1, x2, y2) format
        x1A, y1A, wA, hA = boxA[0], boxA[1], boxA[2], boxA[3]
        x1B, y1B, wB, hB = boxB[0], boxB[1], boxB[2], boxB[3]
        x2A, y2A = x1A + wA, y1A + hA
        x2B, y2B = x1B + wB, y1B + hB

        # Calculate intersection area
        xA = max(x1A, x1B)
        yA = max(y1A, y1B)
        xB = min(x2A, x2B)
        yB = min(y2A, y2B)

        inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1)

        # Calculate area of each box
        boxAArea = wA * hA
        boxBArea = wB * hB

        # Calculate union area
        union_area = boxAArea + boxBArea - inter_area

        # Calculate IoU
        iou = inter_area / union_area if union_area > 0 else 0

        return iou

    def calculate_f1_score(precision, recall):
        if precision + recall == 0:
            return 0
        f1_score = 2 * (precision * recall) / (precision + recall)
        return f1_score

    def calculate_map(predictions, ground_truths, prob_threshold, overlap_threshold):
        average_precision = []
        num_classes = len(set([gt['label'] for gt in ground_truths]))

        for c in range(num_classes):
            class_predictions = [pred for pred in predictions if pred['label'] == c]
            class_ground_truths = [gt for gt in ground_truths if gt['label'] == c]

            precisions = []
            recalls = []

            for threshold in np.arange(0.5, 1.0, 0.05):  # Vary IoU threshold from 0.5 to 0.95
                precisions_at_threshold = []
                recalls_at_threshold = []

                for prob_thresh in np.arange(0.0, 1.05, 0.05):  # Vary confidence threshold from 0 to 1
                    precision, recall = calculate_precision_recall(class_predictions, class_ground_truths,
                                                                   prob_threshold=prob_thresh,
                                                                   overlap_threshold=threshold)
                    precisions_at_threshold.append(precision)
                    recalls_at_threshold.append(recall)

                avg_precision = np.mean(precisions_at_threshold)
                precisions.append(avg_precision)
                recalls.append(np.mean(recalls_at_threshold))

            average_precision.append(np.mean(precisions))

        mAP = np.mean(average_precision)

        return mAP

    total_precision = 0
    total_recall = 0
    total_f1_score = 0
    total_mAP = 0

    num_images = len(images_predictions)

    for i in range(num_images):
        predictions = images_predictions[i]
        ground_truths = images_ground_truths[i]

        # Calculate Precision and Recall for the current image
        precision, recall = calculate_precision_recall(predictions, ground_truths, prob_threshold, overlap_threshold)

        # Calculate F1 score for the current image
        f1_score = calculate_f1_score(precision, recall)

        # Calculate mAP for the current image
        mAP = calculate_map(predictions, ground_truths, prob_threshold, overlap_threshold)

        # Accumulate metrics for averaging
        total_precision += precision
        total_recall += recall
        total_f1_score += f1_score
        total_mAP += mAP

    # Average metrics across all images
    precision_avg = total_precision / num_images
    recall_avg = total_recall / num_images
    f1_score_avg = total_f1_score / num_images
    mAP_avg = total_mAP / num_images

    return precision_avg, recall_avg, f1_score_avg, mAP_avg


# Evaluate object detection for multiple images
precision_avg, recall_avg, f1_score_avg, mAP_avg = evaluate_object_detection_multiple_images(images_predictions, images_ground_truths, prob_threshold=0.5, overlap_threshold=0.3)

print(f'Average Precision: {precision_avg:.2f}, Average Recall: {recall_avg:.2f}, Average F1 Score: {f1_score_avg:.2f}, Average mAP: {mAP_avg:.2f}')


Example Calculation...
Average Precision: 0.50, Average Recall: 1.00, Average F1 Score: 0.67, Average mAP: 0.28


In [None]:
def convert_ground_truths(ground_truths):
    images_ground_truths = []

    for ground_truth in ground_truths:
        image_gt = []
        boxes = ground_truth['boxes']
        labels = ground_truth['labels']

        for i in range(len(labels)):
            label = int(labels[i])  # Convert label to integer
            box = boxes[i].tolist()  # Convert numpy array to list
            xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
            width = xmax - xmin
            height = ymax - ymin
            bounding_box = [xmin, ymin, width, height]

            image_gt.append({'label': label, 'bounding_box': bounding_box})

        images_ground_truths.append(image_gt)

    return images_ground_truths

converted_ground_truths = convert_ground_truths(ground_truths)
print(converted_ground_truths)

[[{'label': 1, 'bounding_box': [367.3354797363281, 340.1585388183594, 33.22998046875, 34.634063720703125]}, {'label': 1, 'bounding_box': [333.2627258300781, 373.3300476074219, 20.18621826171875, 18.196044921875]}, {'label': 1, 'bounding_box': [409.2523498535156, 365.2770080566406, 38.29656982421875, 63.66180419921875]}, {'label': 1, 'bounding_box': [395.2663269042969, 232.90467834472656, 65.37789916992188, 57.08122253417969]}], [{'label': 1, 'bounding_box': [332.43280029296875, 417.6826477050781, 26.33416748046875, 73.948486328125]}]]


In [None]:
def convert_predictions(predictions):
    images_predictions = []

    for image_pred in predictions:
        image_predictions = []
        boxes = image_pred['boxes']
        labels = image_pred['labels']
        scores = image_pred['scores']

        for i in range(len(labels)):
            label = int(labels[i])  # Convert label to integer
            score = float(scores[i])  # Convert score to float
            box = boxes[i].tolist()  # Convert numpy array to list
            xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
            width = xmax - xmin
            height = ymax - ymin
            bounding_box = [int(xmin), int(ymin), int(width), int(height)]  # Convert to integers

            image_predictions.append({
                'label': label,
                'probability': score,
                'bounding_box': bounding_box
            })

        images_predictions.append(image_predictions)

    return images_predictions

converted_predictions = convert_predictions(predictions)
print(converted_predictions)

[[{'label': 1, 'probability': 0.8237559199333191, 'bounding_box': [409, 357, 39, 73]}, {'label': 1, 'probability': 0.7759232521057129, 'bounding_box': [392, 210, 68, 209]}, {'label': 1, 'probability': 0.7583789229393005, 'bounding_box': [364, 338, 37, 43]}, {'label': 1, 'probability': 0.6883583664894104, 'bounding_box': [561, 459, 12, 20]}, {'label': 1, 'probability': 0.6863035559654236, 'bounding_box': [328, 330, 116, 107]}, {'label': 1, 'probability': 0.6819499135017395, 'bounding_box': [331, 374, 18, 17]}, {'label': 1, 'probability': 0.6601813435554504, 'bounding_box': [451, 496, 13, 15]}, {'label': 1, 'probability': 0.6282103061676025, 'bounding_box': [327, 337, 79, 70]}, {'label': 1, 'probability': 0.5526962876319885, 'bounding_box': [33, 388, 19, 25]}, {'label': 1, 'probability': 0.5424738526344299, 'bounding_box': [557, 456, 20, 29]}, {'label': 1, 'probability': 0.4696422815322876, 'bounding_box': [428, 360, 21, 61]}, {'label': 1, 'probability': 0.4660336673259735, 'bounding_box

In [None]:
probability_threshold=0.5
overlap_threshold=0.3

precision_avg, recall_avg, f1_score_avg, mAP_avg = evaluate_object_detection_multiple_images(
    converted_predictions,
    converted_ground_truths,
    prob_threshold=probability_threshold,
    overlap_threshold=overlap_threshold
    )
print(f'Performance Metrics @ probability_threshold={probability_threshold}, overlap_threshold={overlap_threshold}')
print(f'\t Precision: {precision_avg:.2f}, \n\t Recall: {recall_avg:.2f}, \n\t F1 Score: {f1_score_avg:.2f}, \n\t mAP: {mAP_avg:.2f}')


Performance Metrics @ probability_threshold=0.5, overlap_threshold=0.3
	 Precision: 0.40, 
	 Recall: 0.88, 
	 F1 Score: 0.55, 
	 mAP: 0.00


In [None]:
def evaluate_model(model, dataloader, device):
    model.eval()
    ground_truths_all = []
    predictions_all = []

    with torch.no_grad():
        for images, targets in tqdm(dataloader):
            images = list(image.to(device) for image in images)

            ground_truths = [{k: v.to(device) for k, v in t.items()} for t in targets]
            ground_truths_all.extend(ground_truths)

            predictions = model(images)
            predictions_all.extend(predictions)

    # Convert ground truths and predictions to lists of dictionaries
    converted_ground_truths_all = convert_ground_truths(ground_truths_all)
    converted_predictions_all = convert_predictions(predictions_all)

    precision_avg, recall_avg, f1_score_avg, mAP_avg = evaluate_object_detection_multiple_images(
        converted_predictions,
        converted_ground_truths,
        prob_threshold=probability_threshold,
        overlap_threshold=overlap_threshold
        )
    print()
    print(f'Performance Metrics @ probability_threshold={probability_threshold}, overlap_threshold={overlap_threshold}')
    print(f'\t Precision: {precision_avg:.2f}, \n\t Recall: {recall_avg:.2f}, \n\t F1 Score: {f1_score_avg:.2f}, \n\t mAP: {mAP_avg:.2f}')


# Example usage of evaluation function
evaluate_model(model, test_dataloader, device)


100%|██████████| 158/158 [38:37<00:00, 14.67s/it]

Performance Metrics @ probability_threshold=0.5, overlap_threshold=0.3
	 Precision: 0.40, 
	 Recall: 0.88, 
	 F1 Score: 0.55, 
	 mAP: 0.00



