<a href="https://colab.research.google.com/github/AvellinaLeong/NHM-Nannofossil-Segmentation-Project/blob/main/04_evaluation_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Load Detectrons and Datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Set script location to own development space
MY_DEVELOPMENT_SPACE = '/content/drive/MyDrive/development/avellina/'
import os
os.chdir(MY_DEVELOPMENT_SPACE)
!pwd
!ls

Mounted at /content/drive/
/content/drive/MyDrive/development/avellina
Binary_Classification_notebooks  Detectron2_notebooks  Mask-RCNN		output
detectron2			 Extra_Data_Pipeline   Morphometrics_notebooks	src


In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/274.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/274.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue wit

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.5 ; cuda:  cu121
detectron2: 0.6


In [None]:
# Import Detectron2 and logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

<Logger detectron2 (DEBUG)>

In [None]:
import os
import torch
import copy
import numpy as np
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader, transforms as T
from detectron2.structures import BoxMode
from detectron2.data import detection_utils as utils
from detectron2.engine import DefaultTrainer
import cv2
from detectron2.utils.visualizer import Visualizer, ColorMode

In [None]:
# Register datasets
register_coco_instances("my_dataset_val", {}, "/content/drive/MyDrive/data/species_53/data/val/coco_val.json", "/content/drive/MyDrive/data/species_53/data/val")
register_coco_instances("my_dataset_test", {}, "/content/drive/MyDrive/data/species_53/data/test/coco_test.json", "/content/drive/MyDrive/data/species_53/data/test")

In [None]:
# Load metadata for visualization
val_metadata = MetadataCatalog.get("my_dataset_val")
test_metadata = MetadataCatalog.get("my_dataset_test")

# Get val and test dataset dicts
val_dataset_dicts = DatasetCatalog.get("my_dataset_val")
test_dataset_dicts = DatasetCatalog.get("my_dataset_test")

[11/04 14:54:40 d2.data.datasets.coco]: Loaded 95 images in COCO format from /content/drive/MyDrive/data/species_53/data/val/coco_val.json
[11/04 14:54:41 d2.data.datasets.coco]: Loaded 96 images in COCO format from /content/drive/MyDrive/data/species_53/data/test/coco_test.json


## Define Augmentations and Load Configurations

In [None]:
# Define augmentations
from detectron2.data import transforms as T

augs = T.AugmentationList([
    T.RandomBrightness(0.9, 1.1),
    T.RandomFlip(prob=0.5),
    T.RandomRotation(angle=[-90, 90]),
    T.RandomSaturation(0.8, 1.2),
])

In [None]:
# Custom data mapper
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")

    # Get the annotations
    annos = dataset_dict.get("annotations", [])
    bbox_list = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    bbox_list = np.array(bbox_list, dtype=np.float32)

    # Apply augmentations
    aug_input = T.AugInput(image, boxes=bbox_list)
    transforms = augs(aug_input)
    image = aug_input.image

    # Apply the same transforms to the annotations
    annos = [utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in annos]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    return dataset_dict

In [None]:
class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

cfg = get_cfg()
cfg.OUTPUT_DIR = "/content/drive/MyDrive/data/species_53/Detectron2_Models/5" # Make sure this path is correct
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.VAL = ("my_dataset_val",)
cfg.DATASETS.TEST = ("my_dataset_test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final_5.pth")  # Make sure this path is correct
cfg.MODEL.BACKBONE.FREEZE_AT = 0 # !!!!
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1000
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.TEST.EVAL_PERIOD = 500
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

# Load the saved configuration from the YAML file
config_yaml_path = "/content/drive/MyDrive/data/species_53/Detectron2_Models/5/config_5.yaml" # Make sure this path is correct
cfg.merge_from_file(config_yaml_path)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final_5.pth") # Make sure this path is correct
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.50



In [None]:
# Initialize the predictor
predictor = DefaultPredictor(cfg)

[11/04 14:55:00 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/drive/MyDrive/data/species_53/Detectron2_Models/5/model_final_5.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


In [None]:
# Run predictions on the val and test sets
def get_predictions(dataset_dicts):
    predictions = []
    for data in dataset_dicts:
        im = cv2.imread(data["file_name"])
        outputs = predictor(im)
        predictions.append(outputs)
    return predictions

val_predictions = get_predictions(val_dataset_dicts)
test_predictions = get_predictions(test_dataset_dicts)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


## Precision, Recall and IOU

### Prepare the Ground Truth Data and Predictions

In [None]:
import skimage.draw

In [None]:
def compute_iou(b1, b2):
    y1, x1, y2, x2 = max(b1[0], b2[0]), max(b1[1], b2[1]), min(b1[2], b2[2]), min(b1[3], b2[3])
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    b1_area = (b1[2] - b1[0]) * (b1[3] - b1[1])
    b2_area = (b2[2] - b2[0]) * (b2[3] - b2[1])
    union = b1_area + b2_area - intersection
    return intersection / union if union != 0 else 0

def get_TP_FP(pred_boxes, gt_boxes, iou_threshold=0.5):
    tp = 0
    fp = 0
    matched_gt = []
    for pred_box in pred_boxes:
        match_found = False
        for i, gt_box in enumerate(gt_boxes):
            if i not in matched_gt:
                iou = compute_iou(pred_box, gt_box)
                if iou >= iou_threshold:
                    tp += 1
                    match_found = True
                    matched_gt.append(i)
                    break
        if not match_found:
            fp += 1
    fn = len(gt_boxes) - tp
    return tp, fp, fn

def extract_bboxes(mask):
    bboxes = []
    for i in range(mask.shape[2]):
        pos = np.where(mask[:, :, i])
        xmin = np.min(pos[1])
        xmax = np.max(pos[1])
        ymin = np.min(pos[0])
        ymax = np.max(pos[0])
        bboxes.append([xmin, ymin, xmax, ymax])
    return bboxes

In [None]:
def extract_pred_masks_and_boxes(predictions):
    pred_masks = []
    pred_boxes = []
    scores = []

    for output in predictions:
        instances = output["instances"].to("cpu")
        pred_masks.append(instances.pred_masks.numpy())
        pred_boxes.append(instances.pred_boxes.tensor.numpy())
        scores.append(instances.scores.numpy())

    return pred_masks, pred_boxes, scores

In [None]:
# Convert predictions
val_pred_masks, val_pred_boxes, val_scores = extract_pred_masks_and_boxes(val_predictions)
test_pred_masks, test_pred_boxes, test_scores = extract_pred_masks_and_boxes(test_predictions)

In [None]:
def create_info(annotations, height, width):
    info = {"height": height, "width": width, "polygons": [a["segmentation"] for a in annotations if "segmentation" in a and a["segmentation"]]}
    return info

def ground_truth_masks(data, height, width):
    info = create_info(data["annotations"], height, width)
    mask = np.zeros([info["height"], info["width"], len(info["polygons"])], dtype=np.uint8)
    for i, p in enumerate(info["polygons"]):
        if not p or len(p[0]) < 3:  # Check if the polygon list is empty or invalid
            print(f"Invalid or empty polygon for annotation {i} in file {data['file_name']}")
            continue
        rr, cc = skimage.draw.polygon(p[0][1::2], p[0][0::2])
        mask[rr, cc, i] = 1
    return mask

In [None]:
# Create ground truth masks for the val and test sets
ground_truth_masks_dict = {}
for data in val_dataset_dicts:
    filename = data["file_name"]
    height, width = data["height"], data["width"]
    mask = ground_truth_masks(data, height, width)
    ground_truth_masks_dict[filename] = mask

ground_truth_masks_dict_test = {}
for data in test_dataset_dicts:
    filename = data["file_name"]
    height, width = data["height"], data["width"]
    mask = ground_truth_masks(data, height, width)
    ground_truth_masks_dict_test[filename] = mask

print("Ground truth masks for validation set created successfully")
print("Ground truth masks for test set created successfully")


Ground truth masks for validation set created successfully
Ground truth masks for test set created successfully


### Precision, Recall and IOU Calculate on Val and Test Sets


In [None]:
import pandas as pd
from sklearn.metrics import auc

In [None]:
# Generate predicted masks for validation and test sets
val_pred_masks, _, _ = extract_pred_masks_and_boxes(val_predictions)
test_pred_masks, _, _ = extract_pred_masks_and_boxes(test_predictions)

In [None]:
# Create dictionaries to hold predicted masks
val_pred_masks_dict = {}
for i, data in enumerate(val_dataset_dicts):
    filename = data["file_name"]
    val_pred_masks_dict[filename] = val_pred_masks[i]

test_pred_masks_dict = {}
for i, data in enumerate(test_dataset_dicts):
    filename = data["file_name"]
    test_pred_masks_dict[filename] = test_pred_masks[i]

In [None]:
def compute_metrics(dataset_dicts, pred_boxes, ground_truth_masks_dict):
    all_tp, all_fp, all_fn = 0, 0, 0
    ious = []

    for data, pred_box in zip(dataset_dicts, pred_boxes):
        filename = data["file_name"]
        gt_mask = ground_truth_masks_dict[filename]
        gt_boxes = extract_bboxes(gt_mask)
        tp, fp, fn = get_TP_FP(pred_box, gt_boxes)
        all_tp += tp
        all_fp += fp
        all_fn += fn
        for pred_b, gt_b in zip(pred_box, gt_boxes):
            ious.append(compute_iou(pred_b, gt_b))

    precision = all_tp / (all_tp + all_fp) if (all_tp + all_fp) > 0 else 0
    recall = all_tp / (all_tp + all_fn) if (all_tp + all_fn) > 0 else 0
    mean_iou = np.mean(ious) if ious else 0
    accuracy = all_tp / (all_tp + all_fp + all_fn) if (all_tp + all_fp + all_fn) > 0 else 0

    return precision, recall, mean_iou, accuracy

# Compute metrics for validation set
val_precision, val_recall, val_mean_iou, val_accuracy = compute_metrics(val_dataset_dicts, val_pred_boxes, ground_truth_masks_dict)
print("Validation - Precision:", val_precision)
print("Validation - Recall:", val_recall)
print("Validation - Mean IoU:", val_mean_iou)
print("Validation - Accuracy:", val_accuracy)

# Compute metrics for test set
test_precision, test_recall, test_mean_iou, test_accuracy = compute_metrics(test_dataset_dicts, test_pred_boxes, ground_truth_masks_dict_test)
print("Test - Precision:", test_precision)
print("Test - Recall:", test_recall)
print("Test - Mean IoU:", test_mean_iou)
print("Test - Accuracy:", test_accuracy)

Validation - Precision: 0.7983870967741935
Validation - Recall: 0.9611650485436893
Validation - Mean IoU: 0.7885116001897382
Validation - Accuracy: 0.7734375
Test - Precision: 0.7835820895522388
Test - Recall: 0.9722222222222222
Test - Mean IoU: 0.8005595759821296
Test - Accuracy: 0.7664233576642335


In [None]:
# --------- Model (train resnet layers) ---------------
# Validation - Precision: 0.7983870967741935
# Validation - Recall: 0.9611650485436893
# Validation - Mean IoU: 0.7885116001897382
# Validation - Accuracy: 0.7734375
# Test - Precision: 0.7835820895522388
# Test - Recall: 0.9722222222222222
# Test - Mean IoU: 0.8005595759821296
# Test - Accuracy: 0.7664233576642335

In [None]:
# Model without normalisation
# Validation - Precision: 0.8135593220338984
# Validation - Recall: 0.46601941747572817
# Validation - Mean IoU: 0.5399237047686556
# Validation - Accuracy: 0.42105263157894735
# Test - Precision: 0.7017543859649122
# Test - Recall: 0.37037037037037035
# Test - Mean IoU: 0.47500537918242575
# Test - Accuracy: 0.32

In [None]:
# Model with normalisation cv2
# Validation - Precision: 0.6578947368421053
# Validation - Recall: 0.7281553398058253
# Validation - Mean IoU: 0.5794849551875569
# Validation - Accuracy: 0.528169014084507
# Test - Precision: 0.572463768115942
# Test - Recall: 0.7314814814814815
# Test - Mean IoU: 0.4447712056723371
# Test - Accuracy: 0.47305389221556887

In [None]:
# Model with per channel standardisation
# Validation - Precision: 0.5189873417721519
# Validation - Recall: 0.39805825242718446
# Validation - Mean IoU: 0.4441316441016047
# Validation - Accuracy: 0.2907801418439716
# Test - Precision: 0.4578313253012048
# Test - Recall: 0.35185185185185186
# Test - Mean IoU: 0.3856891764891272
# Test - Accuracy: 0.24836601307189543

## Boundary F1 Score on Val and Test Sets

In [None]:
import skimage
from skimage.segmentation import find_boundaries

This isn't working -- invalid prediction format

In [None]:
import numpy as np
import skimage.draw
from skimage.segmentation import find_boundaries

# Function to compute boundary F1 score
def compute_boundary_f1_score(dataset_dicts, predictions, ground_truth_masks_dict):
    precisions, recalls, f1_scores = [], [], []

    for data, pred_masks_list in zip(dataset_dicts, predictions):
        filename = data["file_name"]

        # Ensure pred_masks is accessed correctly based on the actual structure of predictions
        if isinstance(pred_masks_list, list) and pred_masks_list:
            pred_masks = pred_masks_list[0]["instances"].pred_masks.numpy()  # Assuming first prediction instance
        else:
            print(f"Invalid prediction format for {filename}. Skipping...")
            continue

        gt_mask = ground_truth_masks_dict[filename]

        pred_boundaries = find_boundaries(pred_masks)
        gt_boundaries = find_boundaries(gt_mask)

        # Flatten masks to compute TP, FP, FN
        pred_boundaries_flat = pred_boundaries.flatten()
        gt_boundaries_flat = gt_boundaries.flatten()

        # Compute TP, FP, FN
        TP = np.sum(np.logical_and(pred_boundaries_flat, gt_boundaries_flat))
        FP = np.sum(np.logical_and(pred_boundaries_flat, np.logical_not(gt_boundaries_flat)))
        FN = np.sum(np.logical_and(np.logical_not(pred_boundaries_flat), gt_boundaries_flat))

        # Compute precision, recall, f1 score
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)

    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)
    avg_f1_score = np.mean(f1_scores)

    return avg_precision, avg_recall, avg_f1_score

# Assuming val_dataset_dicts, test_dataset_dicts, val_predictions, test_predictions, ground_truth_masks_dict, ground_truth_masks_dict_test are defined

# Compute boundary F1 score for validation set
val_avg_precision, val_avg_recall, val_avg_f1_score = compute_boundary_f1_score(val_dataset_dicts, val_predictions, ground_truth_masks_dict)
print("Validation - Average Precision:", val_avg_precision)
print("Validation - Average Recall:", val_avg_recall)
print("Validation - Average Boundary F1 Score:", val_avg_f1_score)

# Compute boundary F1 score for test set
test_avg_precision, test_avg_recall, test_avg_f1_score = compute_boundary_f1_score(test_dataset_dicts, test_predictions, ground_truth_masks_dict_test)
print("Test - Average Precision:", test_avg_precision)
print("Test - Average Recall:", test_avg_recall)
print("Test - Average Boundary F1 Score:", test_avg_f1_score)


Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5054_01_29.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5054_03_40.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_06_1.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_06_42.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_08_21.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_09_3.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_12_8.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_12_14.jpeg. Skipping...
Invalid prediction format for /content/drive/MyDrive/data/species_53/data/val/PM_NF_5065_13_32.jpeg

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
