In [1]:
import zipfile
import os
from PIL import Image
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import re
import random
import glob
import matplotlib.pyplot as plt

# Define directories

In [5]:
"""
images_folder_original = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/RGB"
masks_folder_original = '/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/segmap3c'
images_folder_BM_removed = 'images_cropped/RGB_inpainted'
#masks_folder_BM_removed = 'unzipped_dataset/cropped_resized_masks/'
test_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/test.txt"
validation_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/validation.txt"
train_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/train.txt"
pred_path = "/home/lucap/code/fragment-restoration/runs/run9350223490688163_simplifiedUNET_RGB_images512x512_3classes_200epochs_augmented_lr0.001_HSV/results_simplified_UNET_512x512_test_set"
"""

'\nimages_folder_original = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/RGB"\nmasks_folder_original = \'/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/segmap3c\'\nimages_folder_BM_removed = \'images_cropped/RGB_inpainted\'\n#masks_folder_BM_removed = \'unzipped_dataset/cropped_resized_masks/\'\ntest_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/test.txt"\nvalidation_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/validation.txt"\ntrain_txt = "/media/lucap/big_data/datasets/repair/puzzle2D/motif_segmentation/MoFF/train.txt"\npred_path = "/home/lucap/code/fragment-restoration/runs/run9350223490688163_simplifiedUNET_RGB_images512x512_3classes_200epochs_augmented_lr0.001_HSV/results_simplified_UNET_512x512_test_set"\n'

In [6]:
images_folder_original = "images_cropped/RGB/"
masks_folder_original = 'gt-masks/segmap3c'
images_folder_BM_removed = 'images_cropped/RGB_inpainted'
#masks_folder_BM_removed = 'unzipped_dataset/cropped_resized_masks/'
test_txt = "train-val-test/test.txt"
validation_txt = "train-val-test/validation.txt"
train_txt = "train-val-test/train.txt"
pred_path = "run16885392688472511_classicUNET_RGB_images512x512_3classes_200epochs_augmented_lr0.001_HSV/results_test_set/test_pred"

# Load and preprocess data

*Please check if the number of images, ground_truth masks and predicted masks are the same as mine.

In [33]:
def read_and_preprocess_images(image_dir, mask_dir, pred_dir, txt_file, color_space="HSV", preprocessing=None):
    images = []
    masks = []
    predicted_masks = []
    new_size = (512, 512)

    unique_values_gt = set()
    unique_values_pred = set()

    with open(txt_file, 'r') as file:
        lines = file.readlines()

        for line in lines:
            line = line.strip().replace('"', '')

            img_path = os.path.join(image_dir, line)
            mask_path = os.path.join(mask_dir, line)
            pred_mask_path = os.path.join(pred_dir, line)

            img = cv2.imread(img_path)
            mask = Image.open(mask_path).convert("L").resize(new_size)
            pred_mask = Image.open(pred_mask_path).convert("L").resize(new_size)

            if color_space == "HSV":
                img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            else:
                raise ValueError(f"Invalid color space {color_space}")

            img = cv2.resize(img, new_size)

            images.append(img)
            masks.append(mask)
            predicted_masks.append(pred_mask)

            # Add all unique pixel values in the current mask to the respective sets
            unique_values_gt.update(np.unique(mask))
            unique_values_pred.update(np.unique(pred_mask))

    print(f"Number of images: {len(images)}")
    print(f"Number of ground truth masks: {len(masks)}")
    print(f"Number of predicted masks: {len(predicted_masks)}")
    print(f"Unique pixel values in ground truth masks: {unique_values_gt}")
    print(f"Unique pixel values in predicted masks: {unique_values_pred}")

    return images, masks, predicted_masks

test_images_original, test_masks, test_pred_masks = read_and_preprocess_images(images_folder_original, masks_folder_original, pred_path, test_txt, color_space="HSV")

print("Test images, test masks, and predicted test masks data loaded successfully in HSV color space.")

Number of images: 38
Number of ground truth masks: 38
Number of predicted masks: 38
Unique pixel values in ground truth masks: {0, 1, 2}
Unique pixel values in predicted masks: {0, 1, 2}
Test images, test masks, and predicted test masks data loaded successfully in HSV color space.


# Define evaluation metrics (Tensorflow meanIOU multi_class and single_class)

**CustomSingleClassMeanIoU:** This method creates a mask that identifies the pixels of interest, i.e., those belonging to the class of interest (self.class_index). Then, it multiplies both the ground truth (y_true) and predicted (y_pred) tensors with this mask. This effectively zero out all pixels that are not of the class of interest. However, it leaves the pixels of interest untouched, maintaining their original class value. The IoU is then calculated with these modified tensors.

This could potentially lead to overestimation of IoU since the denominator (union) might be smaller than expected as it's only considering the pixels where the ground truth is the class of interest. Moreover, for the pixels not of the class of interest, since they are set to zero in both y_true and y_pred, they are treated as correct predictions (True Negatives). 

**CustomSingleClassMeanIoUAREF:** This method also creates a similar mask but uses it differently. It uses the tf.boolean_mask function to filter out the values in the ground truth and predicted tensors that correspond to the class of interest. In other words, rather than setting non-target classes to zero, it entirely excludes them from the calculation.

In this case, we completely ignore the pixels that are not of the class of interest. The IoU is then calculated using only these selected pixels, effectively making the problem binary - pixels are either of the class of interest or not.


**An example is provided to clearify the differances:**

Let's say we have a 2x2 ground truth (gt) and prediction (pred) and the target class is 1. So, if we have:

    gt = [[0, 1],
       [1, 2]]

    pred = [[1, 0],
        [1, 2]]

* CustomSingleClassMeanIoU:

    The method creates a mask, applies it, and then calculates the intersection and union:
    
    mask = gt == 1 = [[False, True],
                  [True, False]]
                  

    y_true_c2 = y_true * mask = [[0, 1],
                             [1, 0]]

    y_pred_c2 = y_pred * mask = [[0, 0],
                             [1, 0]]

    Intersection is where both y_true_c2 and y_pred_c2 are equal, and union is where either of them has a value. So,
    
        Intersection: 1 (the bottom left pixel)
        Union: 3 (the bottom left, top right, and bottom right pixels)
        IoU: Intersection / Union = 1/3 = 0.33
    
    
* CustomSingleClassMeanIoUAREF:

    This method creates a mask and filters out the non-matching pixels:
    
    mask = gt == 1 = [[False, True],
                  [True, False]]

    y_true_filtered = [1, 1]
    
    y_pred_filtered = [0, 1]

    Intersection and union are then calculated from these filtered values:
    
        Intersection: 1 (second element of the lists)
        Union: 2 (both elements in the lists)
        IoU: Intersection / Union = 1/2 = 0.5



In [8]:
import tensorflow as tf
from tensorflow.keras.metrics import MeanIoU

class CustomMeanIoU(MeanIoU):
    def __init__(self, num_classes):
        super().__init__(num_classes=num_classes)

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.argmax(y_true, axis=-1)
        y_pred = tf.argmax(y_pred, axis=-1)
        return super().update_state(y_true, y_pred, sample_weight)


class CustomSingleClassMeanIoU(MeanIoU):
    def __init__(self, num_classes, class_index):
        super().__init__(num_classes=num_classes)
        self.class_index = class_index

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.argmax(y_true, axis=-1)
        y_pred = tf.argmax(y_pred, axis=-1)
        
        # Only consider y_true and y_pred where y_true is class_index
        mask = tf.equal(y_true, self.class_index)
        y_true_c2 = tf.math.multiply(y_true, tf.cast(mask, tf.int64))
        y_pred_c2 = tf.math.multiply(y_pred, tf.cast(mask, tf.int64))

        return super().update_state(y_true_c2, y_pred_c2, sample_weight)
    
class CustomSingleClassMeanIoUAREF(MeanIoU):
    def __init__(self, num_classes, class_index):
        super().__init__(num_classes=num_classes)
        self.class_index = class_index

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.argmax(y_true, axis=-1)
        y_pred = tf.argmax(y_pred, axis=-1)
        
        # Only consider y_true and y_pred where y_true is class_index
        mask = tf.equal(y_true, self.class_index)
        
        return super().update_state(tf.boolean_mask(y_true, mask), tf.boolean_mask(y_pred, mask), sample_weight)
    
num_classes = 3
single_class_metric = CustomSingleClassMeanIoU(num_classes, 2)
single_class_metricAREF = CustomSingleClassMeanIoUAREF(num_classes, 2)

# Calculate evaluation metrics (Tensorflow meanIOU multi_class and single_class)

In [9]:
# Convert to tensors and one-hot encode the masks
test_masks_tensor = tf.convert_to_tensor([np.array(m) for m in test_masks])
test_pred_masks_tensor = tf.convert_to_tensor([np.array(m) for m in test_pred_masks])

test_masks_tensor = tf.one_hot(test_masks_tensor, num_classes)
test_pred_masks_tensor = tf.one_hot(test_pred_masks_tensor, num_classes)

print("Tensorflow IOU")

# Compute 3 class mean IOU
meanIOU = CustomMeanIoU(num_classes)
meanIOU.update_state(test_masks_tensor, test_pred_masks_tensor)
print(f"3 class Mean IOU: {meanIOU.result().numpy()}")

# Compute single class mean IOU
class_index = 2
single_class_metric = CustomSingleClassMeanIoU(num_classes, class_index)
single_class_metric.update_state(test_masks_tensor, test_pred_masks_tensor)
print(f"Single class Mean IOU: {single_class_metric.result().numpy()}")
single_class_metricAREF = CustomSingleClassMeanIoUAREF(num_classes, class_index)
single_class_metricAREF.update_state(test_masks_tensor, test_pred_masks_tensor)
print(f"Single class Mean IOU AREF: {single_class_metricAREF.result().numpy()}")

manual_iou = []
manual_iou_single_class = []
for pred, gt in zip(test_pred_masks_tensor, test_masks_tensor):
    pred = pred.numpy() * 2
    gt = gt.numpy() * 2
#     print('pred',np.mean(pred))
#     print('gt',np.mean(gt))
    inters = np.sum((gt == pred))
    union = np.sum(gt > -1)
    inters_motif = np.sum(((gt==2)*(pred==2)>0))
    union_motif = np.sum(((gt==2)+(pred==2))>0)
    manual_iou.append(inters/union)
    manual_iou_single_class.append(inters_motif/union_motif)

manual_iou = np.asarray(manual_iou)
manual_iou_single_class = np.asarray(manual_iou_single_class)
print("Manually Computed")
print(f"3 class Mean IOU: {np.mean(manual_iou)}")
print(f"Single class Mean IOU: {np.mean(manual_iou_single_class)}")

Tensorflow IOU
3 class Mean IOU: 0.9186064600944519
Single class Mean IOU: 0.6533918380737305
Single class Mean IOU AREF: 0.3202303349971771
Manually Computed
3 class Mean IOU: 0.9779473020319353
Single class Mean IOU: 0.9390014498136893


# UPDATED Manual computation of meanIOU

**The manual IoU** calculation I am performing here is fundamentally different from the other two methods in the sense that instead of modifying or filtering the ground truth and predictions based on the target class, I create a boolean mask that identifies where both the ground truth and prediction match the target class.
In this method, boolean masks are created where the predicted and true class are equal to the class of interest. These masks are used to calculate the intersection (both masks are true) and the union (either mask is true).
* In other words, I consider the problem as a binary problem (similar to "CustomSingleClassMeanIoUAREF"). However, instead of directly using the class labels, I use boolean masks.
* The main differance is unlike the first two methods, this calculation doesn't modify the original prediction and ground truth or exclude any pixels.
* My personal opinion, this approach gives a direct and intuitive calculation and has the most similarity to text-book calculation of meanIOU. So I would choose this one as our evaluation metric in table 2.

**The example in previous mardown cell is continued here to calrify this method:**

We had a 2x2 ground truth (gt) and prediction (pred) and the target class was 1. SO:

    gt = [[0, 1],
       [1, 2]]

    pred = [[1, 0],
        [1, 2]]
    
This method calculates intersection and union for pixels that match the target class:

    pred_single_class = pred == 1 = [[True, False],
                                     [True, False]]

    gt_single_class = gt == 1 = [[False, True],
                                 [True, False]]

    inters_single_class = pred_single_class & gt_single_class = [[False, False],
                                                                 [True, False]]

    union_single_class = pred_single_class | gt_single_class = [[True, True],
                                                                [True, False]]

So,

    Intersection: 1 (the bottom left pixel)
    Union: 3 (the top left, top right, and bottom left pixels)
    IoU: Intersection / Union = 1/3 = 0.33


In [35]:
class_index = 2 

epsilon = 1e-7 #To avoid dividing by zero

manual_iou = []
manual_iou_single_class = []

for pred, gt in zip(test_pred_masks, test_masks):
    
    pred = np.array(pred)
    gt = np.array(gt)

    inters = np.sum((gt == pred))
    union = np.sum(gt >= 0) + np.sum(pred >= 0) - inters  # Union = Total in GT + Total in Pred - Intersection(to correct for the double-countingof intersection)

    iou = inters / union
    manual_iou.append(iou)

    pred_single_class = (pred == class_index)
    gt_single_class = (gt == class_index)

    inters_single_class = np.sum(pred_single_class & gt_single_class)
    union_single_class = np.sum(pred_single_class | gt_single_class)

    if union_single_class > epsilon:
        iou_single_class = inters_single_class / union_single_class
        manual_iou_single_class.append(iou_single_class)


mean_iou = np.mean(manual_iou)
print(f"3 class Mean IOU: {mean_iou}")


if manual_iou_single_class: 
    mean_iou_single_class = np.mean(manual_iou_single_class)
    print(f"Single class Mean IOU: {mean_iou_single_class}")
else:
    print("No instances where both the predicted mask and the ground truth mask are of class 2.")

3 class Mean IOU: 0.9390014498136893
Single class Mean IOU: 0.8177767577520657
