This model had comparitevly better performance when it was run at enochs = 3 and split_size = 0.2 (60-20-20)

In [1]:
""" mount drive to access the dataset"""

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input, ZeroPadding2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import InceptionResNetV2

def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

In [3]:
"""metric.py"""

import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K

def iou(y_true, y_pred):
    def f(y_true, y_pred):
        intersection = (y_true * y_pred).sum()
        union = y_true.sum() + y_pred.sum() - intersection
        x = (intersection + 1e-15) / (union + 1e-15)
        x = x.astype(np.float32)
        return x
    return tf.numpy_function(f, [y_true, y_pred], tf.float32)

smooth = 1e-15
def dice_coef(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)

def dice_loss(y_true, y_pred):
    return 1.0 - dice_coef(y_true, y_pred)

In [4]:
"""train.py"""

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import numpy as np
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau # ModelCheckPoint - save weights;
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall, Precision

""" Global parameters """
H = 512
W = 512

def create_dir(path):
    """ Create a directory. """
    if not os.path.exists(path):
        os.makedirs(path)

def load_data(split=0.2): # split as 60-20-20 for train-val-test split
    images = sorted(glob(os.path.join( "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Image_Dataset/Data_Augmentation/Images", "*")))                       #
    masks = sorted(glob(os.path.join("/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Image_Dataset/Data_Augmentation/Masks", "*")))        # EDIT ACCORDINGLY

    train_x, valid_x = train_test_split(images, test_size=split, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size=split, random_state=42)

    return (train_x, train_y), (valid_x, valid_y)

In [5]:
def apply_clahe_img(image):
    # Convert RGB image to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)

    # Apply CLAHE to the L channel (Lightness)
    lab_planes = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    lab_planes[0] = clahe.apply(lab_planes[0])

    # Merge the enhanced L channel back with the other channels
    lab = cv2.merge(lab_planes)

    # Convert the LAB image back to RGB color space
    enhanced_image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

    return enhanced_image

def apply_clahe_mask(image):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    image = clahe.apply(image)
    return image

def read_image(path):       # read an image from train_x, or train_y, or... any other data split
    x = cv2.imread(path, cv2.IMREAD_COLOR)      # convert it to 3 channel (if grayscale or else colour only) and
    x = cv2.resize(x, (W, H))                   # resize image to (512,512,3)
    # x = apply_clahe_img(x)
    x = x/255.0                                 # normailise to have value bethween 0 and 1
    x = x.astype(np.float32)                    # convert to numpy float datatype
    return x

def read_mask(path):
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (W, H))
    # x = apply_clahe_mask(x)
    x = x/np.max(x)
    x = x > 0.5
    x = x.astype(np.float32)
    x = np.expand_dims(x, axis=-1)          # convert (512,512) to (512,512,1)
    return x

def tf_parse(x, y):        # this function takes in images x,y
    def _parse(x, y):      # calls read_image and read_mask on the image
        x = x.decode()
        y = y.decode()

        x = read_image(x)
        y = read_mask(y)
        return x, y             # returns as numpy array

    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])     # converts numpy array to tensor type
    x.set_shape([H, W, 3])
    y.set_shape([H, W, 1])
    return x, y

def tf_dataset(X, Y, batch=8):     # Here X,Y are list containing the images
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.shuffle(buffer_size=200)  # shuffle the dataset
    dataset = dataset.map(tf_parse)     # calls 'tf_parse' function to convert to a tensor
    dataset = dataset.batch(batch)      # create a batch of data
    dataset = dataset.prefetch(4)       # prefect some data in advance to RAM
    return dataset

In [6]:
if __name__ == "__main__":
    """ Seeding """
    np.random.seed(42)          # to ensure that randomness is prevented, simmillar results are produced at later implementations
    tf.random.set_seed(42)

    """ Directory for storing files """
    create_dir("files")

    """ Hyperparameters """
    batch_size = 4      # to limit number of images eveluvated at once to handle GPU limitations
    lr = 1e-5           # learning rate
    num_epochs = 20     # number of iteations
    model_path = os.path.join("files", "model.h5")      # location where model weights from 'ModelCheckpoint' are stored
    csv_path = os.path.join("files", "data.csv")        # location where model csv details from 'CSVLogger' are stored

    """ Dataset """
    (train_x, train_y), (valid_x, valid_y) = load_data()

    print(f"Train: {len(train_x)} - {len(train_y)}")
    print(f"Valid: {len(valid_x)} - {len(valid_y)}")

Train: 640 - 640
Valid: 160 - 160


In [7]:
    train_dataset = tf_dataset(train_x, train_y, batch=batch_size)   # create train and test dataset out of x_train and y_train
    valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)
    train_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 512, 512, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None))>

In [8]:
    """model"""

    def build_inception_resnetv2_unet(input_shape):

      """ Input """
      inputs = Input(input_shape)

      """ Pre-trained InceptionResNetV2 Model """
      encoder = InceptionResNetV2(include_top=False, weights="imagenet", input_tensor=inputs)

      """ Encoder """
      s1 = encoder.get_layer("input_1").output           ## (512 x 512)

      s2 = encoder.get_layer("activation").output        ## (255 x 255)
      s2 = ZeroPadding2D(( (1, 0), (1, 0) ))(s2)         ## (256 x 256)

      s3 = encoder.get_layer("activation_3").output      ## (126 x 126)
      s3 = ZeroPadding2D((1, 1))(s3)                     ## (128 x 128)

      s4 = encoder.get_layer("activation_74").output      ## (61 x 61)
      s4 = ZeroPadding2D(( (2, 1),(2, 1) ))(s4)           ## (64 x 64)

      """ Bridge """
      b1 = encoder.get_layer("activation_161").output     ## (30 x 30)
      b1 = ZeroPadding2D((1, 1))(b1)                      ## (32 x 32)

      """ Decoder """
      d1 = decoder_block(b1, s4, 512)                     ## (64 x 64)
      d2 = decoder_block(d1, s3, 256)                     ## (128 x 128)
      d3 = decoder_block(d2, s2, 128)                     ## (256 x 256)
      d4 = decoder_block(d3, s1, 64)                      ## (512 x 512)

      """ Output """
      outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(d4)

      model = Model(inputs, outputs, name="InceptionResNetV2_U-Net")
      return model

    model = build_inception_resnetv2_unet((H, W, 3))
    metrics = [dice_coef, iou, Recall(), Precision()]
    model.compile(loss=dice_loss, optimizer=Adam(lr), metrics=metrics)

    model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "InceptionResNetV2_U-Net"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 512, 512, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 255, 255, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_n

In [9]:
    callbacks = [                                                                  # defines callbacks
        ModelCheckpoint(model_path, verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path)
    ]

    model.fit(                                                                     # fits / trains data
            train_dataset,
            epochs=num_epochs,
            validation_data=valid_dataset,
            callbacks=callbacks
        )

Epoch 1/20


  x = x/np.max(x)


Epoch 1: val_loss improved from inf to 0.76215, saving model to files/model.h5
Epoch 2/20
Epoch 2: val_loss improved from 0.76215 to 0.57709, saving model to files/model.h5
Epoch 3/20
Epoch 3: val_loss improved from 0.57709 to 0.47727, saving model to files/model.h5
Epoch 4/20
Epoch 4: val_loss improved from 0.47727 to 0.44287, saving model to files/model.h5
Epoch 5/20
Epoch 5: val_loss improved from 0.44287 to 0.43108, saving model to files/model.h5
Epoch 6/20
Epoch 6: val_loss improved from 0.43108 to 0.42593, saving model to files/model.h5
Epoch 7/20
Epoch 7: val_loss did not improve from 0.42593
Epoch 8/20
Epoch 8: val_loss improved from 0.42593 to 0.40786, saving model to files/model.h5
Epoch 9/20
Epoch 9: val_loss did not improve from 0.40786
Epoch 10/20
Epoch 10: val_loss did not improve from 0.40786
Epoch 11/20
Epoch 11: val_loss improved from 0.40786 to 0.40599, saving model to files/model.h5
Epoch 12/20
Epoch 12: val_loss improved from 0.40599 to 0.40354, saving model to file

<keras.callbacks.History at 0x7ec723d6c5e0>

In [10]:
test_images = sorted(glob(os.path.join( "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Image_Dataset/Test/Image", "*")))
test_masks = sorted(glob(os.path.join( "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Image_Dataset/Test/Mask", "*")))

In [13]:
"""eval.py"""

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope      # used to implement additional metrics such as iou, dice_coef, etc

""" Directory for storing files """
create_dir("results")

results_dir = "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Semantic-Segmentation_Results/InceptionresNetV2/Results"

""" Loading model """# define functions that are externmal to tensorflow
with CustomObjectScope({'iou': iou, 'dice_coef': dice_coef, 'dice_loss': dice_loss}):
    model = tf.keras.models.load_model("files/model.h5")

for x, y in tqdm(zip(test_images, test_masks), total=len(test_images)):
        """ Extracing the image name. """
        image_name = x.split("/")[-1].split(".")[0]

        """ Reading the image """       # 'read_image' function
        ori_x = cv2.imread(x, cv2.IMREAD_COLOR)
        ori_x = cv2.resize(ori_x, (W, H))
        x = ori_x/255.0
        x = x.astype(np.float32)
        x = np.expand_dims(x, axis=0)


        """ Reading the mask """        # 'read_mask' function
        ori_y = cv2.imread(y, cv2.IMREAD_GRAYSCALE)
        ori_y = cv2.resize(ori_y, (W, H))
        ori_y = np.expand_dims(ori_y, axis=-1)  ## (512, 512, 1)
        ori_y = np.concatenate([ori_y, ori_y, ori_y], axis=-1)  ## (512, 512, 3)

        """ Predicting the mask. """
        y_pred = model.predict(x)[0]> 0.5
        y_pred = y_pred.astype(np.int32)        # converting predicted result to integer datatype

        """ saving prediction in drive"""
        y_pred = np.concatenate([y_pred, y_pred, y_pred], axis=-1)
        y_pred = y_pred*255

        """ Saving the predicted mask along with the image and GT """
        # save_image_path = f"results/{image_name}"   # location to save image
        save_image_path = f"{results_dir}/{image_name}.jpg"   # location to save image

        sep_line = np.ones((H, 10, 3)) * 255    # a white line with 10 pivel width
        cat_image = np.concatenate([ori_x, sep_line, ori_y, sep_line, y_pred], axis=1)  # original image | original mask | predicted mask [ori_x, sep_line, ori_y, sep_line, y_pred*255]

        cv2.imwrite(save_image_path, cat_image)

  0%|          | 0/11 [00:00<?, ?it/s]



  9%|▉         | 1/11 [00:02<00:27,  2.74s/it]



 18%|█▊        | 2/11 [00:03<00:11,  1.31s/it]



 27%|██▋       | 3/11 [00:03<00:06,  1.18it/s]



 36%|███▋      | 4/11 [00:03<00:04,  1.64it/s]



 45%|████▌     | 5/11 [00:03<00:03,  2.00it/s]



 55%|█████▍    | 6/11 [00:04<00:02,  2.39it/s]



 64%|██████▎   | 7/11 [00:04<00:01,  2.74it/s]



 73%|███████▎  | 8/11 [00:04<00:00,  3.06it/s]



 82%|████████▏ | 9/11 [00:04<00:00,  3.33it/s]



 91%|█████████ | 10/11 [00:05<00:00,  3.29it/s]



100%|██████████| 11/11 [00:05<00:00,  2.01it/s]


In [14]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope      # used to implement additional metrics such as iou, dice_coef, etc

""" Function to create directory if it doesn't exist """
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

""" Directory for storing files """
create_dir("results")

results_dir = "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Semantic-Segmentation_Results/InceptionresNetV2/Results"
pred_for_onionring = "/content/drive/MyDrive/Stanford-Computer_Vision-Rectina/Semantic-Segmentation_Results/InceptionresNetV2/predictions_for_onionring"

""" Loading model """# define functions that are externmal to tensorflow
with CustomObjectScope({'iou': iou, 'dice_coef': dice_coef, 'dice_loss': dice_loss}):
    model = tf.keras.models.load_model("files/model.h5")


for x, y in tqdm(zip(test_images, test_masks), total=len(test_images)):
    """ Extracting the image name. """
    image_name = x.split("/")[-1].split(".")[0]

    """ Reading the image """       # 'read_image' function
    ori_x = cv2.imread(x, cv2.IMREAD_COLOR)
    ori_x = cv2.resize(ori_x, (W, H))
    x = ori_x/255.0
    x = x.astype(np.float32)
    x = np.expand_dims(x, axis=0)

    """ Reading the mask """        # 'read_mask' function
    ori_y = cv2.imread(y, cv2.IMREAD_GRAYSCALE)
    ori_y = cv2.resize(ori_y, (W, H))
    ori_y = np.expand_dims(ori_y, axis=-1)  ## (512, 512, 1)
    ori_y = np.concatenate([ori_y, ori_y, ori_y], axis=-1)  ## (512, 512, 3)

    """ Predicting the mask. """
    y_pred = model.predict(x)[0] > 0.5
    y_pred = y_pred.astype(np.int32)        # converting predicted result to integer datatype
    y_pred = np.concatenate([y_pred, y_pred, y_pred], axis=-1)
    y_pred = y_pred*255

    """ Create a folder with the image name to store results """
    image_folder = os.path.join(results_dir, image_name)
    create_dir(image_folder)

    """ Saving the original image """
    ori_image_path = os.path.join(image_folder, f"org_{image_name}.jpg")
    cv2.imwrite(ori_image_path, ori_x)

    """ Saving the ground truth mask """
    gt_mask_path = os.path.join(image_folder, f"gt_{image_name}.jpg")
    cv2.imwrite(gt_mask_path, ori_y)

    """ Saving the predicted mask """
    pred_mask_path = os.path.join(image_folder, f"pred_{image_name}.jpg")
    pred_for_onionringseg = os.path.join(pred_for_onionring, f"{image_name}.jpg")
    # y_pred_visual = y_pred * 255
    cv2.imwrite(pred_mask_path, y_pred)
    cv2.imwrite(pred_for_onionringseg, y_pred) # for onion ring segmetation

    """ Saving the concatenated image """
    sep_line = np.ones((H, 10, 3)) * 255  # a white line with 10 pixel width
    cat_image = np.concatenate([ori_x, sep_line, ori_y, sep_line, y_pred], axis=1)
    montage_path = os.path.join(image_folder, f"montage_{image_name}.jpg")
    cv2.imwrite(montage_path, cat_image)


  0%|          | 0/11 [00:00<?, ?it/s]



  9%|▉         | 1/11 [00:03<00:34,  3.46s/it]



 18%|█▊        | 2/11 [00:03<00:14,  1.60s/it]



 27%|██▋       | 3/11 [00:04<00:08,  1.01s/it]



 36%|███▋      | 4/11 [00:04<00:05,  1.37it/s]



 45%|████▌     | 5/11 [00:04<00:03,  1.75it/s]



 55%|█████▍    | 6/11 [00:04<00:02,  2.12it/s]



 64%|██████▎   | 7/11 [00:05<00:01,  2.38it/s]



 73%|███████▎  | 8/11 [00:05<00:01,  2.65it/s]



 82%|████████▏ | 9/11 [00:05<00:00,  2.89it/s]



 91%|█████████ | 10/11 [00:06<00:00,  3.08it/s]



100%|██████████| 11/11 [00:06<00:00,  1.72it/s]
