In [5]:
import numpy as np
import cv2
import os
from glob import glob
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model, save_model
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, BatchNormalization, UpSampling2D, concatenate, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau

# Model Architecture

In [4]:
def Encoding(input):
    skip_connections = []  # Store feature maps for skip connections
    
    # Block 1
    conv1 = Conv2D(filters=64, kernel_size=(3,3), padding='same')(input)
    conv1 = Activation('relu')(conv1)
    conv1 = Conv2D(filters=64, kernel_size=(3,3), padding='same')(conv1)
    conv1 = Activation('relu')(conv1)
    skip_connections.append(conv1)  # Store skip connection
    pool1 = MaxPooling2D(pool_size=(2,2))(conv1)

    # Block 2
    conv2 = Conv2D(filters=128, kernel_size=(3,3), padding='same')(pool1)
    conv2 = Activation('relu')(conv2)
    conv2 = Conv2D(filters=128, kernel_size=(3,3), padding='same')(conv2)
    conv2 = Activation('relu')(conv2)
    skip_connections.append(conv2)  
    pool2 = MaxPooling2D(pool_size=(2,2))(conv2)

    # Block 3
    conv3 = Conv2D(filters=256, kernel_size=(3,3), padding='same')(pool2)
    conv3 = Activation('relu')(conv3)
    conv3 = Conv2D(filters=256, kernel_size=(3,3), padding='same')(conv3)
    conv3 = Activation('relu')(conv3)
    skip_connections.append(conv3)  
    pool3 = MaxPooling2D(pool_size=(2,2))(conv3)

    # Block 4
    conv4 = Conv2D(filters=512, kernel_size=(3,3), padding='same')(pool3)
    conv4 = Activation('relu')(conv4)
    conv4 = Conv2D(filters=512, kernel_size=(3,3), padding='same')(conv4)
    conv4 = Activation('relu')(conv4)
    skip_connections.append(conv4)  
    pool4 = MaxPooling2D(pool_size=(2,2))(conv4)

    # Bottleneck (Middle Block)
    conv5 = Conv2D(filters=1024, kernel_size=(3,3), padding='same')(pool4)
    conv5 = Activation('relu')(conv5)
    conv5 = Conv2D(filters=1024, kernel_size=(3,3), padding='same')(conv5)
    conv5 = Activation('relu')(conv5)

    return conv5, skip_connections



def decoder(encoded, skip_connections):
    conv6 = UpSampling2D(size=(2,2))(encoded)
    conv6 = concatenate([conv6, skip_connections[-1]])
    conv6 = Conv2D(filters=512, kernel_size=(3,3), padding='same')(conv6)
    conv6 = Activation('relu')(conv6)
    conv6 = Conv2D(filters=512, kernel_size=(3,3), padding='same')(conv6)
    conv6 = Activation('relu')(conv6)

    conv7 = UpSampling2D(size=(2,2))(conv6)
    conv7 = concatenate([conv7, skip_connections[-2]])
    conv7 = Conv2D(filters=256, kernel_size=(3,3), padding='same')(conv7)
    conv7 = Activation('relu')(conv7)
    conv7 = Conv2D(filters=256, kernel_size=(3,3), padding='same')(conv7)
    conv7 = Activation('relu')(conv7)

    conv8 = UpSampling2D(size=(2,2))(conv7)
    conv8 = concatenate([conv8, skip_connections[-3]])
    conv8 = Conv2D(filters=128, kernel_size=(3,3), padding='same')(conv8)
    conv8 = Activation('relu')(conv8)
    conv8 = Conv2D(filters=128, kernel_size=(3,3), padding='same')(conv8)
    conv8 = Activation('relu')(conv8)

    conv9 = UpSampling2D(size=(2,2))(conv8)
    conv9 = concatenate([conv9, skip_connections[-4]])
    conv9 = Conv2D(filters=64, kernel_size=(3,3), padding='same')(conv9)
    conv9 = Activation('relu')(conv9)
    conv9 = Conv2D(filters=64, kernel_size=(3,3), padding='same')(conv9)
    conv9 = Activation('relu')(conv9)

    conv10 = Conv2D(filters=1, kernel_size=(1,1), activation='sigmoid')(conv9)

    return conv10
    
def Unet(input_shape=(256,256,3)):
    inputs = Input(input_shape)
    encoded, skip_connections = Encoding(inputs)
    output = decoder(encoded, skip_connections)
    model = Model(inputs, output)
    return model

model = Unet(input_shape=(256,256,3))
model.summary()

# Training Step

In [None]:
# Create Directory for saving model
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
""" Load Data  Function """

def load_data(path, split = 0.1):
    """ Load the Images and Masks"""
    train_x = sorted(glob.glob(os.path.join(path, "augmented_scans", "*.png")))
    train_y = sorted(glob.glob(os.path.join(path, "augmented_masks", "*.png")))

    valid_x = sorted(glob.glob(os.path.join("/kaggle/input/val-test-for-lung-segmentation/valid_2", "origin_2", "*.png")))
    valid_y = sorted(glob.glob(os.path.join("/kaggle/input/val-test-for-lung-segmentation/valid_2", "mask_2", "*.png")))

    test_x = sorted(glob.glob(os.path.join("/kaggle/input/val-test-for-lung-segmentation/test_2", "origin_2", "*.png")))
    test_y = sorted(glob.glob(os.path.join("/kaggle/input/val-test-for-lung-segmentation/test_2", "mask_2", "*.png")))
    


    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)


""" Testing: Load Data """

dataset_path = r"/kaggle/input/training-data-for-lung-canceraugmented"
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(dataset_path)
print(f"Train: \t{len(train_x)} - {len(train_y)}")
print(f"Valid: \t{len(valid_x)} - {len(valid_y)}")
print(f"Test: \t{len(test_x)} - {len(test_y)}")

In [None]:
def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (256,256))
    x = x / 255.0
    x = x.astype(np.float32)
    return x

def read_mask(path):
    path = path.decode()
    y = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    y = cv2.resize(y, (256,256))
    y = y / 255.0
    y = y.astype(np.float32)
    y = np.expand_dims(y, axis=-1) # (h, w, 1)
    return y

def tf_parse(x, y):
    def _parse(x, y):
        x = read_image(x)
        y = read_mask(y)
        return x, y

    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])
    x.set_shape([256, 256, 3])
    y.set_shape([256, 256, 1])
    return x, y

def tf_dataset(X, Y, batch=2):
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(10)
    return dataset

In [None]:
import numpy as np
import tensorflow as tf
from keras.saving import register_keras_serializable

smooth = 1e-15


def dice_coef(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)


def dice_loss(y_true, y_pred):
    return 1.0 - dice_coef(y_true, y_pred)


# Intersection over Union (IoU)
@register_keras_serializable()
def iou_coef(y_true, y_pred, smooth=1e-6):
    y_true_f = tf.keras.layers.Flatten()(y_true)
    y_pred_f = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    union = tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) - intersection
    return (intersection + smooth) / (union + smooth)

@register_keras_serializable()
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

# Precision
@register_keras_serializable()
def precision(y_true, y_pred):
    y_pred = tf.round(y_pred)
    true_positives = tf.reduce_sum(y_true * y_pred)
    predicted_positives = tf.reduce_sum(y_pred)
    return (true_positives + smooth) / (predicted_positives + smooth)

# Recall (Sensitivity)
@register_keras_serializable()
def recall(y_true, y_pred):
    y_pred = tf.round(y_pred)
    true_positives = tf.reduce_sum(y_true * y_pred)
    actual_positives = tf.reduce_sum(y_true)
    return (true_positives + smooth) / (actual_positives + smooth)

# F1 Score
@register_keras_serializable()
def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * (p * r) / (p + r + smooth)


In [None]:
if __name__ == "__main__":
    """ seeding """
    np.random.seed(42)
    tf.random.set_seed(42)

    """ Directory for saving weights """
    create_dir("/kaggle/working/save_weights_dataset")

    """ Hyperparameters """
    batch_size = 16
    learning_rate = 1e-4
    num_epochs = 150
    model_path = os.path.join("/kaggle/working/save_weights_dataset", "Unettr_2D.keras")
    csv_path = os.path.join("/kaggle/working/save_weights_dataset", "history.csv")


    """ Load the Dataset """
    dataset_path = r"/kaggle/input/training-data-for-lung-canceraugmented"
    (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(dataset_path)
    print(f"Train: \t{len(train_x)} - {len(train_y)}")
    print(f"Valid: \t{len(valid_x)} - {len(valid_y)}")
    print(f"Test: \t{len(test_x)} - {len(test_y)}")
    
    train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
    valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

    """ Model Implementation """
    model = Unet(256, 256, 3)
    model.compile(loss=dice_loss, optimizer=Adam(learning_rate), metrics= [dice_coef, iou_coef, rmse, precision, recall, f1_score, 'acc'])
    callbacks = [
        ModelCheckpoint(model_path, verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path),
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
    ]

    model.fit(
        train_dataset,
        epochs=num_epochs,
        validation_data=valid_dataset,
        callbacks=callbacks
    )