In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/brain-tumor-segmentation/images/2664.png
/kaggle/input/brain-tumor-segmentation/images/2539.png
/kaggle/input/brain-tumor-segmentation/images/1231.png
/kaggle/input/brain-tumor-segmentation/images/1017.png
/kaggle/input/brain-tumor-segmentation/images/2437.png
/kaggle/input/brain-tumor-segmentation/images/2015.png
/kaggle/input/brain-tumor-segmentation/images/2300.png
/kaggle/input/brain-tumor-segmentation/images/2673.png
/kaggle/input/brain-tumor-segmentation/images/2823.png
/kaggle/input/brain-tumor-segmentation/images/1522.png
/kaggle/input/brain-tumor-segmentation/images/2064.png
/kaggle/input/brain-tumor-segmentation/images/641.png
/kaggle/input/brain-tumor-segmentation/images/1088.png
/kaggle/input/brain-tumor-segmentation/images/173.png
/kaggle/input/brain-tumor-segmentation/images/2851.png
/kaggle/input/brain-tumor-segmentation/images/2448.png
/kaggle/input/brain-tumor-segmentation/images/1746.png
/kaggle/input/brain-tumor-segmentation/images/1958.png
/kaggle/inpu

In [2]:
import shutil
import sys

# Copy the file to working directory
shutil.copy("/kaggle/input/unet/keras/default/1/unet.py", "/kaggle/working/")

# Add the working directory to sys path
sys.path.append("/kaggle/working/")

In [3]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
import numpy as np
import os
from unet import unet
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # tf log messages suppression
import cv2
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [4]:
# Global parameters
H = 256
W = 256

In [5]:
# Metrics
def dice_coefficient(y_true, y_pred, smooth=1e-15):
    """
    Compute the Dice Coefficient for image segmentation tasks.
    
    Args:
        y_true (tensor): Ground truth mask.
        y_pred (tensor): Predicted mask.
        smooth (float): Smoothing factor to avoid division by zero.
    
    Returns:
        tensor: Dice coefficient score.
    """
    # Flatten the tensors
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    
    # Compute the intersection
    intersection = tf.reduce_sum(y_true * y_pred)
    
    # Compute the Dice coefficient
    dice = (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
    
    return dice

def dice_loss(y_true, y_pred):
    """
    Compute the Dice Loss, which is 1 - Dice Coefficient.
    
    Args:
        y_true (tensor): Ground truth mask.
        y_pred (tensor): Predicted mask.
    
    Returns:
        tensor: Dice loss value.
    """
    return 1.0 - dice_coefficient(y_true, y_pred)

In [6]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def load_dataset(path, split=0.2): # 60% for training, 20% for validation, 20% for testing
    images = sorted(glob(os.path.join(path, "images", "*.png")))
    masks = sorted(glob(os.path.join(path, "masks", "*.png")))
    #print(images[0], masks[0])

    split_size = int(len(images) * split)

    # x refers to images and y refers to masks
    train_x, valid_x = train_test_split(images, test_size = split_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size = split_size, random_state=42)

    train_x, test_x = train_test_split(train_x, test_size = split_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size = split_size, random_state=42)

    return(train_x, train_y), (valid_x, valid_y), (test_x, test_y)
    # 60% for training, 20% for validation, 20% for testing

def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (W, H))
    x = x / 255.0 # normalization with max pixel range
    x = x.astype(np.float32)
    return x

def read_mask(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (W, H))
    x = x / 255.0 # normalization with max pixel range - white part of the mask becomes 1 instead of 255.0
    x = x.astype(np.float32) # (h, w)
    x = np.expand_dims(x, axis=-1) # (h, w, 1)
    return x

def tf_parse(x, y): # takes single image path and single mask path
    def _parse(x, y):
        x = read_image(x)
        y = read_mask(y)
        return x, y
    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])
    x.set_shape([H, W, 3])
    y.set_shape([H, W, 1])
    return x, y

def tf_dataset(X, Y, batch=2): # X is a list of image file paths (and Y is list of mask paths)
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(10)
    return dataset
                         
if __name__ == "__main__":
    # Seeding
    np.random.seed(42)
    tf.random.set_seed(42)

    # Directory for storing files
    create_dir("files")

    # Hyperparameters
    batch_size = 16
    lr = 1e-4
    num_epochs = 60
    model_path = os.path.join("files", "model.keras")
    csv_path = os.path.join("files", "log.csv")

    # Dataset
    dataset_path = "/kaggle/input/brain-tumor-segmentation"
    (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_dataset(dataset_path)

    train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
    valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

    #for x, y in train_dataset:
        #print(x.shape, y.shape)

    # Model
    model = unet((H,W, 3))
    model.compile(loss = dice_loss, optimizer = Adam(lr), metrics = [dice_coefficient])

    callbacks = [
        ModelCheckpoint(model_path, save_best_only=True, verbose=1),
        CSVLogger(csv_path),
        ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=False)
    ]

    model.fit(
        train_dataset,
        epochs = num_epochs,
        validation_data = valid_dataset,
        callbacks = callbacks
        )

Epoch 1/60
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 362ms/step - dice_coefficient: 0.0861 - loss: 0.9139
Epoch 1: val_loss improved from inf to 0.96485, saving model to files/model.keras
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 491ms/step - dice_coefficient: 0.0865 - loss: 0.9135 - val_dice_coefficient: 0.0351 - val_loss: 0.9649 - learning_rate: 1.0000e-04
Epoch 2/60
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - dice_coefficient: 0.2227 - loss: 0.7773
Epoch 2: val_loss did not improve from 0.96485
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 407ms/step - dice_coefficient: 0.2228 - loss: 0.7772 - val_dice_coefficient: 0.0104 - val_loss: 0.9895 - learning_rate: 1.0000e-04
Epoch 3/60
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365ms/step - dice_coefficient: 0.3054 - loss: 0.6946
Epoch 3: val_loss did not improve from 0.96485
[1m115/115[0m [32m━━━━━━━━━━━━━━━━

In [11]:
from IPython.display import FileLink

FileLink("/kaggle/working/files/log.csv")