In [7]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import numpy as np # to handle matrices
import cv2 # to read files
from glob import glob # to extract images
from sklearn.model_selection import train_test_split # to split dataset (80, 10, 10)
import tensorflow as tf # to handle tensors
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau
    # to save model weights, to save validation and training metrices, reduces learning rate
from tensorflow.keras.optimizers import Adam # type of optimiser
from tensorflow.keras.metrics import Recall, Precision # methods of performance assessment
from UNET_Lung_Segmentation_Model import FullUNET # imports the UNET architecture made beforehand
from UNET_Functions_IoU_DiceCoef_DiceLoss import DiceLoss, DiceCoef, IoU # imports the loss calculation functions

In [8]:
"""Global Parameters/Variables"""
H = 512
W = 512

## Dataset Management

In [9]:
def CreateDir(path):
    """Creates a Directory"""
    if not os.path.exists(path):
        os.makedirs(path) # creates directory if none exist

In [14]:
"""Setting-up Training/Validation/Testing Split"""
def LoadData(path, split=0.1): # training=80%, validation=10% and testing=10% 
    #loads imgs/masks sequentially
    orgImages = sorted(glob(os.path.join(path, "orgImgs", "*.png")))
    masks = sorted(glob(os.path.join(path, "maskImgs", "*.png")))
    
    splitSize = int(len(orgImages) * split) # calculates the number of items in dataset divided by 0.1
    
    print(len(orgImages))
    
    xTrain, xValid = train_test_split(orgImages, test_size=splitSize, random_state=74)
    yTrain, yValid = train_test_split(masks, test_size=splitSize, random_state=74)
        # function from sklearn, splits imgs/masks into train and validation sets
        # train=90%, validation=10%
        
    xTrain, xTest = train_test_split(xTrain, test_size=splitSize, random_state=74)
    yTrain, yTest = train_test_split(yTrain, test_size=splitSize, random_state=74)
        # train=80%, validation=10%, test=10%
    
    return (xTrain, yTrain), (xValid, yValid), (xTest, yTest)

"""Alterations to Images and Masks"""
def ReadImg(path):
    x = cv2.imread(path, cv2.IMREAD_COLOR) # reads img as a 3-channel img
    x = cv2.resize(x, (W, H)) # makes images 512 by 512
        # x is now a val between 0 and 1
    x = x/255.0 # normalise: dividing by max pixel val
    x = x.astype(np.float32) # converts x to a 32-bit floating point num
    return x

def ReadMask(path):
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (W, H))
    x = x/255.0 
    x = x > 0.5 # converts x val into binary scale
    x = x.astype(np.float32)
    x = np.expand_dims(x, axis=-1) # creates 1-channel numpy matrix/img
    return x

"""Converting All Data to Tensors"""
def TfParse(x, y):
    def Parse(x, y):
        # decodes the strings
        x = x.decode()
        y = y.decode()
        
        # gets numpy arrays
        x = ReadImg(x)
        y = ReadMask(y)
        return x, y 
   
    # converts x and y to tensors
    x, y = tf.numpy_function(Parse, [x, y], [tf.float32, tf.float32]) 
        # allows tenserflow function to use cv2 and np attributes 
        # (function used, input, input's datatype)
    x.set_shape([W, H, 3])
    y.set_shape([W, H, 1])
    return x, y

"""Pipeline for Training and Validation""" 
def TfDataset(X, Y, batch=8): 
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.shuffle(buffer_size=200) # shuffles data to terminate patterns
    dataset = dataset.map(TfParse) # parses parts into tensors
    dataset = dataset.batch(batch) # creates batch of data
    dataset = dataset.prefetch(4) # fetches data into RAM - optimises
    return dataset

In [15]:
if __name__ == "__main__":
    """Seed Declaration"""
    np.random.seed(74)
    tf.random.set_seed(74)
    
    """Storage of Files used for Training"""
    CreateDir("TrainingFilesForUnet")
    
    """Hyperparameters"""
    batchSize = 2 # batch size before OOM error would occur, don't increase
    lr = 1e-5
    epochNum = 25
    modelPath = os.path.join("TrainingFilesForUnet", "model.h5") # saves model
        # model.h5: a file saved by the ModelCheckPoint module
    csvPath = os.path.join("TrainingFilesForUnet", "UNETexcelData.csv") # saves data
    
    """Dataset"""
    datasetPath = r"C:\Users\REDACTED NAME\Documents\PortModel\DatasetsForSciExt\DatasetForUnetSegmentation"
    (xTrain, yTrain), (xValid, yValid), (xTest, yTest) = LoadData(datasetPath)
    
    # displays the amount of imgs and masks in each set
    print(f"Train: x = {len(xTrain)}, y = {len(yTrain)}")
    print(f"Validation: x = {len(xValid)}, y = {len(yValid)}")
    print(f"Test: x = {len(xTest)}, y = {len(yTest)}")
    
    # creates datasets
    trainDataset = TfDataset(xTrain, yTrain, batch=batchSize)
    validDataset = TfDataset(xValid, yValid, batch=batchSize)
    
    """Model"""
    model = FullUNET((H, W, 3)) # loads model and provides the correct params
    metrics = [DiceCoef, IoU, Recall(), Precision()] # defines the performance metrics
    model.compile(loss=DiceLoss, optimizer=Adam(lr), metrics=metrics) # compiles model
    
    callbacks = [
        ModelCheckpoint(modelPath, verbose=1, save_best_only=True), # saves the weight files
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1), 
        # for 5 epochs, if val_loss does not decrease, lr will decrease by factor of 0.1
        CSVLogger(csvPath) # records all data while training in csv file
    ]
    
    # training
    model.fit(
        trainDataset,
        epochs=epochNum,
        validation_data=validDataset,
        callbacks=callbacks
    )

60
Train: x = 48, y = 48
Validation: x = 6, y = 6
Test: x = 6, y = 6
Epoch 1/25
Epoch 1: val_loss improved from inf to 0.60982, saving model to TrainingFilesForUnet\model.h5
Epoch 2/25
Epoch 2: val_loss improved from 0.60982 to 0.60463, saving model to TrainingFilesForUnet\model.h5
Epoch 3/25
Epoch 3: val_loss improved from 0.60463 to 0.59466, saving model to TrainingFilesForUnet\model.h5
Epoch 4/25
Epoch 4: val_loss improved from 0.59466 to 0.57599, saving model to TrainingFilesForUnet\model.h5
Epoch 5/25
Epoch 5: val_loss improved from 0.57599 to 0.55059, saving model to TrainingFilesForUnet\model.h5
Epoch 6/25
Epoch 6: val_loss improved from 0.55059 to 0.52763, saving model to TrainingFilesForUnet\model.h5
Epoch 7/25
Epoch 7: val_loss improved from 0.52763 to 0.51604, saving model to TrainingFilesForUnet\model.h5
Epoch 8/25
Epoch 8: val_loss improved from 0.51604 to 0.51276, saving model to TrainingFilesForUnet\model.h5
Epoch 9/25
Epoch 9: val_loss improved from 0.51276 to 0.51223, 