* Training with 64 x 64 x 32 cubes
* Included train, validation & test dataset to test memory limits

In [1]:
# Import Libraries -------------------------------------------------------------------------------------------------------------------------------------------------------------
import os
import h5py
import keras
import loss
import Helper
import allMetrics
import numpy as np
import tensorflow as tf
import UNetModel_3D
import matplotlib.pyplot as plt
from keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')

os.environ["CUDA_VISIBLE_DEVICES"]="0"
try:
  for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass


In [3]:
train_fileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/train_Scans/train_DS3.hdf5"
train_maskfileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/train_Mask_Scans/train_maskDS3.hdf5"

train_DatasetName = "trainScans_DataSet3"
train_maskDatasetName = "trainMaskScans_DataSet3"


valid_fileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/valid_Scans/valid_DS1.hdf5"
valid_maskfileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/valid_Mask_Scans/valid_maskDS1.hdf5"

valid_DatasetName = "validScans_DataSet1"
valid_maskDatasetName = "validMaskScans_DataSet1"


# test_fileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/test_Scans/test_DS1.hdf5"
# test_maskfileName = "/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/trials/numpyDatasets/numPyArrays/test_Mask_Scans/test_maskDS1.hdf5"

# test_DatasetName = "testScans_DataSet1"
# test_maskDatasetName = "testMaskScans_DataSet1"

In [4]:
# Train Dataset
with h5py.File(train_fileName, 'r') as hf: # File Dir
    train_array = hf[train_DatasetName][:]
    
with h5py.File(train_maskfileName,'r') as hf:
    train_mask_array = hf[train_maskDatasetName][:]
    
# Valid Dataset
with h5py.File(valid_fileName, 'r') as hf: # File Dir
    valid_array = hf[valid_DatasetName][:3375]
    
with h5py.File(valid_maskfileName,'r') as hf:
    valid_mask_array = hf[valid_maskDatasetName][:3375]
    
# # Test Dataset
# with h5py.File(test_fileName, 'r') as hf: # File Dir
#     test_array = hf[test_DatasetName][:]
    
# with h5py.File(test_maskfileName,'r') as hf:
#     test_mask_array = hf[test_maskDatasetName][:]

In [5]:
print(train_array.shape)
print(valid_array.shape)

(23625, 64, 64, 32)
(3375, 64, 64, 32)


In [6]:
train_array = np.expand_dims(train_array, axis=4)
train_mask_array = np.expand_dims(train_mask_array, axis=4)

valid_array = np.expand_dims(valid_array, axis=4)
valid_mask_array = np.expand_dims(valid_mask_array, axis=4)

In [7]:
LR = 0.001
opt = tf.keras.optimizers.Nadam(LR)

input_shape = (64,64,32,1)
num_class = 1

metrics = [allMetrics.dice_coef]

In [8]:
model = UNetModel_3D.build_unet(input_shape, n_classes = num_class)
model.compile(optimizer=opt, loss=loss.tversky_crossentropy, metrics=metrics)
print(model.summary())

2023-02-08 11:34:03.007435: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-08 11:34:05.321550: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38414 MB memory:  -> device: 0, name: A100-SXM4-40GB, pci bus id: 0000:01:00.0, compute capability: 8.0
2023-02-08 11:34:05.323532: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38414 MB memory:  -> device: 1, name: A100-SXM4-40GB, pci bus id: 0000:47:00.0, compute capability: 8.0
2023-02-08 11:34:05.325409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 

sigmoid
Model: "U-Net"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64, 64, 32,  0           []                               
                                 1)]                                                              
                                                                                                  
 conv3d (Conv3D)                (None, 64, 64, 32,   896         ['input_1[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization (BatchNorm  (None, 64, 64, 32,   128        ['conv3d[0][0]']                 
 alization)                     32)                                                   

In [9]:
csv_path = '/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/CSVLogs/Model9.csv'
model_checkpoint_path = '/media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5'

In [10]:
my_callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, mode = 'auto'),
    EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True, mode = 'auto'),
    CSVLogger(csv_path, separator=',', append=True),
    ModelCheckpoint(filepath=model_checkpoint_path,
                    monitor='val_loss',
                    mode='auto',
                    verbose=1,
                    save_best_only= True)
]
#     CustomCallBack()

In [11]:
model_name = 'Model9 (300 Epochs)'
# Helper.telegram_bot_sendtext(f'Model {model_name} started training')

history = model.fit(train_array,
                    train_mask_array,
                    batch_size=5,
                    epochs=300,
                    verbose=1,
                    shuffle = True,
                    validation_data=(valid_array, valid_mask_array),
                    callbacks=my_callbacks)

Epoch 1/300


2023-02-08 11:34:36.806360: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8301

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


Epoch 00001: val_loss improved from inf to 1.01610, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 2/300
Epoch 00002: val_loss improved from 1.01610 to 1.01334, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 3/300
Epoch 00003: val_loss improved from 1.01334 to 1.01196, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 4/300
Epoch 00004: val_loss did not improve from 1.01196
Epoch 5/300
Epoch 00005: val_loss did not improve from 1.01196
Epoch 6/300
Epoch 00006: val_loss improved from 1.01196 to 1.00380, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 7/300
Epoch 00007: val_loss did not improve from 1.00380
Epoch 8/300
Epoch 00008: val_loss improved from 1.00380 to 0.99875, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 9/300
Epoch 00009: val_los

Epoch 00024: val_loss did not improve from 0.08867
Epoch 25/300
Epoch 00025: val_loss did not improve from 0.08867
Epoch 26/300
Epoch 00026: val_loss did not improve from 0.08867
Epoch 27/300
Epoch 00027: val_loss did not improve from 0.08867
Epoch 28/300
Epoch 00028: val_loss improved from 0.08867 to 0.08349, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 29/300
Epoch 00029: val_loss did not improve from 0.08349
Epoch 30/300
Epoch 00030: val_loss did not improve from 0.08349
Epoch 31/300
Epoch 00031: val_loss improved from 0.08349 to 0.08103, saving model to /media/dro/JHSeagate/FYP/jh_fyp_work/3D_UNet/Final/SavedModels/Model9.hdf5
Epoch 32/300
Epoch 00032: val_loss did not improve from 0.08103
Epoch 33/300
Epoch 00033: val_loss did not improve from 0.08103
Epoch 34/300
Epoch 00034: val_loss did not improve from 0.08103
Epoch 35/300
Epoch 00035: val_loss did not improve from 0.08103
Epoch 36/300
Epoch 00036: val_loss did not improve fr

KeyboardInterrupt: 

In [12]:
#plot the training and validation IoU and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

acc = history.history['dice_coef']
val_acc = history.history['val_dice_coef']

plt.plot(epochs, acc, 'y', label='Training Dice')
plt.plot(epochs, val_acc, 'r', label='Validation Dice')
plt.title('Training and validation Dice')
plt.xlabel('Epochs')
plt.ylabel('Dice')
plt.legend()
plt.show()

NameError: name 'history' is not defined