In [1]:
import datetime
import json
import os
import sys
import time
from configparser import ConfigParser
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split
from models.pnsamp_2d import PNSAMP_2D

sys.path.append('../')
from utils.directory import check_or_create
from utils.data_generators import DicomDataGenerator

c:\users\dell\desktop\coms4059a - research project\venv\lib\site-packages\numpy\.libs\libopenblas.gk7gx5keq4f6uyo3p26ulgbqyhgqo7j4.gfortran-win_amd64.dll
c:\users\dell\desktop\coms4059a - research project\venv\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


# Training

In [2]:
parser = ConfigParser()
parser.read('../project.conf')

# Get Directory setting
# add '../' since you are in the a notebook
saved_weights_path = check_or_create('../' + parser.get('train', 'SAVED_WEIGHTS_PATH'))
checkpoints_path = check_or_create('../' + parser.get('train', 'CHECKPOINTS_PATH'))
history_path = check_or_create('../' + parser.get('train', 'HISTORY_PATH'))
data_path = check_or_create('../' + parser.get('train', 'DATA_PATH'))
batch_size = int(parser.get('train', 'BATCH_SIZE'))
image_size = int(parser.get('train', 'IMAGE_SIZE'))
variant = parser.get('train', 'VARIANT')
epochs = int(parser.get('train', 'EPOCHS'))
test_ratio = float(parser.get('train', 'TEST_RATIO'))

In [3]:
df = pd.read_csv(os.path.join(data_path, 'meta/meta_info.csv'),
                         dtype={'patient_id': str,
                                'nodule_no': str,
                                'slice_no': str})

# use only non-clean scans (scans that contains at least one nodule) for training
df = df[df['is_clean'] == False]

def get_paths(x):
    patient_img_path = os.path.join(data_path, 'image', 'LIDC-IDRI-' + x[0])
    patient_mask_path = os.path.join(data_path, 'mask', 'LIDC-IDRI-' + x[0])
    return [os.path.join(patient_img_path, x[1] + '.npy'), os.path.join(patient_mask_path, x[2] + '.npy')]

temp = df[['patient_id',
                'original_image',
                'mask_image']].values

paths = list(map(get_paths, temp))
df_paths = pd.DataFrame(paths, columns=['img_path', 'mask_path'])

df.reset_index(drop=True, inplace=True)
df_paths.reset_index(drop=True, inplace=True)

df = pd.concat([df, df_paths], axis=1, sort=False)

df.head()

Unnamed: 0,patient_id,nodule_no,slice_no,original_image,mask_image,subtlety,internalStructure,calcification,sphericity,margin,lobulation,spiculation,texture,malignancy,is_cancer,is_clean,img_path,mask_path
0,1,0,0,0001_NI000_slice000,0001_MA000_slice000,5.0,1.0,6.0,4.0,4.0,3.0,5.0,5.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
1,1,0,1,0001_NI000_slice001,0001_MA000_slice001,5.0,1.0,6.0,4.0,4.0,3.0,5.0,5.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
2,1,0,2,0001_NI000_slice002,0001_MA000_slice002,5.0,1.0,6.0,4.0,4.0,3.0,5.0,5.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
3,1,0,3,0001_NI000_slice003,0001_MA000_slice003,5.0,1.0,6.0,4.0,4.0,3.0,5.0,5.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
4,1,0,4,0001_NI000_slice004,0001_MA000_slice004,5.0,1.0,6.0,4.0,4.0,3.0,5.0,5.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...


In [4]:
features = ['subtlety',
            'margin',
            'lobulation',
            'texture',]

from sklearn.preprocessing import MinMaxScaler
df[features] = MinMaxScaler().fit_transform(df[features])
df.head()

Unnamed: 0,patient_id,nodule_no,slice_no,original_image,mask_image,subtlety,internalStructure,calcification,sphericity,margin,lobulation,spiculation,texture,malignancy,is_cancer,is_clean,img_path,mask_path
0,1,0,0,0001_NI000_slice000,0001_MA000_slice000,1.0,1.0,6.0,4.0,0.75,0.5,5.0,1.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
1,1,0,1,0001_NI000_slice001,0001_MA000_slice001,1.0,1.0,6.0,4.0,0.75,0.5,5.0,1.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
2,1,0,2,0001_NI000_slice002,0001_MA000_slice002,1.0,1.0,6.0,4.0,0.75,0.5,5.0,1.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
3,1,0,3,0001_NI000_slice003,0001_MA000_slice003,1.0,1.0,6.0,4.0,0.75,0.5,5.0,1.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...
4,1,0,4,0001_NI000_slice004,0001_MA000_slice004,1.0,1.0,6.0,4.0,0.75,0.5,5.0,1.0,5,True,False,.././data/processed\image\LIDC-IDRI-0001\0001_...,.././data/processed\mask\LIDC-IDRI-0001\0001_M...


In [5]:
num_batches = int(len(df) / batch_size)

print("|=============BUILDING GENERATOR=============|\n")
datagen = DicomDataGenerator(df,
                                     img_path_col_name='img_path',
                                     mask_path_col_name='mask_path',
                                     features_cols=features,
                                     batch_size=batch_size,
                                     target_size=(image_size, image_size, 1)
                                     )

traingen, valigen = train_test_split(datagen, test_size=test_ratio)




In [6]:
model = PNSAMP_2D(num_attributes=len(features), input_size=(image_size, image_size, 1), variant=variant)
model.summary()

# Instantiate an optimizer.
optimizer = tf.keras.optimizers.Adam()

# Instantiate a loss function.
loss_fn1 = tf.keras.losses.BinaryCrossentropy()
loss_fn2 = tf.keras.losses.BinaryCrossentropy()
loss_fn3 = tf.keras.losses.SparseCategoricalCrossentropy()

# setup training checkpoints
checkpoint = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(
            checkpoint,
            os.path.join(checkpoints_path, variant),
            max_to_keep=3
        )

checkpoint.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print("Note: Starting training with model restored from {}".format(manager.latest_checkpoint))
else:
    print("Note: Starting training from scratch.")

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 64, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 64, 2)    18          input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 64, 2)    8           conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 64, 64, 2)    0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [None]:
# record of training
history = {
            'train_loss': [],
            'val_loss': [],
            'training_time': [],
        }

# the validation will be use save a
# checkpoint of the model for the
# best loss
best_validation_loss = np.inf

print("|==================TRAINING==================|\n")
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    total_loss1 = 0.0
    total_loss2 = 0.0
    total_loss3 = 0.0
    start_train_time = time.time()
    i = 0
    # Iterate over the batches of the train dataset.
    for _, (x_batch_train, mask_batch_train, feats_batch_train, mal_batch_train) in enumerate(traingen):
        i += 1
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape(persistent=True) as tape:
            # Run the forward pass of the layer. The operations that the layer applies to its inputs are
            # going to be recorded on the GradientTape. segmentation_logits, multi_regr_logits, class_logits
            segmentation_logits, multi_regr_logits, class_logits = model(x_batch_train, training=True)  #
            # Logits for this minibatch

            # Compute the loss value for this minibatch.
            # Compute the loss value for this mini batch.
            loss_value1 = loss_fn1(mask_batch_train, segmentation_logits)
            loss_value2 = loss_fn2(feats_batch_train, multi_regr_logits)
            loss_value3 = loss_fn3(mal_batch_train, class_logits)
            print(loss_value1.numpy(), loss_value2.numpy(), loss_value3.numpy())
            
            # add different losses to total loss
            total_loss1 += loss_value1.numpy()
            total_loss2 += loss_value1.numpy()
            total_loss3 += loss_value1.numpy()

        """MULTI-CLASSIFICATOIN"""
        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        # grads = tape.gradient([loss_value1, loss_value2, loss_value3], model.trainable_weights)
        grads = tape.gradient(loss_value1, model.trainable_weights)

        optimizer.apply_gradients(
            (grad, var) 
            for (grad, var) in zip(grads, model.trainable_variables) 
            if grad is not None
        )

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        # optimizer.apply_gradients(zip(grads, model.trainable_weights))

    end_train_time = time.time()
    history['training_time'].append(end_train_time - start_train_time)
    history['train_loss'].append(total_loss1 / i)

    # Iterate over the batches of the dataset.
    total_loss1 = 0.0
    total_loss2 = 0.0
    total_loss3 = 0.0
    start_train_time = time.time()
    i = 0
    for _, (x_batch_val, mask_batch_val, feats_batch_val, mal_batch_val) in enumerate(valigen):
        i += 1
        with tf.GradientTape(persistent=True) as tape:
            segmentation_logits, multi_regr_logits, class_logits = model(x_batch_val, training=True)

            # Compute the loss value for this mini batch.
            loss_value1 = loss_fn1(mask_batch_val, segmentation_logits)
            loss_value2 = loss_fn2(feats_batch_val, multi_regr_logits)
            loss_value3 = loss_fn3(mal_batch_val, class_logits)
            print(loss_value1.numpy(), loss_value2.numpy(), loss_value3.numpy())

        # add different losses to total loss
        total_loss1 += loss_value1.numpy()
        total_loss2 += loss_value1.numpy()
        total_loss3 += loss_value1.numpy()

    history['val_loss'].append(total_loss1 / i)

    # save model if performance is better (loss is lower)
    if history['val_loss'][-1] < best_validation_loss:
        save_path = manager.save()
        print("Saved checkpoint for step {}: {}, loss {:1.3f}".format(int(checkpoint.step),
                                                                              save_path,
                                                                              history['val_loss'][-1]))
        model.save_weights(os.path.join(saved_weights_path, variant), save_format='tf')
        checkpoint.step.assign_add(1)
        best_validation_loss = history['val_loss'][-1]



Start of epoch 0
0.58833086 0.46882397 1.4537046
0.5370314 0.42446822 2.230518
0.5382066 0.3330621 1.6081399
0.52807283 0.45943692 1.4088643
0.5373131 0.5006338 1.3809446
0.52347004 0.3188972 2.2931032
0.5412611 0.45490968 1.3428144
0.50708175 0.4518629 1.2730544
0.500036 0.42383966 1.6772752
0.504698 0.463202 2.2459345
0.49332932 0.34586468 1.3806664
0.49182212 0.18703628 1.4090141
0.4850591 0.40130675 1.7044346
0.4864888 0.40961945 1.20692
0.49753028 0.3182001 1.7537067
0.4753669 0.44592315 1.6422564
0.4832935 0.35008633 1.4000349
0.47817624 0.43200743 3.641121
0.47366074 0.27540952 1.215337
0.47457945 0.32114834 1.8118219
0.47030592 0.37075275 1.3076794
0.47271 0.36821097 1.5254531
0.46402675 0.32942304 1.2452788
0.4606216 0.35881644 1.6028022
0.4577448 0.0051972065 1.5559933
0.45706078 0.37315255 1.4360086
0.4573189 0.27039886 1.4693007
0.45587516 0.43220845 1.4717252
0.44950536 0.42215142 1.5266116
0.4525994 0.23401028 1.4649385
0.45208302 0.43738788 1.1076294
0.45651287 0.47164

In [None]:
print('Final training loss', history['train_loss'][-1])
print('Final validation loss', history['val_loss'][-1])

# save history file
today = datetime.datetime.now()

if today.hour < 12:
    h = "00"
else:
    h = "12"
    
file_path = check_or_create(os.path.join(history_path, variant))

with open(os.path.join(file_path, 'history_{}.json'.format(today.strftime('%Y%m%d') + h +
                                                                                 str(today.minute))),'w') as fp:
    json.dump(history, fp)