In [None]:
!nvidia-smi

Mon Jan 24 13:50:12 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import os
import cv2
import math
import keras
import numpy as np
from skimage import io
from sys import getsizeof
from tensorflow import keras
from sklearn.metrics import f1_score
from skimage.transform import rescale
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
seed = 7
np.random.seed(seed)

data_path = '/content/drive/MyDrive/Newcastle University/Machine Learning/lesions'

def load_paths(path):
  
  image_paths = []
  y = []
  
  for label, elem in enumerate(os.listdir(path)):

    print('class: ' + elem + ' | label: ' + str(label))
    path1 = path + '/' + elem
    images = os.listdir(path1)
    for im in images:
      image_paths.append(path1 + '/' + im)
      y.append(str(label))
  
  # shuffle
  c = list(zip(image_paths,y))
  np.random.shuffle(c)
  image_paths,y = zip(*c)
  
  return image_paths, keras.utils.to_categorical(y, num_classes=7)

image_paths, labels = load_paths(data_path)

x_train, x_test, y_train, y_test = train_test_split(image_paths, labels, test_size = 0.2, random_state = 42)

In [None]:
# Training data (non-augmented)
train_path1 = '/content/drive/MyDrive/NCL/ML/Dataset/train'
# Training data (augmented)
train_path2 = '/content/drive/MyDrive/NCL/ML/Dataset/output'
# Testing data
valid_path = '/content/drive/MyDrive/NCL/ML/Dataset/validation'

In [None]:
classes = sorted(os.listdir(train_path2))

In [None]:
train_list = []
train_labels = []

#elem_list1 = os.listdir(path_train)

for label, elem in enumerate(classes):
  
  path1 = train_path2 + '/' + str(elem)
  images = os.listdir(path1)
  
  for im in images:
      
      # takes the whole path of the images
      # removes the need to mention the dataset path for the generator
      train_list.append(path1 + '/' + str(im))
      train_labels.append(label)

# shuffling the image list
c = list(zip(train_list, train_labels))
np.random.shuffle(c)
train_list, train_labels = zip(*c)

# one hot encoding
train_labels = keras.utils.to_categorical(train_labels, num_classes=7)

In [None]:
test_list = []
test_labels = []

#elem_list1 = os.listdir(path_train)

for label, elem in enumerate(classes):
  
  path1 = valid_path + '/' + str(elem)
  images = os.listdir(path1)
  
  for im in images:
      
      # takes the whole path of the images
      # removes the need to mention the dataset path for the generator
      test_list.append(path1 + '/' + str(im))
      test_labels.append(label)

test_labels = keras.utils.to_categorical(test_labels, num_classes=7)

In [None]:
train_labels.shape

(20000, 7)

In [None]:
test_labels.shape

(2005, 7)

# Generator for data loading while training the model
def im_datagen(image_list, label_list, batch_size = 32, shuffle = False):
    while True:
        # shuffle the data
        if shuffle == True:
            c = list(zip(image_list, label_list))
            np.random.shuffle(c)
            image_list, label_list = zip(*c)
    
        total_elements = len(label_list)
    
        for i in range(0, total_elements, batch_size):
          
          images = [cv2.resize(cv2.imread(x), (224,224)) for x in image_list[i:i+batch_size]]
          images = np.asarray(images)
          images = images/255.
          
          yield images, keras.utils.to_categorical(label_list[i:i+batch_size], num_classes = 7)

batch_size = 50

STEP_SIZE_TRAIN = len(train_labels)//batch_size
STEP_SIZE_VALID = len(test_labels)//batch_size

print(STEP_SIZE_TRAIN)
print(STEP_SIZE_VALID)

train_gen = im_datagen(train_list, train_labels, batch_size = batch_size, shuffle = True)

test_gen = im_datagen(test_list, test_labels, batch_size = batch_size, shuffle = False)

x_test.[17]

y_test[17]

Counter(y_test).keys() # equals to list(set(words))

Counter(y_test).values() # counts the elements' frequency

In [None]:
class LeisonSequence(keras.utils.Sequence):

    def __init__(self, x_set, y_set, batch_size = 32):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) *
        self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) *
        self.batch_size]

        images = []

        for file_name in batch_x:
          images.append(cv2.resize(cv2.imread(file_name), (224, 224)))

        return (np.array(images)/255), np.array(batch_y)

from matplotlib import pyplot as plt
test = []

for file_name in x_test[0:5]:
  test.append(rescale(io.imread(file_name), (0.5, 0.5, 1)))
  #print(file_name)

test = np.array(test)
plt.imshow(test[3])

In [None]:
train_img_gen = LeisonSequence(train_list, train_labels, 100)
val_img_gen = LeisonSequence(test_list, test_labels, 50)

In [None]:
#base_model = keras.applications.ResNet50(
model = keras.applications.VGG16(
    include_top=True,
    weights=None,
    input_shape=(224, 224, 3),
    classes=7,
    pooling=None
)

In [None]:
#base_model.summary()
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

inputs = keras.Input(shape=(224, 224, 3))
last_layer = base_model.get_layer('global_average_pooling2d')

print ('last layer output shape:', last_layer.output_shape)

bm_out = last_layer.output
#x = base_model(inputs, training=False)
x = keras.layers.Dense(128, activation='relu')(bm_out)
#x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(7, activation='softmax')(x)
#model = keras.Model(inputs, outputs)
model = keras.Model(base_model.input, outputs)

model.summary()

In [None]:
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.0002,
    decay_steps=100,
    decay_rate=0.9)
sgd = keras.optimizers.SGD(learning_rate=0.0002)
adam = keras.optimizers.Adam(learning_rate=lr_schedule, amsgrad=True)
nadam = keras.optimizers.Nadam(learning_rate=0.0002)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=sgd ,metrics=['accuracy'])

In [None]:
model.load_weights('/content/drive/MyDrive/NCL/ML/Models/vgg16_aug.h5')

In [None]:
cp = keras.callbacks.ModelCheckpoint(filepath = '/content/drive/MyDrive/NCL/ML/Models/vgg16_aug_2.h5', verbose = 1, save_best_only = True, monitor='val_accuracy')

In [None]:
history = model.fit(train_img_gen, epochs=50, validation_data=val_img_gen, callbacks=cp)

Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.75362, saving model to /content/drive/MyDrive/NCL/ML/Models/vgg16_aug_2.h5
Epoch 2/50
Epoch 00002: val_accuracy improved from 0.75362 to 0.76010, saving model to /content/drive/MyDrive/NCL/ML/Models/vgg16_aug_2.h5
Epoch 3/50
Epoch 00003: val_accuracy did not improve from 0.76010
Epoch 4/50
Epoch 00004: val_accuracy improved from 0.76010 to 0.76060, saving model to /content/drive/MyDrive/NCL/ML/Models/vgg16_aug_2.h5
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.76060
Epoch 6/50
Epoch 00006: val_accuracy improved from 0.76060 to 0.76259, saving model to /content/drive/MyDrive/NCL/ML/Models/vgg16_aug_2.h5
Epoch 7/50
Epoch 00007: val_accuracy did not improve from 0.76259
Epoch 8/50