In [48]:
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import preprocessing
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

In [49]:
import os
import random
from pathlib import Path
import shutil

In [50]:
#VAE architecture and paramteres
kernels = 2
strides = 2
latent_dim = 28
filters = [32, 64, 128, 224, 512]
input_shape = (224, 224, 3)
last_conv_dim = int(input_shape[0] / (2 ** len(filters)))
b_norm = 3
#Batch Norm
epsilon = 1e-5
#spatial Classifier
num_classes = 4
#dataset
ham_dataset_dir = 'ham_minified'
batch_size = 48
seed = 42
#define early stopping paramter
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=5)

In [51]:
ham_dataset_dir = 'ham_minified'
nv_source_dir = 'nv_src'
nv_test_num = 200
nv_gen_num = 0
batch_size = 48
seed = 42

In [52]:
images = list(os.walk(ham_dataset_dir + '/nv'))[0][2]
for image_to_delete in images:
    path = Path(f'{ham_dataset_dir}/nv/{image_to_delete}')
    path.unlink(missing_ok=False)

In [53]:
images = list(os.walk(nv_source_dir + '/nv_true'))[0][2]
nv_images = random.sample(images, nv_test_num)
for true_image in nv_images:
    shutil.copy(nv_source_dir + '/nv_true/' + true_image, ham_dataset_dir + '/nv/' + true_image)

In [54]:
images = list(os.walk(nv_source_dir + '/gen_data'))[0][2]
gen_images = random.sample(images, nv_gen_num)
for gen_image in gen_images:
    shutil.copy(nv_source_dir + '/gen_data/' + gen_image, ham_dataset_dir + '/nv/' + gen_image)

In [55]:
#load and split datatse
train_ds = keras.preprocessing.image_dataset_from_directory(ham_dataset_dir, validation_split=0.3, color_mode='rgb',
                                                          labels='inferred', shuffle=True, subset='validation', image_size=(224, 224),
                                                          batch_size=batch_size, seed=seed)

val_ds = keras.preprocessing.image_dataset_from_directory(ham_dataset_dir, validation_split=0.3, color_mode='rgb',
                                                          labels='inferred', shuffle=True, subset='validation', image_size=(224, 224),
                                                          batch_size=batch_size, seed=seed)

Found 2926 files belonging to 4 classes.
Using 877 files for validation.
Found 2926 files belonging to 4 classes.
Using 877 files for validation.


In [56]:
classes = train_ds.class_names
train_ds_single_batch = keras.preprocessing.image_dataset_from_directory(ham_dataset_dir, validation_split=0.3, color_mode='rgb',
                                                     labels='inferred', shuffle=True, subset='training', image_size=(224, 224),
                                                     batch_size=1, seed=seed)

Found 2926 files belonging to 4 classes.
Using 2049 files for training.


In [57]:
y = np.array([label.numpy()[0] for _, label in train_ds_single_batch])
class_weights_list = compute_class_weight('balanced', classes=np.unique(y), y=y)
print(class_weights_list)
class_weights = {}

for i in range(len(class_weights_list)):
    class_weights[i] = class_weights_list[i]

[1.40728022 0.66439689 0.65926641 3.73905109]


In [58]:
val_batches = tf.data.experimental.cardinality(val_ds)
test_ds = val_ds.take(val_batches // 2)
val_ds = val_ds.skip(val_batches // 2)

In [59]:
print('Number of validation batches: %d' % tf.data.experimental.cardinality(val_ds))
print('Number of test batches: %d' % tf.data.experimental.cardinality(test_ds))

Number of validation batches: 10
Number of test batches: 9


In [60]:
#scale immages
rescale = keras.layers.experimental.preprocessing.Rescaling(scale=1.0 / 255)
train_ds = train_ds.map(lambda x, y: (rescale(x), y))
val_ds = val_ds.map(lambda x, y: (rescale(x), y))
test_ds = test_ds.map(lambda x, y: (rescale(x), y))

In [61]:
#autotune dataset
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)

In [62]:
#define the neural network
model = Sequential()
for conv_filter in filters:
    model.add(keras.layers.Conv2D(conv_filter, 3, strides=2, padding='same', input_shape=input_shape))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(64, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-5), activation='relu'))
model.add(keras.layers.Dense(num_classes, activation='softmax'))

#print summary of the model
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 112, 112, 32)      896       
                                                                 
 batch_normalization_10 (Bat  (None, 112, 112, 32)     128       
 chNormalization)                                                
                                                                 
 activation_10 (Activation)  (None, 112, 112, 32)      0         
                                                                 
 conv2d_11 (Conv2D)          (None, 56, 56, 64)        18496     
                                                                 
 batch_normalization_11 (Bat  (None, 56, 56, 64)       256       
 chNormalization)                                                
                                                                 
 activation_11 (Activation)  (None, 56, 56, 64)       

In [63]:
#optimize the model with using Adam
model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

In [None]:
#fit the validation data to the model
history = model.fit(train_ds, validation_data=val_ds, epochs=50, class_weight=class_weights)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50

In [None]:
def plotmodelhistory(history): 
    fig, axs = plt.subplots(1,2,figsize=(15,5)) 
    # summarize history for accuracy
    axs[0].plot(history.history['accuracy']) 
    axs[0].plot(history.history['val_accuracy']) 
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy') 
    axs[0].set_xlabel('Epoch')
    
    axs[0].legend(['train', 'validate'], loc='upper left')
    # summarize history for loss
    axs[1].plot(history.history['loss']) 
    axs[1].plot(history.history['val_loss']) 
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss') 
    axs[1].set_xlabel('Epoch')
    axs[1].legend(['train', 'validate'], loc='upper left')
    plt.show()

# list all data in history
print(history.history.keys())
plotmodelhistory(history)

In [None]:
test_list = list(test_ds)

In [None]:
preds = []
labels = []

for batch_num in range(len(test_list)):
    image_batch, label_batch = test_list[batch_num]
    image_batch = image_batch.numpy()
    label_batch = label_batch.numpy()
    this_pred = model.predict_on_batch(image_batch)
    this_classes = np.argmax(this_pred, axis=1)
    
    pred = tf.keras.utils.to_categorical(this_classes, num_classes=4)
    
    y_test_non_category = [ np.argmax(t) for t in label_batch ]
    y_predict_non_category = [ np.argmax(t) for t in pred ]
    
    labels.extend(y_test_non_category)
    preds.extend(y_predict_non_category)

In [None]:
label_names = sorted(os.listdir(ham_dataset_dir))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
conf_mat = confusion_matrix(labels, preds)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=label_names)
disp.plot()
plt.savefig('plots/cnn_da_ft_confmat_nv_' + str(nv_test_num) + '_gen_' + str(nv_gen_num) + '.pdf')