In [None]:
import os
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gc

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
SIZE = 180
EPOCHS = 10
BATCH_SIZE = 32
FOLDS = 8

beningList1 = os.listdir('/content/drive/MyDrive/skincancer/data/test/benign')
beningList2 = os.listdir('/content/drive/MyDrive/skincancer/data/train/benign')

malignantList1 = os.listdir('/content/drive/MyDrive/skincancer/data/test/malignant')
malignantList2 = os.listdir('/content/drive/MyDrive/skincancer/data/train/malignant')

In [None]:
from cv2 import imread, resize

def create_dataset(x, y, path, files, type_):
    for i in range(len(files)):
        img = imread(path + files[i])
        x.append(resize(img, (SIZE, SIZE)))
        y.append(type_)


In [None]:
from sklearn.utils.class_weight import compute_class_weight
from keras.utils.np_utils import to_categorical

X = []
y = []
#create_dataset(X, y, '../input/data/test/benign/', beningList1, 0)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/train/benign/', beningList2, 0)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/benign/', beningList1, 0)
#create_dataset(X, y, '../input/data/test/malignant/', malignantList1, 1)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/train/malignant/', malignantList2, 1)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/malignant/', malignantList1, 1)
X = np.array(X)
y = np.array(y)
y_train = to_categorical(y, num_classes=2)

class_weights = np.around(compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y), 2)
class_weights = dict(zip(np.unique(y_train), class_weights))
print('balancing classes: ',class_weights)
print(X.shape)
print(y_train.shape)

balancing classes:  {0.0: 0.92, 1.0: 1.1}
(3297, 180, 180, 3)
(3297, 2)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras.optimizers import Adam

early_stopping = EarlyStopping(monitor='val_accuracy',
                               mode='max',
                               patience=10)

def create_model():
    pre_trained_model = tf.keras.applications.ResNet152V2(include_top=False,
                                                        input_shape=(SIZE, SIZE, 3),
                                                        weights='imagenet')

    optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-3)

    print('layers ',len(pre_trained_model.layers))

    for layer in pre_trained_model.layers[:8]:
        layer.trainable = False
    for layer in pre_trained_model.layers[9:]:
        layer.trainable = True

    model = tf.keras.models.Sequential([
        pre_trained_model,
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(4096, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(4096, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(2, activation='softmax')
    ])

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

    


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold

i=0
model_history = [] 
skf = StratifiedKFold(n_splits=FOLDS, random_state=None, shuffle=False)
model = None
filepath = 'best_resnet152_fold_unique.h5'
checkpoint = ModelCheckpoint(filepath,
             monitor='val_accuracy',
             mode='max',
             save_best_only=True,
             verbose=1)
model = create_model()

for train_index, val_index in skf.split(X,y):
    print('*****Starting fold ', i+1, ' of ', FOLDS)
    X_train, X_test, y_train, y_test = X[train_index], X[val_index],y[train_index], y[val_index]
    y_train = to_categorical(y_train, num_classes=2)
    y_test = to_categorical(y_test, num_classes=2)
    datagen = ImageDataGenerator(shear_range=0.2,
                                 zoom_range=0.2,
                                 rotation_range=40,
                                 brightness_range=(0.5, 1.5),
                                 vertical_flip=True,
                                 horizontal_flip=True)
    datagen.fit(X_train)
    model_history.append(model.fit(datagen.flow(X_train, y_train), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True,
                          callbacks=[early_stopping, checkpoint],
                          validation_data=(X_test, y_test),
                          class_weight=class_weights))
    print('*****Finishing fold ', i+1, ' of ', FOLDS)
    i = i+1
    gc.collect()

layers  564
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet152v2 (Functional)    (None, 6, 6, 2048)        58331648  
                                                                 
 conv2d (Conv2D)             (None, 4, 4, 64)          1179712   
                                                                 
 conv2d_1 (Conv2D)           (None, 2, 2, 64)          36928     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 1, 1, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 4096)              266240    
                                            

  layer_config = serialize_layer_fn(layer)


Epoch 2/10
Epoch 00002: val_accuracy improved from 0.63680 to 0.76271, saving model to best_resnet152_fold_unique.h5
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.76271 to 0.80872, saving model to best_resnet152_fold_unique.h5
Epoch 4/10
Epoch 00004: val_accuracy did not improve from 0.80872
Epoch 5/10
Epoch 00005: val_accuracy improved from 0.80872 to 0.81840, saving model to best_resnet152_fold_unique.h5
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.81840
Epoch 7/10
Epoch 00007: val_accuracy improved from 0.81840 to 0.83293, saving model to best_resnet152_fold_unique.h5
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.83293
Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.83293
Epoch 10/10
Epoch 00010: val_accuracy improved from 0.83293 to 0.84019, saving model to best_resnet152_fold_unique.h5
*****Finishing fold  1  of  8
*****Starting fold  2  of  8
Epoch 1/10
Epoch 00001: val_accuracy improved from 0.84019 to 0.91505, saving model to best

In [None]:
#plt.title('Val Accuracies vs Epochs')
for i in range(FOLDS):
    plt.plot(model_history[i].history['val_accuracy'], label='Training Fold '+str(i+1))
plt.legend()
plt.show()

In [None]:
#plt.title('Val Accuracies vs Epochs')
#for i in range(FOLDS):
    #plt.plot(model_history[i].history['val_accuracy'], label='Training Fold '+str(i+1))
#plt.legend()
#plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
import seaborn as sns

target_names = ['benign', 'malignant']

X = []
y = []
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/benign', beningList1, 0)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/benign', beningList2, 0)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/malignant/', malignantList1, 1)
create_dataset(X, y, '/content/drive/MyDrive/skincancer/data/test/malignant/', malignantList2, 1)
roc_curves = []
X = np.array(X)
y_true = np.array(y)
y_expected = to_categorical(y_true, num_classes=2)
best_model = None
print('evaluating: ','best_mobilenet_fold_unique.h5')
result = model.evaluate(X, y_expected)
y_pred = model.predict(X)
y_pred = np.argmax(y_pred, axis=1)
print('Classification Report:')
print(classification_report(y_true, y_pred, target_names=target_names))
print('Confusion Matrix:')
y_true_as_label = ['benign' if (i == 0) else 'malignant'  for i in y_true]
y_pred_as_label = ['benign' if (i == 0) else 'malignant'  for i in y_pred]
cm = confusion_matrix(y_true_as_label, y_pred_as_label, labels=target_names)
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax)
ax.xaxis.set_ticklabels(target_names)
ax.yaxis.set_ticklabels(target_names)
plt.show()
fpr, tpr, thresholds  = roc_curve(y_true,y_pred)
auc = roc_auc_score(y_true, y_pred)
print('ROC AUC score:', auc)
print('-----------------------------------')

In [None]:
plt.plot(fpr, tpr, linestyle='--', label='Model: best_resnet152_fold_unique.h5')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# show the legend
plt.legend()
# show the plot
plt.show() 