<a href="https://colab.research.google.com/github/Abhiram102000/SkinCancerDetection/blob/main/SCD_v2_InceptionV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing the libraries

%matplotlib inline
!pip install -U tensorflow-addons
!pip install -q "tqdm>=4.36.1"

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tensorflow.keras.utils import get_file
from sklearn.metrics import roc_curve, auc, confusion_matrix
from imblearn.metrics import sensitivity_score, specificity_score
import itertools

import keras
from keras.utils.np_utils import to_categorical # used for converting labels to one-hot-encoding
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.wrappers.scikit_learn import KerasClassifier
from keras.applications.inception_v3 import InceptionV3
from keras import backend as K 
from PIL import Image
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score

import os
import glob
import zipfile
import random

# to get consistent results after multiple runs
tf.random.set_seed(10)
np.random.seed(10)
random.seed(10)

In [None]:
def download_and_extract_dataset():

  data_url = "https://storage.googleapis.com/kaggle-competitions-data/kaggle-v2/20270/1222630/compressed/jpeg.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1646130380&Signature=awtZ4t5y9uVyNG4G6ZWR9tWHE2gaxTI%2F23NsSY8ZH09o5Fx4yn5X2NyxDzs4eWVcXq%2FnWi%2BD2Sjfu2o7whbXFakAk0WIajCGKlIk%2BC4x%2Bx%2BzD%2F8pNggm1oOl7WFGkChOlgIZaTY5thnI8TOS1QfDqfdEZEvnF5xOFSppdp1xKrrpLPv7ombT6XoKOh4MZShBeHN2qCOoMzcY3hEePT6u5yrIw8TsLVafTjRD%2Fz%2FGJxfNwaNBTbm8LKB2guTp0NhInD8KltgSR5Ssn4rTGNvf2Rh9O6JdGWfzKDwnnNx%2FmXKYk%2Brug1XcASz83UTivCQuTRI7P0sBGU2dV%2BMmwgd3lg%3D%3D&response-content-disposition=attachment%3B+filename%3Djpeg.zip"
  for i, download_link in enumerate([data_url]):
    temp_file = f"temp{i}.zip"
    data_dir = get_file(origin=download_link, fname=os.path.join(os.getcwd(), temp_file))
    print("Extracting", download_link)
    with zipfile.ZipFile(data_dir, "r") as z:
      z.extractall("data")
    # remove the temp file
    os.remove(temp_file)

# comment the below line if you already downloaded the dataset
download_and_extract_dataset()

In [None]:
folder_benign_train = 'archive/train/benign/'
folder_malignant_train = 'archive/train/malignant/'

folder_benign_test = 'archive/test/benign/'
folder_malignant_test = 'archive/test/malignant/'

read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))

# Load Training Images 
ims_benign = [read(os.path.join(folder_benign_train, filename)) for filename in os.listdir(folder_benign_train)]
X_benign = np.array(ims_benign, dtype='uint8')
ims_malignant = [read(os.path.join(folder_malignant_train, filename)) for filename in os.listdir(folder_malignant_train)]
X_malignant = np.array(ims_malignant, dtype='uint8')

# Load Testing Images
ims_benign = [read(os.path.join(folder_benign_test, filename)) for filename in os.listdir(folder_benign_test)]
X_benign_test = np.array(ims_benign, dtype='uint8')
ims_malignant = [read(os.path.join(folder_malignant_test, filename)) for filename in os.listdir(folder_malignant_test)]
X_malignant_test = np.array(ims_malignant, dtype='uint8')

# Create labels
y_benign = np.zeros(X_benign.shape[0])
y_malignant = np.ones(X_malignant.shape[0])

y_benign_test = np.zeros(X_benign_test.shape[0])
y_malignant_test = np.ones(X_malignant_test.shape[0])


# Merge data 
X_train = np.concatenate((X_benign, X_malignant), axis = 0)
y_train = np.concatenate((y_benign, y_malignant), axis = 0)

X_test = np.concatenate((X_benign_test, X_malignant_test), axis = 0)
y_test = np.concatenate((y_benign_test, y_malignant_test), axis = 0)

# Shuffle data
s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train = X_train[s]
y_train = y_train[s]

s = np.arange(X_test.shape[0])
np.random.shuffle(s)
X_test = X_test[s]
y_test = y_test[s]

In [None]:
w=40
h=30
fig=plt.figure(figsize=(12, 8))
columns = 5
rows = 3

for i in range(1, columns*rows +1):
    ax = fig.add_subplot(rows, columns, i)
    if y_train[i] == 0:
        ax.title.set_text('Benign')
    else:
        ax.title.set_text('Malignant')
    plt.imshow(X_train[i], interpolation='nearest')
plt.show()

In [None]:
y_train = to_categorical(y_train, num_classes= 2)
y_test = to_categorical(y_test, num_classes= 2)

In [None]:
X_train = X_train/255.
X_test = X_test/255.

In [None]:
n_train = len(X_train)
n_test = len(X_test)
print('Train data: ', n_train)
print('Test data: ', n_test)

In [None]:
input_shape = (224,224,3)
lr = 1e-5
epochs = 50
batch_size = 64

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=5, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=1e-7)

tqdm_callback = tfa.callbacks.TQDMProgressBar()

model = InceptionV3(include_top=True,
                 weights= None,
                 input_tensor=None,
                 input_shape=input_shape,
                 pooling='avg',
                 classes=2)

model.compile(optimizer = Adam(lr) ,
              loss = "binary_crossentropy", 
              metrics=["accuracy"])

model.summary()

In [None]:
history = model.fit(X_train, y_train, validation_split=0.2,
                    epochs= epochs, batch_size= batch_size, verbose=2, 
                    callbacks=[learning_rate_reduction, tqdm_callback]
                   )

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
y_pred = model.predict(X_test)

sensitivity = sensitivity_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))
specificity = specificity_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

print("Melanoma Accuracy:", accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1)))
print("Melanoma Sensitivity:", sensitivity)
print("Melanoma Specificity:", specificity)

In [None]:
class_names = ["Benign", "Malignant"]
def plot_confusion_matrix(y_test, y_pred):
  cmn = confusion_matrix(y_test, y_pred)
  # Normalise
  cmn = cmn.astype('float') / cmn.sum(axis=1)[:, np.newaxis]
  # print it
  print(cmn)
  fig, ax = plt.subplots(figsize=(10,10))
  sns.heatmap(cmn, annot=True, fmt='.2f', 
              xticklabels=[f"Predicted_{c}" for c in class_names], 
              yticklabels=[f"True_{c}" for c in class_names],
              cmap="Blues"
              )
  plt.ylabel('Actual')
  plt.xlabel('Predicted')
  # plot the resulting confusion matrix
  plt.show()

plot_confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

In [None]:
def plot_roc_auc(y_true, y_pred):
    """
    This function plots the ROC curves and provides the scores.
    """
    # prepare for figure
    plt.figure()
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    # obtain ROC AUC
    roc_auc = auc(fpr, tpr)
    # print score
    print(f"ROC AUC: {roc_auc:.3f}")
    # plot ROC curve
    plt.plot(fpr, tpr, color="blue", lw=2,
                label='ROC curve (area = {f:.2f})'.format(d=1, f=roc_auc))
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curves')
    plt.legend(loc="lower right")
    plt.show()

plot_roc_auc(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))