# **Importing libraries**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir dataset

In [None]:
!cp /content/drive/MyDrive/datajpg/train.csv /content/dataset

In [None]:
!pip install imbalanced-learn

In [None]:
!pip install imgaug

In [None]:
from google.colab import files
import pandas as pd
import os
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
import imblearn
from imblearn.over_sampling import RandomOverSampler
from skimage.filters import threshold_otsu
import imgaug.augmenters as iaa

# **Downloading dataset from kaggle**

In [None]:
# **Downloading the dataset using kaggle API**
!pip install -q kaggle

In [None]:
files.upload()

In [None]:
!mkdir ~/.kaggle
# copy the json file to the folder .kaggle in the root directory
!cp kaggle.json ~/.kaggle/

# change folder permissions to be able to read and write
!chmod 600 ~/.kaggle/kaggle.json

!pwd

!mkdir dataset

%cd /content/dataset

!pwd


In [None]:
# -f folder name
!kaggle competitions download -c siim-isic-melanoma-classification

In [None]:
!kaggle competitions download -c siim-isic-melanoma-classification -f "train.csv"

# **Downloading dataset from website**

In [None]:
%cd /content/dataset

In [None]:
!wget "https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_JPEG.zip"

# **Unzipping dataset**

In [None]:
#!mkdir dataset

In [None]:
#%cd dataset

In [None]:
#!mv /content/ISIC_2020_Training_JPEG.zip /content/dataset/.

In [None]:
!unzip "/content/dataset/ISIC_2020_Training_JPEG.zip"

In [None]:
#!unzip "/content/dataset/train.csv.zip"

In [None]:
main_path = "/content/dataset/train"
len(os.listdir(main_path))

In [None]:
df = pd.read_csv("/content/dataset/train.csv")

In [None]:
df.info()

In [None]:
df['image_name_'] = df['image_name'].apply(lambda x: f"{main_path}/{x}.jpg")

In [None]:
df

In [None]:
img = cv2.imread(f"{main_path}/ISIC_0149568.jpg")
#cv2_imshow(img)

In [None]:
img.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['image_name_'].to_list(), df['target'].to_list(), test_size=0.1, random_state=42, stratify=df['target'].to_list())
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)


In [None]:
oversample = RandomOverSampler(sampling_strategy='minority', random_state=42)
X_over, y_over = oversample.fit_resample(np.array(X_train).reshape(-1, 1), np.array(y_train).reshape(-1, 1))


In [None]:
X_over_list = []
for i in range(len(X_over)):
  X_over_list.append(X_over[i][0])

In [None]:
len(X_over_list)

In [None]:
class DataGenerator(keras.utils.Sequence):
  'Generates data for Keras'
  def __init__(self, list_IDs, labels, batch_size=16, dim=(128,128), n_channels=1,
              n_classes=2, shuffle=True, augmentation=True, segmentation=True):
    'Initialization'
    self.dim = dim
    self.batch_size = batch_size
    self.labels = labels
    self.list_IDs = list_IDs
    self.n_channels = n_channels
    self.n_classes = n_classes
    self.shuffle = shuffle
    self.augmentation = augmentation
    self.segmentation = segmentation

    if self.augmentation:
      self.seq = iaa.Sequential([
                              iaa.GaussianBlur(sigma=(0.1, 3.5)),
                              iaa.Emboss(alpha=(0.0, 1.0), strength=(0.0, 1.5)),
                              iaa.Fliplr(0.25),
                              iaa.Flipud(0.25),
                              iaa.Affine(rotate=(-45, 45)),
                              iaa.PiecewiseAffine(scale=(0.01, 0.05)),
                              iaa.Affine(shear=(-7, 7))
                          ])

    self.on_epoch_end()

  def __len__(self):
    'Denotes the number of steps per epoch'
    return int(np.floor(len(self.list_IDs) / self.batch_size))

  def __getitem__(self, index):
    'Generate one batch of data'
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]
    list_labels_temp = [self.labels[k] for k in indexes]

    # Generate data
    X, y = self.__data_generation(list_IDs_temp, list_labels_temp)

    return X, y

  def on_epoch_end(self):
    'Updates indexes after each epoch'
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
        np.random.shuffle(self.indexes)

  def hair_removal(self, src):
    # Convert the original image to grayscale
    #self.grayScale = cv2.cvtColor( src, cv2.COLOR_RGB2GRAY )
    # Kernel for the morphological filtering
    kernel = cv2.getStructuringElement(1,(17,17))
    # Perform the blackHat filtering on the grayscale image to find the
    # hair countours
    blackhat = cv2.morphologyEx(self.grayScale, cv2.MORPH_BLACKHAT, kernel)
    # intensify the hair countours in preparation for the inpainting
    # algorithm
    ret,thresh2 = cv2.threshold(blackhat,10,255,cv2.THRESH_BINARY)
    # inpaint the original image depending on the mask
    dst = cv2.inpaint(src,thresh2,1,cv2.INPAINT_TELEA)
    return dst

  def segment_image(self, image):
    #self.grayScale = cv2.cvtColor( image, cv2.COLOR_RGB2GRAY )
    th = threshold_otsu(self.grayScale)
    mask  = self.grayScale < th
    mask = np.stack((mask,)*3, axis=-1)
    filtered = image * mask
    return filtered


  def augment_image(self, image):
    aug_image = self.seq.augment_image(image)
    return aug_image


  def __data_generation(self, list_IDs_temp, list_labels_temp):
    'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
    # Initialization
    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size), dtype=int)

    # Generate data
    for i, ID in enumerate(list_IDs_temp):
        # Store sample
        #img = cv2.imread(ID,cv2.IMREAD_GRAYSCALE)
        img = cv2.imread(ID)
        #print(ID)
        img_resized = cv2.resize(img, self.dim[::-1])
        img_resized = cv2.medianBlur(img_resized, 3)
        self.grayScale = cv2.cvtColor( img_resized, cv2.COLOR_RGB2GRAY )
        img_resized = self.hair_removal(img_resized)
        # Segmentation block
        if self.segmentation:
          img_resized = self.segment_image(img_resized)
        # Classical augmentation
        if self.augmentation:
          img_resized = self.augment_image(img_resized)
        # GAN augmentation

        #img_resized = cv2.cvtColor( img_resized, cv2.COLOR_BGR2GRAY )
        #X[i,] =  np.expand_dims(img_resized, axis=2)
        X[i, ] = img_resized
        # Store class
        y[i] = list_labels_temp[i]

    return X, keras.utils.to_categorical(y, num_classes=self.n_classes)


In [None]:
train_generator = DataGenerator(list_IDs= X_over_list, labels= y_over, n_channels=3,
                                shuffle=True, augmentation=True, batch_size=256
                                )
val_generator = DataGenerator(list_IDs= X_val, labels= y_val, n_channels=3, shuffle=False, augmentation=False, batch_size=256)
test_generator = DataGenerator(list_IDs= X_test, labels= y_test, n_channels=3, shuffle=False, augmentation=False, batch_size=256)

In [None]:
MODEL_NAME = 'resnet'

In [None]:
"""
for images, labels in train_generator:
  print(images.shape)
  print(labels.shape)
  for index in range(len(labels)):
    cv2_imshow(images[index])
    print(labels[index])
  break
"""

# **Copy folder to content**

In [None]:
!zip -r /content/drive/MyDrive/datajpg /content/drive/MyDrive/.

In [None]:
!cp /content/drive/MyDrive/datajpg.zip /content/

# **Transfer learning**
Resnet50

In [None]:
from tensorflow.keras.applications import ResNet50 as ResNet
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, Dropout, Dense, Conv2D, MaxPooling2D, Flatten, Input, GlobalAveragePooling2D
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
pretrained_model = ResNet(input_shape=(128, 128, 3), include_top=False, weights="imagenet")

In [None]:
pretrained_model.summary()

In [None]:
plot_model(pretrained_model, to_file='resnet.png', show_shapes=True, show_layer_names=True)

In [None]:
input_layer = Input(shape=(128, 128, 3))
x = pretrained_model(input_layer)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
output_layer = Dense(2, activation='softmax')(x)

model = keras.Model(inputs=input_layer, outputs=output_layer, name="transfer_learning_resnet")


In [None]:
model.summary()

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.Recall(name='recall', class_id=1),
                                                                          tf.keras.metrics.AUC(name='auc'),
                                                                          tf.keras.metrics.Precision(name='prec', class_id=1),
                                                                          tf.keras.metrics.Accuracy(name="accuracy")
                                                                          ])

In [None]:
best_checkpoint = ModelCheckpoint(f'/content/drive/MyDrive/models/{MODEL_NAME}.h5',
                                  monitor="val_recall",
                                  mode='max')

In [None]:
csv_logger = CSVLogger(f"/content/drive/MyDrive/models/{MODEL_NAME}_logger.csv", append=True)

# **Load model**

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import ResNet50 as ResNet
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, Dropout, Dense, Conv2D, MaxPooling2D, Flatten, Input, GlobalAveragePooling2D
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.callbacks import CSVLogger


In [None]:
model = load_model(f"/content/drive/MyDrive/models/{MODEL_NAME}.h5")

In [None]:
best_checkpoint = ModelCheckpoint(f'/content/drive/MyDrive/models/{MODEL_NAME}.h5', monitor="val_recall")

In [None]:
csv_logger = CSVLogger(f"/content/drive/MyDrive/models/{MODEL_NAME}_logger.csv", append=True)

# **Model fitting**

In [None]:
model.fit(x = train_generator,
          validation_data = val_generator,
          epochs=60,
          callbacks=[best_checkpoint, csv_logger])

In [None]:
!nvidia-smi

# **Evaluating model**

In [None]:
from sklearn.metrics import classification_report

In [None]:
def compute_classification_report(y_true, y_pred):
  target_names = ['Normal', 'Malignant']
  print(classification_report(y_true, y_pred, target_names=target_names, digits=4))


In [None]:
def get_truth_pred(generator, model):
  labels_all = []
  labels_pred_all = []
  for image, label in generator:
        labels_pred = model.predict(image)
        labels_pred = np.argmax(labels_pred, axis=1)
        label = np.argmax(label, axis=1)
        labels_pred_all.append(labels_pred)
        labels_all.append(label)


  return labels_all, labels_pred_all




In [None]:
y_val_truth, y_val_pred = get_truth_pred(DataGenerator(list_IDs= X_val, labels= y_val, n_channels=3, shuffle=False, augmentation=False, batch_size=1),
                                         model)

In [None]:
len(X_val)

In [None]:
len(y_val_pred)

In [None]:
compute_classification_report(y_val_truth, y_val_pred)

In [None]:
y_test_truth, y_test_pred = get_truth_pred(
    DataGenerator(list_IDs= X_test, labels= y_test, n_channels=3, shuffle=False, augmentation=False, batch_size=1)
    , model)

In [None]:
compute_classification_report(y_test_truth, y_test_pred)