In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
!mkdir -p "/content/drive/MyDrive/uni/pastukai/temp"
!mkdir -p "/content/drive/MyDrive/uni/pastukai/data"
!ls "/content/drive/MyDrive/uni/pastukai"
%cd "/content/drive/MyDrive/uni/pastukai/temp"

In [None]:
# Downloading testing set

%%bash

DATASET_DIR="/content/drive/MyDrive/uni/pastukai/temp/dataset/"

if [ ! -f "ISIC_2019_Test_Input.zip" ]; then
    echo "Downloading Training Data ..."
    wget --show-progress --progress=bar:force https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Test_Input.zip -O ISIC_2019_Test_Input.zip
fi

echo "Unpacking ISIC_2019_Test_Input.zip ..."
unzip -q -j ISIC_2019_Test_Input.zip -d $DATASET_DIR

# Number of files in dataset folder.
ls $DATASET_DIR | wc -l

In [None]:
%%bash

DATASET_DIR="/content/drive/MyDrive/uni/pastukai/temp/dataset/"

if [ -d $DATASET_DIR ] && [ $(ls -1 $DATASET_DIR | wc -l) -eq 25333 ]; then
    echo "Successfully built the dataset"
else
    echo "Error when building the dataset"
fi

**Preprocess data**

In [2]:
import cv2
import numpy as np
import pandas as pd
from skimage import exposure, morphology, filters, img_as_ubyte, img_as_float
from skimage.color.adapt_rgb import adapt_rgb, each_channel

def enlarge_image(img):
    '''
    Enlarge image with dark border areas
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    f = np.zeros((700, 700,3), np.uint8)
    ax, ay = (700 - img.shape[1]) // 2, (700 - img.shape[0]) // 2
    f[ay:img.shape[0] + ay, ax:ax + img.shape[1]] = img
    return f


def reduce_image(img):
    '''
    Resize image to size 600x600 and remove dark border areas
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    ax, ay = (img.shape[1]-600)//2, (img.shape[0] - 600) // 2
    f = img[ay:600 + ay, ax:ax + 600]
    return f


def reduce_mask(img):
    '''
    Reuce mask size to 600x600
    :param img: ndarray, binary image
    :return: ndarray, binary image
    '''
    ax, ay = (img.shape[1]-600)//2, (img.shape[0] - 600) // 2
    f = img[ay:600 + ay, ax:ax + 600]
    return f


def remove_black_border(gray_image):
    '''
    Remove black border areas
    :param gray_image: ndarray, grayscale image
    :return: ndarray, binary image
    '''
    _, mask = cv2.threshold(gray_image, 10, 255, cv2.THRESH_BINARY);
    (contours, _) = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    c = max(contours, key=cv2.contourArea)
    mask_a = np.zeros((700, 700), np.uint8)
    cv2.drawContours(mask_a, [c], -1, 255, thickness=cv2.FILLED)

    kernel = np.ones((15, 15), np.uint8)
    eroded = cv2.erode(mask_a, kernel, iterations=3)
    _, mask_a = cv2.threshold(eroded, 10, 255, cv2.THRESH_BINARY);
    return mask_a


@adapt_rgb(each_channel)
def morph_closing_each(image, struct_element):
    return morphology.closing(image, struct_element)


@adapt_rgb(each_channel)
def median_filter_each(image, struct_element):
    return filters.median(image, struct_element)


structuring_element = morphology.disk(7)


def crop_center_rgb(img, cropx, cropy):
    '''
    Crop image from the center
    :param img: ndarray, BGR image
    :param cropx: width in int
    :param cropy: height in int
    :return: ndarray, BGR cropped image
    '''
    y,x,_ = img.shape
    startx = x//2-(cropx//2)
    starty = y//2-(cropy//2)
    return img[starty:starty+cropy,startx:startx+cropx,:]


def noise_removal(img):
    '''
    Remove noise in the image
    :param img: ndarray, BGR image
    :return: ndarray, BGR filtered image
    '''
    img = img_as_float(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    equalized_adapthist = exposure.equalize_adapthist(img)
    img_morph_closing = morph_closing_each(equalized_adapthist, structuring_element)
    img_filtered = median_filter_each(img_morph_closing, structuring_element)
    img_filtered = cv2.cvtColor(img_as_ubyte(img_filtered), cv2.COLOR_RGB2BGR)
    return img_filtered


# Not used anymore
# See https://www.kaggle.com/apacheco/shades-of-gray-color-constancy
def shade_of_gray_cc(img, power=6, gamma=None):
    """
    img (numpy array): the original image with format of (h, w, c)
    power (int): the degree of norm, 6 is used in reference paper
    gamma (float): the value of gamma correction, 2.2 is used in reference paper
    """
    img_dtype = img.dtype

    if gamma is not None:
        img = img.astype('uint8')
        look_up_table = np.ones((256, 1), dtype='uint8') * 0
        for i in range(256):
            look_up_table[i][0] = 255 * pow(i / 255, 1 / gamma)
        img = cv2.LUT(img, look_up_table)

    img = img.astype('float32')
    img_power = np.power(img, power)
    rgb_vec = np.power(np.mean(img_power, (0, 1)), 1 / power)
    rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
    rgb_vec = rgb_vec / rgb_norm
    rgb_vec = 1 / (rgb_vec * np.sqrt(3))
    img = np.multiply(img, rgb_vec)

    # Andrew Anikin suggestion
    img = np.clip(img, a_min=0, a_max=255)

    return img.astype(img_dtype)


def preprocess_image(img):
    '''
    Preprocess image
    :param img: ndarray, BGR image
    :return:  ndarray, BGR image
    '''
    img = noise_removal(img)
    return img


def crop_image(img):
    '''
    Remove dark border areas and crop image
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    img = cv2.resize(img, (600, 600))
    inpaint_image = enlarge_image(img)

    # Remove black border
    gray = cv2.cvtColor(inpaint_image, cv2.COLOR_BGR2GRAY)

    mask = remove_black_border(gray)
    #mean = cv2.mean(inpaint_image, mask)
    mask = reduce_mask(mask)
    inpaint_image = reduce_image(inpaint_image)
    inpaint_image[mask == 0] = 0

    coords = cv2.findNonZero(mask)
    x, y, w, h = cv2.boundingRect(coords)
    inpaint_image = inpaint_image[y:y + h, x:x + w]
    inpaint_image = cv2.resize(inpaint_image, (600, 600))
    # inpaint_image = shade_of_gray_cc(inpaint_image)

    # Crop from the center the image if the borders are black
    gray_img = cv2.cvtColor(inpaint_image, cv2.COLOR_BGR2GRAY)
    if gray_img[0][0] < 10 and gray_img[0][-1] < 10 and gray_img[-1][0] < 10 and gray_img[-1][-1] < 10:
        inpaint_image = cv2.resize(crop_center_rgb(inpaint_image, 400, 400), (600,600))
    return inpaint_image


def preprocess_dataset(dataset_path, dataset_csv_path, preprocessed_dataset_path):
    '''
    Preprocess training and validation dataset
    :param dataset_path: Path to dataset
    :param dataset_csv_path: Path to training csv file
    :param val_csv_path: Path to validation csv file
    :param preprocessed_dataset_path: Path where to save preprocessed  images
    :return:
    '''

    df_train = pd.read_csv(dataset_csv_path)

    print('Start of preprocessing step of dataset ')
    for i, image in df_train.iterrows():
        img = cv2.imread(dataset_path + image['image']+'.jpg', cv2.IMREAD_COLOR)
        img = preprocess_image(img)
        cv2.imwrite(preprocessed_dataset_path + image['image']+'.jpg', img)
        print(str(i) + ': Preprocessed image ' + image['image'])
    print('Finished preprocess step of dataset')
    print('preprocessed images saved in ' + preprocessed_dataset_path)
    print('Finished script')


def crop_dataset(dataset_path, dataset_csv_path):
    '''
    Crop training and validation dataset
    :param dataset_path: Path to training dataset
    :param dataset_csv_path: Path to training csv file
    :return:
    '''

    df_dataset = pd.read_csv(dataset_csv_path)

    print('Start of dataset cropping step')
    for i, image in df_dataset.iterrows():
        img = cv2.imread(dataset_path + image['image']+'.jpg', cv2.IMREAD_COLOR)
        img = crop_image(img)
        cv2.imwrite(dataset_path + image['image']+'.jpg', img)
        print(str(i) + ': Cropped image ' + image['image'])
    print('Finished cropping step of dataset')
    print('cropped  images saved in ' + dataset_path)

**Create CSV file for testing images**

In [21]:
import os
import csv
import pandas as pd

# create CSV file of all images 
def create_testing_file_csv(datatset_path, testing_set_csv_path):
    '''
    Function to create csv file out of testing dataset
    :param datatset_path: Path of the testing dataset
    :param testing_set_csv_path: Path to the testing csv file
    :return:
    '''
    filenames = []
    
    # Get all image filenames
    for filename in os.listdir(datatset_path):
        filenames.append(filename[:-4])
        
    # save image filenames in csv
    csvfilenames = []
    with open(testing_set_csv_path + 'testing.csv','w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['image'])
        for file in filenames:
            if file[0:4] == "ISIC":
                writer.writerow([file])
                csvfilenames.append(file)     
    print("CSV file is created successfully.")
    

def create_results_file_csv(results_path, csv_name, images, predictions, max_predictions):
    '''
    :param results_path: Path to the results directory
    :param csv_name: Name of the csv file
    :param images: array of image names
    :param predictions: Array of predicted class
    :param max_predictions: Array with probability of the predictions
    :return:
    '''
    # check length of both files if they are equal
    if len(images) != len(predictions) or len(predictions) != len(max_predictions):
        print("!!!!! Length is not the same of the image array and prediction array !!!!!    image lenght = ", len(images) , " prediction length = ", len(predictions), " max prediction length = ", len(max_predictions))
        
    # save imagesnames and predictions into csv file
    with open(results_path + csv_name + '.csv','w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}
        # {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}  
        writer.writerow(['image', 'AK', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'SCC', 'VASC', 'UNK'])
        print('hier:', predictions)
        for i in range(len(images)):
            if max_predictions[i] < 0.25:
                writer.writerow([images[i],0,0,0,0,0,0,0,0,1])
            else:
                if predictions[i] == 0:
                    writer.writerow([images[i],1,0,0,0,0,0,0,0,0])
                elif predictions[i] == 1:
                    writer.writerow([images[i],0,1,0,0,0,0,0,0,0])
                elif predictions[i] == 2:
                    writer.writerow([images[i],0,0,1,0,0,0,0,0,0])
                elif predictions[i] == 3:
                    writer.writerow([images[i],0,0,0,1,0,0,0,0,0])            
                elif predictions[i] == 4:
                    writer.writerow([images[i],0,0,0,0,1,0,0,0,0])
                elif predictions[i] == 5:
                    writer.writerow([images[i],0,0,0,0,0,1,0,0,0])
                elif predictions[i] == 6:
                    writer.writerow([images[i],0,0,0,0,0,0,1,0,0])
                elif predictions[i] == 7:
                    writer.writerow([images[i],0,0,0,0,0,0,0,1,0])
                elif predictions[i] == 8:
                    writer.writerow([images[i],0,0,0,0,0,0,0,0,1])
                else:
                    print('Error! this class is unknown! Number:', i, 'Prediction:', predictions[i], 'images:', images[i])

    print("CSV file is created successfully.")
                 
    
    
def getImageTestingNames(testing_set_csv_path):
    '''
    Get all testing images name
    :param testing_set_csv_path:
    :return: Array of image names
    '''
    df_testing = pd.read_csv(testing_set_csv_path)
    image_names = []
    
    for i, image in df_testing.iterrows():
        image_names.append(image['image'])
    
    return image_names



def getMaxPredictions(predicted_testing_prob):
    '''
    Get from the svm output the maximum probability for each image
    :param predicted_testing_prob: Probabilities for each class of every image
    :return: Array of max probabilities
    '''
    max_prediction = []
    for item in predicted_testing_prob:
        max_prediction.append(max(item))
    return max_prediction

**Pipeline**

In [None]:
# Set paths
dataset_path = '/content/drive/MyDrive/uni/pastukai/temp/dataset/'
testing_csv = '/content/drive/MyDrive/uni/pastukai/data/testing.csv'
testing_csv_path = '/content/drive/MyDrive/uni/pastukai/data/'
result_csv_path = '/content/drive/MyDrive/uni/pastukai/data/result.csv'

model_h5_path = "/content/drive/MyDrive/uni/pastukai/model.h5"

class_labels = ['MEL','NV','BCC','AK','BKL','DF','VASC','SCC']

NUM_CLASSES = 8
IMG_SIZE = 160
dropout_rate = 0.4
batch_size = 20
epochs = 50
print('Build directory structure')

In [None]:
# create CSV file for images
create_testing_file_csv(dataset_path, testing_csv_path)

In [None]:
# Skip this step if you intend to use the last dataset split
# Remove black border from validation  images 
crop_dataset(dataset_path, testing_csv)

In [5]:
# read csv file for testing

def read_csv_files(testing_csv):
    '''
    Read csv files for training and validation
    :param train_csv: Path to train csv file
    :param val_csv: Path to val csv file
    :return: dict train_classes (key: image_name, value: class), dict val_classes (key: image_name, value: class)
    '''
    testing_classes = {}
    df_testing = pd.read_csv(testing_csv)
    for i, image in df_testing.iterrows():
         testing_classes[image['image']] = "unkown class"

    return testing_classes

In [6]:
# Read dataframes
testing_df = read_csv_files(testing_csv)

imageNames = []
new_testing = {}

for key,value in testing_df.items():
  new_testing[key+'.jpg'] = value 
  imageNames.append(key)

testing_df = new_testing

In [None]:
testing_df = pd.DataFrame(list(testing_df.items()), columns=['image_name','class'])

**Build, load and run model**

In [9]:
from tensorflow.keras.applications import EfficientNetB5
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from keras import optimizers
from keras.applications import VGG16
from keras.applications import MobileNet
from keras.layers import Dense,GlobalAveragePooling2D,Flatten,Dropout,BatchNormalization
from keras.layers import Conv2D, MaxPooling2D,Input
from keras.models import Model
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
import numpy as np


def build_model_mobilenet(num_classes):
  base_model=MobileNet(weights='imagenet',include_top=False,input_shape=(IMG_SIZE, IMG_SIZE, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.

  x=base_model.output
  x=GlobalAveragePooling2D()(x)
  x=Dropout(0.4)(x)

  x=Dense(300,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better result
  x=Dropout(0.4)(x)
  x=BatchNormalization()(x)
  x=Dense(100,activation='relu')(x) #dense layer 2
  x=Dropout(0.4)(x)

  x=Dense(50,activation='relu')(x) #dense layer 3
  preds=Dense(num_classes,activation='softmax')(x) #final layer with softmax activation

  model=Model(inputs=base_model.input,outputs=preds)
  print(len(model.layers[:]))
  for layer in model.layers[:85]:
    layer.trainable=False
  for layer in model.layers[85:]:
    layer.trainable=True
  model.summary()
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1),
              metrics=['acc'])
  return model

In [None]:
# build and load model

model = build_model_mobilenet(num_classes=NUM_CLASSES)
model.load_weights(model_h5_path)


**Predict data with model**

In [None]:
# Prediction of testing data
datagen = ImageDataGenerator(rescale=1. / 255)

testing_iterator = datagen.flow_from_dataframe(dataframe=testing_df,
                                                directory=dataset_path,
                                                x_col="image_name",
                                                y_col=None,
                                                batch_size=batch_size,
                                                seed=42,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(IMG_SIZE, IMG_SIZE))

In [None]:
# predict testing data with the model
testing_iterator.reset()
pred=model.predict_generator(testing_iterator,
                            verbose=1)

In [None]:
# save prediction in csv file
predicted_class_indices=np.argmax(pred,axis=1)
create_results_file_csv(testing_csv_path,'results', imageNames, predicted_class_indices, getMaxPredictions(pred))