In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
!mkdir -p "/content/drive/MyDrive/uni/pastukai/temp"
!mkdir -p "/content/drive/MyDrive/uni/pastukai/data"
!ls "/content/drive/MyDrive/uni/pastukai"
%cd "/content/drive/MyDrive/uni/pastukai/temp"


In [None]:
%%bash

DATASET_DIR="/content/drive/MyDrive/uni/pastukai/temp/dataset/"

if [ ! -f "ISIC_2019_Training_Input.zip" ]; then
    echo "Downloading Training Data ..."
    wget --show-progress --progress=bar:force https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Training_Input.zip -O ISIC_2019_Training_Input.zip
fi

echo "Unpacking ISIC_2019_Training_Input.zip ..."
unzip -q -j ISIC_2019_Training_Input.zip -d $DATASET_DIR

# Number of files in dataset folder.
ls $DATASET_DIR | wc -l

In [None]:
%%bash

DATASET_DIR="/content/drive/MyDrive/uni/pastukai/temp/dataset/"

if [ -d $DATASET_DIR ] && [ $(ls -1 $DATASET_DIR | wc -l) -eq 25333 ]; then
    echo "Successfully built the dataset"
else
    echo "Error when building the dataset"
fi

**Split data**

In [4]:
import pandas as pd
import shutil
import cv2
import numpy as np
from skimage.util import random_noise
from skimage import img_as_ubyte, img_as_float
import random


def get_classification(data_series):
    '''

    :param data_series: dataserie containing the one hotencoding
    :return: classification as string
    '''
    classification = None
    for index, value in data_series.items():
        if value == 1.0:
            classification = index
    return classification


# Augmentation functions
def flip_image(img, vflip=False, hflip=False):
    '''
    Flip image vertically or horizontally
    :param img: ndarray, BGR image
    :param vflip: bool if vertically flip
    :param hflip: bool if horizontally flip
    :return: ndarray, BGR image
    '''
    if hflip or vflip:
        if hflip and vflip:
            c = -1
        else:
            c = 0 if vflip else 1
        image = cv2.flip(img, flipCode=c)
    return image


def decrease_brightness(img):
    '''
    Decrease brightness of image
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    bright = np.ones(img.shape, dtype="uint8") * -50
    bright_image = cv2.add(img,bright)
    return bright_image


def add_noise_image(img):
    '''
    Add random noise to image
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    img = img_as_float(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    noise = random_noise(img, mode='s&p', amount=0.011)
    noise = cv2.cvtColor(img_as_ubyte(noise), cv2.COLOR_RGB2BGR)
    return noise


def rotate_image(img, angle):
    '''
    Rotate image
    :param img: ndarray, BGR image
    :param angle: angle of rotation as int
    :return: ndarray, BGR image
    '''
    angle = int(random.uniform(-angle, angle))
    h, w = img.shape[:2]
    matrix = cv2.getRotationMatrix2D((int(w/2), int(h/2)), angle, 1)
    img = cv2.warpAffine(img, matrix, (w, h))
    return img


def prepare_dataset(dataset_path, new_set_path, csv_path,
                    reduced_csv_path, sample_number=50, validation=False):
    '''
    Prepare dataset and split it into training and validation dataset
    :param dataset_path: Path to the original dataset with all images
    :param new_set_path: Path where to save  images
    :param csv_path: Path to the original  csv
    :param reduced_csv_path: Path to the new  csv containing the pictures
    :param sample_number: Number of samples per class for training dataset , maximum 1000
    :param validation: Bool if dataset is validation set
    :return:
    '''
    try:
        new_dataset_dict = []
        dataset_classes_dict = {}

        dataset_path = dataset_path
        new_set_path = new_set_path
        df_dataset = pd.read_csv(csv_path)
        classes_counter = {'MEL': 0, 'NV': 0, 'BCC': 0, 'AK': 0, 'BKL': 0, 'DF': 0, 'VASC': 0, 'SCC': 0}

        # Shuffle dataset
        if not validation:
            df_dataset = df_dataset.sample(frac=1).reset_index(drop=True)

        # Iterate over dataset
        for i, image in df_dataset.iterrows():
            dict1 = {}
            if min(classes_counter.values()) > sample_number:
                break
            image_class = get_classification(image)
            if image_class is not None and ((classes_counter[image_class] < sample_number) or validation):
                img = cv2.imread(dataset_path + image['image'] + '.jpg')
                img = cv2.resize(img, (600,600))
                cv2.imwrite(new_set_path + image['image'] + '.jpg', img)
                shutil.copy(dataset_path + image['image']+'.jpg', new_set_path)
                dict1.update(image)
                new_dataset_dict.append(dict1)
                dataset_classes_dict[image['image']] = image_class
                classes_counter[image_class] += 1


        # Get distribution printed before augmentation
        for index, value in classes_counter.items():
            print('Dataset distribution:')
            print('Class: ' + index)
            print('Number of images: ' + str(value))
        print('##########################################')

        if not validation:
            # Data augmentation
            for index, value in classes_counter.items():
                print('\n Data augmentation for class ' + index)

                if value >= sample_number:
                    print(index + ' has ' + str(value) + ' images. Skipping augmenatation for this class')
                    continue
                else:
                    for i, image in df_dataset.iterrows():
                        if classes_counter[index] > sample_number:
                            break
                        image_class = get_classification(image)
                        if image_class == index:
                            print('Add augmented images for ' + image['image'])
                            print('Number of samples for class: ' + index + '     ' + str(classes_counter[index]))
                            img = cv2.imread(dataset_path + image['image']+'.jpg')
                            img = cv2.resize(img, (600, 600))
                            flip_h = flip_image(img,hflip=True)
                            flip_v = flip_image(img, vflip=True)
                            noise = add_noise_image(img)
                            rotate = rotate_image(img, 90)
                            rotate2 = rotate_image(img, 270)

                            augmented_images = [flip_h, flip_v, rotate2, noise, rotate]

                            for idx, aug_image in enumerate(augmented_images):
                                dict1 = {}
                                aug_image_name = f"{image['image']}_{idx}"
                                cv2.imwrite(new_set_path + aug_image_name + ".jpg", aug_image)
                                dict1.update(image)
                                dict1['image'] = aug_image_name
                                new_dataset_dict.append(dict1)
                                dataset_classes_dict[aug_image_name] = image_class
                                classes_counter[index]+=1

            print('############################################')
            # Get distribution printed
            for index, value in classes_counter.items():
                print('Training set distribution:')
                print('Class: ' + index)
                print('Number of images: ' + str(value))

        df_updated = pd.DataFrame(new_dataset_dict)
        df_updated.to_csv(reduced_csv_path, index=False)

    except OSError as e:
        print(e)

    print("Data structure created")


def read_csv_files(train_csv, val_csv):
    '''
    Read csv files for training and validation
    :param train_csv: Path to train csv file
    :param val_csv: Path to val csv file
    :return: dict train_classes (key: image_name, value: class), dict val_classes (key: image_name, value: class)
    '''
    train_classes = {}
    val_classes = {}
    df_train = pd.read_csv(train_csv)
    df_val = pd.read_csv(val_csv)

    for i, image in df_train.iterrows():
        image_class = get_classification(image)
        if image_class is not None:
            train_classes[image['image']] = image_class

    for i, image in df_val.iterrows():
        image_class = get_classification(image)
        if image_class is not None:
            val_classes[image['image']] = image_class

    return train_classes, val_classes

**Preprocess**

In [6]:
import cv2
import numpy as np
import pandas as pd
from skimage import exposure, morphology, filters, img_as_ubyte, img_as_float
from skimage.color.adapt_rgb import adapt_rgb, each_channel


def enlarge_image(img):
    '''
    Enlarge image with dark border areas
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    f = np.zeros((700, 700,3), np.uint8)
    ax, ay = (700 - img.shape[1]) // 2, (700 - img.shape[0]) // 2
    f[ay:img.shape[0] + ay, ax:ax + img.shape[1]] = img
    return f


def reduce_image(img):
    '''
    Resize image to size 600x600 and remove dark border areas
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    ax, ay = (img.shape[1]-600)//2, (img.shape[0] - 600) // 2
    f = img[ay:600 + ay, ax:ax + 600]
    return f


def reduce_mask(img):
    '''
    Reuce mask size to 600x600
    :param img: ndarray, binary image
    :return: ndarray, binary image
    '''
    ax, ay = (img.shape[1]-600)//2, (img.shape[0] - 600) // 2
    f = img[ay:600 + ay, ax:ax + 600]
    return f


def remove_black_border(gray_image):
    '''
    Remove black border areas
    :param gray_image: ndarray, grayscale image
    :return: ndarray, binary image
    '''
    _, mask = cv2.threshold(gray_image, 10, 255, cv2.THRESH_BINARY);
    (contours, _) = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    c = max(contours, key=cv2.contourArea)
    mask_a = np.zeros((700, 700), np.uint8)
    cv2.drawContours(mask_a, [c], -1, 255, thickness=cv2.FILLED)

    kernel = np.ones((15, 15), np.uint8)
    eroded = cv2.erode(mask_a, kernel, iterations=3)
    _, mask_a = cv2.threshold(eroded, 10, 255, cv2.THRESH_BINARY);
    return mask_a


@adapt_rgb(each_channel)
def morph_closing_each(image, struct_element):
    return morphology.closing(image, struct_element)


@adapt_rgb(each_channel)
def median_filter_each(image, struct_element):
    return filters.median(image, struct_element)


structuring_element = morphology.disk(7)


def crop_center_rgb(img, cropx, cropy):
    '''
    Crop image from the center
    :param img: ndarray, BGR image
    :param cropx: width in int
    :param cropy: height in int
    :return: ndarray, BGR cropped image
    '''
    y,x,_ = img.shape
    startx = x//2-(cropx//2)
    starty = y//2-(cropy//2)
    return img[starty:starty+cropy,startx:startx+cropx,:]


def noise_removal(img):
    '''
    Remove noise in the image
    :param img: ndarray, BGR image
    :return: ndarray, BGR filtered image
    '''
    img = img_as_float(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    equalized_adapthist = exposure.equalize_adapthist(img)
    img_morph_closing = morph_closing_each(equalized_adapthist, structuring_element)
    img_filtered = median_filter_each(img_morph_closing, structuring_element)
    img_filtered = cv2.cvtColor(img_as_ubyte(img_filtered), cv2.COLOR_RGB2BGR)
    return img_filtered


# Not used anymore
# See https://www.kaggle.com/apacheco/shades-of-gray-color-constancy
def shade_of_gray_cc(img, power=6, gamma=None):
    """
    img (numpy array): the original image with format of (h, w, c)
    power (int): the degree of norm, 6 is used in reference paper
    gamma (float): the value of gamma correction, 2.2 is used in reference paper
    """
    img_dtype = img.dtype

    if gamma is not None:
        img = img.astype('uint8')
        look_up_table = np.ones((256, 1), dtype='uint8') * 0
        for i in range(256):
            look_up_table[i][0] = 255 * pow(i / 255, 1 / gamma)
        img = cv2.LUT(img, look_up_table)

    img = img.astype('float32')
    img_power = np.power(img, power)
    rgb_vec = np.power(np.mean(img_power, (0, 1)), 1 / power)
    rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
    rgb_vec = rgb_vec / rgb_norm
    rgb_vec = 1 / (rgb_vec * np.sqrt(3))
    img = np.multiply(img, rgb_vec)

    # Andrew Anikin suggestion
    img = np.clip(img, a_min=0, a_max=255)

    return img.astype(img_dtype)


def preprocess_image(img):
    '''
    Preprocess image
    :param img: ndarray, BGR image
    :return:  ndarray, BGR image
    '''
    img = noise_removal(img)
    return img


def crop_image(img):
    '''
    Remove dark border areas and crop image
    :param img: ndarray, BGR image
    :return: ndarray, BGR image
    '''
    img = cv2.resize(img, (600, 600))
    inpaint_image = enlarge_image(img)

    # Remove black border
    gray = cv2.cvtColor(inpaint_image, cv2.COLOR_BGR2GRAY)

    mask = remove_black_border(gray)
    #mean = cv2.mean(inpaint_image, mask)
    mask = reduce_mask(mask)
    inpaint_image = reduce_image(inpaint_image)
    inpaint_image[mask == 0] = 0

    coords = cv2.findNonZero(mask)
    x, y, w, h = cv2.boundingRect(coords)
    inpaint_image = inpaint_image[y:y + h, x:x + w]
    inpaint_image = cv2.resize(inpaint_image, (600, 600))
    # inpaint_image = shade_of_gray_cc(inpaint_image)

    # Crop from the center the image if the borders are black
    gray_img = cv2.cvtColor(inpaint_image, cv2.COLOR_BGR2GRAY)
    if gray_img[0][0] < 10 and gray_img[0][-1] < 10 and gray_img[-1][0] < 10 and gray_img[-1][-1] < 10:
        inpaint_image = cv2.resize(crop_center_rgb(inpaint_image, 400, 400), (600,600))
    return inpaint_image


def preprocess_dataset(dataset_path, dataset_csv_path, preprocessed_dataset_path):
    '''
    Preprocess training and validation dataset
    :param dataset_path: Path to dataset
    :param dataset_csv_path: Path to training csv file
    :param val_csv_path: Path to validation csv file
    :param preprocessed_dataset_path: Path where to save preprocessed  images
    :return:
    '''

    df_train = pd.read_csv(dataset_csv_path)

    print('Start of preprocessing step of dataset ')
    for i, image in df_train.iterrows():
        img = cv2.imread(dataset_path + image['image']+'.jpg', cv2.IMREAD_COLOR)
        img = preprocess_image(img)
        cv2.imwrite(preprocessed_dataset_path + image['image']+'.jpg', img)
        print(str(i) + ': Preprocessed image ' + image['image'])
    print('Finished preprocess step of dataset')
    print('preprocessed images saved in ' + preprocessed_dataset_path)
    print('Finished script')


def crop_dataset(dataset_path, dataset_csv_path):
    '''
    Crop training and validation dataset
    :param dataset_path: Path to training dataset
    :param dataset_csv_path: Path to training csv file
    :return:
    '''

    df_dataset = pd.read_csv(dataset_csv_path)

    print('Start of dataset cropping step')
    for i, image in df_dataset.iterrows():
        img = cv2.imread(dataset_path + image['image']+'.jpg', cv2.IMREAD_COLOR)
        img = crop_image(img)
        cv2.imwrite(dataset_path + image['image']+'.jpg', img)
        print(str(i) + ': Cropped image ' + image['image'])
    print('Finished cropping step of dataset')
    print('cropped  images saved in ' + dataset_path)

**Helper Functions**

In [7]:
def plot_confusion_matrix(true_labels, predicted_labels, class_labels):
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    plt.figure()
    plt.title('Confusion matrix')
    sns.heatmap(conf_matrix.T, square=True, annot=True, fmt='d', cbar=False,
                xticklabels=sorted(class_labels), yticklabels=sorted(class_labels))
    plt.xlabel('true label')
    plt.ylabel('predicted label')
    plt.draw()
    plt.tight_layout()
    plt.show();


import matplotlib.pyplot as plt


def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

**Pipeline**

In [8]:
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import confusion_matrix, f1_score, precision_score, \
                            recall_score, accuracy_score, classification_report
import pandas as pd

In [None]:
# Set paths
train_set_path = "/content/drive/MyDrive/uni/pastukai/data/images/training_set/"
val_set_path = "/content/drive/MyDrive/uni/pastukai/data/images/val_set/"

dataset_path = '/content/drive/MyDrive/uni/pastukai/temp/dataset/'
train_csv = '/content/drive/MyDrive/uni/pastukai/data/groundtruth_train.csv'
train_reduced_csv = '/content/drive/MyDrive/uni/pastukai/data/reduced_groundtruth_train.csv'
val_csv = '/content/drive/MyDrive/uni/pastukai/data/groundtruth_val.csv'
val_reduced_csv = '/content/drive/MyDrive/uni/pastukai/data/reduced_groundtruth_val.csv'

pastukai_path = "/content/drive/MyDrive/uni/pastukai/"

# Preprocess data
do_preprocess = True
sample_number=400

class_labels = ['MEL','NV','BCC','AK','BKL','DF','VASC','SCC']

# Create directories
os.makedirs(train_set_path, exist_ok=True)
os.makedirs(val_set_path, exist_ok=True)

# Create directory for each class
#for item in class_labels:
#  os.makedirs(train_set_path + item)

NUM_CLASSES = 8
IMG_SIZE = 160
dropout_rate = 0.4
batch_size = 20
epochs = 50
print('Build directory structure')

!ls  '/content/drive/MyDrive/uni/pastukai/data'

In [None]:
# Prepare training data
prepare_dataset(dataset_path,train_set_path, train_csv, train_reduced_csv, sample_number, validation=False)

In [None]:
# Prepare validation data
prepare_dataset(dataset_path,val_set_path, 
                        val_csv, val_reduced_csv, validation=True)

In [None]:
# Skip this step if you intend to use the last dataset split
# Remove black border from training  images 
crop_dataset(train_set_path, train_reduced_csv)

In [None]:
# Skip this step if you intend to use the last dataset split
# Remove black border from validation  images 
crop_dataset(val_set_path, val_reduced_csv)

In [None]:
# Preprocess data and save preprocessed training images
if do_preprocess:
    preprocess_dataset(train_set_path, train_reduced_csv, preprocessed_train_set_path)

In [None]:
# Preprocess data and save preprocessed validation images
if do_preprocess:
    preprocess_dataset(val_set_path, val_reduced_csv, preprocessed_val_set_path)

In [10]:
# Read dataframes
train_df, val_df = read_csv_files(train_reduced_csv, val_reduced_csv)

new_train = {}
new_val = {}
imageNames = []

for key,value in train_df.items():
  new_train[key+'.jpg'] = value

for key,value in val_df.items():
  new_val[key+'.jpg'] = value
  imageNames.append(key)

train_df = new_train
val_df = new_val

In [11]:
train_df = pd.DataFrame(list(train_df.items()), columns=['image_name','class'])
val_df = pd.DataFrame(list(val_df.items()), columns=['image_name','class'])

In [24]:
# Models

from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import models
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from keras import optimizers
from keras.applications import VGG16
from keras.applications import MobileNet, MobileNetV2, DenseNet121, InceptionV3, EfficientNetB5
from keras.layers import Dense,GlobalAveragePooling2D,Flatten,Dropout,BatchNormalization,Activation
from keras.layers import Conv2D, MaxPooling2D,Input
from keras.models import Model


def build_model_effnet(num_classes):
  input_shape = (IMG_SIZE,IMG_SIZE,3)
  conv_base = EfficientNetB5(weights='imagenet', include_top=False, input_shape=input_shape)

  model = models.Sequential()
  model.add(conv_base)
  model.add(layers.GlobalMaxPooling2D(name="gap"))
  if dropout_rate > 0:
      model.add(layers.Dropout(dropout_rate, name="dropout_out"))
  model.add(layers.Dense(num_classes, activation='softmax', name="fc_out"))
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=0.1),
              metrics=['acc'])
  model.summary()
  return model

def build_model_mobilenet(num_classes):
  base_model=MobileNet(weights='imagenet',include_top=False,input_shape=(IMG_SIZE, IMG_SIZE, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.

  x=base_model.output
  x=GlobalAveragePooling2D()(x)
  x=Dropout(0.4)(x)

  x=Dense(300,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better result
  x=Dropout(0.4)(x)
  x=BatchNormalization()(x)
  x=Dense(100,activation='relu')(x) #dense layer 2
  x=Dropout(0.4)(x)

  x=Dense(50,activation='relu')(x) #dense layer 3
  preds=Dense(num_classes,activation='softmax')(x) #final layer with softmax activation

  model=Model(inputs=base_model.input,outputs=preds)
  print(len(model.layers[:]))
  for layer in model.layers[:85]:
    layer.trainable=False
  for layer in model.layers[85:]:
    layer.trainable=True
  model.summary()
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1),
              metrics=['acc'])
  return model


def build_model_MobileNetV2(num_classes):
  base_model=MobileNetV2(weights='imagenet',include_top=False,input_shape=(IMG_SIZE, IMG_SIZE, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.

  x=base_model.output
  x=GlobalAveragePooling2D()(x)
  x=Dropout(0.2)(x)

  preds=Dense(num_classes,activation='softmax')(x) #final layer with softmax activation

  model=Model(inputs=base_model.input,outputs=preds)
  print(len(model.layers[:]))
  for layer in model.layers[:85]:
    layer.trainable=False
  for layer in model.layers[85:]:
    layer.trainable=True
  model.summary()
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1),
              metrics=['acc'])
  return model


def build_model_InceptionV3(num_classes):
  base_model=InceptionV3(weights='imagenet',include_top=False,input_shape=(IMG_SIZE, IMG_SIZE, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.

  x=base_model.output
  x=GlobalAveragePooling2D()(x)
  x=Dropout(0.4)(x)

  x=Dense(300,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better result
  x=Dropout(0.4)(x)
  x=BatchNormalization()(x)
  x=Dense(100,activation='relu')(x) #dense layer 2
  x=Dropout(0.4)(x)

  x=Dense(50,activation='relu')(x) #dense layer 3
  preds=Dense(num_classes,activation='softmax')(x) #final layer with softmax activation

  model=Model(inputs=base_model.input,outputs=preds)
  print(len(model.layers[:]))
  for layer in model.layers[:85]:
    layer.trainable=False
  for layer in model.layers[85:]:
    layer.trainable=True
  model.summary()
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1),
              metrics=['acc'])
  return model


def build_model_DenseNet121(num_classes):
  base_model=DenseNet121(weights='imagenet',include_top=False,input_shape=(IMG_SIZE, IMG_SIZE, 3)) #imports the mobilenet model and discards the last 1000 neuron layer.

  x=base_model.output
  x=GlobalAveragePooling2D()(x)
  x=Dropout(0.4)(x)

  x=Dense(300,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better result
  x=Dropout(0.4)(x)
  x=BatchNormalization()(x)
  x=Dense(100,activation='relu')(x) #dense layer 2
  x=Dropout(0.4)(x)

  x=Dense(50,activation='relu')(x) #dense layer 3
  preds=Dense(num_classes,activation='softmax')(x) #final layer with softmax activation

  model=Model(inputs=base_model.input,outputs=preds)
  print(len(model.layers[:]))
  for layer in model.layers[:85]:
    layer.trainable=False
  for layer in model.layers[85:]:
    layer.trainable=True
  model.summary()
  model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1),
              metrics=['acc'])
  return model


def own_model(num_classes):

  model = Sequential()
  model.add(Conv2D(50, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))

  # convolutional layer
  model.add(Conv2D(75, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(125, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25))

  model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
  model.add(Dense(64))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_classes))
  model.add(Activation('softmax'))

  model.compile(loss='binary_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'])

  #preds=Dense(num_classes,activation='softmax')(model) #final layer with softmax activation
  model.summary()
  return model

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1. / 255)

train_iterator = datagen.flow_from_dataframe(train_df,
                                            directory=train_set_path,
                                            x_col='image_name',
                                            y_col='class',
                                            target_size=(IMG_SIZE, IMG_SIZE),
                                            batch_size=batch_size,
                                            color_mode='rgb',
                                            class_mode='categorical')

val_iterator = datagen.flow_from_dataframe(val_df,
                                            directory=val_set_path,
                                            x_col='image_name',
                                            y_col='class',
                                            target_size=(IMG_SIZE, IMG_SIZE),
                                            batch_size=batch_size,
                                            color_mode='rgb',
                                            class_mode='categorical')

In [None]:
model = own_model(num_classes=NUM_CLASSES)
history = model.fit(train_iterator,
                    steps_per_epoch= train_iterator.samples // batch_size,
                    epochs=160,
                    validation_data=val_iterator,
                    validation_steps= val_iterator.samples // batch_size,
                    verbose=1)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_x = range(len(acc))

plt.plot(epochs_x, acc, 'bo', label='Training acc')
plt.plot(epochs_x, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs_x, loss, 'bo', label='Training loss')
plt.plot(epochs_x, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
model.save("/content/drive/MyDrive/uni/pastukai/model.h5")

**Create CSV file for validation data**

In [11]:
import os
import csv
import pandas as pd
    
def create_results_file_csv(results_path, csv_name, images, predictions, max_predictions):
    '''
    :param results_path: Path to the results directory
    :param csv_name: Name of the csv file
    :param images: array of image names
    :param predictions: Array of predicted class
    :param max_predictions: Array with probability of the predictions
    :return:
    '''
    # check length of both files if they are equal
    if len(images) != len(predictions) or len(predictions) != len(max_predictions):
        print("!!!!! Length is not the same of the image array and prediction array !!!!!    image lenght = ", len(images) , " prediction length = ", len(predictions), " max prediction length = ", len(max_predictions))
        
    # save imagesnames and predictions into csv file
    with open(results_path + csv_name + '.csv','w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}
        # {'AK': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'SCC': 6, 'VASC': 7}  
        writer.writerow(['image', 'AK', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'SCC', 'VASC', 'UNK'])
        print('hier:', predictions)
        for i in range(len(images)):
            if max_predictions[i] < 0.25:
                writer.writerow([images[i],0,0,0,0,0,0,0,0,1])
            else:
                if predictions[i] == 0:
                    writer.writerow([images[i],1,0,0,0,0,0,0,0,0])
                elif predictions[i] == 1:
                    writer.writerow([images[i],0,1,0,0,0,0,0,0,0])
                elif predictions[i] == 2:
                    writer.writerow([images[i],0,0,1,0,0,0,0,0,0])
                elif predictions[i] == 3:
                    writer.writerow([images[i],0,0,0,1,0,0,0,0,0])            
                elif predictions[i] == 4:
                    writer.writerow([images[i],0,0,0,0,1,0,0,0,0])
                elif predictions[i] == 5:
                    writer.writerow([images[i],0,0,0,0,0,1,0,0,0])
                elif predictions[i] == 6:
                    writer.writerow([images[i],0,0,0,0,0,0,1,0,0])
                elif predictions[i] == 7:
                    writer.writerow([images[i],0,0,0,0,0,0,0,1,0])
                elif predictions[i] == 8:
                    writer.writerow([images[i],0,0,0,0,0,0,0,0,1])
                else:
                    print('Error! this class is unknown! Number:', i, 'Prediction:', predictions[i], 'images:', images[i])

    print("CSV file is created successfully.")
                 
    
    
def getImageTestingNames(testing_set_csv_path):
    '''
    Get all testing images name
    :param testing_set_csv_path:
    :return: Array of image names
    '''
    df_testing = pd.read_csv(testing_set_csv_path)
    image_names = []
    
    for i, image in df_testing.iterrows():
        image_names.append(image['image'])
    
    return image_names



def getMaxPredictions(predicted_testing_prob):
    '''
    Get from the svm output the maximum probability for each image
    :param predicted_testing_prob: Probabilities for each class of every image
    :return: Array of max probabilities
    '''
    max_prediction = []
    for item in predicted_testing_prob:
        max_prediction.append(max(item))
    return max_prediction

In [None]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

datagen = ImageDataGenerator(rescale=1. / 255)


testing_iterator = datagen.flow_from_dataframe(dataframe=val_df,
                                                directory=val_set_path,
                                                x_col="image_name",
                                                y_col=None,
                                                batch_size=batch_size,
                                                seed=42,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(IMG_SIZE, IMG_SIZE))

val_iterator.reset()
pred=model.predict_generator(val_iterator,
                            verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)

create_results_file_csv(pastukai_path,'prediction_val', imageNames, predicted_class_indices, getMaxPredictions(pred))