<a href="https://colab.research.google.com/github/StankovicStevan/Master-s-Thesis-skin-diseases-detection-using-DL/blob/master/skin_diseases_recognition_using_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Data analysis & engineering**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install kaggle

In [None]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

api_token = {"username":"kaggle_username","key":"kaggle_key"}

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!mkdir /content/skin_diseases_recognition_using_ml
!mkdir /content/skin_diseases_recognition_using_ml/data

In [None]:
!cp -av /content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model /content/skin_diseases_recognition_using_ml/data/final_data_for_training_model

In [None]:
from kaggle.api.kaggle_api_extended import KaggleApi

In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
!pip install keras_tuner

In [None]:
from keras import layers
from keras import optimizers
from keras.models import Model
from keras.models import Sequential
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from keras_tuner.tuners import RandomSearch
from tensorflow import Tensor

In [None]:
from PIL import Image

**If it is needed to connect to TPU run this cell:**

In [None]:
import pprint
import tensorflow as tf

In [None]:
assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

if 'COLAB_TPU_ADDR' in os.environ:
  TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
else:
  TF_MASTER=''

tpu_address = TF_MASTER

In [None]:
tpu_address

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu_address)
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))
print("Number of devices: ", len(tf.config.list_logical_devices('TPU')))

In [None]:
strategy = tf.distribute.TPUStrategy(resolver)

In [None]:
# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [None]:
# print(tpu)

In [None]:
# # Select appropriate distribution strategy
# if tpu:
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# else:
#   strategy = tf.distribute.get_strategy() # Default strategy that works on CPU and single GPU
#   print('Running on CPU instead')


In [None]:
def download_data_from_kaggle():
    """
    This function download data from kaggle source and creates new file.

    :return: None
    """

    api = KaggleApi()

    # API token has been provided through environment variables (KAGGLE_USERNAME, KAGGLE_KEY)
    api.authenticate()

    dataset = 'kmader/skin-cancer-mnist-ham10000'
    file_name = 'HAM10000_metadata.csv'
    file_path = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/input/'
    dataset_dir = file_path + file_name
    new_file_name = file_path + 'input_data_from_kaggle.csv'

    # Checking whether file already is downloaded
    if os.path.exists(new_file_name):
        print("Found dataset directory, exiting")
        exit(0)

    print("Dataset not found, using kaggle-api tool for download")

    # Downloading file from kaggle (link: 'https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000') and
    # creating new file
    api.dataset_download_files(dataset=dataset,
                               path='/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/input',
                               unzip=True)

    # Renaming downloaded file
    os.rename(dataset_dir, new_file_name)

In [None]:
download_data_from_kaggle()

In [None]:
!rm -r /content/skin_diseases_recognition_using_ml

**Data analysis**

In [None]:
input_data = pd.read_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/input/input_data_from_kaggle.csv",
                             header=0,
                             index_col=0)

In [None]:
input_data

In [None]:
def data_analysis(input_data):
    """
    This function creates analysis of input data.

    :param input_data: dataframe to be processed
    :return: None
    """

    print(f"Columns are: {input_data.columns}")

    dx_unique = pd.unique(input_data['dx'])
    print(f"Unique dx: {dx_unique}")

    dx_type_unique = pd.unique(input_data['dx_type'])
    print(f"Unique dx_type: {dx_type_unique}")

    age_unique = pd.unique(input_data['age'])
    print(f"Unique age: {len(age_unique)}")

    sex_unique = pd.unique(input_data['sex'])
    print(f"Unique sex: {sex_unique}")

    localization_unique = pd.unique(input_data['localization'])
    print(f"Unique localization: {localization_unique}")

    id_unknown_sex = pd.unique(input_data[input_data['sex'] == 'unknown']['image_id'])
    id_unknown_localization = pd.unique(input_data[input_data['localization'] == 'unknown']['image_id'])
    print(f"Unknown sex: {len(input_data[input_data['sex'] == 'unknown'])}")
    print(f"Unknown sex ids: {id_unknown_sex}")

    print(f"Unknown localization: {len(input_data[input_data['localization'] == 'unknown'])}")
    print(f"Unknown localization ids: {id_unknown_localization}")

    same_unknown_ids = np.intersect1d(id_unknown_sex, id_unknown_localization)
    print(f"Same unknown ids: {len(same_unknown_ids)}")

    print(f"Analysis:\n{input_data.isnull().sum()}")

In [None]:
data_analysis(input_data)

In [None]:
def data_visualization(input_data):
    """
    This function visualize input data so it could be analysed.

    :param input_data: dataframe which contains data to be visualized
    :return: None
    """

    my_colors = ['black', 'red', 'green', 'blue', 'cyan', 'silver', 'gold', 'slategrey', 'crimson', 'olive', 'orange',
                 'tomato', 'navy', 'lime', 'violet']

    input_data['dx'].value_counts().plot(kind='bar', color=my_colors)

    x = ['Melanocytic nevi', 'Melanoma', 'Benign keratosis-like lesions', 'Basal cell carcinoma', 'Actinic keratoses',
         'Vascular lesions', 'Dermatofibroma']
    values = np.arange(0, 7, 1)

    plt.xticks(values, x,
               rotation=90)
    plt.title("Diseases")
    plt.xlabel("Diseases type")
    plt.ylabel("Count")
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/diseases_type_graph.png',
                bbox_inches='tight')
    plt.tight_layout()
    plt.show()

    input_data['dx_type'].value_counts().plot(kind='bar',
                                              color=my_colors)

    plt.title("Technical validation")
    plt.xlabel("Technical validation type")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/technical_validation_graph.png',
                bbox_inches='tight')

    plt.show()

    input_data['age'].hist(color='darkred',
                           histtype='bar',
                           ec='black')

    plt.title("Age")
    plt.xlabel("Age")
    plt.ylabel("Count")
    plt.margins(x=0)
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/age_graph.png',
                bbox_inches='tight')

    plt.show()

    input_data['sex'].value_counts().plot(kind='pie')
    plt.title("Sex")
    plt.legend()
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/sex_graph.png',
                bbox_inches='tight')

    plt.show()

    input_data['localization'].value_counts().plot(kind='bar',
                                                   color=my_colors)

    plt.title("Localization")
    plt.xlabel("Localization place")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/localization_graph.png',
                bbox_inches='tight')

    plt.show()

    plt.title("Diseases depending on age")
    plt.xlabel("Disease")
    plt.ylabel("Age")
    plt.xticks(values, x,
               rotation=90)

    plt.scatter(input_data['dx'], input_data['age'],
                color="red")

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/diseases_depending_on_age.png',
                bbox_inches='tight')

    plt.show()

    plt.title("Diseases depending on localization")
    plt.xlabel("Disease")
    plt.ylabel("Sex")
    plt.xticks(values, x,
               rotation=90)

    plt.scatter(input_data['dx'], input_data['sex'],
                color="blue")

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/diseases_depending_on_sex.png',
                bbox_inches='tight')

    plt.show()

    plt.title("Diseases depending on localization")
    plt.xlabel("Disease")
    plt.ylabel("Localization")
    plt.xticks(values, x,
               rotation=90)

    plt.scatter(input_data['dx'], input_data['localization'],
                color="green")

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/diseases_depending_on_localization.png',
                bbox_inches='tight')

    plt.show()

    plt.title("Localization depending on sex")
    plt.xlabel("Localization")
    plt.ylabel("Sex")
    plt.xticks(rotation=90)
    plt.scatter(input_data['localization'], input_data['sex'],
                color="gray")

    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data_analysis/input_data_analysis_results/localization_depending_on_sex.png',
                bbox_inches='tight')

    plt.show()

In [None]:
data_visualization(input_data)

**Data engineering**

In [None]:
def drop_unknown_sex_localization(input_data):
    """
    This function drops data which has unknown sex or localization cell.

    :param input_data: dataframe to be processed
    :return: input_data - input dataframe with dropped unknown sex or localization cell
    """

    ids_unknown_sex = pd.unique(input_data[input_data['sex'] == 'unknown']['image_id'])
    ids_unknown_localization = pd.unique(input_data[input_data['localization'] == 'unknown']['image_id'])
    ids_unknown = np.unique(np.append(ids_unknown_sex, ids_unknown_localization))

    input_data.drop(index=input_data.loc[input_data['image_id'].isin(ids_unknown)].index,
                    axis=0,
                    inplace=True)

    input_data = input_data.reset_index(drop=True)

    return input_data


def drop_missing_data(dropped_unknown_sex_localization):
    """
    This function drops data which has any null cells.

    :param dropped_unknown_sex_localization: dataframe to be processed
    :return: dropped_unknown_sex_localization - input dataframe with dropped null cells
    """

    print(f"Number of data which has nulls: {dropped_unknown_sex_localization['age'].isna().sum()}")
    dropped_unknown_sex_localization.drop(
        index=dropped_unknown_sex_localization.loc[dropped_unknown_sex_localization['age'].isna()].index,
        axis=0,
        inplace=True)

    dropped_unknown_sex_localization = dropped_unknown_sex_localization.reset_index(drop=True)

    return dropped_unknown_sex_localization

In [None]:
dropped_unknown_sex_localization = drop_unknown_sex_localization(input_data)

dropped_missing_data = drop_missing_data(dropped_unknown_sex_localization)

dropped_missing_data.to_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/dropped_unknown_sex_localization_nulls.csv")

In [None]:
dropped_missing_data

In [None]:
dropped_unknown_sex_localization_nulls = pd.read_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/dropped_unknown_sex_localization_nulls.csv",
                             header=0,
                             index_col=0)

In [None]:
dropped_unknown_sex_localization_nulls

In [None]:
def add_images_path(input_data):
    """
    This function adds new column with image paths for each data.

    :param input_data: dataframe to be processed
    :return: input_data - dataframe with ned column with image path
    """

    image_part_1_path = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/input/HAM10000_images_part_1/'
    image_part_2_path = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/input/HAM10000_images_part_2/'
    extension = '.jpg'

    input_data['image_path'] = input_data.apply(lambda x: np.where(
        os.path.exists(str(image_part_1_path + x['image_id'] + extension)),
        str(image_part_1_path + x['image_id'] + extension),
        str(image_part_2_path + x['image_id'] + extension)),
                                                axis=1)

    file_path = str(input_data.loc[0]['image_path'])
    plt.title("Disease Image")
    plt.xlabel("X pixel scaling")
    plt.ylabel("Y pixels scaling")

    image = mpimg.imread(file_path)
    plt.imshow(image)
    plt.show()

    return input_data


def add_disease_classes(added_images):
    """
    This function adds new column which contains codes of each disease.

    :param added_images: input dataframe to be processed
    :return: added_images - dataframe with new column with diseases codes
    """

    diseases_dict = {
        'nv': 'Melanocytic nevi',
        'mel': 'Melanoma',
        'bkl': 'Benign keratosis-like lesions',
        'bcc': 'Basal cell carcinoma',
        'akiec': 'Actinic keratoses',
        'vasc': 'Vascular lesions',
        'df': 'Dermatofibroma'
    }

    added_images['diseases'] = added_images.apply(lambda x: diseases_dict[x['dx']],
                                                  axis=1)

    added_images['diseases_code'] = pd.Categorical(added_images['diseases']).codes

    return added_images

In [None]:
added_images_path = add_images_path(dropped_unknown_sex_localization_nulls)
added_images_diseases_classes = add_disease_classes(added_images_path)

added_images_diseases_classes.to_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/added_images_diseases_classes.csv")

In [None]:
added_images_diseases_classes = pd.read_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/added_images_diseases_classes.csv",
                             header=0,
                             index_col=0)

In [None]:
added_images_diseases_classes

In [None]:
def create_dummies_categorical_columns(input_data):
    """
    This function creates dummy columns from categorical column, exactly "dx_type" and "localization" columns.

    :param input_data: dataframe to be processed
    :return: input_data - dataframe with dummy columns
    """

    input_data = pd.get_dummies(input_data,
                                columns=["dx_type", "localization"])

    return input_data


def label_encoding_sex_column(df_with_dummies):
    """
    This function converts categorical sex column to numerical one.

    :param df_with_dummies: dataframe to be processed
    :return: df_with_dummies - dataframe with converted column
    """

    le = LabelEncoder()
    df_with_dummies['sex'] = le.fit_transform(df_with_dummies['sex'])

    return df_with_dummies

In [None]:
df_with_dummies = create_dummies_categorical_columns(added_images_diseases_classes)

converted_categorical_columns = label_encoding_sex_column(df_with_dummies)

converted_categorical_columns.to_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/converted_categorical_columns.csv")

**Machine Learning**

In [None]:
!pip install keras_tuner

In [None]:
import shutil
from itertools import cycle
from sklearn.model_selection import train_test_split

In [None]:
converted_categorical_columns = pd.read_csv("/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/preprocessing/converted_categorical_columns.csv",
                             header=0,
                             index_col=0)

In [None]:
converted_categorical_columns

In [None]:
def split_with_stratify(input_data):
    label = input_data['diseases_code']
    features = input_data.drop(columns='diseases_code')

    print("Input data:")
    print(input_data['diseases'].value_counts(normalize=True) * 100)

    x_train, x_test, y_train, y_test = train_test_split(features,
                                                        label,
                                                        test_size=0.2,
                                                        stratify=features['diseases'],
                                                        random_state=0)

    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.25,
                                                      stratify=x_train['diseases'],
                                                      random_state=0)
    print("X train data:")
    print(x_train['diseases'].value_counts(normalize=True) * 100)
    print(len(x_train))

    print("X validation data:")
    print(x_val['diseases'].value_counts(normalize=True) * 100)
    print(len(x_val))

    print("X test data:")
    print(x_test['diseases'].value_counts(normalize=True) * 100)
    print(len(x_test))

    return x_train, x_test, x_val, y_val, y_train, y_test


def move_images_to_directories(x_train, x_test, x_val):
    train_directory = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/train/'
    test_directory = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/test/'
    val_directory = '/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/validation/'

    base_path = "/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/"
    train_path = base_path + "train/"
    test_path = base_path + "test/"
    val_path = base_path + "validation/"
    base_path_extended = [train_path, test_path, val_path]
    dx_list = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

    directory_list = []
    for path in base_path_extended:
        paths_to_be_added = list(map(str.__add__, cycle([path]), dx_list))
        directory_list.extend(paths_to_be_added)

    if os.path.exists(directory_list[0]):
        print("Data already split, exiting")
        exit(0)

    for directory in directory_list:
        os.makedirs(directory, exist_ok=True)

    x_train.apply(lambda x: shutil.copy(x['image_path'], train_directory + x['dx'] + '/'),
                  axis=1)
    x_test.apply(lambda x: shutil.copy(x['image_path'], test_directory + x['dx'] + '/'),
                 axis=1)
    x_val.apply(lambda x: shutil.copy(x['image_path'], val_directory + x['dx'] + '/'),
                axis=1)

In [None]:
x_train, x_test, x_val, y_val, y_train, y_test = split_with_stratify(converted_categorical_columns)

x_train.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/x_train.csv', index=0)
x_test.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/x_test.csv', index=0)
x_val.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/x_val.csv', index=0)
y_train.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/y_train.csv', index=0)
y_test.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/y_test.csv', index=0)
y_val.to_csv('/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/y_val.csv', index=0)

move_images_to_directories(x_train, x_test, x_val)

***Experiments:***

1. **Alex Net**


In [None]:
def AlexNetExp(hp):
    model = Sequential()

    model.add(layers.Conv2D(filters=96,
                            kernel_size=(11, 11),
                            strides=(4, 4),
                            activation="relu",
                            input_shape=(227, 227, 3)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPool2D(pool_size=(3, 3),
                               strides=(2, 2)))

    model.add(layers.Conv2D(filters=256,
                            kernel_size=(5, 5),
                            strides=(1, 1),
                            activation="relu",
                            padding="same"))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPool2D(pool_size=(3, 3),
                               strides=(2, 2)))

    model.add(layers.Conv2D(filters=384,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            activation="relu",
                            padding="same"))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(filters=384,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            activation="relu",
                            padding="same"))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(filters=256,
                            kernel_size=(3, 3),
                            strides=(1, 1),
                            activation="relu",
                            padding="same"))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPool2D(pool_size=(3, 3),
                               strides=(2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4096, activation="relu"))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(4096, activation="relu"))

    model.add(layers.Dense(7, activation="softmax"))

    hp_learning_rate = hp.Float("learning_rate",
                                 min_value=1e-6,
                                 max_value=1e-1,
                                 step=10,
                                 sampling="log")

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=hp_learning_rate),
                  metrics=['accuracy'])

    return model

2. **Google Net**

In [None]:
def Inception_block(input_layer, f1, f2_conv1, f2_conv3, f3_conv1, f3_conv5, f4):
    # Input:
    # - f1: number of filters of the 1x1 convolutional layer in the first path
    # - f2_conv1, f2_conv3 are number of filters corresponding to the 1x1 and 3x3 convolutional layers in the second path
    # - f3_conv1, f3_conv5 are the number of filters corresponding to the 1x1 and 5x5  convolutional layer in the third path
    # - f4: number of filters of the 1x1 convolutional layer in the fourth path

    # 1st path:
    path1 = layers.Conv2D(filters=f1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)

    # 2nd path
    path2 = layers.Conv2D(filters=f2_conv1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)
    path2 = layers.Conv2D(filters=f2_conv3,
                          kernel_size=(3, 3),
                          padding='same',
                          activation='relu')(path2)

    # 3rd path
    path3 = layers.Conv2D(filters=f3_conv1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)
    path3 = layers.Conv2D(filters=f3_conv5,
                          kernel_size=(5, 5),
                          padding='same',
                          activation='relu')(path3)

    # 4th path
    path4 = layers.MaxPooling2D((3, 3),
                                strides=(1, 1),
                                padding='same')(input_layer)
    path4 = layers.Conv2D(filters=f4,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(path4)

    output_layer = layers.concatenate([path1, path2, path3, path4],
                                      axis=-1)

    return output_layer


def GoogleNetExp(hp):
    # input layer
    input_layer = layers.Input(shape=(224, 224, 3))

    # convolutional layer: filters = 64, kernel_size = (7,7), strides = 2
    X = layers.Conv2D(filters=64,
                      kernel_size=(7, 7),
                      strides=2,
                      padding='valid',
                      activation='relu')(input_layer)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # convolutional layer: filters = 64, strides = 1
    X = layers.Conv2D(filters=64,
                      kernel_size=(1, 1),
                      strides=1,
                      padding='same',
                      activation='relu')(X)

    # convolutional layer: filters = 192, kernel_size = (3,3)
    X = layers.Conv2D(filters=192,
                      kernel_size=(3, 3),
                      padding='same',
                      activation='relu')(X)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 1st Inception block
    X = Inception_block(X,
                        f1=64,
                        f2_conv1=96,
                        f2_conv3=128,
                        f3_conv1=16,
                        f3_conv5=32,
                        f4=32)

    # 2nd Inception block
    X = Inception_block(X,
                        f1=128,
                        f2_conv1=128,
                        f2_conv3=192,
                        f3_conv1=32,
                        f3_conv5=96,
                        f4=64)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 3rd Inception block
    X = Inception_block(X,
                        f1=192,
                        f2_conv1=96,
                        f2_conv3=208,
                        f3_conv1=16,
                        f3_conv5=48,
                        f4=64)

    # Extra network 1:
    X1 = layers.AveragePooling2D(pool_size=(5, 5),
                                 strides=3)(X)
    X1 = layers.Conv2D(filters=128,
                       kernel_size=(1, 1),
                       padding='same',
                       activation='relu')(X1)
    X1 = layers.Flatten()(X1)
    X1 = layers.Dense(1024,
                      activation='relu')(X1)
    X1 = layers.Dropout(0.7)(X1)
    X1 = layers.Dense(7,
                      activation='softmax')(X1)

    # 4th Inception block
    X = Inception_block(X,
                        f1=160,
                        f2_conv1=112,
                        f2_conv3=224,
                        f3_conv1=24,
                        f3_conv5=64,
                        f4=64)

    # 5th Inception block
    X = Inception_block(X,
                        f1=128,
                        f2_conv1=128,
                        f2_conv3=256,
                        f3_conv1=24,
                        f3_conv5=64,
                        f4=64)

    # 6th Inception block
    X = Inception_block(X,
                        f1=112,
                        f2_conv1=144,
                        f2_conv3=288,
                        f3_conv1=32,
                        f3_conv5=64,
                        f4=64)

    # Extra network 2:
    X2 = layers.AveragePooling2D(pool_size=(5, 5),
                                 strides=3)(X)
    X2 = layers.Conv2D(filters=128,
                       kernel_size=(1, 1),
                       padding='same',
                       activation='relu')(X2)
    X2 = layers.Flatten()(X2)
    X2 = layers.Dense(1024,
                      activation='relu')(X2)
    X2 = layers.Dropout(0.7)(X2)
    X2 = layers.Dense(7,
                      activation='softmax')(X2)

    # 7th Inception block
    X = Inception_block(X,
                        f1=256,
                        f2_conv1=160,
                        f2_conv3=320,
                        f3_conv1=32,
                        f3_conv5=128,
                        f4=128)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 8th Inception block
    X = Inception_block(X,
                        f1=256,
                        f2_conv1=160,
                        f2_conv3=320,
                        f3_conv1=32,
                        f3_conv5=128,
                        f4=128)

    # 9th Inception block
    X = Inception_block(X,
                        f1=384,
                        f2_conv1=192,
                        f2_conv3=384,
                        f3_conv1=48,
                        f3_conv5=128,
                        f4=128)

    # Global Average pooling layer
    X = layers.GlobalAveragePooling2D(name='GAPL')(X)

    # Dropout layer
    X = layers.Dropout(0.4)(X)

    # output layer
    X = layers.Dense(7,
                     activation='softmax')(X)

    # model
    model = Model(input_layer, [X, X1, X2],
                  name='GoogLeNet')

    hp_learning_rate = hp.Float("learning_rate",
                                 min_value=1e-6,
                                 max_value=1e-1,
                                 step=10,
                                 sampling="log")

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=hp_learning_rate),
                  metrics=['accuracy'])

    return model

3. **Le Net**

In [None]:
def LeNetExp(hp):
    model = Sequential()

    model.add(layers.Conv2D(filters=6, kernel_size=(5, 5), activation='tanh', input_shape=(32, 32, 3)))
    model.add(layers.AveragePooling2D())

    model.add(layers.Conv2D(filters=16, kernel_size=(5, 5), activation='tanh'))
    model.add(layers.AveragePooling2D())

    model.add(layers.Conv2D(filters=120, kernel_size=(5, 5), activation='tanh'))

    model.add(layers.Flatten())
    model.add(layers.Dense(units=84, activation='tanh'))
    model.add(layers.Dense(units=7, activation='softmax'))

    hp_learning_rate = hp.Float("learning_rate",
                                 min_value=1e-6,
                                 max_value=1e-1,
                                 step=10,
                                 sampling="log")

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=hp_learning_rate),
                  metrics=['accuracy'])

    return model

4. **Res Net**

In [None]:
def relu_bn(inputs: Tensor) -> Tensor:
    relu = layers.ReLU()(inputs)
    bn = layers.BatchNormalization()(relu)
    return bn


def residual_block(x: Tensor, downsample: bool, filters: int, kernel_size: int = 3) -> Tensor:
    y = layers.Conv2D(kernel_size=kernel_size,
                      strides=(1 if not downsample else 2),
                      filters=filters,
                      padding="same")(x)
    y = relu_bn(y)
    y = layers.Conv2D(kernel_size=kernel_size,
                      strides=1,
                      filters=filters,
                      padding="same")(y)

    if downsample:
        x = layers.Conv2D(kernel_size=1,
                          strides=2,
                          filters=filters,
                          padding="same")(x)
    out = layers.Add()([x, y])
    out = relu_bn(out)
    return out


def ResNetExp(hp):
    inputs = layers.Input(shape=(32, 32, 3))
    num_filters = 64

    t = layers.BatchNormalization()(inputs)
    t = layers.Conv2D(kernel_size=3,
                      strides=1,
                      filters=num_filters,
                      padding="same")(t)
    t = relu_bn(t)

    num_blocks_list = [2, 5, 5, 2]
    for i in range(len(num_blocks_list)):
        num_blocks = num_blocks_list[i]
        for j in range(num_blocks):
            t = residual_block(t, downsample=(j == 0 and i != 0), filters=num_filters)
        num_filters *= 2

    t = layers.AveragePooling2D(4)(t)
    t = layers.Flatten()(t)
    outputs = layers.Dense(7, activation='softmax')(t)

    model = Model(inputs, outputs)

    hp_learning_rate = hp.Float("learning_rate",
                                 min_value=1e-6,
                                 max_value=1e-1,
                                 step=10,
                                 sampling="log")

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=hp_learning_rate),
                  metrics=['accuracy'])

    return model

5. **VGG Net**

In [None]:
def VGGNetExp(hp):
    model = Sequential()

    model.add(
        layers.Conv2D(input_shape=(224, 224, 3), filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(units=4096, activation="relu"))
    model.add(layers.Dense(units=4096, activation="relu"))
    model.add(layers.Dense(units=7, activation="softmax"))

    hp_learning_rate = hp.Float("learning_rate",
                                 min_value=1e-6,
                                 max_value=1e-1,
                                 step=10,
                                 sampling="log")

    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizers.Adam(learning_rate=hp_learning_rate),
                  metrics=['accuracy'])

    return model

In [None]:
def train_model_experiment(model):
    dimensions = (0, 0)
    directory_name = ""
    model_to_train = None

    match model:
      case "AlexNet":
        dimensions = (227, 227)
        directory_name = "AlexNet"
        model_to_train = AlexNetExp
      case "LeNet":
        dimensions = (32, 32)
        directory_name = "LeNet"
        model_to_train = LeNetExp
      case "GoogleNet":
        dimensions = (224, 224)
        directory_name = "GoogleNet"
        model_to_train = GoogleNetExp
      case "ResNet":
        dimensions = (32, 32)
        directory_name = "ResNet"
        model_to_train = ResNetExp
      case "VGGNet":
        dimensions = (224, 224)
        directory_name = "VGGNet"
        model_to_train = VGGNetExp
      case _:
        dimensions = (-1, -1)
        directory_name = "Wrong input given"
        exit(-1)

    train_dir = "/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/train//"
    validation_dir = "/content/drive/MyDrive/skin_diseases_recognition_using_ml/data/final_data_for_training_model/validation//"

    train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )

    test_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=dimensions,
        batch_size=32,
        class_mode="categorical"
    )

    validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=dimensions,
        batch_size=32,
        class_mode="categorical"
    )

    train_img, train_labels = train_generator.next()
    validation_img, validation_labels = validation_generator.next()

    tuner = RandomSearch(
        model_to_train,
        objective='val_accuracy',
        max_trials=6,
        executions_per_trial=5,
        overwrite=True,
        directory=directory_name+ '_experiments',
        project_name='skin_patterns_recognition_using_ML'
    )

    tuner.search_space_summary()

    tuner.search(train_img, train_labels,
                 epochs=10,
                 validation_data=(validation_img, validation_labels),
                 batch_size=32)

    tuner.results_summary()

    best_model = tuner.get_best_models()[0]

    print(f"Best model: {best_model}")

    best_hp = tuner.get_best_hyperparameters()[0].values

    print(f"Best hyperpameters: {best_hp}")

In [None]:
train_model_experiment("AlexNet")

In [None]:
train_model_experiment("LeNet")

In [None]:
train_model_experiment("GoogleNet")

In [None]:
train_model_experiment("VGGNet")

In [None]:
train_model_experiment("ResNet")

***Models implementation:***

1. **Alex Net**

In [None]:
def AlexNet():
    with strategy.scope():
      model = Sequential()

      model.add(layers.Conv2D(filters=96,
                              kernel_size=(11, 11),
                              strides=(4, 4),
                              activation="relu",
                              input_shape=(227, 227, 3)))
      model.add(layers.BatchNormalization())
      model.add(layers.MaxPool2D(pool_size=(3, 3),
                                strides=(2, 2)))

      model.add(layers.Conv2D(filters=256,
                              kernel_size=(5, 5),
                              strides=(1, 1),
                              activation="relu",
                              padding="same"))
      model.add(layers.BatchNormalization())
      model.add(layers.MaxPool2D(pool_size=(3, 3),
                                strides=(2, 2)))

      model.add(layers.Conv2D(filters=384,
                              kernel_size=(3, 3),
                              strides=(1, 1),
                              activation="relu",
                              padding="same"))
      model.add(layers.BatchNormalization())

      model.add(layers.Conv2D(filters=384,
                              kernel_size=(3, 3),
                              strides=(1, 1),
                              activation="relu",
                              padding="same"))
      model.add(layers.BatchNormalization())

      model.add(layers.Conv2D(filters=256,
                              kernel_size=(3, 3),
                              strides=(1, 1),
                              activation="relu",
                              padding="same"))
      model.add(layers.BatchNormalization())
      model.add(layers.MaxPool2D(pool_size=(3, 3),
                                strides=(2, 2)))

      model.add(layers.Flatten())
      model.add(layers.Dropout(0.5))
      model.add(layers.Dense(4096, activation="relu"))
      model.add(layers.Dropout(0.5))
      model.add(layers.Dense(4096, activation="relu"))

      model.add(layers.Dense(7, activation="softmax"))

      return model

2. **Google Net**

In [None]:
def Inception_block(input_layer, f1, f2_conv1, f2_conv3, f3_conv1, f3_conv5, f4):
    # Input:
    # - f1: number of filters of the 1x1 convolutional layer in the first path
    # - f2_conv1, f2_conv3 are number of filters corresponding to the 1x1 and 3x3 convolutional layers in the second path
    # - f3_conv1, f3_conv5 are the number of filters corresponding to the 1x1 and 5x5  convolutional layer in the third path
    # - f4: number of filters of the 1x1 convolutional layer in the fourth path

    # 1st path:
    path1 = layers.Conv2D(filters=f1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)

    # 2nd path
    path2 = layers.Conv2D(filters=f2_conv1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)
    path2 = layers.Conv2D(filters=f2_conv3,
                          kernel_size=(3, 3),
                          padding='same',
                          activation='relu')(path2)

    # 3rd path
    path3 = layers.Conv2D(filters=f3_conv1,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(input_layer)
    path3 = layers.Conv2D(filters=f3_conv5,
                          kernel_size=(5, 5),
                          padding='same',
                          activation='relu')(path3)

    # 4th path
    path4 = layers.MaxPooling2D((3, 3),
                                strides=(1, 1),
                                padding='same')(input_layer)
    path4 = layers.Conv2D(filters=f4,
                          kernel_size=(1, 1),
                          padding='same',
                          activation='relu')(path4)

    output_layer = layers.concatenate([path1, path2, path3, path4],
                                      axis=-1)

    return output_layer


def GoogleNet():
  with strategy.scope():
    # input layer
    input_layer = layers.Input(shape=(224, 224, 3))

    # convolutional layer: filters = 64, kernel_size = (7,7), strides = 2
    X = layers.Conv2D(filters=64,
                      kernel_size=(7, 7),
                      strides=2,
                      padding='valid',
                      activation='relu')(input_layer)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # convolutional layer: filters = 64, strides = 1
    X = layers.Conv2D(filters=64,
                      kernel_size=(1, 1),
                      strides=1,
                      padding='same',
                      activation='relu')(X)

    # convolutional layer: filters = 192, kernel_size = (3,3)
    X = layers.Conv2D(filters=192,
                      kernel_size=(3, 3),
                      padding='same',
                      activation='relu')(X)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 1st Inception block
    X = Inception_block(X,
                        f1=64,
                        f2_conv1=96,
                        f2_conv3=128,
                        f3_conv1=16,
                        f3_conv5=32,
                        f4=32)

    # 2nd Inception block
    X = Inception_block(X,
                        f1=128,
                        f2_conv1=128,
                        f2_conv3=192,
                        f3_conv1=32,
                        f3_conv5=96,
                        f4=64)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 3rd Inception block
    X = Inception_block(X,
                        f1=192,
                        f2_conv1=96,
                        f2_conv3=208,
                        f3_conv1=16,
                        f3_conv5=48,
                        f4=64)

    # Extra network 1:
    X1 = layers.AveragePooling2D(pool_size=(5, 5),
                                 strides=3)(X)
    X1 = layers.Conv2D(filters=128,
                       kernel_size=(1, 1),
                       padding='same',
                       activation='relu')(X1)
    X1 = layers.Flatten()(X1)
    X1 = layers.Dense(1024,
                      activation='relu')(X1)
    X1 = layers.Dropout(0.7)(X1)
    X1 = layers.Dense(7,
                      activation='softmax')(X1)

    # 4th Inception block
    X = Inception_block(X,
                        f1=160,
                        f2_conv1=112,
                        f2_conv3=224,
                        f3_conv1=24,
                        f3_conv5=64,
                        f4=64)

    # 5th Inception block
    X = Inception_block(X,
                        f1=128,
                        f2_conv1=128,
                        f2_conv3=256,
                        f3_conv1=24,
                        f3_conv5=64,
                        f4=64)

    # 6th Inception block
    X = Inception_block(X,
                        f1=112,
                        f2_conv1=144,
                        f2_conv3=288,
                        f3_conv1=32,
                        f3_conv5=64,
                        f4=64)

    # Extra network 2:
    X2 = layers.AveragePooling2D(pool_size=(5, 5),
                                 strides=3)(X)
    X2 = layers.Conv2D(filters=128,
                       kernel_size=(1, 1),
                       padding='same',
                       activation='relu')(X2)
    X2 = layers.Flatten()(X2)
    X2 = layers.Dense(1024,
                      activation='relu')(X2)
    X2 = layers.Dropout(0.7)(X2)
    X2 = layers.Dense(7,
                      activation='softmax')(X2)

    # 7th Inception block
    X = Inception_block(X,
                        f1=256,
                        f2_conv1=160,
                        f2_conv3=320,
                        f3_conv1=32,
                        f3_conv5=128,
                        f4=128)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=2)(X)

    # 8th Inception block
    X = Inception_block(X,
                        f1=256,
                        f2_conv1=160,
                        f2_conv3=320,
                        f3_conv1=32,
                        f3_conv5=128,
                        f4=128)

    # 9th Inception block
    X = Inception_block(X,
                        f1=384,
                        f2_conv1=192,
                        f2_conv3=384,
                        f3_conv1=48,
                        f3_conv5=128,
                        f4=128)

    # Global Average pooling layer
    X = layers.GlobalAveragePooling2D(name='GAPL')(X)

    # Dropout layer
    X = layers.Dropout(0.4)(X)

    # output layer
    X = layers.Dense(7,
                     activation='softmax')(X)

    # model
    model = Model(input_layer, [X, X1, X2],
                  name='GoogLeNet')

    return model

3. **Le Net**

In [None]:
def LeNet():
  with strategy.scope():
    model = Sequential()

    model.add(layers.Conv2D(filters=6, kernel_size=(5, 5), activation='tanh', input_shape=(32, 32, 3)))
    model.add(layers.AveragePooling2D())

    model.add(layers.Conv2D(filters=16, kernel_size=(5, 5), activation='tanh'))
    model.add(layers.AveragePooling2D())

    model.add(layers.Conv2D(filters=120, kernel_size=(5, 5), activation='tanh'))

    model.add(layers.Flatten())
    model.add(layers.Dense(units=84, activation='tanh'))
    model.add(layers.Dense(units=7, activation='softmax'))

    return model

4. **Res Net**

In [None]:
def relu_bn(inputs: Tensor) -> Tensor:
    relu = layers.ReLU()(inputs)
    bn = layers.BatchNormalization()(relu)
    return bn


def residual_block(x: Tensor, downsample: bool, filters: int, kernel_size: int = 3) -> Tensor:
    y = layers.Conv2D(kernel_size=kernel_size,
                      strides=(1 if not downsample else 2),
                      filters=filters,
                      padding="same")(x)
    y = relu_bn(y)
    y = layers.Conv2D(kernel_size=kernel_size,
                      strides=1,
                      filters=filters,
                      padding="same")(y)

    if downsample:
        x = layers.Conv2D(kernel_size=1,
                          strides=2,
                          filters=filters,
                          padding="same")(x)
    out = layers.Add()([x, y])
    out = relu_bn(out)
    return out


def ResNet():
  with strategy.scope():
    inputs = layers.Input(shape=(32, 32, 3))
    num_filters = 64

    t = layers.BatchNormalization()(inputs)
    t = layers.Conv2D(kernel_size=3,
                      strides=1,
                      filters=num_filters,
                      padding="same")(t)
    t = relu_bn(t)

    num_blocks_list = [2, 5, 5, 2]
    for i in range(len(num_blocks_list)):
        num_blocks = num_blocks_list[i]
        for j in range(num_blocks):
            t = residual_block(t, downsample=(j == 0 and i != 0), filters=num_filters)
        num_filters *= 2

    t = layers.AveragePooling2D(4)(t)
    t = layers.Flatten()(t)
    outputs = layers.Dense(7, activation='softmax')(t)

    model = Model(inputs, outputs)

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

5. **VGG Net**

In [None]:
def VGGNet():
  with strategy.scope():
    model = Sequential()

    model.add(
        layers.Conv2D(input_shape=(224, 224, 3), filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.Conv2D(filters=512, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(units=4096, activation="relu"))
    model.add(layers.Dense(units=4096, activation="relu"))
    model.add(layers.Dense(units=7, activation="softmax"))

    return model

In [None]:
def train_model(model, learning_rate, hardware_accelerator, batch_size):
    dimensions = (0, 0)
    directory_name = ""

    match model:
      case "AlexNet":
        dimensions = (227, 227)
        directory_name = "AlexNet"
        model_to_train = AlexNet()
      case "LeNet":
        dimensions = (32, 32)
        directory_name = "LeNet"
        model_to_train = LeNet()
      case "GoogleNet":
        dimensions = (224, 224)
        directory_name = "GoogleNet"
        model_to_train = GoogleNet()
      case "ResNet":
        dimensions = (32, 32)
        directory_name = "ResNet"
        model_to_train = ResNet()
      case "VGGNet":
        dimensions = (224, 224)
        directory_name = "VGGNet"
        model_to_train = VGGNet()
      case _:
        dimensions = (-1, -1)
        directory_name = "Wrong input given"
        exit(-1)

    train_dir = "/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/train//"
    validation_dir = "/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/validation//"

    train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )

    test_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=dimensions,
        batch_size=batch_size,
        class_mode="categorical"
    )

    validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=dimensions,
        batch_size=batch_size,
        class_mode="categorical"
    )
    # with tpu_strategy.scope():
    #   model_to_train.compile(loss='categorical_crossentropy',
    #                 optimizer=optimizers.Adam(learning_rate=learning_rate),
    #                 metrics=['accuracy'])
    with tf.device('/TPU:0'):
      model_to_train.compile(loss='categorical_crossentropy',
                    optimizer=optimizers.Adam(learning_rate=learning_rate),
                    metrics=['accuracy'])

    model_to_train.summary()

    history = model_to_train.fit(
        train_generator,
        steps_per_epoch=100,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=50
    )

    plot_model(model_to_train, show_shapes=True)

    model_to_train.save(f"/content/drive/MyDrive/skin_diseases_recognition_using_ml/final_results/saved_models/{directory_name}_model_lr_{learning_rate}_{batch_size}_{hardware_accelerator}_model.h5")

    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    # plt.gca().set_ylim(0, 1)
    plt.tight_layout()
    plt.savefig(f"/content/drive/MyDrive/skin_diseases_recognition_using_ml/final_results/loss_functions/{directory_name}_model_lr_{learning_rate}_{batch_size}_{hardware_accelerator}_loss_function.png",
                bbox_inches='tight')
    plt.show()

In [None]:
import time

In [None]:
start_time = time.time()
train_model("AlexNet", 0.001, "TPU", 16)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras import models

def prediction(model, learning_rate, hardware_accelerator, batch_size):
    model = load_model(f"/content/drive/MyDrive/skin_diseases_recognition_using_ml/final_results/saved_models/{model}_model_lr_{learning_rate}_{batch_size}_{hardware_accelerator}_model.h5")

    validation_dir = "/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/validation//"
    test_dir = "/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/test//"

    test_datagen = ImageDataGenerator(rescale=1. / 255)

    # AlexNet - 227x227, LeNet - 32 x 32, GoogleNet - 224 x 224, ResNet - 32 x 32, VGGNet - 224 x 224
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(32, 32),
        color_mode="rgb",
        shuffle=False,
        class_mode='categorical',
        batch_size=1)

    label_map = test_generator.class_indices

    print(label_map)

    filenames = test_generator.filenames
    print(filenames[:100])
    nb_samples = len(filenames)
    print(nb_samples)

    predict = model.predict(test_generator,
                            steps=nb_samples)

    scores = model.evaluate(test_generator)

    print(f"{model.metrics_names[1]}: {scores[1] * 100}")


In [None]:
prediction("AlexNet", 0.00001, "GPU", 32)

In [None]:
!pip install opencv-python

In [None]:
import cv2
import os
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import SGD

In [None]:
def preprocess_image(file_path, dimensions):
    img = cv2.imread(file_path)
    img = cv2.resize(img, dimensions)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    return img


def load_dataset(dataset_path, dataset, model):
    dimensions = (0, 0)
    images = []
    labels = []

    match model:
      case "AlexNet":
        dimensions = (227, 227)
        directory_name = "AlexNet"
      case "LeNet":
        dimensions = (32, 32)
        directory_name = "LeNet"
      case "GoogleNet":
        dimensions = (224, 224)
        directory_name = "GoogleNet"
      case "ResNet":
        dimensions = (32, 32)
        directory_name = "ResNet"
      case "VGGNet":
        dimensions = (224, 224)
        directory_name = "VGGNet"
      case _:
        dimensions = (-1, -1)
        directory_name = "Wrong input given"
        exit(-1)

    classes = sorted(os.listdir(dataset_path))
    for class_name in classes:
        class_path = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_path):
            for file_name in os.listdir(class_path):
                file_path = os.path.join(class_path, file_name)
                if os.path.isfile(file_path):
                    img = preprocess_image(file_path, dimensions)
                    images.append(img)
                    labels.append(classes.index(class_name))

    images = np.array(images)
    labels = np.array(labels)

    return images, labels

    # np.save(f'/content/skin_diseases_recognition_using_ml/data/saved_x_{dataset}_array', images)
    # np.save(f'/content/skin_diseases_recognition_using_ml/data//saved_y_{dataset}_array', labels)


In [None]:
dataset_path = '/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/'
dataset_train = 'train'
dataset_test = 'test'
x_train_loaded = []
y_train_loaded = []
x_test_loaded = []
y_test_loaded = []
x_train, y_train = load_dataset(os.path.join(dataset_path, dataset_train), dataset_train, "VGGNet")
x_test, y_test = load_dataset(os.path.join(dataset_path, dataset_test), dataset_test, "VGGNet")

In [None]:
model = VGGNet()

In [None]:
len(x_train)

In [None]:
learning_rate = 0.00001
batch_size = 16

In [None]:
with strategy.scope():
  model.compile(loss='sparse_categorical_crossentropy',
                    optimizer=optimizers.Adam(learning_rate=learning_rate),
                    metrics=['accuracy'])

In [None]:
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          steps_per_epoch=100,
          epochs=50,
          validation_data=(x_test, y_test),
          validation_steps=50)

**If there is need to create TFRecord file (cases when dataset is too big):**

In [None]:
def preprocess_image(image_path):
    with Image.open(image_path) as img:
        processed_image = img.resize((desired_width, desired_height))
        processed_image = processed_image.convert('RGB')
        return processed_image.tobytes()

def create_tfrecord(image_dir, output_dir, tfrecord_file):
    writer = tf.io.TFRecordWriter(os.path.join(output_dir, tfrecord_file))

    for root, _, files in os.walk(image_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                image_path = os.path.join(root, filename)
                image_data = preprocess_image(image_path)

                # Extract the class label from the parent folder name
                class_label = os.path.basename(root)

                feature = {
                    'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_data])),
                    'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[class_label.encode()])),
                }

                example = tf.train.Example(features=tf.train.Features(feature=feature))
                serialized_example = example.SerializeToString()

                writer.write(serialized_example)

    writer.close()

# Set up directories and paths
data_dir = '/content/skin_diseases_recognition_using_ml/data/final_data_for_training_model/'
output_dir = '/content/skin_diseases_recognition_using_ml/data/tfrecords/'
tfrecord_file = 'converted_images_to_tfrecords.tfrecord'

# Set desired image size depending on needed architecture
desired_width = 32
desired_height = 32

# Convert train folder to TFRecord
train_dir = os.path.join(data_dir, 'train')
train_output_dir = os.path.join(output_dir, 'train')
create_tfrecord(train_dir, train_output_dir, tfrecord_file)

# Convert test folder to TFRecord
test_dir = os.path.join(data_dir, 'test')
test_output_dir = os.path.join(output_dir, 'test')
create_tfrecord(test_dir, test_output_dir, tfrecord_file)

# Convert validation folder to TFRecord
validation_dir = os.path.join(data_dir, 'validation')
validation_output_dir = os.path.join(output_dir, 'validation')
create_tfrecord(validation_dir, validation_output_dir, tfrecord_file)


In [None]:
feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'label': tf.io.FixedLenFeature([], tf.string),
}

In [None]:
label_map = {'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}  # Map string labels to integer labels

def parse_tfrecord(example):
    example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(example['image'], channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, example['label']

def convert_labels(image, label):
    label = tf.numpy_function(lambda x: label_map[x.decode()], [label], tf.int32)
    one_hot_label = tf.one_hot(label, depth=num_classes)
    return image, one_hot_label

In [None]:
train_tfrecord_file = '/content/skin_diseases_recognition_using_ml/data/tfrecords/train/converted_images_to_tfrecords.tfrecord'
test_tfrecord_file = '/content/skin_diseases_recognition_using_ml/data/tfrecords/test/converted_images_to_tfrecords.tfrecord'
validation_tfrecord_file = '/content/skin_diseases_recognition_using_ml/data/tfrecords/validation/converted_images_to_tfrecords.tfrecord'

train_dataset = tf.data.TFRecordDataset(train_tfrecord_file)
test_dataset = tf.data.TFRecordDataset(test_tfrecord_file)
validation_dataset = tf.data.TFRecordDataset(validation_tfrecord_file)

parsed_train_dataset = train_dataset.map(parse_tfrecord)
parsed_test_dataset = test_dataset.map(parse_tfrecord)
parsed_validation_dataset = validation_dataset.map(parse_tfrecord)

In [None]:
num_classes = len(label_map)
train_dataset = parsed_train_dataset.map(convert_labels)
test_dataset = parsed_test_dataset.map(convert_labels)
validation_dataset = parsed_validation_dataset.map(convert_labels)

In [None]:
batch_size = 32
shuffle_buffer = 1000

preprocessed_train_dataset = parsed_train_dataset.shuffle(shuffle_buffer).batch(batch_size)
preprocessed_test_dataset = parsed_test_dataset.batch(batch_size)
preprocessed_validation_dataset = parsed_validation_dataset.batch(batch_size)

In [None]:
model.fit(preprocessed_train_dataset,
          steps_per_epoch=100,
          epochs=50,
          validation_data=preprocessed_validation_dataset,
          validation_steps=50)