In [17]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import random
import shutil
import cv2
import pandas as pd
from PIL import Image
from glob import glob
import matplotlib.pyplot as plt

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score, classification_report, roc_curve, auc
from sklearn.preprocessing import normalize

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet152
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, concatenate, GlobalAveragePooling2D, Input, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam


In [2]:
augmented_dataset_path = 'dullrazor_augmented'

class_directories = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

class_image_counts = {}

# Count the number of images in each class
for class_name in class_directories:
    class_path = os.path.join(augmented_dataset_path, class_name)
    num_images = len(os.listdir(class_path))
    class_image_counts[class_name] = num_images

# Print the class distribution
print("Class distribution in the augmented dataset:")
print(class_image_counts)


Class distribution in the augmented dataset:
{'akiec': 3151, 'bcc': 2727, 'bkl': 1000, 'df': 1362, 'mel': 1000, 'nv': 1000, 'vasc': 1437}


In [3]:
class_directories

['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

In [4]:
#lists to store image names, paths, and classes
image_names = []
image_paths = []
image_classes = []

# Iterate through each class and collect image information
for class_name in class_directories:
    class_path = os.path.join(augmented_dataset_path, class_name)
    for image_name in os.listdir(class_path):
        if image_name!=".amlignore" and image_name!=".amlignore.amltmp":
            image_names.append(image_name)
            image_paths.append(os.path.join(class_path, image_name))
            image_classes.append(class_name)

#DataFrame from the collected information
data = {
    'Image_Name': image_names,
    'Image_Path': image_paths,
    'Class': image_classes
}

df = pd.DataFrame(data)

#the DataFrame to verify the results
print(df)


                 Image_Name                                      Image_Path  \
0         augmented_0_0.jpg     dullrazor_augmented/akiec/augmented_0_0.jpg   
1         augmented_0_1.jpg     dullrazor_augmented/akiec/augmented_0_1.jpg   
2       augmented_0_100.jpg   dullrazor_augmented/akiec/augmented_0_100.jpg   
3      augmented_0_1004.jpg  dullrazor_augmented/akiec/augmented_0_1004.jpg   
4      augmented_0_1013.jpg  dullrazor_augmented/akiec/augmented_0_1013.jpg   
...                     ...                                             ...   
11670      ISIC_0072937.jpg       dullrazor_augmented/vasc/ISIC_0072937.jpg   
11671      ISIC_0072964.jpg       dullrazor_augmented/vasc/ISIC_0072964.jpg   
11672      ISIC_0073012.jpg       dullrazor_augmented/vasc/ISIC_0073012.jpg   
11673      ISIC_0073031.jpg       dullrazor_augmented/vasc/ISIC_0073031.jpg   
11674      ISIC_0073110.jpg       dullrazor_augmented/vasc/ISIC_0073110.jpg   

       Class  
0      akiec  
1      akiec  
2     

In [5]:
df['image'] = df.Image_Path.map(lambda x: np.asarray(Image.open(x).resize((150, 112))))
df['cell_type_idx'] = pd.Categorical(df.Class).codes

In [6]:
# Get a list of unique classes in the DataFrame
unique_classes = df['Class'].unique()

#lists to store train and test samples
train_samples = []
test_samples = []


train_proportion = 0.8  

# Iterate through each class and split samples into train and test sets
for class_name in unique_classes:
    # Get the DataFrame subset for the current class
    class_subset = df[df['Class'] == class_name]

    # Split the subset into train and test sets
    train_subset, test_subset = train_test_split(class_subset, train_size=train_proportion, stratify=class_subset['Class'])

    # Append the train and test subsets to the corresponding lists
    train_samples.append(train_subset)
    test_samples.append(test_subset)

# Concatenate the train and test samples for each class back into DataFrames
train_df = pd.concat(train_samples)
test_df = pd.concat(test_samples)

#shuffle the train and test DataFrames
train_df = train_df.sample(frac=1).reset_index(drop=True)
test_df = test_df.sample(frac=1).reset_index(drop=True)

# Print the sizes of the train and test sets for each class
print("Training set sizes for each class:")
print(train_df['Class'].value_counts())

print("\nTesting set sizes for each class:")
print(test_df['Class'].value_counts())


Training set sizes for each class:
Class
akiec    2519
bcc      2181
vasc     1149
df       1089
bkl       800
nv        800
mel       800
Name: count, dtype: int64

Testing set sizes for each class:
Class
akiec    630
bcc      546
vasc     288
df       273
bkl      200
nv       200
mel      200
Name: count, dtype: int64


In [7]:
def prepareimages(images):
    # images is a list of images
    images = np.asarray(images).astype(np.float64)
    images = images[:, :, :, ::-1]
    m0 = np.mean(images[:, :, :, 0])
    m1 = np.mean(images[:, :, :, 1])
    m2 = np.mean(images[:, :, :, 2])
    images[:, :, :, 0] -= m0
    images[:, :, :, 1] -= m1
    images[:, :, :, 2] -= m2
    return images

trainimages = prepareimages(list(train_df.image))
testimages = prepareimages(list(test_df.image))

trainlabels = np.asarray(train_df.cell_type_idx)
testlabels = np.asarray(test_df.cell_type_idx)


In [8]:
train_one_hot_labels = to_categorical(trainlabels, num_classes=7)
test_one_hot_labels = to_categorical(testlabels, num_classes=7)

In [18]:
# ResNet152
ResNet152_base = ResNet152(include_top=False, weights='imagenet', input_shape=(112, 150, 3))
ResNet152_gap = GlobalAveragePooling2D()(ResNet152_base.output)
ResNet152_model = Model(inputs = ResNet152_base.input, outputs=ResNet152_gap)
ResNet152_features_train = ResNet152_model.predict(trainimages)
ResNet152_features_test = ResNet152_model.predict(testimages)

2023-08-02 11:00:58.787703: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1882540800 exceeds 10% of free system memory.




In [19]:
fc_resnet_normalized = normalize(ResNet152_features_train, norm='l2', axis=1)

In [20]:
# dimensions of each feature vector
dim_resnet = fc_resnet_normalized.shape[1]

# input layers for each feature vector
input_resnet = Input(shape=(dim_resnet,))


# Fully connected layers for each input with Batch Normalization and Dropout
fc_resnet = Dense(256)(input_resnet)
fc_resnet = BatchNormalization()(fc_resnet)
fc_resnet = LeakyReLU()(fc_resnet)
fc_resnet = Dropout(0.3)(fc_resnet)

merged = fc_resnet

# Additional fully connected layers for feature fusion with Batch Normalization and Dropout
fusion_fc = Dense(256)(merged)
fusion_fc = BatchNormalization()(fusion_fc)
fusion_fc = LeakyReLU()(fusion_fc)
fusion_fc = Dropout(0.3)(fusion_fc)

fusion_fc = Dense(128)(fusion_fc)
fusion_fc = BatchNormalization()(fusion_fc)
fusion_fc = LeakyReLU()(fusion_fc)
fusion_fc = Dropout(0.3)(fusion_fc)

# Classification layer
output_layer = Dense(7, activation='softmax')(fusion_fc)

# Create the fusion model
fusion_model = Model(inputs=input_resnet, outputs=output_layer)

# Compile the model with a reduced learning rate
optimizer = Adam(learning_rate=0.0001)
fusion_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [21]:
history1 = fusion_model.fit(
    fc_resnet_normalized,
    train_one_hot_labels,
    batch_size=64,
    epochs=70,
    shuffle=True,
    validation_split=0.2
)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [22]:
fc_resnet_normalized_test = normalize(ResNet152_features_test, norm='l2', axis=1)


In [23]:
# Accuracy
test_loss, test_accuracy = fusion_model.evaluate(fc_resnet_normalized_test, test_one_hot_labels)
print("Test Accuracy:", test_accuracy)

# F1 Score
test_predictions = fusion_model.predict(fc_resnet_normalized_test)
test_predictions_classes = np.argmax(test_predictions, axis=1)
test_true_classes = np.argmax(test_one_hot_labels, axis=1)
f1 = f1_score(test_true_classes, test_predictions_classes, average='weighted')
print("F1 Score:", f1)

# AUC-ROC Score
roc_auc = roc_auc_score(test_one_hot_labels, test_predictions, multi_class='ovr', average='weighted')
print("AUC-ROC Score:", roc_auc)

Test Accuracy: 0.8100128173828125
F1 Score: 0.8096433666315713
AUC-ROC Score: 0.9679775073991344


In [24]:
report = classification_report(test_true_classes, test_predictions_classes)
print(report)

              precision    recall  f1-score   support

           0       0.86      0.83      0.85       630
           1       0.81      0.83      0.82       546
           2       0.69      0.60      0.64       200
           3       0.85      0.90      0.88       273
           4       0.64      0.66      0.65       200
           5       0.70      0.77      0.74       200
           6       0.94      0.92      0.93       288

    accuracy                           0.81      2337
   macro avg       0.78      0.79      0.78      2337
weighted avg       0.81      0.81      0.81      2337

