In [1]:
# https://www.kaggle.com/datasets/rajatvisitme/skin-cancer-isic

In [2]:
# !pip install tensorflow==2.15.0

In [5]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import to_categorical  # Updated import
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split

In [6]:
def create_dataframe(data_dir):
    data = [
        {"image_path": os.path.join(data_dir, dir_name, fname), "label": label}
        for label, dir_name in enumerate(os.listdir(data_dir))
        for fname in os.listdir(os.path.join(data_dir, dir_name))
    ]
    return pd.DataFrame(data)

train_dir = r'C:\Users\Pc\Documents\OneDrive\Desktop\Projects\Skin Cancer Project\Skin cancer ISIC Dataset\Train'
test_dir = r'C:\Users\Pc\Documents\OneDrive\Desktop\Projects\Skin Cancer Project\Skin cancer ISIC Dataset\Test'

# Create combined dataframe
df = pd.concat([create_dataframe(train_dir), create_dataframe(test_dir)], ignore_index=True)

In [7]:
df.sample(10)

Unnamed: 0,image_path,label
495,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,2
94,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,0
41,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,0
976,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,3
1312,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,4
1218,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,4
1131,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,4
675,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,3
724,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,3
981,C:\Users\Pc\Documents\OneDrive\Desktop\Project...,3


In [8]:
# Get list of directories and create label_map
label_map = {i: label for i, label in enumerate(os.listdir(train_dir))}
num_classes = len(label_map)
label_map

{0: 'actinic keratosis',
 1: 'basal cell carcinoma',
 2: 'dermatofibroma',
 3: 'melanoma',
 4: 'nevus',
 5: 'vascular lesion'}

In [9]:
max_images_per_class = 2000

# Limit each class to max_images_per_class samples
df = df.groupby("label").head(max_images_per_class).reset_index(drop=True)

In [10]:
gpus = tf.config.list_physical_devices('GPU')
print(gpus)
try:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except Exception as e:
    print(e)

import multiprocessing

# Get the number of CPU cores
max_workers = multiprocessing.cpu_count()
print(max_workers)

[]
12


In [11]:
import concurrent.futures
# Function to resize image arrays
def resize_image_array(image_path):
    return np.asarray(Image.open(image_path).resize((100, 75)))

# Parallelize resizing process
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    df['image'] = list(executor.map(resize_image_array, df['image_path']))

In [12]:
# Initialize ImageDataGenerator with transformations
datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.5,
    height_shift_range=0.25,
    shear_range=0.25,
    zoom_range=0.25,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [13]:
# Initialize augmented DataFrame
augmented_df = pd.DataFrame(columns=['image_path', 'label', 'image'])

for class_label in df['label'].unique():
    # Get image arrays for the current class
    class_images = df[df['label'] == class_label]
    image_arrays = class_images['image'].values
    num_images_needed = max_images_per_class - len(image_arrays)
    
    # Add original images to augmented DataFrame
    augmented_df = pd.concat([augmented_df, class_images], ignore_index=True)

    if num_images_needed > 0:
        selected_images = np.random.choice(image_arrays, size=num_images_needed, replace=True)
        
        # Augment and add new images
        for img_array in selected_images:
            image_tensor = np.expand_dims(img_array, axis=0)
            augmented_images = datagen.flow(image_tensor, batch_size=1)
            
            for _ in range(1):  # Generate one augmented image at a time
                augmented_img = augmented_images.__next__()[0].astype('uint8')
                new_row = pd.DataFrame([{'image_path': None, 'label': class_label, 'image': augmented_img}])
                augmented_df = pd.concat([augmented_df, new_row], ignore_index=True)

# Limit the size of each class
df = augmented_df.groupby('label').head(max_images_per_class).sample(frac=1, random_state=42).reset_index(drop=True)

In [14]:
# Count the number of images in each class
class_counts = df['label'].value_counts().sort_index()

# Print the dataset summary
print("Dataset Summary")
print("-" * 60)
print(f"{'Class Label':<15} {'Class Name':<30} {'Count':<10}")
print("-" * 60)
for label, name in label_map.items():
    print(f"{label:<15} {name:<30} {class_counts[label]:<10}")
print("-" * 60)
print(f"{'Total':<45} {class_counts.sum():<10}")

Dataset Summary
------------------------------------------------------------
Class Label     Class Name                     Count     
------------------------------------------------------------
0               actinic keratosis              2000      
1               basal cell carcinoma           2000      
2               dermatofibroma                 2000      
3               melanoma                       2000      
4               nevus                          2000      
5               vascular lesion                2000      
------------------------------------------------------------
Total                                         12000     


In [15]:
X = df.drop(columns=['label','image_path'],axis=1)
y = df['label']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20,shuffle=True)

In [17]:
# Convert images to numpy arrays
X_train = np.asarray(X_train['image'].tolist())
X_test = np.asarray(X_test['image'].tolist())

# Normalize images
X_train_mean, X_train_std = X_train.mean(), X_train.std()
X_test_mean, X_test_std = X_test.mean(), X_test.std()

X_train = (X_train - X_train_mean) / X_train_std
X_test = (X_test - X_test_mean) / X_test_std

In [18]:
from keras.utils import to_categorical

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

In [19]:
# Split data into training and validation sets
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)

In [20]:
# Reshape images to 3D (height=75px, width=100px, channels=3)
X_train = X_train.reshape(X_train.shape[0], 75, 100, 3)
X_test = X_test.reshape(X_test.shape[0], 75, 100, 3)
X_validate = X_validate.reshape(X_validate.shape[0], 75, 100, 3)

In [21]:
y_train = y_train.astype(int)
y_validate = y_validate.astype(int)

In [22]:
# Count the number of images per class
train_counts = np.sum(y_train, axis=0)
val_counts = np.sum(y_validate, axis=0)
test_counts = np.sum(y_test, axis=0)

# Print dataset summary
print("Dataset Summary")
print("-" * 90)
print(f"{'Class Label':<15} {'Class Name':<30} {'Train':<10} {'Validation':<12} {'Test':<10} {'Total':<10}")
print("-" * 90)
for label, name in label_map.items():
    train_num = int(train_counts[label])
    val_num = int(val_counts[label])
    test_num = int(test_counts[label])
    total_num = train_num + val_num + test_num
    print(f"{label:<15} {name:<30} {train_num:<10} {val_num:<12} {test_num:<10} {total_num:<10}")
print("-" * 90)
total_images = len(y_train) + len(y_validate) + len(y_test)
print(f"{'Total':<46} {len(y_train):<10} {len(y_validate):<12} {len(y_test):<10} {total_images:<10}")

Dataset Summary
------------------------------------------------------------------------------------------
Class Label     Class Name                     Train      Validation   Test       Total     
------------------------------------------------------------------------------------------
0               actinic keratosis              1280       317          403        2000      
1               basal cell carcinoma           1275       332          393        2000      
2               dermatofibroma                 1242       339          419        2000      
3               melanoma                       1284       301          415        2000      
4               nevus                          1297       322          381        2000      
5               vascular lesion                1302       309          389        2000      
------------------------------------------------------------------------------------------
Total                                          7680       19

In [23]:
input_shape = df['image'][0].shape
input_shape

(75, 100, 3)

In [25]:
from tensorflow.keras.applications.resnet import preprocess_input as resnet_preprocess_input
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout,GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD

In [None]:
# Define the DenseNet201 model with custom layers
model = Sequential([
    DenseNet201(include_top=False, weights='imagenet', input_shape=input_shape),
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.summary()

In [None]:
from tensorflow.keras.callbacks import  ReduceLROnPlateau

# Compile the model
opt = SGD(learning_rate=0.0001, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])



# Set learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(
    monitor='val_accuracy',
    patience=3,
    verbose=1,
    factor=0.5,
    min_lr=0.00001
)

In [28]:
%%time
# Fit the model
history = model.fit(
    X_train, y_train,
    epochs=15, batch_size=32,
    validation_data=(X_validate, y_validate),
    callbacks=[learning_rate_reduction]
)

model.save('skin_disease_6_classes_model.h5')

Epoch 1/15
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m745s[0m 3s/step - accuracy: 0.3003 - loss: 2.0112 - val_accuracy: 0.6260 - val_loss: 1.0195 - learning_rate: 1.0000e-04
Epoch 2/15
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m661s[0m 3s/step - accuracy: 0.6026 - loss: 1.0569 - val_accuracy: 0.7016 - val_loss: 0.8061 - learning_rate: 1.0000e-04
Epoch 3/15
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m668s[0m 3s/step - accuracy: 0.6871 - loss: 0.8427 - val_accuracy: 0.7380 - val_loss: 0.7127 - learning_rate: 1.0000e-04
Epoch 4/15
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m686s[0m 3s/step - accuracy: 0.7641 - loss: 0.6543 - val_accuracy: 0.7646 - val_loss: 0.6399 - learning_rate: 1.0000e-04
Epoch 5/15
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m665s[0m 3s/step - accuracy: 0.8030 - loss: 0.5384 - val_accuracy: 0.7891 - val_loss: 0.5920 - learning_rate: 1.0000e-04
Epoch 6/15
[1m240/240[0m [32m━━━━━━━━━━━━━



CPU times: total: 10h 48min 11s
Wall time: 2h 49min 46s


In [29]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np

In [30]:
model = load_model('skin_disease_6_classes_model.h5')



In [66]:
# Define the path to the image
image_path=r'C:\Users\Pc\Documents\OneDrive\Desktop\Projects\Skin Cancer Project\Skin cancer ISIC Dataset\Train\actinic keratosis\ISIC_0026984.jpg'

# Load the image and preprocess it
img = image.load_img(image_path, target_size=(75, 100))  # Correct order: height=75px, width=100px
img_array = image.img_to_array(img)  # Convert image to array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# Normalize the image as per the training data preprocessing
img_array = (img_array - np.mean(img_array)) / np.std(img_array)


# Predict the class
predictions = model.predict(img_array)
predicted_class = np.argmax(predictions, axis=1)

# Map the predicted class index to the class label
predicted_label = label_map[predicted_class[0]]

print(f"Predicted Class: {predicted_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step
Predicted Class: nevus
