In [None]:
# Importing necessary libraries
import pathlib
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import PIL
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import Augmentor


In [None]:
# Defining the path for train and test images 
data_dir_train = pathlib.Path("C:/Users/Mohammad Rizwan/Downloads/Skin cancer ISIC The International Skin Imaging Collaboration/Train/") 
data_dir_test = pathlib.Path('C:/Users/Mohammad Rizwan/Downloads/Skin cancer ISIC The International Skin Imaging Collaboration/Test/')   
path_to_training_dataset = "C:/Users/Mohammad Rizwan/Downloads/Skin cancer ISIC The International Skin Imaging Collaboration/Train/"

In [None]:
# Check the number of images
image_count_train = len(list(data_dir_train.glob('*/*.jpg')))
image_count_test = len(list(data_dir_test.glob('*/*.jpg')))
print(f'Train Image Count: {image_count_train}')
print(f'Test Image Count: {image_count_test}')

In [None]:
# Define parameters
batch_size = 32
img_height = 180
img_width = 180

In [None]:
# Creating the training and validation datasets
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_train,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_train,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

In [None]:
# Checking the class names
class_names = train_ds.class_names
print(f"Classes: {class_names}")

In [None]:
# Visualize one instance of all classes
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")
plt.show()

In [None]:
# Optimize data loading
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
# Build a CNN model
model = Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),  # Regularization layer to reduce overfitting
    layers.Dense(9, activation='softmax')  # Output layer for 9 classes
])

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

# Summary of the model
model.summary()

In [None]:
# Train the model
epochs = 20
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

In [None]:
# Visualize training results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# Apply Data Augmentation to combat overfitting/underfitting
data_augmentation = tf.keras.Sequential(
  [
    layers.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
  ]
)

In [None]:
# Visualize data augmentation on an image
for image, _ in train_ds.take(1):
    augmented_image = data_augmentation(image)
    plt.imshow(augmented_image[0].numpy().astype("uint8"))
    plt.show()

In [None]:
# Build the augmented model
model_augmented = Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(9, activation='softmax')
])

In [None]:
# Compile the augmented model
model_augmented.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

# Train the augmented model
history_augmented = model_augmented.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20
)

In [None]:
# Visualize results of augmented model training
acc_aug = history_augmented.history['accuracy']
val_acc_aug = history_augmented.history['val_accuracy']
loss_aug = history_augmented.history['loss']
val_loss_aug = history_augmented.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc_aug, label='Training Accuracy')
plt.plot(epochs_range, val_acc_aug, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss_aug, label='Training Loss')
plt.plot(epochs_range, val_loss_aug, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# Analyze class distribution
for i, name in enumerate(class_names):
    class_count = len(list(data_dir_train.glob(f'{name}/*.jpg')))
    print(f'Class {name}: {class_count} images')

In [None]:
# Using Augmentor to balance classes

for i in class_names:
    p = Augmentor.Pipeline(path_to_training_dataset + '/' + i)
    p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
    p.sample(500)

In [None]:
# Recheck the dataset size after augmentation
image_count_train_aug = len(list(data_dir_train.glob('*/output/*.jpg')))
print(f"Augmented Train Image Count: {image_count_train_aug}")

In [None]:
# Retrain the model with the augmented balanced dataset
train_ds_aug = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir_train,
  seed=123,
  validation_split=0.2,
  subset="training",
  image_size=(img_height, img_width),
  batch_size=batch_size
)

val_ds_aug = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir_train,
  seed=123,
  validation_split=0.2,
  subset="validation",
  image_size=(img_height, img_width),
  batch_size=batch_size
)

In [None]:
# Rebuild the final model for balanced data
final_model = Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(9, activation='softmax')
])

In [None]:
# Compile final model
final_model.compile(optimizer='adam',
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                    metrics=['accuracy'])

# Train the final model with balanced data
epochs = 30
history_final = final_model.fit(
    train_ds_aug,
    validation_data=val_ds_aug,
    epochs=epochs
)

In [None]:
# Visualize final training results
acc_final = history_final.history['accuracy']
val_acc_final = history_final.history['val_accuracy']
loss_final = history_final.history['loss']
val_loss_final = history_final.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc_final, label='Training Accuracy')
plt.plot(epochs_range, val_acc_final, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Final Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss_final, label='Training Loss')
plt.plot(epochs_range, val_loss_final, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Final Training and Validation Loss')
plt.show()