In [1]:
 import os
 import shutil

 # Input folder containing the images
 input_dir = r"/kaggle/input/skin-disease-dataset/dataset/train"
 # Output folder for renamed images
 output_dir = r"/kaggle/working/renamed_train"

 # Ensure the output directory exists
 os.makedirs(output_dir, exist_ok=True)

 # Dictionary to track counts for each class
 class_counts = {}

 # Traverse through each subdirectory
 for root, dirs, files in os.walk(input_dir):
     for file_name in files:
         # Full path of the image
         img_path = os.path.join(root, file_name)

         # Skip non-image files
         if not file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
             print(f"Skipping non-image file: {file_name}")
             continue

         # Get the folder name (class name) as the class identifier
         class_name = os.path.basename(root)

         # Initialize or increment the count for this class
         if class_name not in class_counts:
             class_counts[class_name] = 1
         else:
             class_counts[class_name] += 1

         # Generate new file name in the format ClassName(Count).Extension
         count = class_counts[class_name]
         ext = os.path.splitext(file_name)[1]  # Get file extension
         new_name = f"{class_name}({count}){ext}"
         new_path = os.path.join(output_dir, new_name)

         # Copy and rename the file to the output directory
         shutil.copy(img_path, new_path)

 # Print the total number of images for each class
 print("\nImage counts by class:")
 for class_name, count in class_counts.items():
     print(f"{class_name}: {count} images")

 print("\nRenaming and consolidation complete!")


Image counts by class:
Eczema: 999 images
Melanoma: 1000 images
Basal Cell: 1000 images
Seborrheic: 1000 images
Atopic Dermatitis: 1000 images
Melanocytic: 1000 images
Benign Keratosis: 1201 images
Warts Molluscum: 1000 images
Psoriasis: 1000 images
Tinea Ringworms Candidiasis: 990 images

Renaming and consolidation complete!


In [2]:
 import os
 import shutil

 # Input folder containing the images
 input_dir = r"/kaggle/input/skin-disease-dataset/dataset/test"
 # Output folder for renamed images
 output_dir = r"/kaggle/working/renamed_Test"

 # Ensure the output directory exists
 os.makedirs(output_dir, exist_ok=True)

 # Dictionary to track counts for each class
 class_counts = {}

 # Traverse through each subdirectory
 for root, dirs, files in os.walk(input_dir):
     for file_name in files:
         # Full path of the image
         img_path = os.path.join(root, file_name)

         # Skip non-image files
         if not file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
             print(f"Skipping non-image file: {file_name}")
             continue

         # Get the folder name (class name) as the class identifier
         class_name = os.path.basename(root)

         # Initialize or increment the count for this class
         if class_name not in class_counts:
             class_counts[class_name] = 1
         else:
             class_counts[class_name] += 1

         # Generate new file name in the format ClassName(Count).Extension
         count = class_counts[class_name]
         ext = os.path.splitext(file_name)[1]  # Get file extension
         new_name = f"{class_name}({count}){ext}"
         new_path = os.path.join(output_dir, new_name)

         # Copy and rename the file to the output directory
         shutil.copy(img_path, new_path)

 # Print the total number of images for each class
 print("\nImage counts by class:")
 for class_name, count in class_counts.items():
     print(f"{class_name}: {count} images")

 print("\nRenaming and consolidation complete! (test)")


Image counts by class:
Eczema: 200 images
Melanoma: 200 images
Basal Cell: 200 images
Seborrheic: 200 images
Atopic Dermatitis: 200 images
Melanocytic: 200 images
Warts Molluscum: 200 images
Psoriasis: 200 images
Tinea Ringworms Candidiasis: 200 images

Renaming and consolidation complete! (test)


In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import InceptionV3  # Replace EfficientNetB3 with InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import random
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, Dropout
from tensorflow.keras.models import Model

# Define class mapping (updated with "Basal Cell")
class_mapping = {
    "Seborrheic": 0,
    "Melanocytic": 1,
    "Melanoma": 2,
    "Eczema": 3,
    "Basal_Cell": 4,  # New class added
}

# Preprocess images: resize and normalize
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: {image_path} could not be loaded.")
        return None

    resized_image = cv2.resize(image, (299, 299))  # Resizing to 299x299 for InceptionV3
    img_normalized = resized_image.astype('float32') / 255.0  # Normalize to [0, 1]
    return resized_image

def load_data_from_single_folder(folder):
    images = []
    labels = []

    # Get a sorted list of image filenames
    image_files = sorted([f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

    for image_name in image_files:
        image_path = os.path.join(folder, image_name)

        # Extract the label from the filename (before the parentheses)
        label = image_name.split('(')[0].strip().replace(' ', '_')  # Handle spaces and extract class name
        
        if label in class_mapping:
            label_index = class_mapping[label]  # Map label to integer
        else:
            #print(f"Warning: Label {label} not found in mapping. Skipping image.")
            continue

        # Preprocess the image
        preprocessed_image = preprocess_image(image_path)
        if preprocessed_image is not None:
            images.append(preprocessed_image)
            labels.append(label_index)

    print(f"Loaded {len(images)} images and {len(labels)} labels.")
    return np.array(images), np.array(labels)

def predict_test_data(model, test_images):
    # Predict probabilities for each class
    test_preds = model.predict(test_images)
    # Get the predicted class indices
    test_pred_classes = np.argmax(test_preds, axis=1)
    return test_pred_classes, test_preds

# Map predicted indices to class names
def map_classes_to_names(pred_classes):
    label_to_class = {v: k for k, v in class_mapping.items()}
    pred_class_names = [label_to_class[pred] for pred in pred_classes]
    return pred_class_names

# Calculate test accuracy
def calculate_accuracy(true_labels, pred_labels):
    accuracy = np.mean(true_labels == pred_labels)
    return accuracy

# Paths for train and test folders
train_folder = r'/kaggle/working/renamed_train'
test_folder = r"/kaggle/working/renamed_Test"

# Load data
X_train, y_train = load_data_from_single_folder(train_folder)
# X_test, y_test = load_data_from_single_folder(test_folder)

# === Step 6: Class Distribution Analysis ===
class_counts = pd.Series(y_train).value_counts()
class_names = {v: k for k, v in class_mapping.items()}  # Reverse the mapping
class_counts_named = class_counts.rename(index=class_names)

print("\nClass counts (class names):")
print(class_counts_named)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

# === Step 7: Balance Classes to Max Class Size Using Augmentation ===
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

max_class_size = class_counts.max()  # Maximum size among all classes
augmented_images = []
augmented_labels = []

# Create augmented images for each class
for label in np.unique(y_train):
    class_images = X_train[y_train == label]
    current_class_size = class_counts[label]

    augmented = datagen.flow(class_images, batch_size=1)
    for _ in range(max_class_size - current_class_size):
        augmented_images.append(next(augmented)[0])
        augmented_labels.append(label)

# If augmented images are created, concatenate them with the original data
if augmented_images:  # Ensure there are augmented images to add
    X_train = np.concatenate([X_train, np.array(augmented_images)])
    y_train = np.concatenate([y_train, np.array(augmented_labels)])

# Check new class distribution
new_class_counts = pd.Series(y_train).value_counts()
new_class_counts_named = new_class_counts.rename(index=class_names)

print("\nNew class counts after augmentation (class names):")
print(new_class_counts_named)

# Check class distribution for test set
train_class_counts = pd.Series(y_train).value_counts().rename(index=class_names)
test_class_counts = pd.Series(y_test).value_counts().rename(index=class_names)

print("\nClass counts in training set:")
print(train_class_counts)

print("\nClass counts in test set:")
print(test_class_counts)

# Define a dense block function
def dense_block(units, dropout_rate):
    def block(x):
        x = Dense(units, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(dropout_rate)(x)
        return x
    return block

# Load InceptionV3 model with pretrained weights, excluding the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3), pooling=None)

# Fine-tune InceptionV3 (Adding some custom layers on top)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = dense_block(128, 0.5)(x)
x = dense_block(32, 0.2)(x)
predictions = Dense(len(class_mapping), activation="softmax")(x)  # Output layer with softmax activation

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Unfreeze the last 10 layers of the base model
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Compile the model with Adam optimizer
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='sparse_categorical_crossentropy',  # Sparse because labels are integers
              metrics=['accuracy'])

# Learning rate scheduler
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7, verbose=1)
]

# Train the model with test data as validation
epochs = 50
batch_size = 16
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
                    validation_data=(X_test, y_test), callbacks=callbacks, shuffle=False)

# Predict on the test data
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)

# Confusion matrix for test set
test_conf_matrix = confusion_matrix(y_test, y_test_pred_classes)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")

# Calculate overall accuracy for the test set
test_accuracy = np.mean(y_test_pred_classes == y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Print the first 10 predictions and actual class names from test set
label_to_class = {v: k for k, v in class_mapping.items()}

for i in range(10):
    predicted_class = label_to_class[y_test_pred_classes[i]]
    actual_class = label_to_class[y_test[i]]
    print(f"Predicted: {predicted_class}, Actual: {actual_class}")

Loaded 4999 images and 4999 labels.

Class counts (class names):
Basal_Cell     1000
Melanocytic    1000
Melanoma       1000
Seborrheic     1000
Eczema          999
Name: count, dtype: int64

New class counts after augmentation (class names):
Melanocytic    800
Basal_Cell     800
Eczema         800
Seborrheic     800
Melanoma       800
Name: count, dtype: int64

Class counts in training set:
Melanocytic    800
Basal_Cell     800
Eczema         800
Seborrheic     800
Melanoma       800
Name: count, dtype: int64

Class counts in test set:
Melanocytic    200
Eczema         200
Seborrheic     200
Melanoma       200
Basal_Cell     200
Name: count, dtype: int64
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 195ms/step - accura