In [1]:
import os
import shutil

# Input folder containing the images
input_dir = r"/kaggle/input/skin-disease-dataset/dataset/train"
# Output folder for renamed images
output_dir = r"/kaggle/working/renamed_train"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Dictionary to track counts for each class
class_counts = {}

# Traverse through each subdirectory
for root, dirs, files in os.walk(input_dir):
 for file_name in files:
     # Full path of the image
     img_path = os.path.join(root, file_name)

     # Skip non-image files
     if not file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
         print(f"Skipping non-image file: {file_name}")
         continue

     # Get the folder name (class name) as the class identifier
     class_name = os.path.basename(root)

     # Initialize or increment the count for this class
     if class_name not in class_counts:
         class_counts[class_name] = 1
     else:
         class_counts[class_name] += 1

     # Generate new file name in the format ClassName(Count).Extension
     count = class_counts[class_name]
     ext = os.path.splitext(file_name)[1]  # Get file extension
     new_name = f"{class_name}({count}){ext}"
     new_path = os.path.join(output_dir, new_name)

     # Copy and rename the file to the output directory
     shutil.copy(img_path, new_path)

# Print the total number of images for each class
print("\nImage counts by class:")
for class_name, count in class_counts.items():
 print(f"{class_name}: {count} images")

print("\nRenaming and consolidation complete!")


Image counts by class:
Eczema: 999 images
Melanoma: 1000 images
Basal Cell: 1000 images
Seborrheic: 1000 images
Atopic Dermatitis: 1000 images
Melanocytic: 1000 images
Benign Keratosis: 1201 images
Warts Molluscum: 1000 images
Psoriasis: 1000 images
Tinea Ringworms Candidiasis: 990 images

Renaming and consolidation complete!


In [2]:
import os
import numpy as np
import cv2
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# --- Class mapping ---
class_mapping = {
    "Seborrheic": 0,
    "Melanocytic": 1,
    "Melanoma": 2,
    "Eczema": 3,
    "Basal_Cell": 4,
    "Atopic_Dermatitis": 5,
    "Benign_Keratosis": 6,
    "Warts_Molluscum": 7,
    "Psoriasis": 8,
    "Tinea_Ringworms_Candidiasis": 9
}
label_to_class = {v: k for k, v in class_mapping.items()}

# --- Preprocess image ---
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: {image_path} could not be loaded.")
        return None
    resized_image = cv2.resize(image, (224, 224))
    img_normalized = resized_image.astype('float32') / 255.0
    return img_normalized

# --- Load data from folder ---
def load_data_from_single_folder(folder, max_per_class=1000):
    images = []
    labels = []
    class_counts = {i: 0 for i in range(len(class_mapping))}

    image_files = sorted([f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])

    for image_name in image_files:
        image_path = os.path.join(folder, image_name)
        label = image_name.split('(')[0].strip().replace(' ', '_')

        if label not in class_mapping:
            continue
        label_index = class_mapping[label]

        if class_counts[label_index] >= max_per_class:
            continue

        preprocessed_image = preprocess_image(image_path)
        if preprocessed_image is not None:
            images.append(preprocessed_image)
            labels.append(label_index)
            class_counts[label_index] += 1

        if all(count >= max_per_class for count in class_counts.values()):
            break

    print(f"Loaded {len(images)} images and {len(labels)} labels.")
    return np.array(images), np.array(labels)

# --- Load and prepare data ---
train_folder = r'/kaggle/working/renamed_train'
X, y = load_data_from_single_folder(train_folder, max_per_class=1000)

# Shuffle
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# One-hot encode
y = to_categorical(y, num_classes=10)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=np.argmax(y, axis=1)
)

# --- Data Augmentation ---
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
test_generator = test_datagen.flow(X_test, y_test, batch_size=32)



# --- Build Model ---
# base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# base_model.trainable = False  # Freeze base

# model = models.Sequential([
#     base_model,
#     layers.GlobalAveragePooling2D(),
#     layers.Dense(256, activation='relu'),
#     layers.Dropout(0.5),
#     layers.Dense(10, activation='softmax')
# ])

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# # --- Callbacks ---
# early_stop = EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss')
# lr_reduce = ReduceLROnPlateau(patience=3, factor=0.2, monitor='val_loss')

# # --- Train ---
# model.fit(
#     train_generator,
#     validation_data=test_generator,
#     epochs=30,
#     callbacks=[early_stop, lr_reduce]
# )


2025-05-01 11:02:40.763390: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746097360.965471      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746097361.026966      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loaded 9989 images and 9989 labels.


250


In [None]:
from tensorflow.keras.applications import DenseNet121
base_model = DenseNet121(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

model = tf.keras.Sequential([
    base_model, 
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')  # Output layer (5 classes)
])

# Unfreeze last few layers of base model for fine-tuning
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print(model.summary())

# Callbacks for early stopping and learning rate reduction
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7, verbose=1)
]

# Train the model
epochs = 50
batch_size = 16
# history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
#                     validation_data=(X_test, y_test), callbacks=callbacks, shuffle=True)

# # --- Train ---
model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=100,
    callbacks=callbacks, 
    batch_size=batch_size,
    shuffle=True
)
# Predict on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_actual_classes = np.argmax(y_test, axis=1)

# Confusion matrix
test_conf_matrix = confusion_matrix(y_test_actual_classes, y_test_pred_classes)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")

# Calculate accuracy
test_accuracy = np.mean(y_test_pred_classes == y_test_actual_classes)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Print first 10 predictions
for i in range(10):
    predicted_class = label_to_class[y_test_pred_classes[i]]
    actual_class = label_to_class[y_test_actual_classes[i]]
    print(f"Predicted: {predicted_class}, Actual: {actual_class}")


None
Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1746097658.085646      93 service.cc:148] XLA service 0x7c9658001ee0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746097658.086317      93 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746097666.772259      93 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1746097744.993506      93 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m356s[0m 715ms/step - accuracy: 0.4570 - loss: 1.5029 - val_accuracy: 0.6902 - val_loss: 0.8558 - learning_rate: 1.0000e-04
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 299ms/step - accuracy: 0.7149 - loss: 0.7761 - val_accuracy: 0.7122 - val_loss: 0.8100 - learning_rate: 1.0000e-04
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 298ms/step - accuracy: 0.7907 - loss: 0.5956 - val_accuracy: 0.5806 - val_loss: 1.2900 - learning_rate: 1.0000e-04
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 297ms/step - accuracy: 0.8337 - loss: 0.4591 - val_accuracy: 0.6547 - val_loss: 1.0127 - learning_rate: 1.0000e-04
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 300ms/step - accuracy: 0.8616 - loss: 0.3877 - val_accuracy: 0.7477 - val_loss: 0.7385 - learning_rate: 1.0000e-04
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━

In [None]:
# import tensorflow as tf
# from tensorflow.keras.applications import InceptionResNetV2
# from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
# from tensorflow.keras.models import Model
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

# # Define image size and input shape (299x299 for InceptionResNetV2)


# # Load the pre-trained InceptionResNetV2 model without the top layer
# base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299,299,3))

# # Freeze the base model (pre-trained weights will not change during training)
# base_model.trainable = False

# model = tf.keras.Sequential([
#     base_model, 
#     tf.keras.layers.GlobalAveragePooling2D(),  # Pooling layer
#     tf.keras.layers.Dense(64, activation='relu'),  # Fully connected layer
#     tf.keras.layers.Dense(5, activation='softmax')  # Output layer (5 classes)
# ])
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])
# print(model.summary())

# for layer in base_model.layers[-4:]:  # Unfreeze the last 4 layers
#     layer.trainable = True

# # Compile the model with Adam optimizer







from tensorflow.keras.applications import DenseNet121
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))

model = tf.keras.Sequential([
    base_model, 
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')  # Output layer (5 classes)
])

# Unfreeze last few layers of base model for fine-tuning
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print(model.summary())

# Callbacks for early stopping and learning rate reduction
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7, verbose=1)
]

# Train the model
epochs = 50
batch_size = 16
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
                    validation_data=(X_test, y_test), callbacks=callbacks, shuffle=True)

# Predict on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)
y_test_actual_classes = np.argmax(y_test, axis=1)

# Confusion matrix
test_conf_matrix = confusion_matrix(y_test_actual_classes, y_test_pred_classes)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")

# Calculate accuracy
test_accuracy = np.mean(y_test_pred_classes == y_test_actual_classes)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Print first 10 predictions
for i in range(10):
    predicted_class = label_to_class[y_test_pred_classes[i]]
    actual_class = label_to_class[y_test_actual_classes[i]]
    print(f"Predicted: {predicted_class}, Actual: {actual_class}")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


None
Epoch 1/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 352ms/step - accuracy: 0.4860 - loss: 1.3755 - val_accuracy: 0.7082 - val_loss: 1.9947 - learning_rate: 1.0000e-04
Epoch 2/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 207ms/step - accuracy: 0.7830 - loss: 0.6040 - val_accuracy: 0.7377 - val_loss: 0.8314 - learning_rate: 1.0000e-04
Epoch 3/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 206ms/step - accuracy: 0.8902 - loss: 0.3353 - val_accuracy: 0.7377 - val_loss: 0.8209 - learning_rate: 1.0000e-04
Epoch 4/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 207ms/step - accuracy: 0.9323 - loss: 0.2025 - val_accuracy: 0.7528 - val_loss: 0.9259 - learning_rate: 1.0000e-04
Epoch 5/50
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 206ms/step - accuracy: 0.9502 - loss: 0.1570 - val_accuracy: 0.7252 - val_loss: 1.1081 - learning_rate: 1.0000e-04
Epoch 6/50
[1m500/500[0