In [3]:
import os
import random
import shutil
from tqdm import tqdm

# Path to your dataset root (where each class folder is)
dataset_path = 'dataset-skin'

# Maximum number of images per class
MAX_IMAGES = 2000

# Whether to randomly select images to keep
RANDOMIZE = True

# Output path for the balanced dataset
output_path = 'balanced-dataset-skin'

os.makedirs(output_path, exist_ok=True)

# Loop through each class folder
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)
    if not os.path.isdir(class_dir):
        continue
    
    images = [f for f in os.listdir(class_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    if RANDOMIZE:
        random.shuffle(images)

    selected_images = images[:MAX_IMAGES]  # Keep only MAX_IMAGES

    new_class_dir = os.path.join(output_path, class_name)
    os.makedirs(new_class_dir, exist_ok=True)

    print(f"Copying {len(selected_images)} images for class '{class_name}'...")

    for img_file in tqdm(selected_images):
        src = os.path.join(class_dir, img_file)
        dst = os.path.join(new_class_dir, img_file)
        shutil.copy2(src, dst)

print("\n✅ Dataset balanced and saved to:", output_path)


Copying 1638 images for class '1. Eczema 1677'...


  0%|          | 0/1638 [00:00<?, ?it/s]

100%|██████████| 1638/1638 [00:08<00:00, 201.78it/s]


Copying 2000 images for class '10. Warts Molluscum and other Viral Infections - 2103'...


100%|██████████| 2000/2000 [00:10<00:00, 190.39it/s]


Copying 1581 images for class '11.Unknown'...


100%|██████████| 1581/1581 [00:22<00:00, 69.02it/s]


Copying 2000 images for class '2. Melanoma 15.75k'...


100%|██████████| 2000/2000 [00:10<00:00, 186.50it/s]


Copying 1254 images for class '3. Atopic Dermatitis - 1.25k'...


100%|██████████| 1254/1254 [00:06<00:00, 197.51it/s]


Copying 2000 images for class '4. Basal Cell Carcinoma (BCC) 3323'...


100%|██████████| 2000/2000 [00:13<00:00, 146.64it/s]


Copying 2000 images for class '5. Melanocytic Nevi (NV) - 7970'...


100%|██████████| 2000/2000 [00:20<00:00, 96.76it/s] 


Copying 2000 images for class '6. Benign Keratosis-like Lesions (BKL) 2624'...


100%|██████████| 2000/2000 [00:09<00:00, 209.43it/s]


Copying 2000 images for class '7. Psoriasis pictures Lichen Planus and related diseases - 2k'...


100%|██████████| 2000/2000 [00:08<00:00, 230.21it/s]


Copying 1847 images for class '8. Seborrheic Keratoses and other Benign Tumors - 1.8k'...


100%|██████████| 1847/1847 [00:07<00:00, 239.53it/s]


Copying 1702 images for class '9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k'...


100%|██████████| 1702/1702 [00:07<00:00, 226.71it/s]


✅ Dataset balanced and saved to: balanced-dataset-skin





In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# ---------------- CONFIG ----------------
IMAGE_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 20
DATA_DIR = 'balanced-dataset-skin'
NUM_CLASSES = 11

# ---------------- DATA AUGMENTATION ----------------
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_data = datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# ---------------- MODEL ----------------
base_model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, weights='imagenet')
base_model.trainable = True  # ✅ Train entire model directly

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.4)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.2)(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# ---------------- COMPILE ----------------
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# ---------------- CALLBACKS ----------------
checkpoint = ModelCheckpoint("best_skin_model.keras", monitor='val_accuracy', save_best_only=True)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6)

# ---------------- TRAIN ----------------
model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS,
    callbacks=[checkpoint, early_stop, reduce_lr]
)

print("✅ Model trained and best version saved as 'best_skin_model.keras'")



Found 18540 images belonging to 11 classes.
Found 4628 images belonging to 11 classes.
Epoch 1/20


  self._warn_if_super_not_called()


[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1809s[0m 3s/step - accuracy: 0.4613 - loss: 1.5158 - val_accuracy: 0.2986 - val_loss: 4.1198 - learning_rate: 1.0000e-04
Epoch 2/20
[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1744s[0m 3s/step - accuracy: 0.6703 - loss: 0.8793 - val_accuracy: 0.3496 - val_loss: 3.9676 - learning_rate: 1.0000e-04
Epoch 3/20
[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1686s[0m 3s/step - accuracy: 0.7205 - loss: 0.7398 - val_accuracy: 0.4047 - val_loss: 2.5785 - learning_rate: 1.0000e-04
Epoch 4/20
[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2152s[0m 4s/step - accuracy: 0.7447 - loss: 0.6771 - val_accuracy: 0.5143 - val_loss: 1.9848 - learning_rate: 1.0000e-04
Epoch 5/20
[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2015s[0m 3s/step - accuracy: 0.7750 - loss: 0.6099 - val_accuracy: 0.5337 - val_loss: 2.2278 - learning_rate: 1.0000e-04
Epoch 6/20
[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━