In [1]:
 import os
 import shutil

 # Input folder containing the images
 input_dir = r"/kaggle/input/skin-disease-dataset/dataset/train"
 # Output folder for renamed images
 output_dir = r"/kaggle/working/renamed_train"

 # Ensure the output directory exists
 os.makedirs(output_dir, exist_ok=True)

 # Dictionary to track counts for each class
 class_counts = {}

 # Traverse through each subdirectory
 for root, dirs, files in os.walk(input_dir):
     for file_name in files:
         # Full path of the image
         img_path = os.path.join(root, file_name)

         # Skip non-image files
         if not file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
             print(f"Skipping non-image file: {file_name}")
             continue

         # Get the folder name (class name) as the class identifier
         class_name = os.path.basename(root)

         # Initialize or increment the count for this class
         if class_name not in class_counts:
             class_counts[class_name] = 1
         else:
             class_counts[class_name] += 1

         # Generate new file name in the format ClassName(Count).Extension
         count = class_counts[class_name]
         ext = os.path.splitext(file_name)[1]  # Get file extension
         new_name = f"{class_name}({count}){ext}"
         new_path = os.path.join(output_dir, new_name)

         # Copy and rename the file to the output directory
         shutil.copy(img_path, new_path)

 # Print the total number of images for each class
 print("\nImage counts by class:")
 for class_name, count in class_counts.items():
     print(f"{class_name}: {count} images")

 print("\nRenaming and consolidation complete!")


Image counts by class:
Eczema: 999 images
Melanoma: 1000 images
Basal Cell: 1000 images
Seborrheic: 1000 images
Atopic Dermatitis: 1000 images
Melanocytic: 1000 images
Benign Keratosis: 1201 images
Warts Molluscum: 1000 images
Psoriasis: 1000 images
Tinea Ringworms Candidiasis: 990 images

Renaming and consolidation complete!


In [None]:
'''import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNet  # Import MobileNet
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import random
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split

# Define class mapping (updated with "Basal Cell")
class_mapping = {
    "Seborrheic": 0,
    "Melanocytic": 1,
    "Melanoma": 2,
    "Eczema": 3,
    "Basal_Cell": 4,
}

# Preprocess images: resize and normalize
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: {image_path} could not be loaded.")
        return None

    resized_image = cv2.resize(image, (224, 224))  # Resizing to 224x224
    img_normalized = resized_image.astype('float32') / 255.0  # Normalize to [0, 1]
    return img_normalized

def load_data_from_single_folder(folder):
    images = []
    labels = []

    for image_name in os.listdir(folder):
        image_path = os.path.join(folder, image_name)

        # Check if file is an image
        if image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Extract the label from the filename (before the parentheses)
            label = image_name.split('(')[0].strip().replace(' ', '_')  # Handle spaces and extract class name
            
            if label in class_mapping:
                label_index = class_mapping[label]  # Map label to integer
            else:
                #print(f"Warning: Label {label} not found in mapping. Skipping image.")
                continue

            # Preprocess the image
            preprocessed_image = preprocess_image(image_path)
            if preprocessed_image is not None:
                images.append(preprocessed_image)
                labels.append(label_index)

    print(f"Loaded {len(images)} images and {len(labels)} labels.")
    return np.array(images), np.array(labels)

# Paths for train folder
train_folder = r'/kaggle/working/renamed_train'

# Load data
X_train, y_train = load_data_from_single_folder(train_folder)

# === Step 6: Class Distribution Analysis ===
class_counts = pd.Series(y_train).value_counts()
class_names = {v: k for k, v in class_mapping.items()}  # Reverse the mapping
class_counts_named = class_counts.rename(index=class_names)

print("\nClass counts (class names):")
print(class_counts_named)

# === Step 7: Balance Classes to Max Class Size Using Augmentation ===
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

max_class_size = class_counts.max()  # Maximum size among all classes
augmented_images = []
augmented_labels = []

# Create augmented images for each class
for label in np.unique(y_train):
    class_images = X_train[y_train == label]
    current_class_size = class_counts[label]

    augmented = datagen.flow(class_images, batch_size=1)
    for _ in range(max_class_size - current_class_size):
        augmented_images.append(next(augmented)[0])
        augmented_labels.append(label)

# If augmented images are created, concatenate them with the original data
if augmented_images:  # Ensure there are augmented images to add
    X_train = np.concatenate([X_train, np.array(augmented_images)])
    y_train = np.concatenate([y_train, np.array(augmented_labels)])

# Check new class distribution
new_class_counts = pd.Series(y_train).value_counts()
new_class_counts_named = new_class_counts.rename(index=class_names)

print("\nNew class counts after augmentation (class names):")
print(new_class_counts_named)

# Split the dataset into training and validation sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

# Check new class distribution for train and validation sets
train_class_counts = pd.Series(y_train).value_counts().rename(index=class_names)
test_class_counts = pd.Series(y_test).value_counts().rename(index=class_names)

print("\nClass counts in training set:")
print(train_class_counts)

print("\nClass counts in test set:")
print(test_class_counts)

# === Replace VGG16 with MobileNet Pretrained Model ===
# Load MobileNet model with pretrained weights, excluding the top layers
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers to prevent them from being trained
base_model.trainable = False

# Create a new model on top of MobileNet
model = models.Sequential()

# Add the MobileNet base model
model.add(base_model)

# Add custom fully connected layers
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))  #1 Regularize the fully connected layer
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5)) #2 Regularize the fully connected layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(len(class_mapping), activation='softmax'))  # Output layer updated to 5 classes

for layer in base_model.layers[-10:]:  # Unfreeze the last 10 layers
    layer.trainable = True
    
# Compile the model with Adam optimizer
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='sparse_categorical_crossentropy',  # Sparse because labels are integers
              metrics=['accuracy'])

# Learning rate scheduler
callbacks = [
EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True),
ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=5,min_lr=1e-7,verbose=1)
]

# Train the model with validation data
epochs = 50
batch_size = 16
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
                    validation_data=(X_test, y_test), callbacks=callbacks)

# Predict on the validation data
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)

# Confusion matrix for validation set
test_conf_matrix = confusion_matrix(y_test, y_test_pred_classes)
print(f"Test Confusion Matrix:\n{test_conf_matrix}")

# Calculate overall accuracy for the validation set
test_accuracy = np.mean(y_test_pred_classes == y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Print the first 10 predictions and actual class names from validation set
label_to_class = {v: k for k, v in class_mapping.items()}

for i in range(10):
    predicted_class = label_to_class[y_test_pred_classes[i]]
    actual_class = label_to_class[y_test[i]]
    print(f"Predicted: {predicted_class}, Actual: {actual_class}")'''

In [2]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import math
from collections import Counter
from torchvision.models import mobilenet_v3_large

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Class mapping and parameters
class_mapping = {
    "Seborrheic": 0, "Melanocytic": 1, "Melanoma": 2, "Eczema": 3,
    "Basal_Cell": 4, "Psoriasis": 5, "Tinea_Ringworms_Candidiasis": 6,
    "Warts_Molluscum": 7, "Atopic_Dermatitis": 8
}
TARGET_SAMPLES_PER_CLASS = 1500

# ================== DATA BALANCING ==================
def load_and_balance_data(folder):
    """Load images and ensure exactly 1500 per class using augmentation"""
    class_counts = {cls: 0 for cls in class_mapping.values()}
    images = []
    labels = []
    
    # First pass: collect natural images
    raw_data = {cls: [] for cls in class_mapping.values()}
    for img_name in os.listdir(folder):
        if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
            label_name = img_name.split('(')[0].strip().replace(' ', '_')
            if label_name in class_mapping:
                label = class_mapping[label_name]
                img_path = os.path.join(folder, img_name)
                img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
                if img is not None:
                    raw_data[label].append(img)
    
    # Second pass: balance classes
    augmenter = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.3),
        A.Rotate(limit=25),
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    ])
    
    for cls, cls_images in raw_data.items():
        # Add original images
        for img in cls_images[:min(len(cls_images), TARGET_SAMPLES_PER_CLASS)]:
            images.append(img)
            labels.append(cls)
            class_counts[cls] += 1
        
        # Augment to reach target count
        while class_counts[cls] < TARGET_SAMPLES_PER_CLASS:
            for img in cls_images:
                if class_counts[cls] >= TARGET_SAMPLES_PER_CLASS:
                    break
                augmented = augmenter(image=img)['image']
                images.append(augmented)
                labels.append(cls)
                class_counts[cls] += 1
    
    # Verify balancing
    print("\nFinal class distribution:")
    for cls, count in Counter(labels).items():
        print(f"Class {cls}: {count} images")
    
    return images, np.array(labels)

# ================== MOBILENET V3 MODEL ==================
class MobileNetV3(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base = mobilenet_v3_large(pretrained=True)
        
        # Freeze early layers
        for param in self.base.features[:10].parameters():
            param.requires_grad = False
            
        # Modify classifier
        self.base.classifier = nn.Sequential(
            nn.Linear(960, 1280),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(1280, num_classes)
        )
    
    def forward(self, x):
        return self.base(x)

# ================== AUGMENTATIONS ==================
train_transform = A.Compose([
    A.Resize(256, 256),
    A.RandomCrop(224, 224),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.Rotate(limit=25),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    A.CoarseDropout(max_holes=6, max_height=32, max_width=32, p=0.3),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# ================== TRAINING LOOP ==================
def train_model():
    # Load and balance data
    images, labels = load_and_balance_data("/kaggle/working/renamed_train")
    X_train, X_val, y_train, y_val = train_test_split(
        images, labels, test_size=0.2, stratify=labels, random_state=42
    )
    
    # Create datasets
    class SkinDataset(Dataset):
        def __init__(self, images, labels, transform=None):
            self.images = images
            self.labels = labels
            self.transform = transform
        
        def __len__(self):
            return len(self.images)
        
        def __getitem__(self, idx):
            img = self.images[idx]
            label = self.labels[idx]
            if self.transform:
                img = self.transform(image=img)['image']
            return img, label
    
    train_dataset = SkinDataset(X_train, y_train, train_transform)
    val_dataset = SkinDataset(X_val, y_val, val_transform)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
    
    # Initialize model
    model = MobileNetV3(len(class_mapping)).to(device)
    
    # Weighted loss (adjust weights based on your dataset)
    class_weights = torch.tensor([1.0, 1.2, 1.5, 1.0, 1.3, 1.0, 1.1, 1.0, 1.2]).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    
    # Optimizer and scheduler
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.5)
    
    # Training
    best_val_acc = 0.0
    for epoch in range(30):
        model.train()
        train_loss, correct, total = 0, 0, 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/30"):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_acc = 100 * correct / total
        val_loss, val_acc = validate(model, val_loader, criterion)
        
        # Apply accuracy ceiling
        val_acc_ceil = math.ceil(val_acc * 100) / 100
        
        # Update scheduler
        scheduler.step(val_acc)
        
        print(f"Train Loss: {train_loss/len(train_loader):.4f} | Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f} | Acc: {val_acc:.2f}% (Ceiled: {val_acc_ceil:.2f}%)")
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            #torch.save(model.state_dict(), "best_mobilenetv3_model.pth")
            #print(f"New best model saved (Val Acc: {math.ceil(best_val_acc*100)/100:.2f}%)")

def validate(model, val_loader, criterion):
    model.eval()
    val_loss, correct, total = 0, 0, 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return val_loss/len(val_loader), 100*correct/total

if __name__ == "__main__":
    train_model()

  check_for_updates()



Final class distribution:
Class 0: 1500 images
Class 1: 1500 images
Class 2: 1500 images
Class 3: 1500 images
Class 4: 1500 images
Class 5: 1500 images
Class 6: 1500 images
Class 7: 1500 images
Class 8: 1500 images


Epoch 1/30: 100%|██████████| 169/169 [00:17<00:00,  9.53it/s]


Train Loss: 0.9460 | Acc: 62.03%
Val Loss: 0.8495 | Acc: 67.00% (Ceiled: 67.00%)


Epoch 2/30: 100%|██████████| 169/169 [00:18<00:00,  9.18it/s]


Train Loss: 0.6919 | Acc: 72.57%
Val Loss: 0.6825 | Acc: 73.96% (Ceiled: 73.97%)


Epoch 3/30: 100%|██████████| 169/169 [00:18<00:00,  8.90it/s]


Train Loss: 0.5693 | Acc: 77.40%
Val Loss: 0.6367 | Acc: 77.19% (Ceiled: 77.19%)


Epoch 4/30: 100%|██████████| 169/169 [00:18<00:00,  9.12it/s]


Train Loss: 0.5087 | Acc: 80.02%
Val Loss: 0.5737 | Acc: 79.41% (Ceiled: 79.41%)


Epoch 5/30: 100%|██████████| 169/169 [00:17<00:00,  9.53it/s]


Train Loss: 0.4520 | Acc: 82.49%
Val Loss: 0.5120 | Acc: 81.74% (Ceiled: 81.75%)


Epoch 6/30: 100%|██████████| 169/169 [00:18<00:00,  9.21it/s]


Train Loss: 0.3960 | Acc: 84.70%
Val Loss: 0.5996 | Acc: 79.41% (Ceiled: 79.41%)


Epoch 7/30: 100%|██████████| 169/169 [00:18<00:00,  9.21it/s]


Train Loss: 0.3500 | Acc: 86.74%
Val Loss: 0.4585 | Acc: 82.37% (Ceiled: 82.38%)


Epoch 8/30: 100%|██████████| 169/169 [00:18<00:00,  9.38it/s]


Train Loss: 0.2994 | Acc: 88.66%
Val Loss: 0.4890 | Acc: 83.22% (Ceiled: 83.23%)


Epoch 9/30: 100%|██████████| 169/169 [00:18<00:00,  9.12it/s]


Train Loss: 0.2886 | Acc: 88.94%
Val Loss: 0.4829 | Acc: 83.96% (Ceiled: 83.97%)


Epoch 10/30: 100%|██████████| 169/169 [00:18<00:00,  9.15it/s]


Train Loss: 0.2528 | Acc: 90.44%
Val Loss: 0.4504 | Acc: 85.37% (Ceiled: 85.38%)


Epoch 11/30: 100%|██████████| 169/169 [00:17<00:00,  9.43it/s]


Train Loss: 0.2344 | Acc: 91.17%
Val Loss: 0.5722 | Acc: 83.63% (Ceiled: 83.63%)


Epoch 12/30: 100%|██████████| 169/169 [00:18<00:00,  9.17it/s]


Train Loss: 0.2288 | Acc: 91.54%
Val Loss: 0.4436 | Acc: 86.19% (Ceiled: 86.19%)


Epoch 13/30: 100%|██████████| 169/169 [00:18<00:00,  9.12it/s]


Train Loss: 0.2005 | Acc: 92.60%
Val Loss: 0.4630 | Acc: 85.89% (Ceiled: 85.89%)


Epoch 14/30: 100%|██████████| 169/169 [00:17<00:00,  9.44it/s]


Train Loss: 0.2004 | Acc: 92.71%
Val Loss: 0.4968 | Acc: 86.81% (Ceiled: 86.82%)


Epoch 15/30: 100%|██████████| 169/169 [00:18<00:00,  8.92it/s]


Train Loss: 0.1770 | Acc: 93.49%
Val Loss: 0.4275 | Acc: 86.52% (Ceiled: 86.52%)


Epoch 16/30: 100%|██████████| 169/169 [00:18<00:00,  9.21it/s]


Train Loss: 0.1649 | Acc: 94.17%
Val Loss: 0.4978 | Acc: 85.70% (Ceiled: 85.71%)


Epoch 17/30: 100%|██████████| 169/169 [00:17<00:00,  9.46it/s]


Train Loss: 0.1646 | Acc: 93.94%
Val Loss: 0.4852 | Acc: 86.52% (Ceiled: 86.52%)


Epoch 18/30: 100%|██████████| 169/169 [00:18<00:00,  9.26it/s]


Train Loss: 0.1557 | Acc: 94.21%
Val Loss: 0.4446 | Acc: 88.30% (Ceiled: 88.30%)


Epoch 19/30: 100%|██████████| 169/169 [00:18<00:00,  9.29it/s]


Train Loss: 0.1480 | Acc: 94.39%
Val Loss: 0.4580 | Acc: 87.74% (Ceiled: 87.75%)


Epoch 20/30: 100%|██████████| 169/169 [00:17<00:00,  9.57it/s]


Train Loss: 0.1626 | Acc: 94.19%
Val Loss: 0.4865 | Acc: 87.22% (Ceiled: 87.23%)


Epoch 21/30: 100%|██████████| 169/169 [00:18<00:00,  9.31it/s]


Train Loss: 0.1165 | Acc: 95.67%
Val Loss: 0.4628 | Acc: 87.07% (Ceiled: 87.08%)


Epoch 22/30: 100%|██████████| 169/169 [00:18<00:00,  9.30it/s]


Train Loss: 0.1321 | Acc: 95.01%
Val Loss: 0.4778 | Acc: 87.96% (Ceiled: 87.97%)


Epoch 23/30: 100%|██████████| 169/169 [00:17<00:00,  9.57it/s]


Train Loss: 0.0780 | Acc: 97.05%
Val Loss: 0.3992 | Acc: 89.74% (Ceiled: 89.75%)


Epoch 24/30: 100%|██████████| 169/169 [00:18<00:00,  9.25it/s]


Train Loss: 0.0491 | Acc: 98.19%
Val Loss: 0.4408 | Acc: 90.44% (Ceiled: 90.45%)


Epoch 25/30: 100%|██████████| 169/169 [00:18<00:00,  9.29it/s]


Train Loss: 0.0573 | Acc: 97.90%
Val Loss: 0.4380 | Acc: 90.41% (Ceiled: 90.41%)


Epoch 26/30: 100%|██████████| 169/169 [00:17<00:00,  9.55it/s]


Train Loss: 0.0552 | Acc: 97.95%
Val Loss: 0.4089 | Acc: 90.70% (Ceiled: 90.71%)


Epoch 27/30: 100%|██████████| 169/169 [00:18<00:00,  9.28it/s]


Train Loss: 0.0522 | Acc: 98.10%
Val Loss: 0.4077 | Acc: 91.15% (Ceiled: 91.15%)


Epoch 28/30: 100%|██████████| 169/169 [00:18<00:00,  9.29it/s]


Train Loss: 0.0457 | Acc: 98.37%
Val Loss: 0.4470 | Acc: 90.07% (Ceiled: 90.08%)


Epoch 29/30: 100%|██████████| 169/169 [00:17<00:00,  9.57it/s]


Train Loss: 0.0468 | Acc: 98.17%
Val Loss: 0.4389 | Acc: 90.11% (Ceiled: 90.12%)


Epoch 30/30: 100%|██████████| 169/169 [00:18<00:00,  9.28it/s]


Train Loss: 0.0575 | Acc: 97.74%
Val Loss: 0.4236 | Acc: 90.59% (Ceiled: 90.60%)
