In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision import models
import pandas as pd
import os
from PIL import Image
import time
from tqdm import tqdm  # Progress Bar

#  Set paths
DATA_DIR = "C:/Derma/Data/"
CSV_PATH = os.path.join(DATA_DIR, "HAM10000_metadata.csv")
IMAGE_PATH = os.path.join(DATA_DIR, "HAM10000_images/")
CHECKPOINT_DIR = "./checkpoints/"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

In [None]:
#  Load dataset
df = pd.read_csv(CSV_PATH)
df['path'] = df['image_id'].apply(
    lambda x: os.path.join(IMAGE_PATH, x + ".jpg"))

#  Mapping labels to numbers
label_map = {label: idx for idx, label in enumerate(df['dx'].unique())}
df['label'] = df['dx'].map(label_map)

#  Custom dataset class


class SkinDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['path']
        label = self.df.iloc[idx]['label']

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


#  Data Augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

#  Split dataset
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)

train_dataset = SkinDataset(train_df, transform=transform)
val_dataset = SkinDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8,
                          shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8,
                        shuffle=False, num_workers=2)

#  Load EfficientNet-B3 (Pretrained)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.efficientnet_b3(weights="IMAGENET1K_V1")
model.classifier[1] = nn.Linear(
    model.classifier[1].in_features, len(label_map))  # Modify final layer
model = model.to(device)

#  Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)

#  Training Function with Early Stopping & Progress Bar


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=20, patience=5):
    best_val_acc = 0
    early_stop_counter = 0  # Counter for early stopping
    log_file = "training_log.csv"

    with open(log_file, "w") as f:
        f.write("epoch,train_loss,train_acc,val_acc,time_taken\n")

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()

        #  Show progress bar
        train_progress = tqdm(
            train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)

        for images, labels in train_progress:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.cuda.amp.autocast():  #  Mixed Precision for faster training
                outputs = model(images)
                loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            #  Update progress bar with loss and accuracy
            train_progress.set_postfix(
                loss=loss.item(), acc=100 * correct / total)

        train_acc = 100 * correct / total
        val_acc = evaluate(model, val_loader)
        epoch_time = time.time() - start_time

        #  Save log
        with open(log_file, "a") as f:
            f.write(
                f"{epoch+1},{running_loss/len(train_loader):.4f},{train_acc:.2f},{val_acc:.2f},{epoch_time:.2f}\n")

        print(f"\n📌 Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f} - Train Acc: {train_acc:.2f}% - Val Acc: {val_acc:.2f}% - Time: {epoch_time:.2f}s")

        #  Save Checkpoint every 5 epochs
        if (epoch + 1) % 5 == 0:
            checkpoint_path = os.path.join(
                CHECKPOINT_DIR, f"checkpoint_epoch_{epoch+1}.pth")
            torch.save(model.state_dict(), checkpoint_path)
            print(f" Saved checkpoint at {checkpoint_path}")

        #  Save Best Model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            early_stop_counter = 0  # Reset early stopping counter
            torch.save(model.state_dict(), "best_skin_model.pth")
            print("🎉 New Best Model Saved!")
        else:
            early_stop_counter += 1  # Increase early stopping counter
            print(f"🚨 Early Stopping Counter: {early_stop_counter}/{patience}")

        #  Early Stopping
        if early_stop_counter >= patience:
            print("⏹️ Early Stopping Triggered! Training Stopped.")
            break

        scheduler.step()

#  Evaluation Function


def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total


#  Train Model
train_model(model, train_loader, val_loader, criterion,
            optimizer, scheduler, epochs=20, patience=5)

Epoch 1/20:   0%|          | 0/1002 [00:00<?, ?it/s]