<a href="https://colab.research.google.com/github/Sterina1906/DermaVerseAI-Skin-Lesion-Detection-using-Deep-Learning/blob/main/skin_disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi


Sun Jan  4 09:17:04 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   53C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
# Step1_imports.py
"""
Install required packages first:
pip install torch torchvision albumentations opencv-python pandas scikit-learn timm
"""

import os
import pandas as pd
import numpy as np
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import timm  # PyTorch Image Models library for EfficientNet

import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úÖ Using device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

‚úÖ Using device: cuda
   GPU: Tesla T4


In [None]:

from sklearn.model_selection import GroupShuffleSplit

In [None]:
import os
import shutil
from pathlib import Path

# Your Drive paths (with spaces and parentheses)
DRIVE_IMAGE_DIR = '/content/drive/MyDrive/skin-disease-el/ISIC2018_Task3_Training_Input (1)/ISIC2018_Task3_Training_Input/'
DRIVE_GT_FILE = '/content/drive/MyDrive/skin-disease-el/ISIC2018_Task3_Training_GroundTruth (1)/ISIC2018_Task3_Training_GroundTruth/ISIC2018_Task3_Training_GroundTruth.csv'
DRIVE_LESION_FILE = '/content/drive/MyDrive/skin-disease-el/ISIC2018_Task3_Training_LesionGroupings.csv'  # Update if you have separate lesion_grouping.csv

# Create local directory
LOCAL_DIR = '/content/local_data/'
os.makedirs(LOCAL_DIR + 'images/', exist_ok=True)

print("Copying images to local storage (3-5 minutes)...")

# Copy all images using Python (handles spaces properly)
image_files = list(Path(DRIVE_IMAGE_DIR).glob('*.jpg'))
if not image_files:
    image_files = list(Path(DRIVE_IMAGE_DIR).glob('*.png'))

print(f"Found {len(image_files)} images")

for i, img_file in enumerate(image_files):
    if i % 1000 == 0:
        print(f"Copied {i}/{len(image_files)} images...")
    shutil.copy2(str(img_file), LOCAL_DIR + 'images/')

print(f"‚úÖ All {len(image_files)} images copied!")

# Copy CSV files
print("Copying CSV files...")
shutil.copy2(DRIVE_GT_FILE, LOCAL_DIR + 'groundtruth.csv')
print("‚úÖ groundtruth.csv copied!")

# If you have a separate lesion_grouping.csv, update the path and uncomment:
shutil.copy2(DRIVE_LESION_FILE, LOCAL_DIR + 'lesion_grouping.csv')

print("\n" + "="*60)
print("‚úÖ ALL DONE! Now update your training code:")
print("="*60)
print("\nChange these lines in your code:")
print("DATA_DIR = '/content/local_data/'")
print("IMAGE_DIR = os.path.join(DATA_DIR, 'images')")
print("METADATA_FILE = os.path.join(DATA_DIR, 'groundtruth.csv')")

Copying images to local storage (3-5 minutes)...
Found 10019 images
Copied 0/10019 images...
Copied 1000/10019 images...
Copied 2000/10019 images...
Copied 3000/10019 images...
Copied 4000/10019 images...
Copied 5000/10019 images...
Copied 6000/10019 images...
Copied 7000/10019 images...
Copied 8000/10019 images...
Copied 9000/10019 images...
Copied 10000/10019 images...
‚úÖ All 10019 images copied!
Copying CSV files...
‚úÖ groundtruth.csv copied!

‚úÖ ALL DONE! Now update your training code:

Change these lines in your code:
DATA_DIR = '/content/local_data/'
IMAGE_DIR = os.path.join(DATA_DIR, 'images')
METADATA_FILE = os.path.join(DATA_DIR, 'groundtruth.csv')


In [None]:
DATA_DIR = '/content/local_data/'  # ‚Üê Change this line only
image_dir = os.path.join(DATA_DIR, 'images')
disease_csv_path = os.path.join(DATA_DIR, 'groundtruth.csv')
lesion_csv_path=os.path.join(DATA_DIR, 'lesion_grouping.csv')

In [None]:
# Step2_data_preparation.py
"""
Prepare the dataset by:
1. Loading CSV files
2. Mapping diseases to binary labels (benign/malignant)
3. Creating train/val/test splits
4. Handling class imbalance with oversampling
"""

class DataPreparation:
    def __init__(self, image_dir, lesion_csv_path, disease_csv_path):
        """
        Args:
            image_dir: Directory containing all images
            lesion_csv_path: Path to lesion grouping CSV (image, lesion_id, diagnosis_confirm_type)
            disease_csv_path: Path to disease CSV (image, disease_label)
        """
        self.image_dir = image_dir
        self.lesion_df = pd.read_csv(lesion_csv_path)
        self.disease_df = pd.read_csv(disease_csv_path)

        # Malignant diseases
        self.malignant_diseases = ['AKIEC', 'MEL', 'BCC']

        print("=" * 60)
        print("üìä DATA PREPARATION")
        print("=" * 60)
        print(f"Lesion CSV shape: {self.lesion_df.shape}")
        print(f"Disease CSV shape: {self.disease_df.shape}")

    def create_binary_labels(self):
      """
      Create binary labels: 0 = Benign, 1 = Malignant
      """
      # Malignant disease columns
      malignant_cols = ['AKIEC', 'MEL', 'BCC']

      # Binary label: 1 if image belongs to any malignant class
      self.disease_df['label'] = self.disease_df[malignant_cols].max(axis=1)

      # Add full image path
      self.disease_df['image_path'] = self.disease_df['image'].apply(
          lambda x: os.path.join(self.image_dir, x + '.jpg')
      )

      # Filter only existing images
      self.disease_df = self.disease_df[
          self.disease_df['image_path'].apply(os.path.exists)
      ].reset_index(drop=True)

      print(f"\n‚úÖ Binary labels created:")
      print(f"   Total images: {len(self.disease_df)}")
      print(f"   Benign (0): {(self.disease_df['label'] == 0).sum()}")
      print(f"   Malignant (1): {(self.disease_df['label'] == 1).sum()}")
      print(f"   Class ratio (Benign:Malignant): "
            f"{(self.disease_df['label'] == 0).sum()}:"
            f"{(self.disease_df['label'] == 1).sum()}")

      return self.disease_df


    def create_stratified_splits(self, test_size=0.15, val_size=0.15, random_state=42):
        """
        Create train/val/test splits with stratification
        """
        df = self.create_binary_labels()

        # First split: separate test set
        train_val_df, test_df = train_test_split(
            df,
            test_size=test_size,
            stratify=df['label'],
            random_state=random_state
        )

        # Second split: separate validation set from training
        val_size_adjusted = val_size / (1 - test_size)  # Adjust val_size proportion
        train_df, val_df = train_test_split(
            train_val_df,
            test_size=val_size_adjusted,
            stratify=train_val_df['label'],
            random_state=random_state
        )

        print(f"\n‚úÖ Dataset splits created:")
        print(f"   Training: {len(train_df)} images")
        print(f"      - Benign: {(train_df['label'] == 0).sum()}")
        print(f"      - Malignant: {(train_df['label'] == 1).sum()}")
        print(f"   Validation: {len(val_df)} images")
        print(f"      - Benign: {(val_df['label'] == 0).sum()}")
        print(f"      - Malignant: {(val_df['label'] == 1).sum()}")
        print(f"   Test: {len(test_df)} images")
        print(f"      - Benign: {(test_df['label'] == 0).sum()}")
        print(f"      - Malignant: {(test_df['label'] == 1).sum()}")

        return train_df.reset_index(drop=True), val_df.reset_index(drop=True), test_df.reset_index(drop=True)


# Usage example:
data_prep = DataPreparation(
    image_dir = image_dir,
    disease_csv_path = disease_csv_path,
    lesion_csv_path=lesion_csv_path
)

# Create splits
train_df, val_df, test_df = data_prep.create_stratified_splits()


üìä DATA PREPARATION
Lesion CSV shape: (10015, 3)
Disease CSV shape: (10015, 8)

‚úÖ Binary labels created:
   Total images: 10015
   Benign (0): 8061
   Malignant (1): 1954
   Class ratio (Benign:Malignant): 8061:1954

‚úÖ Dataset splits created:
   Training: 7009 images
      - Benign: 5641
      - Malignant: 1368
   Validation: 1503 images
      - Benign: 1210
      - Malignant: 293
   Test: 1503 images
      - Benign: 1210
      - Malignant: 293


In [None]:
# Step3_augmentation.py
"""
Data augmentation pipelines using Albumentations
"""

import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_train_transforms(img_size=224):
    """
    Training augmentations - aggressive to handle class imbalance
    Applied to training data only
    """
    return A.Compose([
        # Resize to standard input size
        A.Resize(img_size, img_size),

        # Geometric transformations
        A.RandomRotate90(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.3),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=45, p=0.5),

        # Add noise and blur (simulate real-world variations)
        A.OneOf([
            A.GaussNoise(p=1.0),
            A.GaussianBlur(p=1.0),
            A.MotionBlur(p=1.0),
        ], p=0.3),

        # Distortions
        A.OneOf([
            A.OpticalDistortion(distort_limit=0.5),
            A.GridDistortion(num_steps=5, distort_limit=0.3),
        ], p=0.3),

        # Color adjustments (handle different lighting, skin tones)
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),

        # Normalize using ImageNet statistics (required for pre-trained models)
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),

        # Convert to PyTorch tensor
        ToTensorV2()
    ])


def get_val_transforms(img_size=224):
    """
    Validation/Test preprocessing - no augmentation, only resize and normalize
    Applied to validation and test data
    """
    return A.Compose([
        # Resize to standard input size
        A.Resize(img_size, img_size),

        # Normalize using ImageNet statistics
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),

        # Convert to PyTorch tensor
        ToTensorV2()
    ])


print("‚úÖ Augmentation pipelines defined!")

‚úÖ Augmentation pipelines defined!


In [None]:
# Step4_dataset.py
"""
Custom PyTorch Dataset for binary skin lesion classification
"""

import torch
from torch.utils.data import Dataset
import cv2
import pandas as pd

class SkinLesionBinaryDataset(Dataset):
    """
    Custom dataset for binary skin lesion classification (Benign vs Malignant)
    """
    def __init__(self, dataframe, transform=None):
        """
        Args:
            dataframe: Pandas DataFrame with columns ['image_path', 'label']
            transform: Albumentations transform pipeline
        """
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

        # Binary classification
        self.class_to_idx = {'Benign': 0, 'Malignant': 1}
        self.idx_to_class = {0: 'Benign', 1: 'Malignant'}

        print(f"‚úÖ Dataset initialized with {len(self.df)} images")
        print(f"   Class distribution: {dict(self.df['label'].value_counts())}")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        """
        Load and preprocess a single image

        Returns:
            image: Preprocessed tensor [3, 224, 224]
            label: Binary class (0=Benign, 1=Malignant)
        """
        img_path = self.df.iloc[idx]['image_path']
        label = self.df.iloc[idx]['label']

        # Load image using OpenCV
        image = cv2.imread(img_path)

        if image is None:
            raise FileNotFoundError(f"Image not found: {img_path}")

        # Convert BGR to RGB (OpenCV loads as BGR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Apply transformations (resize, augment, normalize, convert to tensor)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image, label

    def get_class_distribution(self):
        """Return class distribution for weighted sampling"""
        return self.df['label'].value_counts().to_dict()


print("‚úÖ Custom Dataset class defined!")

‚úÖ Custom Dataset class defined!


In [None]:
# Step5_sampler.py
"""
Create weighted sampler to handle class imbalance during training
This oversamples the minority class (malignant) during training
"""

from torch.utils.data import WeightedRandomSampler
import numpy as np

def create_weighted_sampler(dataset):
    """
    Create WeightedRandomSampler to handle class imbalance

    Args:
        dataset: PyTorch Dataset object

    Returns:
        WeightedRandomSampler for DataLoader
    """
    # Get all labels
    labels = dataset.df['label'].values.astype(np.int64)

    # Count samples per class
    class_counts = np.bincount(labels)

    # Calculate weights for each class (inverse of frequency)
    class_weights = 1.0 / class_counts

    # Assign weight to each sample based on its class
    sample_weights = class_weights[labels]

    # Create sampler
    sampler = WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(sample_weights),
        replacement=True
    )

    print(f"‚úÖ Weighted sampler created:")
    print(f"   Class counts: Benign={class_counts[0]}, Malignant={class_counts[1]}")
    print(f"   Class weights: Benign={class_weights[0]:.4f}, Malignant={class_weights[1]:.4f}")
    print(f"   This will oversample malignant cases during training!")

    return sampler


print("‚úÖ Weighted sampler function defined!")

‚úÖ Weighted sampler function defined!


In [None]:
# Step6_dataloaders.py
"""
Create DataLoaders with weighted sampling for training
"""

from torch.utils.data import DataLoader

def create_dataloaders(train_df, val_df, test_df, batch_size=32, num_workers=2, img_size=224):
    """
    Create train, validation, and test DataLoaders

    Args:
        train_df, val_df, test_df: DataFrames with image paths and labels
        batch_size: Batch size for training
        num_workers: Number of workers for data loading
        img_size: Input image size

    Returns:
        train_loader, val_loader, test_loader
    """
    print("=" * 60)
    print("üîÑ CREATING DATALOADERS")
    print("=" * 60)

    # Create datasets
    train_dataset = SkinLesionBinaryDataset(
        train_df,
        transform=get_train_transforms(img_size=img_size)
    )

    val_dataset = SkinLesionBinaryDataset(
        val_df,
        transform=get_val_transforms(img_size=img_size)
    )

    test_dataset = SkinLesionBinaryDataset(
        test_df,
        transform=get_val_transforms(img_size=img_size)
    )

    # Create weighted sampler for training (to handle imbalance)
    train_sampler = create_weighted_sampler(train_dataset)

    # Create DataLoaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=train_sampler,  # Use weighted sampler instead of shuffle
        num_workers=num_workers,
        pin_memory=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    print(f"\n‚úÖ DataLoaders created:")
    print(f"   Training batches: {len(train_loader)}")
    print(f"   Validation batches: {len(val_loader)}")
    print(f"   Test batches: {len(test_loader)}")

    return train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset


# Hyperparameters
BATCH_SIZE = 32
NUM_WORKERS = 2
IMG_SIZE = 224

print("‚úÖ DataLoader creation function defined!")

‚úÖ DataLoader creation function defined!


In [None]:
# Step7_model.py
"""
Define EfficientNet-B0 model for binary classification
"""

import torch
import torch.nn as nn
import timm

class EfficientNetB0Binary(nn.Module):
    """
    EfficientNet-B0 for binary classification (Benign vs Malignant)
    Uses pretrained weights from ImageNet
    """
    def __init__(self, pretrained=True, num_classes=1):
        """
        Args:
            pretrained: Use ImageNet pretrained weights
            num_classes: 1 for binary classification with BCEWithLogitsLoss
        """
        super(EfficientNetB0Binary, self).__init__()

        # Load pretrained EfficientNet-B0
        self.backbone = timm.create_model(
            'efficientnet_b0',
            pretrained=pretrained,
            num_classes=0,  # Remove classification head
            global_pool=''  # Remove global pooling
        )

        # Get number of features from backbone
        self.num_features = self.backbone.num_features

        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)

        # Classification head
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(self.num_features, num_classes)
        )

    def forward(self, x):
        """
        Forward pass

        Args:
            x: Input tensor [batch_size, 3, 224, 224]

        Returns:
            logits: Output tensor [batch_size, 1] for binary classification
        """
        # Extract features
        features = self.backbone(x)  # [batch_size, num_features, 7, 7]

        # Global pooling
        pooled = self.global_pool(features)  # [batch_size, num_features, 1, 1]
        pooled = pooled.flatten(1)  # [batch_size, num_features]

        # Classification
        logits = self.classifier(pooled)  # [batch_size, 1]

        return logits


def create_model(pretrained=True, device='cuda'):
    """
    Create and initialize the model
    """
    model = EfficientNetB0Binary(pretrained=pretrained, num_classes=1)
    model = model.to(device)

    print("=" * 60)
    print("ü§ñ MODEL ARCHITECTURE")
    print("=" * 60)
    print(f"Model: EfficientNet-B0")
    print(f"Pretrained: {pretrained}")
    print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")
    print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
    print(f"Device: {device}")

    return model


print("‚úÖ Model architecture defined!")

‚úÖ Model architecture defined!


In [None]:
# Step8_training.py
"""
Training and evaluation functions
"""

import torch
import torch.nn as nn
from tqdm import tqdm
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch):
    """
    Train for one epoch
    """
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    pbar = tqdm(train_loader, desc=f'Epoch {epoch} [Train]')

    for images, labels in pbar:
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)  # [batch_size, 1]

        # Forward pass
        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item() * images.size(0)

        # Get predictions
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # Update progress bar
        pbar.set_postfix({'loss': loss.item()})

    # Calculate metrics
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds)

    return epoch_loss, epoch_acc


def validate(model, val_loader, criterion, device, epoch):
    """
    Validate the model
    """
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_probs = []
    all_labels = []

    pbar = tqdm(val_loader, desc=f'Epoch {epoch} [Val]')

    with torch.no_grad():
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            # Forward pass
            logits = model(images)
            loss = criterion(logits, labels)

            # Statistics
            running_loss += loss.item() * images.size(0)

            # Get predictions
            probs = torch.sigmoid(logits)
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            pbar.set_postfix({'loss': loss.item()})

    # Calculate metrics
    epoch_loss = running_loss / len(val_loader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds)
    epoch_precision = precision_score(all_labels, all_preds, zero_division=0)
    epoch_recall = recall_score(all_labels, all_preds, zero_division=0)
    epoch_f1 = f1_score(all_labels, all_preds, zero_division=0)

    try:
        epoch_auc = roc_auc_score(all_labels, all_probs)
    except:
        epoch_auc = 0.0

    return epoch_loss, epoch_acc, epoch_precision, epoch_recall, epoch_f1, epoch_auc


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler,
                num_epochs, device, save_path='best_model.pth'):
    """
    Complete training loop
    """
    print("=" * 60)
    print("üöÄ STARTING TRAINING")
    print("=" * 60)

    best_val_auc = 0.0
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': [], 'val_precision': [],
        'val_recall': [], 'val_f1': [], 'val_auc': []
    }

    for epoch in range(1, num_epochs + 1):
        print(f"\n{'='*60}")
        print(f"Epoch {epoch}/{num_epochs}")
        print(f"{'='*60}")

        # Train
        train_loss, train_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device, epoch
        )

        # Validate
        val_loss, val_acc, val_precision, val_recall, val_f1, val_auc = validate(
            model, val_loader, criterion, device, epoch
        )

        # Update learning rate
        scheduler.step(val_auc)

        # Save metrics
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_precision'].append(val_precision)
        history['val_recall'].append(val_recall)
        history['val_f1'].append(val_f1)
        history['val_auc'].append(val_auc)

        # Print metrics
        print(f"\nüìä Epoch {epoch} Results:")
        print(f"   Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"   Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        print(f"   Val Precision: {val_precision:.4f} | Val Recall: {val_recall:.4f}")
        print(f"   Val F1: {val_f1:.4f} | Val AUC: {val_auc:.4f}")

        # Save best model
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_auc': val_auc,
            }, save_path)
            print(f"   ‚úÖ Best model saved! (AUC: {val_auc:.4f})")

    print(f"\n{'='*60}")
    print(f"‚úÖ TRAINING COMPLETE!")
    print(f"   Best Validation AUC: {best_val_auc:.4f}")
    print(f"{'='*60}")

    return history


print("‚úÖ Training functions defined!")

‚úÖ Training functions defined!


In [None]:
# Step9_evaluation.py
"""
Model evaluation and testing functions
"""

import torch
import numpy as np
from sklearn.metrics import (classification_report, confusion_matrix,
                            roc_auc_score, roc_curve, accuracy_score)
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

def evaluate_model(model, test_loader, device):
    """
    Evaluate model on test set
    """
    print("=" * 60)
    print("üìä EVALUATING MODEL ON TEST SET")
    print("=" * 60)

    model.eval()
    all_preds = []
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc='Testing'):
            images = images.to(device)
            labels = labels.float().unsqueeze(1)

            # Forward pass
            logits = model(images)
            probs = torch.sigmoid(logits)
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Convert to numpy arrays
    all_preds = np.array(all_preds).flatten()
    all_probs = np.array(all_probs).flatten()
    all_labels = np.array(all_labels).flatten()

    # Calculate metrics
    test_acc = accuracy_score(all_labels, all_preds)
    test_auc = roc_auc_score(all_labels, all_probs)

    print(f"\n‚úÖ Test Results:")
    print(f"   Accuracy: {test_acc:.4f}")
    print(f"   AUC-ROC: {test_auc:.4f}")

    # Classification report
    print(f"\nüìã Classification Report:")
    print(classification_report(all_labels, all_preds,
                                target_names=['Benign', 'Malignant'],
                                digits=4))

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    print(f"\nüî¢ Confusion Matrix:")
    print(f"              Predicted")
    print(f"              Benign  Malignant")
    print(f"Actual Benign    {cm[0][0]:5d}  {cm[0][1]:5d}")
    print(f"       Malignant {cm[1][0]:5d}  {cm[1][1]:5d}")

    return all_labels, all_preds, all_probs, cm


def plot_confusion_matrix(cm, save_path='confusion_matrix.png'):
    """
    Plot confusion matrix
    """
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Benign', 'Malignant'],
                yticklabels=['Benign', 'Malignant'])
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"‚úÖ Confusion matrix saved to {save_path}")
    plt.show()


def plot_roc_curve(labels, probs, save_path='roc_curve.png'):
    """
    Plot ROC curve
    """
    fpr, tpr, thresholds = roc_curve(labels, probs)
    auc = roc_auc_score(labels, probs)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.4f})', linewidth=2)
    plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve - Binary Classification')
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"‚úÖ ROC curve saved to {save_path}")
    plt.show()


def plot_training_history(history, save_path='training_history.png'):
    """
    Plot training history
    """
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    # Loss
    axes[0, 0].plot(history['train_loss'], label='Train Loss')
    axes[0, 0].plot(history['val_loss'], label='Val Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Training and Validation Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(alpha=0.3)
    # Accuracy
    axes[0, 1].plot(history['train_acc'], label='Train Acc')
    axes[0, 1].plot(history['val_acc'], label='Val Acc')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Training and Validation Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(alpha=0.3)

    # F1 Score
    axes[1, 0].plot(history['val_f1'], label='Val F1', color='green')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('F1 Score')
    axes[1, 0].set_title('Validation F1 Score')
    axes[1, 0].legend()
    axes[1, 0].grid(alpha=0.3)

    # AUC
    axes[1, 1].plot(history['val_auc'], label='Val AUC', color='red')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('AUC')
    axes[1, 1].set_title('Validation AUC-ROC')
    axes[1, 1].legend()
    axes[1, 1].grid(alpha=0.3)

    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"‚úÖ Training history saved to {save_path}")
    plt.show()
    print("‚úÖ Evaluation functions defined!")

In [None]:
# Step10_main_training.py
"""
Main training script - Puts everything together
Run this file after defining all previous steps (Step 1-9)
"""

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import os
import sys

# Import all previous step functions
# Make sure all Step1-9 files are in the same directory or imported properly

# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    """Configuration class for all hyperparameters and paths"""

    # ==================== PATHS (üî¥ UPDATE THESE) ====================
    IMAGE_DIR = image_dir  # Directory containing all .jpg images
    LESION_CSV = lesion_csv_path  # CSV with image, lesion_id, diagnosis_confirm_type
    DISEASE_CSV = disease_csv_path  # CSV with image and disease columns

    # ==================== MODEL SETTINGS ====================
    PRETRAINED = True  # Use ImageNet pretrained weights
    MODEL_SAVE_PATH = 'best_efficientnet_b0_binary.pth'

    # ==================== TRAINING HYPERPARAMETERS ====================
    BATCH_SIZE = 32
    NUM_EPOCHS = 30
    LEARNING_RATE = 1e-4  # Initial learning rate
    WEIGHT_DECAY = 1e-5  # L2 regularization

    # ==================== DATA SETTINGS ====================
    IMG_SIZE = 224  # EfficientNet-B0 input size
    NUM_WORKERS = 2  # DataLoader workers (increase if you have more CPU cores)
    TEST_SIZE = 0.15  # 15% for testing
    VAL_SIZE = 0.15  # 15% for validation

    # ==================== DEVICE ====================
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ==================== REPRODUCIBILITY ====================
    SEED = 42

    # ==================== OUTPUT SETTINGS ====================
    SAVE_PLOTS = True
    PLOT_DIR = 'plots'  # Directory to save plots


# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def print_section(title):
    """Print formatted section header"""
    print("\n" + "=" * 70)
    print(f"  {title}")
    print("=" * 70)


def create_output_directories(config):
    """Create directories for saving outputs"""
    if config.SAVE_PLOTS:
        os.makedirs(config.PLOT_DIR, exist_ok=True)
        print(f"‚úÖ Output directory created: {config.PLOT_DIR}")


def verify_paths(config):
    """Verify that all required paths exist"""
    print_section("üîç VERIFYING PATHS")

    paths_to_check = {
        'Image Directory': config.IMAGE_DIR,
        'Lesion CSV': config.LESION_CSV,
        'Disease CSV': config.DISEASE_CSV
    }

    all_exist = True
    for name, path in paths_to_check.items():
        exists = os.path.exists(path)
        status = "‚úÖ" if exists else "‚ùå"
        print(f"{status} {name}: {path}")
        if not exists:
            all_exist = False

    if not all_exist:
        print("\n‚ùå ERROR: Some required paths do not exist!")
        print("Please update the paths in Config class and try again.")
        sys.exit(1)

    print("\n‚úÖ All paths verified!")


def print_device_info(device):
    """Print device information"""
    print_section("üíª DEVICE INFORMATION")
    print(f"Device: {device}")
    if device.type == 'cuda':
        print(f"GPU Name: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        print(f"CUDA Version: {torch.version.cuda}")
    else:
        print("‚ö†Ô∏è  Warning: Running on CPU. Training will be slower.")
        print("   Consider using a GPU for faster training.")


def print_dataset_info(train_df, val_df, test_df):
    """Print dataset statistics"""
    print_section("üìä DATASET STATISTICS")

    total = len(train_df) + len(val_df) + len(test_df)

    print(f"\nTotal Images: {total}")
    print(f"\n{'Split':<12} {'Total':<8} {'Benign':<8} {'Malignant':<10} {'Ratio (B:M)'}")
    print("-" * 60)

    for name, df in [('Training', train_df), ('Validation', val_df), ('Test', test_df)]:
        total_imgs = len(df)
        benign = (df['label'] == 0).sum()
        malignant = (df['label'] == 1).sum()
        ratio = f"{benign}:{malignant}"
        print(f"{name:<12} {total_imgs:<8} {benign:<8} {malignant:<10} {ratio}")

    print("\n" + "=" * 60)
    print("Class Imbalance Information:")
    train_benign = (train_df['label'] == 0).sum()
    train_malignant = (train_df['label'] == 1).sum()
    imbalance_ratio = train_benign / train_malignant
    print(f"Training set imbalance ratio: {imbalance_ratio:.2f}:1 (Benign:Malignant)")
    print(f"This means benign samples are {imbalance_ratio:.1f}x more common")
    print("‚úÖ Weighted sampling will be used to handle this imbalance!")


def calculate_pos_weight(train_df):
    """Calculate positive class weight for BCEWithLogitsLoss"""
    benign_count = (train_df['label'] == 0).sum()
    malignant_count = (train_df['label'] == 1).sum()
    pos_weight = benign_count / malignant_count
    return pos_weight


def print_training_setup(criterion, optimizer, scheduler, pos_weight):
    """Print training setup information"""
    print_section("‚öôÔ∏è  TRAINING SETUP")

    print(f"\nüìâ Loss Function: BCEWithLogitsLoss")
    print(f"   - Positive Weight: {pos_weight:.2f}")
    print(f"   - This gives {pos_weight:.1f}x more importance to malignant samples")

    print(f"\nüéØ Optimizer: AdamW")
    print(f"   - Learning Rate: {optimizer.param_groups[0]['lr']}")
    print(f"   - Weight Decay: {optimizer.param_groups[0]['weight_decay']}")

    print(f"\nüìÖ Scheduler: ReduceLROnPlateau")
    print(f"   - Mode: Maximize (monitoring validation AUC)")
    print(f"   - Factor: 0.5 (halves learning rate)")
    print(f"   - Patience: 3 epochs")


# ============================================================================
# MAIN EXECUTION FUNCTION
# ============================================================================

def main():
    """Main execution function"""

    # Print header
    print("\n" + "=" * 70)
    print("  üè• BINARY SKIN LESION CLASSIFICATION PIPELINE")
    print("  üìä Benign vs Malignant Classification using EfficientNet-B0")
    print("=" * 70)

    # Initialize config
    config = Config()

    # Verify paths exist
    verify_paths(config)

    # Create output directories
    create_output_directories(config)

    # Print device info
    print_device_info(config.DEVICE)

    # Set random seed for reproducibility
    print_section("üé≤ SETTING RANDOM SEED")
    set_seed(config.SEED)
    print(f"‚úÖ Random seed set to {config.SEED} for reproducibility")


    # ========================================================================
    # STEP 1: DATA PREPARATION
    # ========================================================================
    print_section("üìÇ STEP 1: DATA PREPARATION")

    print("\nInitializing DataPreparation class...")
    data_prep = DataPreparation(
        image_dir=config.IMAGE_DIR,
        lesion_csv_path=config.LESION_CSV,
        disease_csv_path=config.DISEASE_CSV
    )

    print("\nCreating stratified train/val/test splits...")
    train_df, val_df, test_df = data_prep.create_stratified_splits(
        test_size=config.TEST_SIZE,
        val_size=config.VAL_SIZE,
        random_state=config.SEED
    )

    print_dataset_info(train_df, val_df, test_df)


    # ========================================================================
    # STEP 2: CREATE DATALOADERS
    # ========================================================================
    print_section("üîÑ STEP 2: CREATING DATALOADERS")

    print("\nInitializing datasets with augmentation pipelines...")
    train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset = create_dataloaders(
        train_df=train_df,
        val_df=val_df,
        test_df=test_df,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        img_size=config.IMG_SIZE
    )

    print(f"\n‚úÖ DataLoaders Summary:")
    print(f"   Batch Size: {config.BATCH_SIZE}")
    print(f"   Training Batches: {len(train_loader)} ({len(train_dataset)} images)")
    print(f"   Validation Batches: {len(val_loader)} ({len(val_dataset)} images)")
    print(f"   Test Batches: {len(test_loader)} ({len(test_dataset)} images)")
    print(f"   Note: Training uses weighted sampling to oversample malignant cases")


    # ========================================================================
    # STEP 3: CREATE MODEL
    # ========================================================================
    print_section("ü§ñ STEP 3: CREATING MODEL")

    print("\nInitializing EfficientNet-B0...")
    model = create_model(pretrained=config.PRETRAINED, device=config.DEVICE)

    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    print(f"\n‚úÖ Model Summary:")
    print(f"   Architecture: EfficientNet-B0")
    print(f"   Pretrained: {config.PRETRAINED}")
    print(f"   Total Parameters: {total_params:,}")
    print(f"   Trainable Parameters: {trainable_params:,}")
    print(f"   Model Size: ~{total_params * 4 / 1e6:.1f} MB")
    print(f"   Device: {config.DEVICE}")


    # ========================================================================
    # STEP 4: DEFINE LOSS, OPTIMIZER, SCHEDULER
    # ========================================================================
    print_section("‚öôÔ∏è  STEP 4: SETTING UP TRAINING COMPONENTS")

    # Calculate positive weight for loss function
    pos_weight_value = calculate_pos_weight(train_df)
    pos_weight = torch.tensor([pos_weight_value]).to(config.DEVICE)

    # Loss function
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=config.LEARNING_RATE,
        weight_decay=config.WEIGHT_DECAY
    )

    # Learning rate scheduler
    scheduler = ReduceLROnPlateau(
        optimizer,
        mode='max',  # Maximize validation AUC
        factor=0.5,  # Reduce LR by half
        patience=3,
        min_lr=1e-7
    )

    print_training_setup(criterion, optimizer, scheduler, pos_weight_value)


    # ========================================================================
    # STEP 5: TRAIN MODEL
    # ========================================================================
    print_section("üèãÔ∏è  STEP 5: TRAINING MODEL")

    print(f"\nStarting training for {config.NUM_EPOCHS} epochs...")
    print(f"Model will be saved to: {config.MODEL_SAVE_PATH}")
    print(f"\nTraining Configuration:")
    print(f"   Epochs: {config.NUM_EPOCHS}")
    print(f"   Batch Size: {config.BATCH_SIZE}")
    print(f"   Learning Rate: {config.LEARNING_RATE}")
    print(f"   Weight Decay: {config.WEIGHT_DECAY}")
    print(f"\nMetrics tracked: Loss, Accuracy, Precision, Recall, F1, AUC-ROC")
    print(f"Best model selection: Based on highest validation AUC-ROC")

    history = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        num_epochs=config.NUM_EPOCHS,
        device=config.DEVICE,
        save_path=config.MODEL_SAVE_PATH
    )


    # ========================================================================
    # STEP 6: LOAD BEST MODEL AND EVALUATE ON TEST SET
    # ========================================================================
    print_section("üìä STEP 6: EVALUATING BEST MODEL ON TEST SET")

    # Load best model
    print(f"\nLoading best model from {config.MODEL_SAVE_PATH}...")
    checkpoint = torch.load(config.MODEL_SAVE_PATH)
    model.load_state_dict(checkpoint['model_state_dict'])

    print(f"‚úÖ Loaded best model:")
    print(f"   Epoch: {checkpoint['epoch']}")
    print(f"   Validation AUC: {checkpoint['val_auc']:.4f}")

    # Evaluate on test set
    print("\nRunning evaluation on test set...")
    labels, preds, probs, cm = evaluate_model(model, test_loader, config.DEVICE)


    # ========================================================================
    # STEP 7: VISUALIZE AND SAVE RESULTS
    # ========================================================================
    print_section("üìà STEP 7: VISUALIZING RESULTS")

    print("\nGenerating plots...")

    # Define plot paths
    if config.SAVE_PLOTS:
        history_path = os.path.join(config.PLOT_DIR, 'training_history.png')
        cm_path = os.path.join(config.PLOT_DIR, 'confusion_matrix.png')
        roc_path = os.path.join(config.PLOT_DIR, 'roc_curve.png')
    else:
        history_path = 'training_history.png'
        cm_path = 'confusion_matrix.png'
        roc_path = 'roc_curve.png'

    # Plot training history
    print("   üìä Plotting training history...")
    plot_training_history(history, save_path=history_path)

    # Plot confusion matrix
    print("   üî¢ Plotting confusion matrix...")
    plot_confusion_matrix(cm, save_path=cm_path)

    # Plot ROC curve
    print("   üìâ Plotting ROC curve...")
    plot_roc_curve(labels, probs, save_path=roc_path)


    # ========================================================================
    # FINAL SUMMARY
    # ========================================================================
    print_section("‚úÖ PIPELINE COMPLETE!")

    print("\nüìÅ Output Files:")
    print(f"   Model: {config.MODEL_SAVE_PATH}")
    if config.SAVE_PLOTS:
        print(f"   Plots: {config.PLOT_DIR}/")
        print(f"      - training_history.png")
        print(f"      - confusion_matrix.png")
        print(f"      - roc_curve.png")
    else:
        print(f"   Plots: Current directory")
        print(f"      - training_history.png")
        print(f"      - confusion_matrix.png")
        print(f"      - roc_curve.png")

    print("\nüìä Final Test Results:")
    test_acc = (preds == labels).mean()
    test_auc = roc_auc_score(labels, probs)
    print(f"   Test Accuracy: {test_acc:.4f}")
    print(f"   Test AUC-ROC: {test_auc:.4f}")

    print("\nüéâ Training and evaluation completed successfully!")
    print("=" * 70)

    return model, history, (labels, preds, probs)


# ============================================================================
# SCRIPT EXECUTION
# ============================================================================

if __name__ == '__main__':
    try:
        # Run main pipeline
        model, history, test_results = main()

        print("\n" + "=" * 70)
        print("üéä ALL DONE! You can now use the model for inference.")
        print("=" * 70)
        print("\nNext steps:")
        print("1. Check the plots in the output directory")
        print("2. Use Step11_inference.py to make predictions on new images")
        print("3. Fine-tune hyperparameters if needed and retrain")

    except KeyboardInterrupt:
        print("\n\n‚ö†Ô∏è  Training interrupted by user")
        print("Partial progress may have been saved")

    except Exception as e:
        print("\n\n‚ùå ERROR occurred during execution:")
        print(f"   {type(e).__name__}: {str(e)}")
        print("\nPlease check:")
        print("1. All file paths are correct")
        print("2. CSV files have the correct column names")
        print("3. Images exist and are readable")
        print("4. All previous step files (Step1-9) are present")
        raise


  üè• BINARY SKIN LESION CLASSIFICATION PIPELINE
  üìä Benign vs Malignant Classification using EfficientNet-B0

  üîç VERIFYING PATHS
‚úÖ Image Directory: /content/local_data/images
‚úÖ Lesion CSV: /content/local_data/lesion_grouping.csv
‚úÖ Disease CSV: /content/local_data/groundtruth.csv

‚úÖ All paths verified!
‚úÖ Output directory created: plots

  üíª DEVICE INFORMATION
Device: cuda
GPU Name: Tesla T4
GPU Memory: 15.83 GB
CUDA Version: 12.6

  üé≤ SETTING RANDOM SEED
‚úÖ Random seed set to 42 for reproducibility

  üìÇ STEP 1: DATA PREPARATION

Initializing DataPreparation class...
üìä DATA PREPARATION
Lesion CSV shape: (10015, 3)
Disease CSV shape: (10015, 8)

Creating stratified train/val/test splits...

‚úÖ Binary labels created:
   Total images: 10015
   Benign (0): 8061
   Malignant (1): 1954
   Class ratio (Benign:Malignant): 8061:1954

‚úÖ Dataset splits created:
   Training: 7009 images
      - Benign: 5641
      - Malignant: 1368
   Validation: 1503 images
      

model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

ü§ñ MODEL ARCHITECTURE
Model: EfficientNet-B0
Pretrained: True
Number of parameters: 4,008,829
Trainable parameters: 4,008,829
Device: cuda

‚úÖ Model Summary:
   Architecture: EfficientNet-B0
   Pretrained: True
   Total Parameters: 4,008,829
   Trainable Parameters: 4,008,829
   Model Size: ~16.0 MB
   Device: cuda

  ‚öôÔ∏è  STEP 4: SETTING UP TRAINING COMPONENTS

  ‚öôÔ∏è  TRAINING SETUP

üìâ Loss Function: BCEWithLogitsLoss
   - Positive Weight: 4.12
   - This gives 4.1x more importance to malignant samples

üéØ Optimizer: AdamW
   - Learning Rate: 0.0001
   - Weight Decay: 1e-05

üìÖ Scheduler: ReduceLROnPlateau
   - Mode: Maximize (monitoring validation AUC)
   - Factor: 0.5 (halves learning rate)
   - Patience: 3 epochs

  üèãÔ∏è  STEP 5: TRAINING MODEL

Starting training for 30 epochs...
Model will be saved to: best_efficientnet_b0_binary.pth

Training Configuration:
   Epochs: 30
   Batch Size: 32
   Learning Rate: 0.0001
   Weight Decay: 1e-05

Metrics tracked: Loss, Ac

Epoch 1 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:07<00:00,  3.28it/s, loss=1.45]
Epoch 1 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:11<00:00,  4.21it/s, loss=0.729]



üìä Epoch 1 Results:
   Train Loss: 0.9047 | Train Acc: 0.7186
   Val Loss: 0.6338 | Val Acc: 0.7259
   Val Precision: 0.4111 | Val Recall: 0.9386
   Val F1: 0.5717 | Val AUC: 0.9041
   ‚úÖ Best model saved! (AUC: 0.9041)

Epoch 2/30


Epoch 2 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.35it/s, loss=1.21]
Epoch 2 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:08<00:00,  5.31it/s, loss=0.667]



üìä Epoch 2 Results:
   Train Loss: 0.7087 | Train Acc: 0.7583
   Val Loss: 0.6102 | Val Acc: 0.7505
   Val Precision: 0.4351 | Val Recall: 0.9386
   Val F1: 0.5946 | Val AUC: 0.9171
   ‚úÖ Best model saved! (AUC: 0.9171)

Epoch 3/30


Epoch 3 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:03<00:00,  3.45it/s, loss=1.75]
Epoch 3 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  4.94it/s, loss=0.561]



üìä Epoch 3 Results:
   Train Loss: 0.6822 | Train Acc: 0.7744
   Val Loss: 0.5411 | Val Acc: 0.8230
   Val Precision: 0.5268 | Val Recall: 0.9044
   Val F1: 0.6658 | Val AUC: 0.9288
   ‚úÖ Best model saved! (AUC: 0.9288)

Epoch 4/30


Epoch 4 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.49it/s, loss=1.1]
Epoch 4 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.60it/s, loss=0.442]



üìä Epoch 4 Results:
   Train Loss: 0.6267 | Train Acc: 0.7938
   Val Loss: 0.5520 | Val Acc: 0.8283
   Val Precision: 0.5359 | Val Recall: 0.8908
   Val F1: 0.6692 | Val AUC: 0.9282

Epoch 5/30


Epoch 5 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:01<00:00,  3.56it/s, loss=1.31]
Epoch 5 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.59it/s, loss=0.655]



üìä Epoch 5 Results:
   Train Loss: 0.5810 | Train Acc: 0.8120
   Val Loss: 0.6152 | Val Acc: 0.8743
   Val Precision: 0.6529 | Val Recall: 0.7577
   Val F1: 0.7014 | Val AUC: 0.9278

Epoch 6/30


Epoch 6 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.51it/s, loss=1.15]
Epoch 6 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.52it/s, loss=0.51]



üìä Epoch 6 Results:
   Train Loss: 0.5504 | Train Acc: 0.8292
   Val Loss: 0.5249 | Val Acc: 0.8603
   Val Precision: 0.6000 | Val Recall: 0.8498
   Val F1: 0.7034 | Val AUC: 0.9366
   ‚úÖ Best model saved! (AUC: 0.9366)

Epoch 7/30


Epoch 7 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.50it/s, loss=0.827]
Epoch 7 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.50it/s, loss=0.754]



üìä Epoch 7 Results:
   Train Loss: 0.5040 | Train Acc: 0.8462
   Val Loss: 0.6927 | Val Acc: 0.8836
   Val Precision: 0.6967 | Val Recall: 0.7133
   Val F1: 0.7049 | Val AUC: 0.9314

Epoch 8/30


Epoch 8 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.52it/s, loss=0.938]
Epoch 8 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.50it/s, loss=0.442]



üìä Epoch 8 Results:
   Train Loss: 0.4626 | Train Acc: 0.8595
   Val Loss: 0.5478 | Val Acc: 0.8483
   Val Precision: 0.5758 | Val Recall: 0.8430
   Val F1: 0.6842 | Val AUC: 0.9360

Epoch 9/30


Epoch 9 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.50it/s, loss=1.64]
Epoch 9 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.55it/s, loss=0.467]



üìä Epoch 9 Results:
   Train Loss: 0.4617 | Train Acc: 0.8649
   Val Loss: 0.5994 | Val Acc: 0.8749
   Val Precision: 0.6378 | Val Recall: 0.8294
   Val F1: 0.7211 | Val AUC: 0.9403
   ‚úÖ Best model saved! (AUC: 0.9403)

Epoch 10/30


Epoch 10 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:02<00:00,  3.50it/s, loss=1.31]
Epoch 10 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.36it/s, loss=0.72]



üìä Epoch 10 Results:
   Train Loss: 0.4380 | Train Acc: 0.8655
   Val Loss: 0.6817 | Val Acc: 0.8916
   Val Precision: 0.7110 | Val Recall: 0.7474
   Val F1: 0.7288 | Val AUC: 0.9386

Epoch 11/30


Epoch 11 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:09<00:00,  3.18it/s, loss=1.16]
Epoch 11 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  4.85it/s, loss=0.59]



üìä Epoch 11 Results:
   Train Loss: 0.3831 | Train Acc: 0.8853
   Val Loss: 0.6811 | Val Acc: 0.8989
   Val Precision: 0.7423 | Val Recall: 0.7372
   Val F1: 0.7397 | Val AUC: 0.9447
   ‚úÖ Best model saved! (AUC: 0.9447)

Epoch 12/30


Epoch 12 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:08<00:00,  3.22it/s, loss=1.93]
Epoch 12 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.38it/s, loss=0.665]



üìä Epoch 12 Results:
   Train Loss: 0.3748 | Train Acc: 0.8921
   Val Loss: 0.7277 | Val Acc: 0.8882
   Val Precision: 0.6972 | Val Recall: 0.7543
   Val F1: 0.7246 | Val AUC: 0.9403

Epoch 13/30


Epoch 13 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.36it/s, loss=1.61]
Epoch 13 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.43it/s, loss=1.3]



üìä Epoch 13 Results:
   Train Loss: 0.3600 | Train Acc: 0.8974
   Val Loss: 0.9249 | Val Acc: 0.8922
   Val Precision: 0.7435 | Val Recall: 0.6826
   Val F1: 0.7117 | Val AUC: 0.9398

Epoch 14/30


Epoch 14 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.33it/s, loss=1.58]
Epoch 14 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  4.76it/s, loss=0.766]



üìä Epoch 14 Results:
   Train Loss: 0.3476 | Train Acc: 0.9026
   Val Loss: 0.9066 | Val Acc: 0.8955
   Val Precision: 0.7464 | Val Recall: 0.7031
   Val F1: 0.7241 | Val AUC: 0.9345

Epoch 15/30


Epoch 15 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:06<00:00,  3.30it/s, loss=1.6]
Epoch 15 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  4.86it/s, loss=0.571]



üìä Epoch 15 Results:
   Train Loss: 0.3391 | Train Acc: 0.9030
   Val Loss: 0.7429 | Val Acc: 0.8929
   Val Precision: 0.7185 | Val Recall: 0.7406
   Val F1: 0.7294 | Val AUC: 0.9401

Epoch 16/30


Epoch 16 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.35it/s, loss=1.21]
Epoch 16 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.37it/s, loss=0.952]



üìä Epoch 16 Results:
   Train Loss: 0.3107 | Train Acc: 0.9131
   Val Loss: 0.8841 | Val Acc: 0.9022
   Val Precision: 0.7704 | Val Recall: 0.7099
   Val F1: 0.7389 | Val AUC: 0.9406

Epoch 17/30


Epoch 17 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.38it/s, loss=1.33]
Epoch 17 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.38it/s, loss=1.22]



üìä Epoch 17 Results:
   Train Loss: 0.2843 | Train Acc: 0.9202
   Val Loss: 1.0948 | Val Acc: 0.8962
   Val Precision: 0.7915 | Val Recall: 0.6348
   Val F1: 0.7045 | Val AUC: 0.9412

Epoch 18/30


Epoch 18 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:04<00:00,  3.39it/s, loss=1.46]
Epoch 18 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.53it/s, loss=1.14]



üìä Epoch 18 Results:
   Train Loss: 0.2747 | Train Acc: 0.9231
   Val Loss: 1.0542 | Val Acc: 0.8982
   Val Precision: 0.7823 | Val Recall: 0.6621
   Val F1: 0.7172 | Val AUC: 0.9437

Epoch 19/30


Epoch 19 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:06<00:00,  3.30it/s, loss=1.37]
Epoch 19 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:08<00:00,  5.24it/s, loss=1.02]



üìä Epoch 19 Results:
   Train Loss: 0.2704 | Train Acc: 0.9241
   Val Loss: 1.0175 | Val Acc: 0.8962
   Val Precision: 0.7546 | Val Recall: 0.6928
   Val F1: 0.7224 | Val AUC: 0.9418

Epoch 20/30


Epoch 20 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:06<00:00,  3.30it/s, loss=1.07]
Epoch 20 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.45it/s, loss=1.51]



üìä Epoch 20 Results:
   Train Loss: 0.2535 | Train Acc: 0.9235
   Val Loss: 1.1818 | Val Acc: 0.8909
   Val Precision: 0.7570 | Val Recall: 0.6485
   Val F1: 0.6985 | Val AUC: 0.9418

Epoch 21/30


Epoch 21 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.38it/s, loss=1.69]
Epoch 21 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.43it/s, loss=1.75]



üìä Epoch 21 Results:
   Train Loss: 0.2447 | Train Acc: 0.9332
   Val Loss: 1.2819 | Val Acc: 0.9015
   Val Precision: 0.8085 | Val Recall: 0.6485
   Val F1: 0.7197 | Val AUC: 0.9389

Epoch 22/30


Epoch 22 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.33it/s, loss=0.951]
Epoch 22 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.38it/s, loss=1.79]



üìä Epoch 22 Results:
   Train Loss: 0.2378 | Train Acc: 0.9301
   Val Loss: 1.2591 | Val Acc: 0.8969
   Val Precision: 0.7805 | Val Recall: 0.6553
   Val F1: 0.7124 | Val AUC: 0.9421

Epoch 23/30


Epoch 23 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:07<00:00,  3.26it/s, loss=1.4]
Epoch 23 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:08<00:00,  5.23it/s, loss=0.995]



üìä Epoch 23 Results:
   Train Loss: 0.2323 | Train Acc: 0.9341
   Val Loss: 1.1117 | Val Acc: 0.9022
   Val Precision: 0.7765 | Val Recall: 0.6997
   Val F1: 0.7361 | Val AUC: 0.9409

Epoch 24/30


Epoch 24 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:06<00:00,  3.32it/s, loss=0.634]
Epoch 24 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.35it/s, loss=1.09]



üìä Epoch 24 Results:
   Train Loss: 0.2204 | Train Acc: 0.9362
   Val Loss: 1.0779 | Val Acc: 0.8989
   Val Precision: 0.7787 | Val Recall: 0.6724
   Val F1: 0.7216 | Val AUC: 0.9435

Epoch 25/30


Epoch 25 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.37it/s, loss=0.601]
Epoch 25 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.36it/s, loss=1.1]



üìä Epoch 25 Results:
   Train Loss: 0.2280 | Train Acc: 0.9352
   Val Loss: 1.1183 | Val Acc: 0.8995
   Val Precision: 0.7817 | Val Recall: 0.6724
   Val F1: 0.7229 | Val AUC: 0.9434

Epoch 26/30


Epoch 26 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:04<00:00,  3.40it/s, loss=0.874]
Epoch 26 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.43it/s, loss=0.747]



üìä Epoch 26 Results:
   Train Loss: 0.2291 | Train Acc: 0.9362
   Val Loss: 1.0296 | Val Acc: 0.8975
   Val Precision: 0.7439 | Val Recall: 0.7235
   Val F1: 0.7336 | Val AUC: 0.9434

Epoch 27/30


Epoch 27 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:04<00:00,  3.42it/s, loss=0.667]
Epoch 27 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.63it/s, loss=1.39]



üìä Epoch 27 Results:
   Train Loss: 0.2214 | Train Acc: 0.9357
   Val Loss: 1.2808 | Val Acc: 0.8929
   Val Precision: 0.7661 | Val Recall: 0.6485
   Val F1: 0.7024 | Val AUC: 0.9413

Epoch 28/30


Epoch 28 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.37it/s, loss=2.13]
Epoch 28 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  5.11it/s, loss=1]



üìä Epoch 28 Results:
   Train Loss: 0.2162 | Train Acc: 0.9372
   Val Loss: 1.1449 | Val Acc: 0.8969
   Val Precision: 0.7614 | Val Recall: 0.6860
   Val F1: 0.7217 | Val AUC: 0.9408

Epoch 29/30


Epoch 29 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:05<00:00,  3.36it/s, loss=1.38]
Epoch 29 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:09<00:00,  4.92it/s, loss=1.07]



üìä Epoch 29 Results:
   Train Loss: 0.2055 | Train Acc: 0.9426
   Val Loss: 1.1557 | Val Acc: 0.8982
   Val Precision: 0.7734 | Val Recall: 0.6758
   Val F1: 0.7213 | Val AUC: 0.9425

Epoch 30/30


Epoch 30 [Train]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 220/220 [01:04<00:00,  3.40it/s, loss=0.594]
Epoch 30 [Val]: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.41it/s, loss=1.82]



üìä Epoch 30 Results:
   Train Loss: 0.2131 | Train Acc: 0.9344
   Val Loss: 1.4220 | Val Acc: 0.9002
   Val Precision: 0.8017 | Val Recall: 0.6485
   Val F1: 0.7170 | Val AUC: 0.9407

‚úÖ TRAINING COMPLETE!
   Best Validation AUC: 0.9447

  üìä STEP 6: EVALUATING BEST MODEL ON TEST SET

Loading best model from best_efficientnet_b0_binary.pth...


‚ùå ERROR occurred during execution:
   UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	Weights

UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL numpy._core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([numpy._core.multiarray.scalar])` or the `torch.serialization.safe_globals([numpy._core.multiarray.scalar])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.

In [None]:
import torch
from sklearn.metrics import roc_auc_score

# Load the best model (with fix)
checkpoint = torch.load('best_efficientnet_b0_binary.pth', weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

print(f"‚úÖ Loaded best model from Epoch {checkpoint['epoch']}")
print(f"   Best Validation AUC: {checkpoint['val_auc']:.4f}")

# Now evaluate on test set
model.eval()
all_preds = []
all_probs = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to('cuda')
        labels = labels.float().unsqueeze(1)

        logits = model(images)
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate test metrics
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report

all_preds = np.array(all_preds).flatten()
all_probs = np.array(all_probs).flatten()
all_labels = np.array(all_labels).flatten()

test_acc = accuracy_score(all_labels, all_preds)
test_precision = precision_score(all_labels, all_preds)
test_recall = recall_score(all_labels, all_preds)
test_f1 = f1_score(all_labels, all_preds)
test_auc = roc_auc_score(all_labels, all_probs)

print("\n" + "="*60)
print("üìä TEST SET RESULTS")
print("="*60)
print(f"Accuracy:  {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"Precision: {test_precision:.4f} ({test_precision*100:.2f}%)")
print(f"Recall:    {test_recall:.4f} ({test_recall*100:.2f}%)")
print(f"F1 Score:  {test_f1:.4f}")
print(f"AUC-ROC:   {test_auc:.4f} ({test_auc*100:.2f}%) üåü")

print("\nüìã Classification Report:")
print(classification_report(all_labels, all_preds,
                           target_names=['Benign', 'Malignant'],
                           digits=4))


‚úÖ Loaded best model from Epoch 11
   Best Validation AUC: 0.9447

üìä TEST SET RESULTS
Accuracy:  0.9035 (90.35%)
Precision: 0.7242 (72.42%)
Recall:    0.8157 (81.57%)
F1 Score:  0.7673
AUC-ROC:   0.9518 (95.18%) üåü

üìã Classification Report:
              precision    recall  f1-score   support

      Benign     0.9540    0.9248    0.9392      1210
   Malignant     0.7242    0.8157    0.7673       293

    accuracy                         0.9035      1503
   macro avg     0.8391    0.8702    0.8532      1503
weighted avg     0.9092    0.9035    0.9056      1503



In [None]:
import torch
from sklearn.metrics import roc_auc_score
config = Config()
train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset = create_dataloaders(
        train_df=train_df,
        val_df=val_df,
        test_df=test_df,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        img_size=config.IMG_SIZE
    )
# ================= CONFIG =================
MODEL_PATH = 'best_efficientnet_b0_binary.pth'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assume model architecture is defined in create_model()
model = create_model(pretrained=False, device=DEVICE)  # pretrained=False for loading checkpoint

# ================= LOAD CHECKPOINT =================
checkpoint = torch.load(MODEL_PATH, weights_only=False)  # Fix for PyTorch 2.6+
model.load_state_dict(checkpoint['model_state_dict'])
model.to(DEVICE)
model.eval()

print(f"‚úÖ Loaded checkpoint from {MODEL_PATH}")
print(f"Epoch: {checkpoint.get('epoch', 'N/A')}, Val AUC: {checkpoint.get('val_auc', 'N/A')}")

# ================= EVALUATE =================
# Make sure test_loader is defined
labels, preds, probs, cm = evaluate_model(model, test_loader, DEVICE)

# Compute metrics
test_acc = (preds == labels).mean()
test_auc = roc_auc_score(labels, probs)

print("\nüìä Test Results:")
print(f"Accuracy: {test_acc:.4f}")
print(f"AUC-ROC: {test_auc:.4f}")
print("\nConfusion Matrix:")
print(cm)


üîÑ CREATING DATALOADERS
‚úÖ Dataset initialized with 7009 images
   Class distribution: {0.0: np.int64(5641), 1.0: np.int64(1368)}
‚úÖ Dataset initialized with 1503 images
   Class distribution: {0.0: np.int64(1210), 1.0: np.int64(293)}
‚úÖ Dataset initialized with 1503 images
   Class distribution: {0.0: np.int64(1210), 1.0: np.int64(293)}
‚úÖ Weighted sampler created:
   Class counts: Benign=5641, Malignant=1368
   Class weights: Benign=0.0002, Malignant=0.0007
   This will oversample malignant cases during training!

‚úÖ DataLoaders created:
   Training batches: 220
   Validation batches: 47
   Test batches: 47
ü§ñ MODEL ARCHITECTURE
Model: EfficientNet-B0
Pretrained: False
Number of parameters: 4,008,829
Trainable parameters: 4,008,829
Device: cuda
‚úÖ Loaded checkpoint from best_efficientnet_b0_binary.pth
Epoch: 11, Val AUC: 0.9447183595182354
üìä EVALUATING MODEL ON TEST SET


Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.64it/s]


‚úÖ Test Results:
   Accuracy: 0.9035
   AUC-ROC: 0.9518

üìã Classification Report:
              precision    recall  f1-score   support

      Benign     0.9540    0.9248    0.9392      1210
   Malignant     0.7242    0.8157    0.7673       293

    accuracy                         0.9035      1503
   macro avg     0.8391    0.8702    0.8532      1503
weighted avg     0.9092    0.9035    0.9056      1503


üî¢ Confusion Matrix:
              Predicted
              Benign  Malignant
Actual Benign     1119     91
       Malignant    54    239

üìä Test Results:
Accuracy: 0.9035
AUC-ROC: 0.9518

Confusion Matrix:
[[1119   91]
 [  54  239]]





In [None]:
# Complete_Evaluation_From_Scratch.py
"""
Complete evaluation script that recreates everything needed
Run this as a standalone script after training
"""

import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os
from sklearn.metrics import *
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# ============================================================================
# STEP 1: IMPORT ALL NECESSARY COMPONENTS
# ============================================================================

# Import your custom modules (make sure they're available)
from Step1_imports import set_seed
from Step2_data_preparation import DataPreparation
from Step3_augmentation import get_val_transforms
from Step4_dataset import SkinLesionBinaryDataset
from Step7_model import EfficientNetB0Binary
from Step9_evaluation import plot_confusion_matrix, plot_roc_curve

# ============================================================================
# STEP 2: CONFIGURATION
# ============================================================================

class EvalConfig:
    # Paths (UPDATE THESE)
    IMAGE_DIR = '/content/local_data/images'
    LESION_CSV = '/content/local_data/lesion_grouping.csv'
    DISEASE_CSV = '/content/local_data/groundtruth.csv'
    MODEL_PATH = 'best_efficientnet_b0_binary.pth'

    # Parameters
    BATCH_SIZE = 32
    NUM_WORKERS = 2
    IMG_SIZE = 224
    TEST_SIZE = 0.15
    VAL_SIZE = 0.15
    SEED = 42

    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    PLOT_DIR = 'plots'

# ============================================================================
# STEP 3: RECREATE TEST DATASET
# ============================================================================

print("=" * 70)
print("üìä RECREATING TEST DATASET")
print("=" * 70)

set_seed(EvalConfig.SEED)

# Prepare data
data_prep = DataPreparation(
    image_dir=EvalConfig.IMAGE_DIR,
    lesion_csv_path=EvalConfig.LESION_CSV,
    disease_csv_path=EvalConfig.DISEASE_CSV
)

train_df, val_df, test_df = data_prep.create_stratified_splits(
    test_size=EvalConfig.TEST_SIZE,
    val_size=EvalConfig.VAL_SIZE,
    random_state=EvalConfig.SEED
)

# Create test dataset
test_dataset = SkinLesionBinaryDataset(
    test_df,
    transform=get_val_transforms(img_size=EvalConfig.IMG_SIZE)
)

# Create test loader
from torch.utils.data import DataLoader
test_loader = DataLoader(
    test_dataset,
    batch_size=EvalConfig.BATCH_SIZE,
    shuffle=False,
    num_workers=EvalConfig.NUM_WORKERS,
    pin_memory=True
)

print(f"\n‚úÖ Test dataset created: {len(test_dataset)} images")
print(f"‚úÖ Test loader created: {len(test_loader)} batches")

# ============================================================================
# STEP 4: LOAD TRAINED MODEL
# ============================================================================

print("\n" + "=" * 70)
print("ü§ñ LOADING TRAINED MODEL")
print("=" * 70)

model = EfficientNetB0Binary(pretrained=False, num_classes=1)
model = model.to(EvalConfig.DEVICE)

# Load checkpoint (WITH FIX FOR PYTORCH 2.6+)
checkpoint = torch.load(EvalConfig.MODEL_PATH, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

print(f"\n‚úÖ Model loaded from: {EvalConfig.MODEL_PATH}")
print(f"   Training Epoch: {checkpoint['epoch']}")
print(f"   Validation AUC: {checkpoint['val_auc']:.4f}")

# ============================================================================
# STEP 5: EVALUATE ON TEST SET
# ============================================================================

print("\n" + "=" * 70)
print("üìä EVALUATING ON TEST SET")
print("=" * 70)

model.eval()
all_preds = []
all_probs = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc='Testing'):
        images = images.to(EvalConfig.DEVICE)
        labels = labels.float().unsqueeze(1)

        logits = model(images)
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds = np.array(all_preds).flatten()
all_probs = np.array(all_probs).flatten()
all_labels = np.array(all_labels).flatten()

# ============================================================================
# STEP 6: CALCULATE AND DISPLAY METRICS
# ============================================================================

test_acc = accuracy_score(all_labels, all_preds)
test_precision = precision_score(all_labels, all_preds, zero_division=0)
test_recall = recall_score(all_labels, all_preds, zero_division=0)
test_f1 = f1_score(all_labels, all_preds, zero_division=0)
test_auc = roc_auc_score(all_labels, all_probs)

print("\n" + "=" * 70)
print("üéØ FINAL TEST SET RESULTS")
print("=" * 70)
print(f"\n{'Metric':<15} {'Value':<10} {'Percentage'}")
print("-" * 70)
print(f"{'Accuracy':<15} {test_acc:<10.4f} {test_acc*100:.2f}%")
print(f"{'Precision':<15} {test_precision:<10.4f} {test_precision*100:.2f}%")
print(f"{'Recall':<15} {test_recall:<10.4f} {test_recall*100:.2f}%")
print(f"{'F1 Score':<15} {test_f1:<10.4f} {test_f1*100:.2f}%")
print(f"{'AUC-ROC':<15} {test_auc:<10.4f} {test_auc*100:.2f}% üåü")

print("\n" + "=" * 70)
print("üìã DETAILED CLASSIFICATION REPORT")
print("=" * 70)
print(classification_report(all_labels, all_preds,
                           target_names=['Benign', 'Malignant'],
                           digits=4))

cm = confusion_matrix(all_labels, all_preds)
print(f"\nüî¢ Confusion Matrix:")
print(f"                    Predicted")
print(f"              Benign      Malignant")
print(f"Actual Benign    {cm[0][0]:5d}      {cm[0][1]:5d}")
print(f"       Malignant {cm[1][0]:5d}      {cm[1][1]:5d}")

# ============================================================================
# STEP 7: GENERATE PLOTS
# ============================================================================

print("\n" + "=" * 70)
print("üìà GENERATING PLOTS")
print("=" * 70)

os.makedirs(EvalConfig.PLOT_DIR, exist_ok=True)

# Confusion Matrix
plot_confusion_matrix(cm, save_path=f'{EvalConfig.PLOT_DIR}/final_test_confusion_matrix.png')

# ROC Curve
plot_roc_curve(all_labels, all_probs, save_path=f'{EvalConfig.PLOT_DIR}/final_test_roc_curve.png')

print("\n" + "=" * 70)
print("‚úÖ EVALUATION COMPLETE!")
print("=" * 70)
print(f"\nüìÅ Results saved in: {EvalConfig.PLOT_DIR}/")
print(f"   - final_test_confusion_matrix.png")
print(f"   - final_test_roc_curve.png")

ModuleNotFoundError: No module named 'Step1_imports'

In [None]:
# Quick evaluation (if test_loader exists)
checkpoint = torch.load('best_efficientnet_b0_binary.pth', weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])

labels, preds, probs, cm = evaluate_model(model, test_loader, device)
plot_confusion_matrix(cm)
plot_roc_curve(labels, probs)

NameError: name 'torch' is not defined

In [None]:
from google.colab import drive
import shutil


# Example: Save model to Drive
shutil.copy('best_efficientnet_b0_binary.pth', '/content/drive/MyDrive/skin-disease-el/skin_lesion_model.pth')

# Save plots to Drive
shutil.copytree('plots', '/content/drive/MyDrive/skin_lesion_plots/confusion_matrix.png')
shutil.copytree('plots', '/content/drive/MyDrive/skin_lesion_plots/roc_curve.png')



FileExistsError: [Errno 17] File exists: '/content/drive/MyDrive/skin_lesion_plots/confusion_matrix.png'

In [None]:
from google.colab import files

# Download model
files.download('content/drive/MyDrive/skin-disease-el/skin_lesion_model.pth')

# Download plots
files.download('plots/final_test_confusion_matrix.png')
files.download('plots/final_test_roc_curve.png')


FileNotFoundError: Cannot find file: content/drive/MyDrive/skin-disease-el/skin_lesion_model.pth