# Project: Skin Cancer Classification
# Team Members:
#  Chin-Chien Lin: Data loading, preprocessing, ResNet18 training
#  Christian Rasmussen: EfficientNet training, Random Forest implementation, metrics comparison

#  Both: Final integration, formatting, result interpretation

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("fanconic/skin-cancer-malignant-vs-benign")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/skin-cancer-malignant-vs-benign


In [None]:
import os
print("Root path:", path)
print("Subfolders:", os.listdir(path))
train_path = os.path.join(path, "train")
test_path = os.path.join(path, "test")
print("Inside train/:", os.listdir(train_path))


Root path: /kaggle/input/skin-cancer-malignant-vs-benign
Subfolders: ['data', 'test', 'train']
Inside train/: ['benign', 'malignant']


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# set path
data_dir = path

# Define transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.ColorJitter(brightness=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load full dataset using ImageFolder
train_path = os.path.join(path, "train")
full_dataset = datasets.ImageFolder(train_path, transform=train_transforms)
test_dataset = datasets.ImageFolder(test_path, transform=val_test_transforms)

# Split into train (80%), val (20%)
total_len = len(full_dataset)
train_len = int(0.8 * total_len)
val_len =  total_len - train_len
test_len = total_len - train_len - val_len

train_set, val_set = random_split(full_dataset, [train_len, val_len])

# Assign validation and test sets the correct transform
val_set.dataset.transform = val_test_transforms

# Create DataLoaders
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

# Check GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load ResNet18 and use ImageNet's pretrained weights
model = models.resnet18(pretrained=True)

# replace fully connected layer in the end，to be a value
model.fc = nn.Linear(model.fc.in_features, 1)

model = model.to(device)

# Loss function & Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 175MB/s]


#ResNet-18 Training

In [None]:
from tqdm import tqdm


def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=5):
    best_val_acc = 0
    best_val_acc = 0
    patience_counter = 0
    early_stop_patience = 7

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        # Training phase
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)  # Reshape to (B, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        avg_loss = total_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
                outputs = model(images)
                preds = torch.sigmoid(outputs) > 0.5
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total

        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), "best_resnet18.pth")
            print(" Best model saved")
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    print(f" Training complete. Best validation accuracy: {best_val_acc:.4f}")

In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=25)

Epoch 1/25 - Training: 100%|██████████| 66/66 [00:23<00:00,  2.86it/s]


Epoch [1/25] - Loss: 0.3528, Train Acc: 0.8246, Val Acc: 0.8788
 Best model saved


Epoch 2/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.91it/s]


Epoch [2/25] - Loss: 0.1249, Train Acc: 0.9550, Val Acc: 0.8807
 Best model saved


Epoch 3/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.90it/s]


Epoch [3/25] - Loss: 0.0631, Train Acc: 0.9777, Val Acc: 0.8902
 Best model saved


Epoch 4/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.92it/s]


Epoch [4/25] - Loss: 0.0193, Train Acc: 0.9976, Val Acc: 0.8977
 Best model saved


Epoch 5/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.87it/s]


Epoch [5/25] - Loss: 0.0099, Train Acc: 0.9986, Val Acc: 0.9015
 Best model saved


Epoch 6/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.87it/s]


Epoch [6/25] - Loss: 0.0105, Train Acc: 0.9976, Val Acc: 0.9072
 Best model saved


Epoch 7/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.85it/s]


Epoch [7/25] - Loss: 0.0070, Train Acc: 0.9991, Val Acc: 0.8996


Epoch 8/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.79it/s]


Epoch [8/25] - Loss: 0.0025, Train Acc: 1.0000, Val Acc: 0.9034


Epoch 9/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.71it/s]


Epoch [9/25] - Loss: 0.0018, Train Acc: 1.0000, Val Acc: 0.9053


Epoch 10/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.88it/s]


Epoch [10/25] - Loss: 0.0019, Train Acc: 1.0000, Val Acc: 0.9072


Epoch 11/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.92it/s]


Epoch [11/25] - Loss: 0.0029, Train Acc: 1.0000, Val Acc: 0.9167
 Best model saved


Epoch 12/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.83it/s]


Epoch [12/25] - Loss: 0.0021, Train Acc: 1.0000, Val Acc: 0.9110


Epoch 13/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.90it/s]


Epoch [13/25] - Loss: 0.0025, Train Acc: 1.0000, Val Acc: 0.8939


Epoch 14/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.87it/s]


Epoch [14/25] - Loss: 0.0019, Train Acc: 1.0000, Val Acc: 0.9053


Epoch 15/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.87it/s]


Epoch [15/25] - Loss: 0.0008, Train Acc: 1.0000, Val Acc: 0.9110


Epoch 16/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.92it/s]


Epoch [16/25] - Loss: 0.0083, Train Acc: 0.9967, Val Acc: 0.8807


Epoch 17/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.88it/s]


Epoch [17/25] - Loss: 0.2110, Train Acc: 0.9284, Val Acc: 0.8561


Epoch 18/25 - Training: 100%|██████████| 66/66 [00:11<00:00,  5.88it/s]


Epoch [18/25] - Loss: 0.1003, Train Acc: 0.9621, Val Acc: 0.9034
Early stopping triggered at epoch 18
 Training complete. Best validation accuracy: 0.9167


#Evaluation & Random Forest implementation

In [None]:
# Model Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import torch
# Load best ResNet18 model from training
model.load_state_dict(torch.load("best_resnet18.pth"))
model.eval()

# Inference on test set for ResNet18
y_true, y_pred, y_probs = [], [], []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(device)
        logits = model(imgs).squeeze(1)
        probs = torch.sigmoid(logits)
        preds = (probs > 0.5).long().cpu().numpy()

        y_pred.extend(preds)
        y_true.extend(labels.numpy())
        y_probs.extend(probs.cpu().numpy())

# Convert to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# Display evaluation metrics
print("\n Test Set Evaluation (ResNet18):")
print("Accuracy :", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall   :", recall_score(y_true, y_pred))
print("F1 Score :", f1_score(y_true, y_pred))
print("AUC      :", roc_auc_score(y_true, y_probs))

# Confusion matrix and classification report
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix (ResNet18):\n", cm)
print("\nClassification Report (ResNet18):\n",
      classification_report(y_true, y_pred, target_names=["Benign","Malignant"]))

# -------------------- Random Forest Baseline --------------------

# Function to flatten images for traditional ML input
def flatten_loader(dl):
    Xs, Ys = [], []
    for imgs, labels in dl:
        flat = imgs.view(imgs.size(0), -1).cpu().numpy()
        Xs.append(flat)
        Ys.append(labels.numpy())
    return np.vstack(Xs), np.concatenate(Ys)

# Flatten training and test sets
X_train, y_train = flatten_loader(train_loader)
X_test,  y_test  = flatten_loader(test_loader)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Random Forest metrics
y_probs_rf = rf.predict_proba(X_test)[:, 1]
cm_rf = confusion_matrix(y_test, y_pred_rf)

print("\n Random Forest Evaluation:")
print("Accuracy :", accuracy_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall   :", recall_score(y_test, y_pred_rf))
print("F1 Score :", f1_score(y_test, y_pred_rf))
print("AUC      :", roc_auc_score(y_test, y_probs_rf))

print("\nConfusion Matrix (Random Forest):\n", cm_rf)
print("\nClassification Report (Random Forest):\n",
      classification_report(y_test, y_pred_rf, target_names=["Benign","Malignant"]))



 Test Set Evaluation (ResNet18):
Accuracy : 0.8848484848484849
Precision: 0.85
Recall   : 0.9066666666666666
F1 Score : 0.8774193548387097
AUC      : 0.9581944444444445

Confusion Matrix (ResNet18):
 [[312  48]
 [ 28 272]]

Classification Report (ResNet18):
               precision    recall  f1-score   support

      Benign       0.92      0.87      0.89       360
   Malignant       0.85      0.91      0.88       300

    accuracy                           0.88       660
   macro avg       0.88      0.89      0.88       660
weighted avg       0.89      0.88      0.89       660


 Random Forest Evaluation:
Accuracy : 0.8348484848484848
Precision: 0.7800586510263929
Recall   : 0.8866666666666667
F1 Score : 0.8299531981279251
AUC      : 0.904425925925926

Confusion Matrix (Random Forest):
 [[285  75]
 [ 34 266]]

Classification Report (Random Forest):
               precision    recall  f1-score   support

      Benign       0.89      0.79      0.84       360
   Malignant       0.78    

#EfficientNet-B0

In [None]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->efficientnet_pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->efficientnet_pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metada

In [None]:
import os
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
from efficientnet_pytorch import EfficientNet

# Load a pre-trained EfficientNet-B0 model
model = EfficientNet.from_pretrained('efficientnet-b0')

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 190MB/s]

Loaded pretrained weights for efficientnet-b0





In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# set path
data_dir = path

# Define transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.ColorJitter(brightness=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load full dataset using ImageFolder
train_path = os.path.join(path, "train")
full_dataset = datasets.ImageFolder(train_path, transform=train_transforms)
test_dataset = datasets.ImageFolder(test_path, transform=val_test_transforms)

# Split into train (70%), val (15%), test (15%)
total_len = len(full_dataset)
train_len = int(0.8 * total_len)
val_len =  total_len - train_len
test_len = total_len - train_len - val_len

train_set, val_set = random_split(full_dataset, [train_len, val_len])

# Assign validation and test sets the correct transform
val_set.dataset.transform = val_test_transforms

# Create DataLoaders
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet

# Check GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load EfficientNet-B0 with ImageNet pre-trained weights
model = EfficientNet.from_pretrained('efficientnet-b0')

# Replace the fully connected layer for binary classification (output = 1)
model._fc = nn.Linear(model._fc.in_features, 1)

model = model.to(device)

# Loss function & Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


Loaded pretrained weights for efficientnet-b0


#EfficientNet-B0 Training & Evaluation

In [None]:
from tqdm import tqdm
import torch

def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=5):
    best_val_acc = 0
    patience_counter = 0
    early_stop_patience = 7

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        # Training phase
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1)  # Reshape to (B, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        avg_loss = total_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device).float().unsqueeze(1)
                outputs = model(images)
                preds = torch.sigmoid(outputs) > 0.5
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_acc = val_correct / val_total

        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), "best_efficientnet_b0.pth")
            print(" Best model saved")
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    print(f"Training complete. Best validation accuracy: {best_val_acc:.4f}")

In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=25)

Epoch 1/25 - Training: 100%|██████████| 66/66 [00:21<00:00,  3.11it/s]


Epoch [1/25] - Loss: 0.4756, Train Acc: 0.8203, Val Acc: 0.6818
 Best model saved


Epoch 2/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.42it/s]


Epoch [2/25] - Loss: 0.2787, Train Acc: 0.8919, Val Acc: 0.7822
 Best model saved


Epoch 3/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.44it/s]


Epoch [3/25] - Loss: 0.2023, Train Acc: 0.9232, Val Acc: 0.8201
 Best model saved


Epoch 4/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.47it/s]


Epoch [4/25] - Loss: 0.1481, Train Acc: 0.9469, Val Acc: 0.8731
 Best model saved


Epoch 5/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.46it/s]


Epoch [5/25] - Loss: 0.0901, Train Acc: 0.9725, Val Acc: 0.8788
 Best model saved


Epoch 6/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.43it/s]


Epoch [6/25] - Loss: 0.0614, Train Acc: 0.9862, Val Acc: 0.8769


Epoch 7/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.46it/s]


Epoch [7/25] - Loss: 0.0382, Train Acc: 0.9915, Val Acc: 0.8883
 Best model saved


Epoch 8/25 - Training: 100%|██████████| 66/66 [00:18<00:00,  3.49it/s]


Epoch [8/25] - Loss: 0.0397, Train Acc: 0.9877, Val Acc: 0.8864


Epoch 9/25 - Training: 100%|██████████| 66/66 [00:18<00:00,  3.48it/s]


Epoch [9/25] - Loss: 0.0210, Train Acc: 0.9948, Val Acc: 0.8902
 Best model saved


Epoch 10/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.44it/s]


Epoch [10/25] - Loss: 0.0298, Train Acc: 0.9915, Val Acc: 0.8788


Epoch 11/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.46it/s]


Epoch [11/25] - Loss: 0.0231, Train Acc: 0.9938, Val Acc: 0.8788


Epoch 12/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.47it/s]


Epoch [12/25] - Loss: 0.0185, Train Acc: 0.9943, Val Acc: 0.8731


Epoch 13/25 - Training: 100%|██████████| 66/66 [00:18<00:00,  3.49it/s]


Epoch [13/25] - Loss: 0.0166, Train Acc: 0.9967, Val Acc: 0.8712


Epoch 14/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.45it/s]


Epoch [14/25] - Loss: 0.0132, Train Acc: 0.9967, Val Acc: 0.8845


Epoch 15/25 - Training: 100%|██████████| 66/66 [00:19<00:00,  3.46it/s]


Epoch [15/25] - Loss: 0.0115, Train Acc: 0.9976, Val Acc: 0.8845


Epoch 16/25 - Training: 100%|██████████| 66/66 [00:18<00:00,  3.48it/s]


Epoch [16/25] - Loss: 0.0110, Train Acc: 0.9976, Val Acc: 0.8902
Early stopping triggered at epoch 16
Training complete. Best validation accuracy: 0.8902


In [None]:
# Model Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import torch

# Load best EfficientNet-B0 model from training
model.load_state_dict(torch.load("best_efficientnet_b0.pth"))
model.eval()

# Inference on test set for EfficientNet-B0
y_true, y_pred, y_probs = [], [], []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(device)
        logits = model(imgs).squeeze(1)
        probs = torch.sigmoid(logits)
        preds = (probs > 0.5).long().cpu().numpy()

        y_pred.extend(preds)
        y_true.extend(labels.numpy())
        y_probs.extend(probs.cpu().numpy())

# Convert to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# Display evaluation metrics
print("\nTest Set Evaluation (EfficientNet-B0):")
print("Accuracy :", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall   :", recall_score(y_true, y_pred))
print("F1 Score :", f1_score(y_true, y_pred))
print("AUC      :", roc_auc_score(y_true, y_probs))

# Confusion matrix and classification report
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix (EfficientNet-B0):\n", cm)
print("\nClassification Report (EfficientNet-B0):\n",
      classification_report(y_true, y_pred, target_names=["Benign", "Malignant"]))

# -------------------- Random Forest Baseline --------------------

# Function to flatten images for traditional ML input
def flatten_loader(dl):
    Xs, Ys = [], []
    for imgs, labels in dl:
        flat = imgs.view(imgs.size(0), -1).cpu().numpy()
        Xs.append(flat)
        Ys.append(labels.numpy())
    return np.vstack(Xs), np.concatenate(Ys)

# Flatten training and test sets
X_train, y_train = flatten_loader(train_loader)
X_test,  y_test  = flatten_loader(test_loader)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Random Forest metrics
y_probs_rf = rf.predict_proba(X_test)[:, 1]
cm_rf = confusion_matrix(y_test, y_pred_rf)

print("\nRandom Forest Evaluation:")
print("Accuracy :", accuracy_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall   :", recall_score(y_test, y_pred_rf))
print("F1 Score :", f1_score(y_test, y_pred_rf))
print("AUC      :", roc_auc_score(y_test, y_probs_rf))

print("\nConfusion Matrix (Random Forest):\n", cm_rf)
print("\nClassification Report (Random Forest):\n",
      classification_report(y_test, y_pred_rf, target_names=["Benign", "Malignant"]))


Test Set Evaluation (EfficientNet-B0):
Accuracy : 0.9015151515151515
Precision: 0.8983050847457628
Recall   : 0.8833333333333333
F1 Score : 0.8907563025210085
AUC      : 0.9528888888888889

Confusion Matrix (EfficientNet-B0):
 [[330  30]
 [ 35 265]]

Classification Report (EfficientNet-B0):
               precision    recall  f1-score   support

      Benign       0.90      0.92      0.91       360
   Malignant       0.90      0.88      0.89       300

    accuracy                           0.90       660
   macro avg       0.90      0.90      0.90       660
weighted avg       0.90      0.90      0.90       660


Random Forest Evaluation:
Accuracy : 0.8333333333333334
Precision: 0.7810650887573964
Recall   : 0.88
F1 Score : 0.8275862068965517
AUC      : 0.9015324074074074

Confusion Matrix (Random Forest):
 [[286  74]
 [ 36 264]]

Classification Report (Random Forest):
               precision    recall  f1-score   support

      Benign       0.89      0.79      0.84       360
   Mali