In [2]:
# ================================
# 1. Imports & Device Setup
# ================================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
# ===============================
# 2. Kaggle Dataset Download
# ===============================
!pip install kaggle -q

# Upload kaggle.json in Colab (from your Kaggle account)
from google.colab import files
files.upload()   # Upload kaggle.json here

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [4]:
# Download dataset
!kaggle datasets download -d rutamgp/histopathology-robust-resized-to-224x224
!unzip -q histopathology-robust-resized-to-224x224.zip -d /content/

Dataset URL: https://www.kaggle.com/datasets/rutamgp/histopathology-robust-resized-to-224x224
License(s): MIT
Downloading histopathology-robust-resized-to-224x224.zip to /content
 98% 1.93G/1.97G [00:12<00:00, 215MB/s]
100% 1.97G/1.97G [00:12<00:00, 169MB/s]


In [5]:
# Dataset directory
data_dir = "/content/robust_resized/Robust_Data"

In [10]:
# ================================
# 3. Transforms (resize for InceptionV3)
# ================================
transform = transforms.Compose([
    transforms.Resize((299, 299)),    # InceptionV3 expects 299x299
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(root=data_dir, transform=transform)
num_classes = len(dataset.classes)
print("Classes:", dataset.classes)
print("Total images:", len(dataset))

Classes: ['all_benign', 'all_early', 'all_pre', 'all_pro', 'breast_benign', 'breast_malignant', 'colon_aca', 'colon_bnt', 'lung_aca', 'lung_bnt', 'lung_scc', 'lymph_cll', 'lymph_fl', 'lymph_mcl', 'oral_normal', 'oral_scc']
Total images: 80002


In [11]:
# ================================
# 4. Train/Val Split & DataLoader
# ================================
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, drop_last=False)

print(f"Train size: {len(train_dataset)}, Val size: {len(val_dataset)}")

Train size: 64001, Val size: 16001


In [12]:
# ================================
# 5. Model Setup (InceptionV3)
# ================================
model = models.inception_v3(weights="IMAGENET1K_V1")
model.aux_logits = False   # disable auxiliary logits
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [13]:

# ================================
# 6. Training Loop (with AMP)
# ================================
scaler = torch.cuda.amp.GradScaler()
num_epochs = 10  # adjust as needed

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        # Forward + Loss in mixed precision
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass with scaling
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"- Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%")


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch [1/10] - Loss: 0.1774, Acc: 94.03%
Epoch [2/10] - Loss: 0.0473, Acc: 98.45%
Epoch [3/10] - Loss: 0.0311, Acc: 99.00%
Epoch [4/10] - Loss: 0.0246, Acc: 99.23%
Epoch [5/10] - Loss: 0.0234, Acc: 99.26%
Epoch [6/10] - Loss: 0.0196, Acc: 99.40%
Epoch [7/10] - Loss: 0.0159, Acc: 99.51%
Epoch [8/10] - Loss: 0.0160, Acc: 99.48%
Epoch [9/10] - Loss: 0.0150, Acc: 99.54%
Epoch [10/10] - Loss: 0.0126, Acc: 99.62%


In [15]:
# ================================
# 7. Validation + Detailed Metrics (with AMP)
# ================================
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix
)
from sklearn.preprocessing import label_binarize

model.eval()
all_labels, all_preds, all_probs = [], [], []

with torch.no_grad():
    for images, labels in val_loader:   # using val_loader for metrics
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        with torch.amp.autocast("cuda"):   # AMP for faster inference
            logits = model(images)
            probs = torch.softmax(logits, dim=1)
            _, preds = logits.max(1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

# Convert lists to numpy arrays
all_labels = np.array(all_labels)
all_preds  = np.array(all_preds)
all_probs  = np.array(all_probs)

# Metrics
acc = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average="weighted", zero_division=0)
recall = recall_score(all_labels, all_preds, average="weighted", zero_division=0)
f1 = f1_score(all_labels, all_preds, average="weighted", zero_division=0)

# ROC-AUC (multi-class One-vs-Rest)
roc_auc = None
try:
    n_classes = len(dataset.classes)
    y_bin = label_binarize(all_labels, classes=np.arange(n_classes))
    roc_auc = roc_auc_score(y_bin, all_probs, average="weighted", multi_class="ovr")
except Exception as e:
    print("ROC-AUC calculation failed:", e)

# Print results
print("\n=== Final Validation Performance ===")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-score  : {f1:.4f}")
print(f"ROC-AUC   : {roc_auc:.4f}" if roc_auc is not None else "ROC-AUC   : N/A")

print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes, zero_division=0))

print("\nConfusion matrix:")
print(confusion_matrix(all_labels, all_preds))


=== Final Validation Performance ===
Accuracy  : 0.9933
Precision : 0.9935
Recall    : 0.9933
F1-score  : 0.9932
ROC-AUC   : 1.0000

Classification report:
                  precision    recall  f1-score   support

      all_benign       1.00      1.00      1.00      1010
       all_early       1.00      0.99      1.00      1042
         all_pre       1.00      1.00      1.00       975
         all_pro       0.99      1.00      1.00      1005
   breast_benign       1.00      0.99      1.00      1012
breast_malignant       0.99      1.00      1.00       954
       colon_aca       1.00      1.00      1.00      1034
       colon_bnt       1.00      1.00      1.00       986
        lung_aca       1.00      1.00      1.00      1029
        lung_bnt       1.00      1.00      1.00       990
        lung_scc       1.00      1.00      1.00      1013
       lymph_cll       1.00      1.00      1.00       972
        lymph_fl       1.00      1.00      1.00       983
       lymph_mcl       1.00   