In [None]:
# ===============================
# 1. Setup
# ===============================
!pip install kaggle -q

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import numpy as np

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
# ===============================
# 2. Kaggle Dataset Download
# ===============================
# Upload kaggle.json in Colab (from your Kaggle account)
from google.colab import files
files.upload()   # Upload kaggle.json here

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [None]:
# Download dataset
!kaggle datasets download -d rutamgp/histopathology-robust-resized-to-224x224
!unzip -q histopathology-robust-resized-to-224x224.zip -d /content/

Dataset URL: https://www.kaggle.com/datasets/rutamgp/histopathology-robust-resized-to-224x224
License(s): MIT
Downloading histopathology-robust-resized-to-224x224.zip to /content
100% 1.96G/1.97G [00:19<00:00, 66.1MB/s]
100% 1.97G/1.97G [00:23<00:00, 90.8MB/s]


In [None]:
# Path to dataset
original_dir = "/content/robust_resized"
print("Classes:", os.listdir(original_dir))

Classes: ['Robust_Data']


In [None]:
# ===============================
# 3. Dataset + Loaders
# ===============================
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # ResNet50 uses 224x224
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(original_dir, transform=transform)

In [None]:
# Train/val/test split
train_size = int(0.7 * len(dataset))
val_size   = int(0.15 * len(dataset))
test_size  = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

class_names = dataset.classes
print("Classes:", class_names)

Classes: ['Robust_Data']


In [None]:
# ===============================
# 4. Define ResNet50
# ===============================
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 194MB/s]


In [None]:
# ================================
# 5. Training Loop (Epoch-wise Logs)
# ================================
num_epochs = 5  # increase for better accuracy

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # ✅ Print summary once per epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"- Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%")

Epoch [1/5] - Loss: 0.0000, Acc: 100.00%


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize
import numpy as np

# Put model in evaluation mode
model.eval()
all_preds, all_labels, all_probs = [], [], []

with torch.no_grad():
    for images, labels in test_loader:  # <-- make sure you have a test_loader split
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)

        # Predictions
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # Softmax probabilities (for ROC-AUC)
        probs = torch.softmax(outputs, dim=1)
        all_probs.extend(probs.cpu().numpy())

# Convert lists to arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
all_probs = np.array(all_probs)

# === Metrics ===
acc = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average="weighted")
recall = recall_score(all_labels, all_preds, average="weighted")
f1 = f1_score(all_labels, all_preds, average="weighted")

# ROC-AUC (only valid if >=2 classes)
try:
    n_classes = len(np.unique(all_labels))
    y_bin = label_binarize(all_labels, classes=np.arange(n_classes))
    roc_auc = roc_auc_score(y_bin, all_probs, average="weighted", multi_class="ovr")
except Exception as e:
    roc_auc = None

print("\n=== Final Test Set Performance ===")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-score  : {f1:.4f}")
if roc_auc is not None:
    print(f"ROC-AUC   : {roc_auc:.4f}")
else:
    print("ROC-AUC   : Not available (check class setup)")


In [None]:
# =====================================
# 1. Split dataset into Train/Val/Test
# =====================================
from torch.utils.data import random_split, DataLoader

# 80-10-10 split
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# =====================================
# 2. Define Loss & Optimizer
# =====================================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# =====================================
# 3. Training Loop (Train + Validation)
# =====================================
num_epochs = 5  # increase for better accuracy

for epoch in range(num_epochs):
    # ---- Training ----
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # ---- Validation ----
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * val_correct / val_total

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"- Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% "
          f"- Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

KeyboardInterrupt: 

In [None]:
# =====================================
# 4. Final Evaluation on Test Set
# =====================================
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute metrics
acc = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average="weighted")
recall = recall_score(all_labels, all_preds, average="weighted")
f1 = f1_score(all_labels, all_preds, average="weighted")

print("\n=== Final Test Set Performance ===")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


# Test 2

In [None]:
import os

# Inspect what got unzipped
print("Top of /content:")
print(os.listdir("/content"))

# Peek inside likely folders
for p in ["/content/robust_resized",
          "/content/histopathology-robust-resized-to-224x224",
          "/content/histopathology-robust-resized-to-224x224/robust_resized",
          "/content/histopathology-robust-resized-to-224x224/robust_resized/Robust_Data"]:
    if os.path.exists(p):
        print("\n", p)
        try:
            print(os.listdir(p)[:20])
        except Exception as e:
            print("Error listing:", e)


Top of /content:
['.config', 'sample_data']


In [None]:
import os

VALID_EXTS = (".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp",".ppm",".pgm")

def looks_like_class_root(path):
    if not os.path.isdir(path):
        return False
    subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path,d))]
    if len(subdirs) < 2:
        return False
    # Check each subdir has at least one image file somewhere inside
    for d in subdirs:
        dpath = os.path.join(path, d)
        has_img = False
        for r, _, files in os.walk(dpath):
            if any(f.lower().endswith(VALID_EXTS) for f in files):
                has_img = True
                break
        if not has_img:
            return False
    return True

def find_class_root(start="/content"):
    candidates = []
    for root, dirs, files in os.walk(start):
        # Only consider directories that have subdirs
        if dirs and looks_like_class_root(root):
            candidates.append(root)
    # Prefer the deepest (most specific) path
    if candidates:
        candidates.sort(key=lambda p: p.count(os.sep), reverse=True)
        return candidates[0]
    return None

original_dir = find_class_root("/content")
print("Detected class-root:", original_dir)
assert original_dir is not None, "Could not find a folder whose subfolders look like classes."


Detected class-root: None


AssertionError: Could not find a folder whose subfolders look like classes.

In [None]:
from collections import Counter
from torchvision import datasets, transforms

_transform_probe = transforms.Compose([transforms.ToTensor()])
_probe = datasets.ImageFolder(original_dir, transform=_transform_probe)

print("Classes:", _probe.classes)
print("Num classes:", len(_probe.classes))
assert len(_probe.classes) > 1, "Only 1 class found — wrong folder. Point original_dir at the folder that contains multiple class subfolders."

# Count samples per class
label_counts = Counter([lbl for _, lbl in _probe.samples])
print("Per-class counts:", { _probe.classes[k]: v for k,v in label_counts.items() })
print("Total images:", len(_probe))


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Transforms (224×224 for ResNet50)
train_tf = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
eval_tf = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# Separate datasets so train has augmentation, val/test do not
full_train = datasets.ImageFolder(original_dir, transform=train_tf)
full_eval  = datasets.ImageFolder(original_dir, transform=eval_tf)  # same files, just different transform

# Split indices once, reuse for both transform variants
N = len(full_train)
n_train = int(0.8*N); n_val = int(0.1*N); n_test = N - n_train - n_val
g = torch.Generator().manual_seed(42)  # reproducible split

train_ds, val_ds, test_ds = random_split(full_train, [n_train, n_val, n_test], generator=g)
# Rebuild val/test with eval transforms using the same indices
val_ds   = torch.utils.data.Subset(full_eval, val_ds.indices)
test_ds  = torch.utils.data.Subset(full_eval, test_ds.indices)

batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

class_names = full_train.classes
num_classes = len(class_names)
print("Classes:", class_names, "| num_classes:", num_classes)

Using device: cuda
Classes: ['all_benign', 'all_early', 'all_pre', 'all_pro', 'breast_benign', 'breast_malignant', 'colon_aca', 'colon_bnt', 'lung_aca', 'lung_bnt', 'lung_scc', 'lymph_cll', 'lymph_fl', 'lymph_mcl', 'oral_normal', 'oral_scc'] | num_classes: 16


In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

model = models.resnet50(weights="IMAGENET1K_V1")
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
num_epochs = 10  # increase later

for epoch in range(num_epochs):
    # ---- Train ----
    model.train()
    tr_loss, tr_correct, tr_total = 0.0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        tr_loss += loss.item()
        _, preds = outputs.max(1)
        tr_total += labels.size(0)
        tr_correct += (preds == labels).sum().item()

    train_loss = tr_loss / max(1, len(train_loader))
    train_acc  = 100.0 * tr_correct / max(1, tr_total)

    # ---- Validation ----
    model.eval()
    va_loss, va_correct, va_total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(images)
            loss = criterion(outputs, labels)
            va_loss += loss.item()
            _, preds = outputs.max(1)
            va_total += labels.size(0)
            va_correct += (preds == labels).sum().item()

    val_loss = va_loss / max(1, len(val_loader))
    val_acc  = 100.0 * va_correct / max(1, va_total)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"- Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% "
          f"- Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

NameError: name 'model' is not defined

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.preprocessing import label_binarize

model.eval()
all_labels, all_preds, all_probs = [], [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        logits = model(images)
        probs = torch.softmax(logits, dim=1)
        _, preds = logits.max(1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

all_labels = np.array(all_labels)
all_preds  = np.array(all_preds)
all_probs  = np.array(all_probs)

acc = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average="weighted", zero_division=0)
recall = recall_score(all_labels, all_preds, average="weighted", zero_division=0)
f1 = f1_score(all_labels, all_preds, average="weighted", zero_division=0)

# ROC-AUC (multi-class One-vs-Rest)
roc_auc = None
try:
    n_classes = len(class_names)
    y_bin = label_binarize(all_labels, classes=np.arange(n_classes))
    roc_auc = roc_auc_score(y_bin, all_probs, average="weighted", multi_class="ovr")
except Exception:
    pass

print("\n=== Final Test Performance ===")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-score  : {f1:.4f}")
print(f"ROC-AUC   : {roc_auc:.4f}" if roc_auc is not None else "ROC-AUC   : N/A")

print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=class_names, zero_division=0))

print("\nConfusion matrix:")
print(confusion_matrix(all_labels, all_preds))


NameError: name 'model' is not defined