In [None]:
!pip install medmnist
!pip install torch torchvision torchaudio
!pip install transformers datasets

In [None]:
from medmnist import PathMNIST
from medmnist import INFO
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torchvision import models, transforms
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import medmnist
import torchvision

In [None]:
info = INFO["pathmnist"]

num_classes = len(info["label"].keys())
print("num_classes =", num_classes)

DatasetClass = getattr(medmnist, info["python_class"])

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
train_dataset = DatasetClass(split="train", transform=transform, download=True)
val_dataset   = DatasetClass(split="val",   transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)
model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
epochs = 5

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.squeeze().to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f}")


In [None]:
all_labels = []
all_preds = []

model.eval()
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.squeeze().to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())


In [None]:
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)

plt.figure(figsize=(10, 8))
disp.plot(include_values=True, cmap="Blues", xticks_rotation="vertical")
plt.title("Confusion Matrix - ViT on PathMNIST")
plt.show()
print("CLASSIFICATION REPORT:\n")
print(classification_report(all_labels, all_preds))


In [None]:
class_names = list(info["label"].values())  

def show_predictions(num_images=6):
    model.eval()
    images_shown = 0

    plt.figure(figsize=(12, 8))

    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.squeeze()

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        for i in range(images.size(0)):
            if images_shown == num_images:
                plt.show()
                return

            img = images[i].cpu().permute(1, 2, 0).numpy()
            img = (img * 0.229) + 0.485 
            img = np.clip(img, 0, 1)

            plt.subplot(2, 3, images_shown + 1)
            plt.imshow(img)
            plt.axis("off")
            plt.title(f"True: {class_names[labels[i].item()]}\nPred: {class_names[predicted[i].item()]}")

            images_shown += 1

show_predictions()