In [None]:
!pip install medmnist
!pip install torch torchvision torchaudio
!pip install transformers datasets

Collecting medmnist
  Downloading medmnist-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting fire (from medmnist)
  Downloading fire-0.7.1-py3-none-any.whl.metadata (5.8 kB)
Downloading medmnist-3.0.2-py3-none-any.whl (25 kB)
Downloading fire-0.7.1-py3-none-any.whl (115 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.9/115.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fire, medmnist
Successfully installed fire-0.7.1 medmnist-3.0.2


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

from medmnist import PathMNIST
import medmnist

from torchvision import transforms

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts image to a PyTorch tensor (C×H×W)
    transforms.Normalize(mean=[0.5], std=[0.5]) # Scale pixel values from [0,1] → [-1,1] : a common preprocessing step in machine learning to make the data compatible with the mathematical functions used in neural networks.
])

In [None]:
train_dataset = PathMNIST(split="train", transform=transform, download=True)
val_dataset = PathMNIST(split="val", transform=transform, download=True)
test_dataset = PathMNIST(split="test", transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


100%|██████████| 206M/206M [00:10<00:00, 19.3MB/s]


In [None]:
class SmallCNN(nn.Module):
  def __init__(self):
    super(SmallCNN, self).__init__()

    # First convolution layer:
    # input channels = 3 (RGB) for PathMNIST
    # output channels = 16 filters
    # kernel size = 3x3

    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)

    # Second conv layer
    self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)

    # Third conv layer
    self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

    # Fully connected layer -> final 9 classes
    self.fc = nn.Linear(64 * 3 * 3, 9) # after 3 poolings, 28->14->7

    # Activation function
    self.relu = nn.ReLU()

    # MaxPool layer (reduces img size by half)
    self.pool = nn.MaxPool2d(2,2)

  def forward(self,x):
    # Pass through conv layers + activation + pooling
    x = self.pool(self.relu(self.conv1(x))) # 28x28 => 14x14
    x = self.pool(self.relu(self.conv2(x))) # 14x14 => 7x7
    x = self.pool(self.relu(self.conv3(x))) # 7x7 because third pooling reduces 7 to 3

    #Flatten before feeding to FC layer
    x = x.view(x.size(0), -1)

    # Output logits (not softmax yet)
    x = self.fc(x)
    return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SmallCNN().to(device)

criterion = nn.CrossEntropyLoss() # Good for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []


epochs = 5

for epoch in range(epochs):
  model.train()
  running_loss = 0
  correct = 0
  total = 0

  for images, labels in train_loader:
    images, labels = images.to(device), labels.squeeze().to(device)

    optimizer.zero_grad() # Reset gradients

    outputs = model(images) # Forward pass
    loss =  criterion(outputs, labels) # compute loss

    loss.backward() # Back propagation
    optimizer.step() # update weights

    running_loss += loss.item()

    # Accuracy
    _, predicted = torch.max(outputs, dim=1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  train_losses.append(running_loss / len(train_loader))
  train_accuracies.append(correct / total * 100)

  #print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

KeyboardInterrupt: 

In [None]:
model.eval() # eval mode disables dropout/batchnorm updates

val_loss = 0
val_correct = 0
val_total = 0

with torch.no_grad(): # no gradient calculation
  for images, labels in val_loader:
    images, labels = images.to(device), labels.squeeze().to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    val_loss += loss.item()

    _, predicted = torch.max(outputs, dim=1)

    val_total += labels.size(0)
    val_correct += (predicted == labels).sum().item()

val_losses.append(val_loss / len(val_loader))
val_accuracies.append(val_correct / val_total * 100)

print(f"Validation Accuracy: {100 * correct/total:.2f}%")

print(f"Epoch {epoch+1}/{epochs} | "
      f"Train Loss: {train_losses[-1]:.4f} | "
      f"Val Loss: {val_losses[-1]:.4f} | "
      f"Train Acc: {train_accuracies[-1]:.2f}% | "
      f"Val Acc: {val_accuracies[-1]:.2f}%")

In [None]:
# ---- LOSS PLOT ----
plt.figure(figsize=(7,5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid(True)
plt.show()

# ---- ACCURACY PLOT ----
plt.figure(figsize=(7,5))
plt.plot(train_accuracies, label="Train Accuracy")
plt.plot(val_accuracies, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()
