<a href="https://colab.research.google.com/github/AntNeedsHelp/ExplainableAI/blob/main/pneumoniaMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
!pip3 install medmnist

Looking in indexes: https://download.pytorch.org/whl/cu117
Collecting medmnist
  Downloading medmnist-2.2.3-py3-none-any.whl (22 kB)
Collecting fire (from medmnist)
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116934 sha256=77a7dfa85a97bfd56e81a4fd560171340cca68462198bba857ee36f08e5b843f
  Stored in directory: /root/.cache/pip/wheels/90/d4/f7/9404e5db0116bd4d43e5666eaa3e70ab53723e1e3ea40c9a95
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.5.0 medmnist-2.2.3


In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets

from tqdm import tqdm
import numpy as np
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms


import medmnist
from medmnist import INFO, Evaluator

In [None]:
print(torch.__version__)

2.0.1+cu118


In [None]:
data_flag = 'pneumoniamnist'
# data_flag = 'breastmnist'
download = True

NUM_EPOCHS = 3
BATCH_SIZE = 128
lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

In [None]:
# preprocessing
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset = DataClass(split='train', transform=data_transform, download=download)
test_dataset = DataClass(split='test', transform=data_transform, download=download)

pil_dataset = DataClass(split='train', download=download)

# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

for X, y in test_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Downloading https://zenodo.org/record/6496656/files/pneumoniamnist.npz?download=1 to /root/.medmnist/pneumoniamnist.npz


100%|██████████| 4170669/4170669 [00:01<00:00, 2802845.31it/s]


Using downloaded and verified file: /root/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /root/.medmnist/pneumoniamnist.npz
Shape of X [N, C, H, W]: torch.Size([128, 1, 28, 28])
Shape of y: torch.Size([128, 1]) torch.int64


In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
def train(dataloader, model, loss_fn, optimizer, print_size=1):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        y = torch.squeeze(y)
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % print_size == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            y = torch.squeeze(y)
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.5f}%, Avg loss: {test_loss:>8f} \n")

ACTUAL TRAINING LOOP

In [None]:
epochs = 500

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer, 5)
    test(test_loader, model, loss_fn)
print("Done!")
%time

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
loss: 1.848199  [ 3968/ 4708]
loss: 1.830573  [ 4608/ 4708]
Test Error: 
 Accuracy: 62.50000%, Avg loss: 1.903299 

Epoch 117
-------------------------------
loss: 1.871376  [  128/ 4708]
loss: 1.840435  [  768/ 4708]
loss: 1.836169  [ 1408/ 4708]
loss: 1.865576  [ 2048/ 4708]
loss: 1.865241  [ 2688/ 4708]
loss: 1.856660  [ 3328/ 4708]
loss: 1.850268  [ 3968/ 4708]
loss: 1.840552  [ 4608/ 4708]
Test Error: 
 Accuracy: 62.50000%, Avg loss: 1.903299 

Epoch 118
-------------------------------
loss: 1.837656  [  128/ 4708]
loss: 1.828368  [  768/ 4708]
loss: 1.847940  [ 1408/ 4708]
loss: 1.841873  [ 2048/ 4708]
loss: 1.808964  [ 2688/ 4708]
loss: 1.845094  [ 3328/ 4708]
loss: 1.873782  [ 3968/ 4708]
loss: 1.831878  [ 4608/ 4708]
Test Error: 
 Accuracy: 62.50000%, Avg loss: 1.903299 

Epoch 119
-------------------------------
loss: 1.847891  [  128/ 4708]
loss: 1.833639  [  768/ 4708]
loss: 1.844619  [ 1408/ 4708]
loss: 1.803

In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [None]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [None]:
classes = [
    "Negative",
    "Positive",
]

model.eval()
x, y = test_dataset[0][0], test_dataset[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y[0]]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Positive", Actual: "Positive"
