
Downloading the dataset

We will be using the dataset from kaggle https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia


In [1]:
!pip install opendatasets --upgrade --quiet

In [2]:
import opendatasets as od

In [3]:
dataset_url = "https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia"

In [4]:
od.download(dataset_url)

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: mesugareymond
Your Kaggle Key: ········


  0%|          | 0.00/2.29G [00:00<?, ?B/s]

Downloading chest-xray-pneumonia.zip to ./chest-xray-pneumonia


  2%|▏         | 40.0M/2.29G [00:12<11:47, 3.42MB/s]


KeyboardInterrupt: 

In [None]:
data_dir = "./chest-xray-pneumonia/chest_xray"

In [None]:
import shutil

shutil.rmtree(data_dir + "/__MACOSX")
shutil.rmtree(data_dir + "/chest_xray")

In [None]:
import os

In [None]:
os.listdir(data_dir)

Deleting unecessary folders

In [None]:
os.listdir(data_dir)

Import the dataset <br>
Use the ImageFolder class from torchvision

In [None]:
from torchvision.datasets import ImageFolder

In [None]:
dataset = ImageFolder(data_dir)

In [None]:
len(dataset)

In [None]:
dataset[0]

In [None]:
dataset.classes

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
img, label = dataset[0]
plt.imshow(img)

In [None]:
import torchvision.transforms as tt
# dataset = ImageFolder(data_dir,tt.Compose([tt.Resize(64), tt.CenterCrop(64), tt.ToTensor()]))

In [None]:
img, label = dataset[0]

plt.imshow(img.permute((1,2,0)))

In [None]:
train_ds = ImageFolder(data_dir + "/train",tt.Compose([tt.Resize(224), tt.CenterCrop(224), tt.ToTensor()]))
test_ds = ImageFolder(data_dir + "/test",tt.Compose([tt.Resize(224), tt.CenterCrop(224), tt.ToTensor()]))
val_ds = ImageFolder(data_dir + "/val",tt.Compose([tt.Resize(224), tt.CenterCrop(224), tt.ToTensor()]))

Checking the length of each set

In [None]:
len(train_ds)

In [None]:
len(test_ds)

In [None]:
len(train_ds)

In [None]:
img, label = train_ds[1]

plt.imshow(img.permute((1,2,0)))

creating data loader

In [None]:
from torch.utils.data import DataLoader
batch_size = 32

train_dl = DataLoader(train_ds, 
                      batch_size, 
                      shuffle=True, 
                      num_workers=4, 
                      pin_memory=True)
val_dl = DataLoader(val_ds, 
                    batch_size, 
                    num_workers=4, 
                    pin_memory=True)

In [None]:
from torchvision.utils import make_grid

def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(16, 8))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=16).permute(1, 2, 0))
        break

In [None]:
show_batch(train_dl)

To check the no. of images in each class

In [None]:
for cls in os.listdir(data_dir):
    PNEUMONIA_LEN = len(os.listdir(data_dir + "/" + cls + "/" + "PNEUMONIA"))
    NORMAL_LEN = len(os.listdir(data_dir + "/" + cls + "/" + "NORMAL"))
    print(cls, ":", PNEUMONIA_LEN)
    print(cls, ":", NORMAL_LEN)
    print("total",cls,":", PNEUMONIA_LEN + NORMAL_LEN)



Utility Functions and Classes

GPU Utilities

In [None]:
import torch
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:
torch.cuda.is_available()

In [None]:
device = get_default_device()

In [None]:
device

In [None]:
# just checking
img, label = test_ds[5]
plt.imshow(img.permute((1,2,0)))
print(label)
img.shape

In [None]:
img.device

In [None]:
img_gpu = to_device(img, device)
img_gpu.device

In [None]:

train_dl = DeviceDataLoader(train_dl, device )
val_dl = DeviceDataLoader(val_dl, device)
print(train_dl.device, val_dl.device)

Model and Training Utilities

In [None]:
import torch.nn as nn

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        "calculate loss for a batch of training data"
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        "calculate loss & accuracy for a batch of validation data"
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))
        
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
@torch.no_grad()
def evaluate(model, val_loader):
    "Evaluate the models performance on the validation set"
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

Model (ResNet9)

In [None]:
def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        # Input: 128 x 224 x 224
        # 3 x 32 x 32 with batch size 400
        self.conv1 = conv_block(in_channels, 64) # 64 x 32 x 32
        self.conv2 = conv_block(64, 128, pool=True) # 128 x 16 x 16, feature map is reduced to 16 x 16 because pool is set to true
        self.res1 = nn.Sequential(conv_block(128, 128), 
                                  conv_block(128, 128)) # 128 x 16 x 16
        
        self.conv3 = conv_block(128, 256, pool=True) # 256 x 8 x 8
        self.conv4 = conv_block(256, 512, pool=True) # 512 x 4 x 4
        self.res2 = nn.Sequential(conv_block(512, 512), 
                                  conv_block(512, 512)) # 512 x 4 x 4
        
        self.classifier = nn.Sequential(nn.MaxPool2d(4), # 512 x 1 x 1
                                        nn.Flatten(), # 512 
                                        nn.Dropout(0.2), # 512. Dropout makes the model learn by analyzing relationships not by specific values
                                        nn.Linear(512, num_classes)) # 10
        
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out

In [None]:
model = to_device(ResNet9(3, len(train_ds.classes)), device)
model

In [None]:
# Checking if the weight of a layer is on cude
model.conv1[0].weight.device

Pass one batch of input tensor through the model

In [None]:
for batch in train_dl:
  images, label = batch
  print("images.shape",images.shape)
  print("images.device",images.device)
  preds = model(images)
  print("preds.shape", preds.shape)
  break