In [1]:
import pydicom
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
labels = pd.read_csv("/kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv")
labels = labels.drop_duplicates("patientId")

train_images = "/kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_images"
processed_images = "/kaggle/working/processed"

def img_processing(val_split):
    train_val_split = round(labels.shape[0] * val_split)

    for parent_dir in ["train", "val"]:
        for label in range(2):
            Path(processed_images + f"/{parent_dir}/{label}").mkdir(parents=True, exist_ok=True)

    for c, patient_id in enumerate(tqdm(labels.patientId)):
        dcm_path = Path(train_images + f"/{patient_id}")
        dcm_path = dcm_path.with_suffix(".dcm")
        dcm = pydicom.read_file(dcm_path).pixel_array / 255
        dcm_arr = cv2.resize(dcm, (224, 224)).astype(np.float16)

        label = labels.Target.iloc[c]
        
        train_or_val = "train" if c < train_val_split else "val"
        np.save(processed_images + f"/{train_or_val}/{str(label)}/{patient_id}", dcm_arr)

In [3]:
img_processing(0.8)

  0%|          | 0/26684 [00:00<?, ?it/s]

In [4]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

class VGGNet(nn.Module):
    def __init__(self, in_channels: int = 1, num_classes: int = 2):
        super(VGGNet, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG16)
        self.fcs = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels
        for x in architecture:
            if type(x) == int:
                out_channels = x
                layers += [nn.Conv2d(in_channels=in_channels, 
                                     out_channels=out_channels,
                                     kernel_size=(3,3), 
                                     stride=(1,1), 
                                     padding=(1,1)),
                            nn.BatchNorm2d(x),
                            nn.ReLU()]
                in_channels = x
            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]

        return nn.Sequential(*layers)

def test():
    batch_size, in_channels, img_size = 64, 1, 224
    model = VGGNet(1, 2)
    x = torch.randn(batch_size, in_channels, img_size, img_size)
    print(model(x).shape)
    assert model(x).shape == (batch_size, 2), "Generator test failed"
    print("Success, tests passed!")

In [5]:
test()

torch.Size([64, 2])
Success, tests passed!


In [6]:
def load_file(path: str):
    return np.load(path).astype(np.float32)

def checkpoint(model, filename: str):
    torch.save(model.state_dict(), filename)

def resume(model, filename: str):
    model.load_state_dict(torch.load(filename))

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Parameters
in_dim = 1
out_dim = 2
batch_size = 64
num_workers = 4
num_epochs = 200
e_stop_thresh = 10

train_transforms = transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize(0.49, 0.248),
                                    transforms.RandomAffine(degrees=(-5, 5), translate=(0, 0.05), scale=(0.9, 1.1)),
                                    transforms.RandomResizedCrop((224, 224), scale=(0.35, 1), antialias=True)
])

val_transforms = transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.49], [0.248]),
])

train_dataset = torchvision.datasets.DatasetFolder(
    f"{processed_images}/train/",
    loader=load_file, 
    extensions="npy", 
    transform=train_transforms
)

val_dataset = torchvision.datasets.DatasetFolder(
    f"{processed_images}/val/",
    loader=load_file, 
    extensions="npy", 
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size = batch_size, num_workers = num_workers, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = batch_size, num_workers = num_workers, shuffle = False)



In [8]:
model = VGGNet(in_dim, out_dim).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

def train_val(train_loader, val_loader, model):
    size = len(train_loader.dataset)
    best_acc = -1
    best_epoch = -1

    model.train()

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}')
        print('-'*15)

        for batch, (X_train, y_train) in enumerate(train_loader):
            X_train = X_train.to(device)
            y_train = y_train.to(device)

            # Compute predictions and loss
            pred = model(X_train)
            loss = loss_fn(pred, y_train)

            # Backpropagation
            optimizer.zero_grad(set_to_none=True)
            loss.backward()
            optimizer.step()
            
            # Running training accuracy
            _, prediction = pred.max(1)
            
            loss, current = loss.item(), (batch + 1) * len(X_train)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

            n_correct = (prediction == y_train).sum()
            training_acc = n_correct/X_train.shape[0]
            print(f"training accuracy: {training_acc.item()*100}%")
        
        model.eval()
        with torch.no_grad():
            X_val, y_val = next(val_loader)
            X_val = X_val.to(device)
            y_val = y_val.to(device)

            pred = model(X_val)
            _, prediction = pred.max(1)
            n_correct = (prediction == y_val).sum()
            validation_acc = n_correct/X_val.shape[0]
            print(f"validation accuracy: {validation_acc.item()*100}%")
            print('-'*15)

            if validation_acc > best_acc:
                best_acc = validation_acc
                best_epoch = epoch
                checkpoint(model, "best_model.pth")
            elif epoch - best_epoch > e_stop_thresh:
                print(f"Early stopped training at epoch {epoch+1}")
                break

In [9]:
print(f'Using {device}')
print(f"There are {len(train_dataset)} images in training set and {len(val_dataset)} images in validation set\n")
val_iter = iter(val_loader)
train_val(train_loader, val_iter, model)

Using cuda
There are 21347 images in training set and 5337 images in validation set

Epoch 1
---------------
loss: 0.746450  [   64/21347]
training accuracy: 31.25%
loss: 0.665900  [  128/21347]
training accuracy: 65.625%
loss: 0.578690  [  192/21347]
training accuracy: 78.125%
loss: 0.651571  [  256/21347]
training accuracy: 70.3125%
loss: 0.510320  [  320/21347]
training accuracy: 84.375%
loss: 0.636143  [  384/21347]
training accuracy: 75.0%
loss: 0.670053  [  448/21347]
training accuracy: 73.4375%
loss: 0.654398  [  512/21347]
training accuracy: 75.0%
loss: 0.687221  [  576/21347]
training accuracy: 75.0%
loss: 0.780752  [  640/21347]
training accuracy: 70.3125%
loss: 0.608780  [  704/21347]
training accuracy: 70.3125%
loss: 0.728860  [  768/21347]
training accuracy: 59.375%
loss: 0.556621  [  832/21347]
training accuracy: 75.0%
loss: 0.596092  [  896/21347]
training accuracy: 70.3125%
loss: 0.593690  [  960/21347]
training accuracy: 78.125%
loss: 0.680349  [ 1024/21347]
training a