In [1]:
import time
start = time.time()

from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from sklearn.metrics import *

In [2]:
chest = np.load("./nick_bucket/marketplace-c3kdc8vj/MedMNIST/120/artifactFiles/chestmnist_128.npz")

from PIL import Image

class MyDataset(Dataset):
  def __init__(self, x, y, transform=None, as_rgb=True):
    super().__init__()
    self.x = x
    self.y = y 
    self.transform = transform
    self.as_rgb = as_rgb
  
  def __getitem__(self, idx):
    img, target = self.x[idx], self.y[idx].astype(int)
    img = Image.fromarray(img)
    if self.as_rgb:
        img = img.convert('RGB')

    if self.transform is not None:
        img = self.transform(img)
    
    return img, target

  def __repr__(self):
    return "This is my dataset and it has length {}".format(len(self))

  def __len__(self):
    assert len(self.x) == len(self.y)
    return len(self.y)

In [3]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])])

my_dataset = MyDataset(chest['train_images'], chest['train_labels'], transform=data_transform, as_rgb=True)
my_dataloader = DataLoader(my_dataset, batch_size=128)

In [4]:
val_dataset = MyDataset(chest['val_images'], chest['val_labels'], transform=data_transform)
val_dataloader = DataLoader(val_dataset, batch_size=128)

test_dataset = MyDataset(chest['test_images'], chest['test_labels'], transform=data_transform)
test_dataloader = DataLoader(test_dataset, batch_size=128)

label_mapping = ['atelectasis', 'cardiomegaly', 'effusion', 'infiltration', 'mass', 'nodule', 'pneumonia', 'pneumothorax', 'consolidation', 'edema', 'emphysema', 'fibrosis', 'pleural', 'hernia']


In [5]:

import sys
sys.path.insert(0, './classifying-chestMNIST')
from models import ResNet18
net = ResNet18(in_channels=3, num_classes=14)


In [6]:
###################################################

In [7]:
num_epochs = 1
lr = 0.001
gamma=0.1
milestones = [0.5 * num_epochs, 0.75 * num_epochs]

# Optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# Scheduler
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

# Loss function (cross entropy for classification)
loss_func = nn.BCEWithLogitsLoss()

In [8]:
def getAUC(y_true, y_score):
    '''AUC metric.
    :param y_true: the ground truth labels, shape: (n_samples, n_labels) or (n_samples,) if n_labels==1
    :param y_score: the predicted score of each class,
    shape: (n_samples, n_labels) or (n_samples, n_classes) or (n_samples,) if n_labels==1 or n_classes==1
    :param task: the task of current dataset
    '''
    y_true = y_true.squeeze()
    y_score = y_score.squeeze()

    auc = 0
    for i in range(y_score.shape[1]):
        label_auc = roc_auc_score(y_true[:, i], y_score[:, i])
        auc += label_auc
    ret = auc / y_score.shape[1]

    return ret

In [9]:
def getACC(y_true, y_score, threshold=0.5):
    '''Accuracy metric.
    :param y_true: the ground truth labels, shape: (n_samples, n_labels) or (n_samples,) if n_labels==1
    :param y_score: the predicted score of each class,
    shape: (n_samples, n_labels) or (n_samples, n_classes) or (n_samples,) if n_labels==1 or n_classes==1
    :param task: the task of current dataset
    :param threshold: the threshold for multilabel and binary-class tasks
    '''
    y_true = y_true.squeeze()
    y_score = y_score.squeeze()

    y_pre = y_score > threshold
    acc = 0
    for label in range(y_true.shape[1]):
        label_acc = accuracy_score(y_true[:, label], y_pre[:, label])
        acc += label_acc
    ret = acc / y_true.shape[1]

    return ret

In [10]:
from sklearn.metrics import roc_auc_score, accuracy_score
def test(model, split_labels, data_loader, criterion, device='cuda', raw=False):
    model.cuda()
    model.eval()
    
    total_loss = []
    y_score = torch.tensor([]).to('cpu')
    with torch.no_grad():
        for batch in data_loader:
            # Forward pass 
            inp, labels = batch
            inp = torch.tensor(inp.cuda(), dtype=torch.float32)
            out = model(inp)
            labels = labels.to(torch.float32).cuda()
            loss = criterion(out, labels)

            # Get predictions from scores
            sigmoid = torch.nn.Sigmoid()
            answers = sigmoid(out).data.cpu()

            # Recording values
            y_score = torch.cat((y_score, answers), 0)
            total_loss.append(loss.item())
        
        y_score = y_score.cpu().data.numpy()
        auc = getAUC(split_labels, y_score)
        acc = getACC(split_labels, y_score)

        testing_loss = np.mean(total_loss)

        if raw:
            return [testing_loss, auc, acc, split_labels, y_score]

        return [testing_loss, auc, acc]

In [11]:
net.cuda()

best_epoch = 0
best_auc = 0
best_model = net

for epoch in range(num_epochs): # We go over the data ten times
    losses = []
    net.train()
    for batch in my_dataloader:
        optimizer.zero_grad()

        # Forward pass 
        inp, labels = batch
        inp = torch.tensor(inp.cuda(), dtype=torch.float32)
        out = net(inp)
        labels = labels.to(torch.float32).cuda()
        loss = loss_func(out, labels)
        losses.append(loss.item())

        # Backward pass
        loss.backward()
        optimizer.step()
    
    train_loss = np.mean(losses)
    val_metrics = test(net, chest['val_labels'], val_dataloader, loss_func)

    cur_auc = val_metrics[2]
    if cur_auc > best_auc:
        best_epoch = epoch
        best_auc = cur_auc
        best_model = net
        print(f"Epoch {best_epoch} is the best yet with Val ACC = {best_auc}")
        torch.save(best_model, './ch-2/chest_resnet18_224.pt')
    
    scheduler.step()

  inp = torch.tensor(inp.cuda(), dtype=torch.float32)
  inp = torch.tensor(inp.cuda(), dtype=torch.float32)


Epoch 0 is the best yet with Val ACC = 0.9484484229559549


In [12]:
test_metrics = test(net, chest['test_labels'], test_dataloader, loss_func, raw=True)

  inp = torch.tensor(inp.cuda(), dtype=torch.float32)


In [13]:
y_true, y_score = test_metrics[-2], test_metrics[-1]
print(f"Test AUC: {test_metrics[1]:5f} \nTest ACC: {test_metrics[2]:5f}")

Test AUC: 0.643341 
Test ACC: 0.946622


In [14]:
end = time.time()

print(end - start)

344.6658489704132
