In [9]:
#classify cheXray into normal or pneumonia
# This base line use pretrained resnet18 as the baseline model
#running in colab
#test gpu
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [10]:
import os
import torch
from PIL import Image
import torch.utils
import numpy
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader

import torchvision
import torch.nn as nn
import time
from torch.autograd import Variable
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm import tqdm

In [11]:
#means and std of all x-ray imgs of this dataset
MEAN=[0.48104131770750114, 0.48104131770750114, 0.48104131770750114]
STD=[0.23757618012549234, 0.23757618012549234, 0.23757618012549234]

class ChestXrayDataSet(Dataset):

    def __init__(self, root, image_list_file, transform=None):
        """
        Args:
            root: root path to image directory.
            image_list_file: path to the file containing images
                with corresponding labels.
            transform: optional transform to be applied on a sample.
        """
        imgs_path = []
        labels = []

        with open(image_list_file, "r") as f:
            for line in f:
                items = line.split(',')
                img_path = os.path.join(root, items[0])
                label = [items[1]]

                imgs_path.append(img_path)
                labels.append(label)

        self.imgs_path = imgs_path
        self.labels = labels

        if transform is None:
            # use pre_defined transfer
            normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                             [0.229, 0.224, 0.225])#ImageNet means and std

            
            transform = transforms.Compose([
                transforms.Resize([320, 320]),
                # transforms.RandomResizedCrop(320),
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                 ])

            self.transform = transform

    def __getitem__(self, index):
        """
        Args:
            index: the index of item

        Returns:
            image and its label
        """
        img_path = self.imgs_path[index]
        img = Image.open(img_path).convert('RGB')
        label = torch.FloatTensor(list(map(float, self.labels[index])))
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs_path)


In [17]:
def get_model(num_classes=1,pretrained=True):

    model = torchvision.models.resnet18(pretrained=pretrained)

    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, num_features),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.1),
        nn.Linear(num_features, num_classes),
        nn.Sigmoid()
    )
    
    return model

In [22]:
def getBinaryTensor(out, boundary = 0.5):
    zero = torch.zeros_like(out)
    one = torch.ones_like(out)
    out=torch.where(out > boundary, one, zero)
    return out.view(-1)


In [13]:
def validation(model, dataloader, criterion, total_batch):
    model.eval()
    counter = 0
    loss_sum = 0

    with torch.no_grad():
        bar = tqdm(enumerate(dataloader), total=total_batch)
        for i, (data, label) in bar:
            input = data.clone().detach()
            target = label.clone().detach()
            if torch.cuda.is_available():
                input = input.cuda()
                target = target.cuda()

            output = model(input)

            loss = criterion(output, target)
            loss_sum += loss.item()
            counter += 1
            bar.set_postfix_str('loss: %.5s' % loss.item())

    loss_mean = loss_sum / counter
    return loss_mean

In [32]:
def train(root,train_data_list, test_data_list,train_batch_size=16, save_model=False, eval_=False,scalar_name='training loss'):
    #model
    model = get_model()
    if torch.cuda.is_available():
      model.cuda()
      torch.backends.cudnn.benchmark = True
      print("using GPU")

    # data
    print("load data")
    train_data = ChestXrayDataSet(root, train_data_list)
    if eval_:
      train_data,eval_data=train_test_split(test_size=0.1,shuffle=True)
      train_dataloader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True,num_workers=2)
      eval_dataloader= DataLoader(eval_data, batch_size=train_batch_size, shuffle=True,num_workers=2)
    else:
      train_dataloader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True,num_workers=2)

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    writer = SummaryWriter()
   
    # training
    print("training")
    epoches = 5
    loss_mean_min = 1e100

    for epoch in range(epoches):
        train_loss = 0.
        train_acc = 0.
        running_loss=0.
        total_batch = int(len(train_data) / train_batch_size )
        bar = tqdm(enumerate(train_dataloader), total=total_batch)

        for step, (data, label) in bar:
            # train model
          torch.set_grad_enabled(True)
          batch_x = data.clone().detach().requires_grad_(True)
          batch_y = label.clone().detach()
          if torch.cuda.is_available():
            batch_x = batch_x.cuda()
            batch_y = batch_y.cuda()

          out = model(batch_x)
          loss = criterion(out, batch_y)
          train_loss += loss.item()
          running_loss += loss.item()
          # pred is the expect class
          # batch_y is the true label
          #pred = torch.max(out, 1)[1]
          pred=getBinaryTensor(out)
          train_correct = (pred == batch_y.view(-1)).sum()
          train_acc += train_correct.item()
          if epoch==0 and step==0:
            print(pred)
            print( batch_y.view(-1))
            print(train_correct)
            print(train_acc)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          bar.set_postfix_str('loss: %.5s' % loss.item())
          
          if step % 20 == 19:
            print(train_acc,(step+1)*train_batch_size)
            print('\n','Epoch: ', epoch, 'Step', step, 'Train_loss: ', train_loss/(step+1), 'Train acc: ', train_acc/((step+1)*train_batch_size))
            writer.add_scalar(scalar_name, running_loss/20, epoch * len(train_dataloader) + step)
            running_loss = 0.
        
        if eval_:
          train_min_loss_mean=validation(model, eval_dataloader, criterion, total_batch)
        else:
          train_min_loss_mean=train_loss/(step+1)
          
        if train_min_loss_mean<=loss_mean_min:
          print('\n','Update min loss mean','Epoch: ', epoch, 'Step', step,'min loss mean: ', train_loss/(step+1))
          time_end=time.strftime('%m-%d-%Hh%Mm')
          loss_mean_min=train_min_loss_mean
          if save_model:
            torch.save({'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()},
                       './checkpoints/m_' + time_end + '.pth.tar')

            
        print('\n','Epoch: ', epoch, 'Train_loss: ', train_loss / len(train_data), 'Train acc: ',
              train_acc / len(train_data))
        
    #test(root,test_data_list)
    # save model
    #print("saving final model")
    #model_without_ddp = model
    #time_end=time.strftime('%m-%d-%Hh%Mm')
    #torch.save({'epoch': epoch + 1,
                        #'state_dict': model.state_dict(),
                        #'optimizer': optimizer.state_dict()},
                       #'./checkpoints/final_' + time_end + '.pth.tar')


In [None]:
#if save model,set dir root and make checkpoints dir before training
os.chdir('/content/drive/MyDrive/Colab Notebooks')
#os.mkdir('./checkpoints')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#whole dataset
root='DATA ROOT'
train_data_list='TRAIN_DATA_LIST'
test_data_list='TEST_DATA_LIST'
train(root,train_data_list, test_data_list)

In [27]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir=runs

In [24]:
def test(root,test_data_list,test_batch_size=16):
  test_data = ChestXrayDataSet(root, test_data_list)
  test_dataloader = DataLoader(test_data, batch_size=test_batch_size,
                                shuffle=False,num_workers=2)
  total_batch=len(test_data)/16
  model = get_model()
  if torch.cuda.is_available():
      model.cuda()
      torch.backends.cudnn.benchmark = True
      print("using GPU")
  criterion = torch.nn.MSELoss()

  model.eval()
  eval_loss = 0.
  eval_acc = 0.
  with torch.no_grad():
    bar = tqdm(enumerate(test_dataloader), total=total_batch)
    for i, (data, label) in bar:
      batch_X = data.clone().detach()
      batch_y = label.clone().detach()
      if torch.cuda.is_available():
        batch_X=batch_X.cuda()
        batch_y=batch_y.cuda()
      out=model(batch_X)
      loss = criterion(out, batch_y)
      eval_loss += loss.item()
      # pred is the expect class
      # batch_y is the true label
      #pred = torch.max(out, 1)[1]
      pred=getBinaryTensor(out)
      test_correct = (pred == batch_y.view(-1)).sum()
      eval_acc += test_correct.item()
      print('Test_loss: ', eval_loss / len(test_data), 'Test acc: ', eval_acc / len(test_data))

In [None]:
test(root,test_data_list)