In [1]:
from efficientnet import Efficientnet as Backbone 
from dataset import ImageDataset_iq as ImageDataset  

In [2]:
from torch.utils.data import DataLoader 
from tensorboardX import SummaryWriter
import torch 
import torch.nn as nn 
import logging
import time 
import numpy as np 
from sklearn import metrics
import os
import tqdm

In [3]:
model = Backbone()

Loaded pretrained weights for efficientnet-b0


In [4]:
print(model)

Efficientnet(
  (model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_p

# Configuration

In [5]:
DATA_FOLDER = 'data_segmented/'

TRAIN_CSV = DATA_FOLDER +'CheXpert-v1.0-small/train_frontal.csv'
VALID_CSV = DATA_FOLDER + 'CheXpert-v1.0-small/dev_frontal.csv'
IMAGE_SHAPE = 256 
TRAIN_BATCH_SIZE = 24
VALID_BATCH_SIZE = 24
EPOCHS = 10 
lr = 0.0001
log_every = 10
test_every = 100 
run = 'logdir2'

In [6]:
dataloader_train = DataLoader(ImageDataset(TRAIN_CSV, IMAGE_SHAPE, subfolder = DATA_FOLDER), batch_size = TRAIN_BATCH_SIZE,shuffle = True, drop_last = True)
dataloader_dev = DataLoader(ImageDataset(VALID_CSV, IMAGE_SHAPE, subfolder = DATA_FOLDER), batch_size = VALID_BATCH_SIZE,shuffle = True, drop_last = False)

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [8]:
dev_header = dataloader_dev.dataset._label_header

summary = {'epoch': 0, 'step': 0}
summary_dev = {'loss': float('inf'), 'acc': 0.0}
summary_writer = SummaryWriter(run)
epoch_start = 0
best_dict = {
    "acc_dev_best": 0.0,
    "auc_dev_best": 0.0,
    "loss_dev_best": float('inf'),
    "fused_dev_best": 0.0,
    "best_idx": 1}

In [9]:
def get_loss(output, target, index, device):
    num_classes = [1,1,1,1,1]
    batch_weight = False 
    criterion = 'BCE'
    if criterion == 'BCE':
        for num_class in num_classes:
            assert num_class == 1
        target = target[:, index].view(-1)
        pos_weight = torch.from_numpy(
            np.array([1,1,1,1,1],
                     dtype=np.float32)).to(device).type_as(target)
        if batch_weight:
            if target.sum() == 0:
                loss = torch.tensor(0., requires_grad=True).to(device)
            else:
                weight = (target.size()[0] - target.sum()) / target.sum()
                loss = F.binary_cross_entropy_with_logits(
                    output[index].view(-1), target, pos_weight=weight)
        else:
            loss = torch.nn.functional.binary_cross_entropy_with_logits(
                output[:,index].view(-1), target, pos_weight=pos_weight[index])

        label = torch.sigmoid(output[:,index].view(-1)).ge(0.5).float()
        acc = (target == label).float().sum() / len(label)
    else:
        raise Exception('Unknown criterion : {}'.format(criterion))

    return (loss, acc)


In [10]:
def test_epoch(summary, run, device,  model, dataloader):
    torch.set_grad_enabled(False)
    model.eval()
    num_classes = 5 
    num_tasks = 5 
    steps = len(dataloader)
    dataiter = iter(dataloader)
    #num_tasks = len(num_classes)

    loss_sum = np.zeros(num_tasks)
    acc_sum = np.zeros(num_tasks)

    predlist = list(x for x in range(num_classes))
    true_list = list(x for x in range(num_classes))
    for step in range(steps):
        image, target = next(dataiter)
        image = image.to(device)
        target = target.to(device)
        output = model(image)
        # different number of tasks
        for t in range(num_classes):

            loss_t, acc_t = get_loss(output, target, t, device)
            # AUC
            output_tensor = torch.sigmoid(
                output[:,t].view(-1)).cpu().detach().numpy()
            target_tensor = target[:, t].view(-1).cpu().detach().numpy()
            if step == 0:
                predlist[t] = output_tensor
                true_list[t] = target_tensor
            else:
                predlist[t] = np.append(predlist[t], output_tensor)
                true_list[t] = np.append(true_list[t], target_tensor)

            loss_sum[t] += loss_t.item()
            acc_sum[t] += acc_t.item()
    summary['loss'] = loss_sum / steps
    summary['acc'] = acc_sum / steps

    return summary, predlist, true_list

In [11]:
def train_epoch(summary, summary_dev, run, model, dataloader, dataloader_dev, optimizer,device):
    torch.set_grad_enabled(True)
    model.train()
    steps = len(dataloader)
    dataiter = iter(dataloader)
    label_header = dataloader.dataset._label_header
    num_tasks =5
    num_classes = 5 
    
    time_now = time.time()
    loss_sum = np.zeros(num_tasks)
    acc_sum = np.zeros(num_tasks)
    
    for step in tqdm.tqdm(range(steps)):
        image, target = next(dataiter)
        #print(image.shape)
        #print(target.shape)
        #raise Exception
        image = image.to(device)
        target = target.to(device)
        
        output = model(image)
        
        # different number of tasks
        loss = 0
        for t in range(num_tasks):
            loss_t, acc_t = get_loss(output, target, t, device)
            loss += loss_t
            loss_sum[t] += loss_t.item()
            acc_sum[t] += acc_t.item()
            
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        summary['step'] += 1
        
        if summary['step'] % log_every == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            loss_sum /= log_every
            acc_sum /= log_every
            loss_str = ' '.join(map(lambda x: '{:.5f}'.format(x), loss_sum))
            acc_str = ' '.join(map(lambda x: '{:.3f}'.format(x), acc_sum))

            logging.info(
                '{}, Train, Epoch : {}, Step : {}, Loss : {}, '
                'Acc : {}, Run Time : {:.2f} sec'
                .format(time.strftime("%Y-%m-%d %H:%M:%S"),
                        summary['epoch'] + 1, summary['step'], loss_str,
                        acc_str, time_spent))
            
            for t in range(num_tasks):
                summary_writer.add_scalar(
                    'train/loss_{}'.format(label_header[t]), loss_sum[t],
                    summary['step'])
                summary_writer.add_scalar(
                    'train/acc_{}'.format(label_header[t]), acc_sum[t],
                    summary['step'])

                
            loss_sum = np.zeros(num_tasks)
            acc_sum = np.zeros(num_tasks)
            
            
        if summary['step'] % test_every == 0:
            time_now = time.time()
            summary_dev, predlist, true_list = test_epoch(
                summary_dev,run, device,  model, dataloader_dev)
            time_spent = time.time() - time_now

            auclist = []
            for i in range(num_classes):
                y_pred = predlist[i]
                y_true = true_list[i]
                fpr, tpr, thresholds = metrics.roc_curve(
                    y_true, y_pred, pos_label=1)
                auc = metrics.auc(fpr, tpr)
                auclist.append(auc)
            summary_dev['auc'] = np.array(auclist)

            loss_dev_str = ' '.join(map(lambda x: '{:.5f}'.format(x),
                                        summary_dev['loss']))
            acc_dev_str = ' '.join(map(lambda x: '{:.3f}'.format(x),
                                       summary_dev['acc']))
            auc_dev_str = ' '.join(map(lambda x: '{:.3f}'.format(x),
                                       summary_dev['auc']))

            logging.info(
                '{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},'
                'Mean auc: {:.3f} ''Run Time : {:.2f} sec' .format(
                    time.strftime("%Y-%m-%d %H:%M:%S"),
                    summary['step'],
                    loss_dev_str,
                    acc_dev_str,
                    auc_dev_str,
                    summary_dev['auc'].mean(),
                    time_spent))

            for t in range(num_classes):
                summary_writer.add_scalar(
                    'dev/loss_{}'.format(dev_header[t]),
                    summary_dev['loss'][t], summary['step'])
                summary_writer.add_scalar(
                    'dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t],
                    summary['step'])
                summary_writer.add_scalar(
                    'dev/auc_{}'.format(dev_header[t]), summary_dev['auc'][t],
                    summary['step'])
                
                
            save_index = [0,1,2,3,4]
            best_target ='auc'
            save_best = False
            mean_acc = summary_dev['acc'][save_index].mean()
            if mean_acc >= best_dict['acc_dev_best']:
                best_dict['acc_dev_best'] = mean_acc
                if best_target == 'acc':
                    save_best = True

            mean_auc = summary_dev['auc'][save_index].mean()
            if mean_auc >= best_dict['auc_dev_best']:
                best_dict['auc_dev_best'] = mean_auc
                if best_target == 'auc':
                    save_best = True

            mean_loss = summary_dev['loss'][save_index].mean()
            if mean_loss <= best_dict['loss_dev_best']:
                best_dict['loss_dev_best'] = mean_loss
                if best_target == 'loss':
                    save_best = True

            if save_best:
                torch.save(
                    {'epoch': summary['epoch'],
                     'step': summary['step'],
                     'acc_dev_best': best_dict['acc_dev_best'],
                     'auc_dev_best': best_dict['auc_dev_best'],
                     'loss_dev_best': best_dict['loss_dev_best'],
                     'state_dict': model.state_dict()},
                    os.path.join(run, 'best{}.ckpt'.format(
                        best_dict['best_idx']))
                )
                best_dict['best_idx'] += 1
                if best_dict['best_idx']>3:
                    best_dict['best_idx'] = 1
                logging.info(
                    '{}, Best, Step : {}, Loss : {}, Acc : {},Auc :{},'
                    'Best Auc : {:.3f}' .format(
                        time.strftime("%Y-%m-%d %H:%M:%S"),
                        summary['step'],
                        loss_dev_str,
                        acc_dev_str,
                        auc_dev_str,
                        best_dict['auc_dev_best']))
        model.train()
        torch.set_grad_enabled(True)
    summary['epoch'] += 1

    return summary, best_dict


In [12]:
for epoch in range(EPOCHS):
    train_epoch(summary, summary_dev, run, model, dataloader_train, dataloader_dev, optimizer,device)

  1%|▌                                                                            | 53/7959 [08:12<20:24:25,  9.29s/it]


KeyboardInterrupt: 