In [None]:
import PIL
import datetime
from time import sleep
import os
from os import listdir as ls
from tqdm import tqdm
import pytz

import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage import io, transform

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import albumentations as A
from albumentations.pytorch import ToTensor

from torch.utils.tensorboard import SummaryWriter

plt.ion()
%load_ext tensorboard
%matplotlib inline

In [None]:
!kill 78340

In [None]:
!top

In [None]:
train_log_dir = '.././logs/tensorboard/train/2019-05-21_15-05'

In [None]:
train_log_dir = os.path.join(".././logs/tensorboard/train/",
                                datetime.datetime.now(tz=pytz.timezone('Europe/Moscow')).strftime("%Y-%m-%d_%H-%M"))
print('nohup tensorboard --logdir='+train_log_dir+' &')
os.makedirs(train_log_dir, exist_ok=True)

In [None]:
train_summary_writer = SummaryWriter(train_log_dir)

In [None]:
def show_example(example):
    """
    Show image with labels
    Args:
    example: dict or image(ndarray)
    """
    plt.figure()
    if isinstance(example, dict):
        image = example['image']
        plt.title(str(example['labels']))
    else:
        image = example
    plt.imshow(image)
    plt.show()  
    

def show_batch(sample_batched):
    """Show image with landmarks for a batch of samples."""
    images_batch, labels_batch = \
            sample_batched['image'], sample_batched['labels']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)
    print('Batch shape', images_batch.size())

    grid = utils.make_grid(images_batch)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))
    plt.title('Batch from dataloader')
    

def show_train_batches(dataloader, i):
    for i_batch, sample_batched in enumerate(dataloader):
        print(i_batch, sample_batched['image'].size(),
              sample_batched['labels'].size())

        if i_batch == i:
            plt.figure()
            show_batch(sample_batched)
            plt.axis('off')
            plt.ioff()
            plt.show()
            break

In [None]:
class ChestnetDataset(Dataset):
    """Chest X-ray picture dataset annotated with patologies"""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        img_name = os.path.join(self.root_dir,
                                self.df.iloc[idx, 0])
        image = cv2.imread(img_name)
        # By default OpenCV uses BGR color space for color images,
        # so we need to convert the image to RGB color space.
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        labels = self.df.iloc[idx, 1:-1].values.astype(int)
        sample = {'image': image, 'labels': labels}

        if self.transform:
            # Apply tranform to numpy.ndarray which represents sample image
            augmented = self.transform(image=sample['image'])
            sample['image'] = augmented['image']
            sample['labels'] = torch.from_numpy(sample['labels']).float()
        return sample

In [None]:
train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(),
    A.Rotate(limit=30),
    A.RandomBrightnessContrast(brightness_limit = 0.1, contrast_limit = 0.1),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensor()
])
val_transform = A.Compose([ 
    A.Resize(256, 256),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensor()
])

        
fold_titles = [('fold1234', 'fold0'),('fold0234', 'fold1'),('fold0134', 'fold2'),('fold0124', 'fold3'),('fold0123', 'fold4')]
folds = dict() # {'fold0': (train_dl0, val_dl0),...,'fold4' : (train_dl4, val_dl4)}
for train, val in fold_titles:
    print(train, val)
    train_dataset = ChestnetDataset(os.path.join('../dataset/', train+'.csv'), 
                                    '../../../datasets/ilyas/ChestNets/images/',
                                    transform=train_transform)
    val_dataset = ChestnetDataset(os.path.join('../dataset/', val+'.csv'), 
                                  '../../../datasets/ilyas/ChestNets/images/',
                                  transform=val_transform)

    train_dl = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=8)
    val_dl = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=8)
    folds[val] = (train_dl, val_dl)
folds

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import re

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def new_densenet121(imagenet=True, path_to_weights=None):
    net = torchvision.models.densenet121()
    if imagenet:        
        state_dict = torch.load('../weights/misc/densenet121_pretrained.pth')
        # '.'s are no longer allowed in module names, but pervious _DenseLayer
        # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
        # They are also in the checkpoints in model_urls. This pattern is used
        # to find such keys.
        pattern = re.compile(
            r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
        for key in list(state_dict.keys()):
            res = pattern.match(key)
            if res:
                new_key = res.group(1) + res.group(2)
                state_dict[new_key] = state_dict[key]
                del state_dict[key] 
        net.load_state_dict(state_dict)
        num_ftrs = net.classifier.in_features
        net.classifier = nn.Linear(num_ftrs, 14)
    else:
        if path_to_weights == None:
            num_ftrs = net.classifier.in_features
            net.classifier = nn.Linear(num_ftrs, 14)
        else:
            state_dict = torch.load(path_to_weights)
            num_ftrs = net.classifier.in_features
            net.classifier = nn.Linear(num_ftrs, 14)
            net.load_state_dict(state_dict)
    return net.to(device);


def new_inceptionV3(imagenet=True, path_to_weights=None):
    net = torchvision.models.inception_v3()
    if imagenet:        
        state_dict = torch.load('../weights/misc/inception_v3_pretrained_imagenet.pth')
        # '.'s are no longer allowed in module names, but pervious _DenseLayer
        # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
        # They are also in the checkpoints in model_urls. This pattern is used
        # to find such keys.
        pattern = re.compile(
            r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
        for key in list(state_dict.keys()):
            res = pattern.match(key)
            if res:
                new_key = res.group(1) + res.group(2)
                state_dict[new_key] = state_dict[key]
                del state_dict[key] 
        net.load_state_dict(state_dict)
        num_ftrs = net.fc.in_features
        net.fc = nn.Linear(num_ftrs, 14)
    else:
        if path_to_weights == None:
            num_ftrs = net.fc.in_features
            net.fc = nn.Linear(num_ftrs, 14)
        else:
            state_dict = torch.load(path_to_weights)
            num_ftrs = net.fc.in_features
            net.fc = nn.Linear(num_ftrs, 14)
            net.load_state_dict(state_dict)
    net.aux_logits = False
    return net.to(device);

In [None]:
def perform_training_epoch(model, criterion, optimizer, scheduler=None):
    model.train()
    train_average_loss = 0.0
    for i, data in enumerate(train_dl, 0):
        # get the inputs
        inputs, labels = data['image'].to(device), data['labels'].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_average_loss += loss.item()
    train_average_loss /= len(train_dl)
    return train_average_loss

def compute_validation_loss(model, criterion):
    model.eval()
    val_average_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_dl, 0):
            inputs, labels = data['image'].to(device), data['labels'].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_average_loss += loss.item()
    val_average_loss /= len(val_dl)
    return val_average_loss

In [None]:
def train(model, criterion, val_criterion, optimizer, train_dl, val_dl, save_title='test', num_epochs=30):
    
    for epoch in tqdm(range(num_epochs)):  
        train_average_loss = perform_training_epoch(model, criterion, optimizer)
        val_average_loss = compute_validation_loss(model, val_criterion)
        
        train_summary_writer.add_scalar('train_loss '+save_title, train_average_loss, global_step = epoch)
        train_summary_writer.add_scalar('val_loss '+save_title, val_average_loss, global_step = epoch)
        train_summary_writer.close()
        
        torch.save(model.state_dict(), '../weights/inceptionV3_BCELoss_'+save_title+'_epoch'+str(epoch)+'.pth')
        print(save_title+' [%d] train_loss: %.3f; val_loss: %.3f' % (epoch + 1, train_average_loss, val_average_loss))
    
    print('Finished Training')

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=False, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [None]:
# models = {key: new_densenet121(imagenet = False, path_to_weights = model_weights[key]) for key in folds.keys()}
models = {key: new_inceptionV3(imagenet = True) for key in folds.keys()}
for key, (train_dl, val_dl) in folds.items():
    print(key, train_dl, val_dl)
    model = models[key]
    val_criterion = nn.BCEWithLogitsLoss()
#     criterion = nn.BCEWithLogitsLoss()
    criterion = FocalLoss(logits= True)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train(model, criterion, val_criterion, optimizer, train_dl, val_dl, save_title=key, num_epochs=40 )
    

In [None]:
from sklearn.metrics import roc_auc_score
from scipy.special import expit
from pprint import pprint

In [None]:

# K-fold validation. 
# Training models for each fold
# Learning curves: logs.txt
model_weights = {
    'fold0': '../weights/inceptionV3_BCE_40epochs/inceptionV3_BCELoss_fold0_epoch17.pth',
    'fold1': '../weights/inceptionV3_BCE_40epochs/inceptionV3_BCELoss_fold1_epoch39.pth',
    'fold2': '../weights/inceptionV3_BCE_40epochs/inceptionV3_BCELoss_fold2_epoch39.pth',
    'fold3': '../weights/inceptionV3_BCE_40epochs/inceptionV3_BCELoss_fold3_epoch39.pth',
    'fold4': '../weights/inceptionV3_BCE_40epochs/inceptionV3_BCELoss_fold4_epoch19.pth'
}
dir = '../weights/inceptionV3_FocalLoss_40epochs'
l = ls(dir)
l = sorted(l)[1:-1]
model_weights = {'fold{}'.format(i): os.path.join(dir,path) for i,path in enumerate(l)}
model_weights

In [None]:
def test_model(model, test_dl):
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(test_dl, 0):
            inputs, labels = data['image'].to(device), data['labels'].to(device)
            outputs = model(inputs).cpu()
            outputs = expit(outputs)            
            labels = labels.byte().cpu()
            if i == 0: 
                all_labels = labels.numpy()
                all_outputs = outputs.numpy()
            else:
                all_labels = np.vstack([all_labels, labels])
                all_outputs = np.vstack([all_outputs, outputs])
    auc_roc_per_class = dict()
    for i in range(14):
        x = all_outputs[:, i]
        y = all_labels[:, i]
        auc_roc_per_class[i] = roc_auc_score(y,x)
    print(all_labels.shape, all_outputs.shape)
    pprint(auc_roc_per_class)
    return auc_roc_per_class

def test_all_folds(models, save_dir):
    test_dataset = ChestnetDataset(os.path.join('../dataset/test.csv'), 
                                  '../../../datasets/ilyas/ChestNets/images/',
                                  transform=val_transform)
    test_dl = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=8)
    pathologies = ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
    df = pd.DataFrame(pathologies,columns=['Class'])
    for key, model in tqdm(models.items()):
        df[key] = test_model(model, test_dl).values()
    df.loc['Mean'] = df.mean()
    df.loc['Mean', 'Class'] = 'Mean'
    df['Mean'] = df.mean(axis=1)
    df.to_csv(save_dir, index = False)
    return df

models = {key: new_inceptionV3(imagenet = False, path_to_weights=path) for key, path in model_weights.items()}
test_all_folds(models,'../dataset/outputs/inceptionV3_FocalLoss_40epochs/InceptionV3_FocaLoss_AUCROC.csv')

In [None]:
def baseline():
    test_dataset = ChestnetDataset(os.path.join('../dataset/test.csv'), 
                                  '../../../datasets/ilyas/ChestNets/images/',
                                  transform=val_transform)
    test_dl = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=8)
    models = [new_densenet121() for _ in range(5)]
    weights = {0: '../weights/densenet_fold0_epoch22.pth',
              1: '../weights/densenet_fold1_epoch29.pth',
              2: '../weights/densenet_fold2_epoch29.pth',
              3: '../weights/densenet_fold3_epoch28.pth',
              4: '../weights/densenet_fold4_epoch18.pth'}
    for i, m in tqdm(enumerate(models,0)):
        m.load_state_dict(torch.load(weights[i]))
        m.eval()
    
    for i, data in tqdm(enumerate(test_dl, 0)):
        outputs = {}
        inputs, labels = data['image'].to(device), data['labels'].to(device)
        
#         Average Ensemble
        for j, m in enumerate(models,0):
            with torch.no_grad():
                outputs[j] = expit(m(inputs).cpu()) 
        outputs = [out.numpy() for out in list(outputs.values())]    
#         outputs = np.mean(outputs, axis=0)
        
        labels = labels.byte().cpu().numpy()

        if i == 0: 
            all_outputs = {}
            for j, out in enumerate(outputs,0):
                all_outputs[j] = out
            all_labels = labels
#             all_outputs = outputs
        else:
            all_labels = np.vstack([all_labels, labels])
            for j, out in enumerate(outputs,0):
                all_outputs[j] = np.vstack([all_outputs[j], out])
#             all_outputs = np.vstack([all_outputs, outputs])
    
       
#     auc_roc_per_class = dict()
#     for i in range(14):
#         x = all_outputs[:, i]
#         y = all_labels[:, i]
#         auc_roc_per_class[i] = roc_auc_score(y,x)
#     pprint(auc_roc_per_class)
    return all_outputs
out = baseline()

In [None]:
def save_prediction_on_test(models, save_dir):
    test_dataset = ChestnetDataset(os.path.join('../dataset/test.csv'), 
                                  '../../../datasets/ilyas/ChestNets/images/',
                                  transform=val_transform)
    test_dl = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=8)
    for i, data in tqdm(enumerate(test_dl, 0)):
        outputs = {}
        inputs, labels = data['image'].to(device), data['labels'].to(device)
        for j, m in enumerate(models.values(),0):
            with torch.no_grad():
                outputs[j] = expit(m(inputs).cpu()) 
        outputs = [out.numpy() for out in list(outputs.values())]
        labels = labels.byte().cpu().numpy()
        if i == 0: 
            all_outputs = {}
            for j, out in enumerate(outputs,0):
                all_outputs[j] = out
            all_labels = labels
        else:
            all_labels = np.vstack([all_labels, labels])
            for j, out in enumerate(outputs,0):
                all_outputs[j] = np.vstack([all_outputs[j], out])
    for i, output in enumerate(all_outputs.values()):
        pd.DataFrame(output).to_csv('buffer.csv', index = False)
        pd.DataFrame(output).to_csv(os.path.join(save_dir, 'fold{}_test.csv').format(i), index = False)
    return all_outputs
save_prediction_on_test(models, '../dataset/outputs/inceptionV3_FocalLoss_40epochs/')

In [312]:
def create_correlation_table(data_directory):
    out = pd.DataFrame(np.random.randn(1,14))
    df = [pd.read_csv(os.path.join(data_directory, 'fold{}_test.csv'.format(i))) for i in range(5)]
    
    for i, _ in enumerate(df):
        for j,_ in enumerate(df):
            if i != j and j>i:
                string = '{} vs {}'.format(i,j)
                out.loc[string] = list(df[i].corrwith(df[j]))
    out = out.drop([0], axis = 0)
    out.loc['Mean'] = out.mean()
    out['Mean'] = out.mean(axis=1)
    out.to_csv(os.path.join(data_directory,'corr_of_models.csv'))
    return out
create_correlation_table('../dataset/outputs/inceptionV3_FocalLoss_40epochs')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,Mean
0 vs 1,0.519554,0.462886,0.595989,0.455916,0.508987,0.437295,0.385088,0.47807,0.458332,0.464174,0.491516,0.332827,0.437981,0.40514,0.459554
0 vs 2,0.517262,0.466954,0.614793,0.505941,0.48285,0.418632,0.360367,0.45524,0.448881,0.494417,0.486628,0.301431,0.418792,0.363073,0.452519
0 vs 3,0.529353,0.465469,0.615186,0.488525,0.510776,0.451548,0.380558,0.51177,0.471389,0.476813,0.47154,0.359428,0.444039,0.377355,0.468125
0 vs 4,0.501555,0.46118,0.612255,0.47774,0.470529,0.403346,0.366094,0.460177,0.436334,0.485916,0.458933,0.343896,0.442039,0.27736,0.442668
1 vs 2,0.534864,0.496664,0.618571,0.477283,0.512987,0.435789,0.416289,0.460714,0.480073,0.489167,0.483012,0.351819,0.43447,0.449716,0.474387
1 vs 3,0.534862,0.459619,0.605366,0.457003,0.505878,0.478589,0.395858,0.492306,0.475342,0.472882,0.469717,0.356553,0.433657,0.487613,0.473232
1 vs 4,0.517859,0.428197,0.615715,0.431304,0.460887,0.39021,0.410461,0.450347,0.452135,0.491847,0.456821,0.344011,0.437947,0.305222,0.442354
2 vs 3,0.528846,0.492057,0.638946,0.491296,0.48944,0.452739,0.391141,0.487755,0.494868,0.508245,0.498493,0.360903,0.440779,0.43512,0.479331
2 vs 4,0.518357,0.487315,0.630019,0.477721,0.451976,0.395134,0.387644,0.430077,0.465813,0.526334,0.465223,0.351253,0.429015,0.30756,0.451674
3 vs 4,0.53108,0.463552,0.630873,0.486086,0.466264,0.442209,0.392922,0.486439,0.468347,0.511459,0.471424,0.369784,0.432927,0.33533,0.463478
