rm -rf *.csv

In [4]:
import numpy as np
import pandas as pd
import time
import csv
import random
import matplotlib.pyplot as plt
from PIL import Image
from barbar import Bar

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split

import sklearn.metrics as metrics
from sklearn.metrics import roc_auc_score

use_gpu = torch.cuda.is_available()
print(use_gpu)

train_path = '/gpfs/data/denizlab/Datasets/Public/CheXpert-v1.0/train.csv'
valid_path = '/gpfs/data/denizlab/Datasets/Public/CheXpert-v1.0/valid.csv'


Traindata = pd.read_csv(train_path)
Traindata = Traindata[Traindata['Path'].str.contains("frontal")] # use only frontal images
#Traindata = Traindata[500:]
Traindata.to_csv('/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/U1-V1/train_mod.csv', index = False)
print("Train data length:", len(Traindata))

Validdata = pd.read_csv(valid_path)
Validdata = Validdata[Validdata['Path'].str.contains("frontal")] # use only frontal images
Validdata.to_csv('/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/U1-V1/valid_mod.csv', index = False)
print("Valid data length:", len(Validdata))

# Testdata = Traindata.head(500) # use first 500 training data as test data (obs ratio is almost same!)
# Testdata.to_csv('/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/test_mod.csv', index = False)
# print("Test data length:", len(Testdata))

pathFileTrain = '/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/U1-V1/train_mod.csv'
pathFileValid = '/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/U1-V1/valid_mod.csv'
# pathFileTest = '/gpfs/data/denizlab/Users/skr2369/Chexpert/CheXpert-v1/test_mod.csv'

# Neural network parameters:
nnIsTrained = False     # pre-trained using ImageNet
nnClassCount = 14       # dimension of the output

# Training settings: batch size, maximum number of epochs
trBatchSize = 16
trMaxEpoch = 3

# Parameters related to image transforms: size of the down-scaled image, cropped image
imgtransResize = (320, 320)
imgtransCrop = 224

# Class names
class_names = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 
               'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
               'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']

class CheXpertDataSet(Dataset):
    def __init__(self, data_PATH, transform = None, policy = "ones"):
        """
        data_PATH: path to the file containing images with corresponding labels.
        transform: optional transform to be applied on a sample.
        Upolicy: name the policy with regard to the uncertain labels.
        """
        image_names = []
        labels = []

        with open(data_PATH, "r") as f:
            csvReader = csv.reader(f)
            next(csvReader, None) # skip the header
            for line in csvReader:
                image_name = line[0]
                label = line[5:]
                
                for i in range(14):
                    if label[i]:
                        a = float(label[i])
                        if a == 1:
                            label[i] = 1
                        elif a == -1:
                            if policy == "ones":
                                label[i] = 1
                            elif policy == "zeroes":
                                label[i] = 0
                            else:
                                label[i] = 0
                        else:
                            label[i] = 0
                    else:
                        label[i] = 0
                
                image_names.append('/gpfs/data/denizlab/Datasets/Public/' + image_name)
        
#                 image_names.append('./' + image_name)
                labels.append(label)

        self.image_names = image_names
        self.labels = labels
        self.transform = transform

    def __getitem__(self, index):
        """Take the index of item and returns the image and its labels"""
        image_name = self.image_names[index]
        image = Image.open(image_name).convert('RGB')
        label = self.labels[index]
        if self.transform is not None:
            image = self.transform(image)
        return image, torch.FloatTensor(label)

    def __len__(self):
        return len(self.image_names)
    

IMAGENET_MEAN = [0.485, 0.456, 0.406]  # mean of ImageNet dataset(for normalization)
IMAGENET_STD = [0.229, 0.224, 0.225]   # std of ImageNet dataset(for normalization)

# Tranform data
normalize = transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
transformList = []

transformList.append(transforms.Resize((imgtransCrop, imgtransCrop))) # 224
# transformList.append(transforms.RandomResizedCrop(imgtransCrop))
# transformList.append(transforms.RandomHorizontalFlip())
transformList.append(transforms.ToTensor())
# transformList.append(normalize)
transformSequence = transforms.Compose(transformList)

# Load dataset
datasetTrain = CheXpertDataSet(pathFileTrain, transformSequence, policy = "ones")
print("Train data length:", len(datasetTrain))
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True,  num_workers=20, pin_memory=True)


datasetValid = CheXpertDataSet(pathFileValid, transformSequence)
print("Valid data length:", len(datasetValid))
dataLoaderVal = DataLoader(dataset = datasetValid, batch_size = trBatchSize, 
                           shuffle = False, num_workers = 2, pin_memory = True)

# data("Test data length:", len(datasetTest))


class CheXpertTrainer():

    def train(model, dataLoaderTrain, dataLoaderVal, nnClassCount, trMaxEpoch, checkpoint):
        optimizer = optim.Adam(model.parameters(), lr = 0.0001, # setting optimizer & scheduler
                               betas = (0.9, 0.999), eps = 1e-08, weight_decay = 0) 
        loss = torch.nn.BCELoss() # setting loss function
        
        if checkpoint != None and use_gpu: # loading checkpoint
            modelCheckpoint = torch.load(checkpoint)
            model.load_state_dict(modelCheckpoint['state_dict'])
            optimizer.load_state_dict(modelCheckpoint['optimizer'])
            
        # Train the network
        lossMIN = 100000
        train_start = []
        train_end = []
        for epochID in range(0, trMaxEpoch):
            train_start.append(time.time()) # training starts
            losst = CheXpertTrainer.epochTrain(model, dataLoaderTrain, optimizer, trMaxEpoch, nnClassCount, loss)
            train_end.append(time.time()) # training ends
#             lossv = CheXpertTrainer.epochVal(model, dataLoaderVal, optimizer, trMaxEpoch, nnClassCount, loss)
            print("Training loss: {:.3f},".format(losst))#, "Valid loss: {:.3f}".format(lossv))
            
            if losst < lossMIN:
                lossMIN = losst
                torch.save({'epoch': epochID + 1, 'state_dict': model.state_dict(), 
                            'best_loss': lossMIN, 'optimizer' : optimizer.state_dict()}, 
                           'm-epoch_FL' + str(epochID + 1) + '.pth.tar')
                print('Epoch ' + str(epochID + 1) + ' [save] loss = ' + str(losst))
            else:
                print('Epoch ' + str(epochID + 1) + ' [----] loss = ' + str(losst))

        train_time = np.array(train_end) - np.array(train_start)
        print("Training time for each epoch: {} seconds".format(train_time.round(0)))
        params = model.state_dict()
        return params
       
        
    def epochTrain(model, dataLoaderTrain, optimizer, epochMax, classCount, loss):
        losstrain = 0
        model.train()

        for batchID, (varInput, target) in enumerate(Bar(dataLoaderTrain)):
            
            varTarget = target.cuda(non_blocking = True)
            varOutput = model(varInput)
            lossvalue = loss(varOutput, varTarget)
                       
            optimizer.zero_grad()
            lossvalue.backward()
            optimizer.step()
            
            losstrain += lossvalue.item()
            
        return losstrain / len(dataLoaderTrain)

    
    def computeAUROC(dataGT, dataPRED, classCount):
        # Computes area under ROC curve 
        # dataGT: ground truth data
        # dataPRED: predicted data
        outAUROC = []
        datanpGT = dataGT.cpu().numpy()
        datanpPRED = dataPRED.cpu().numpy()
        
        for i in range(classCount):
            try:
                outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
            except ValueError:
                pass
        return outAUROC
    
    
    def test(model, dataLoaderTest, nnClassCount, checkpoint, class_names):
        cudnn.benchmark = True
        
        if checkpoint != None and use_gpu:
            modelCheckpoint = torch.load(checkpoint)
            model.load_state_dict(modelCheckpoint['state_dict'])

        if use_gpu:
            outGT = torch.FloatTensor().cuda()
            outPRED = torch.FloatTensor().cuda()
        else:
            outGT = torch.FloatTensor()
            outPRED = torch.FloatTensor()
       
        model.eval()
        
        with torch.no_grad():
            for i, (input, target) in enumerate(dataLoaderTest):

                target = target.cuda()
                outGT = torch.cat((outGT, target), 0).cuda()

                bs, c, h, w = input.size()
                varInput = input.view(-1, c, h, w)
            
                out = model(varInput)
                outPRED = torch.cat((outPRED, out), 0)
        aurocIndividual = CheXpertTrainer.computeAUROC(outGT, outPRED, nnClassCount)
        aurocMean = np.array(aurocIndividual).mean()
        print('AUROC mean ', aurocMean)
        
        for i in range (0, len(aurocIndividual)):
            print(class_names[i], ' ', aurocIndividual[i])
        
        return outGT, outPRED

True
Train data length: 191027
Valid data length: 202
Train data length: 191027
Valid data length: 202


In [5]:
class DenseNet121(nn.Module):
    """Model modified.
    The architecture of our model is the same as standard DenseNet121
    except the classifier layer which has an additional sigmoid function.
    """
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained = False)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x

In [6]:
# resnet model
class resnet_model(nn.Module):
    def __init__(self, size, features_dim, out_size, pretrained=False):
        super(resnet_model, self).__init__()
        
        if size==18:
            self.backbone = torchvision.models.resnet18(pretrained=pretrained)
        elif size==50:
            self.backbone = torchvision.models.resnet50(pretrained=pretrained)
        elif size==101:
            self.backbone = torchvision.models.resnet101(pretrained=pretrained)
        else:
            raise NotImplementedError(f"ResNet with size {size} is not implemented!")

        #self.backbone.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.feature_dim_in = self.backbone.fc.weight.shape[1]
        self.backbone.fc = nn.Linear(in_features=self.feature_dim_in, out_features=features_dim, bias=True)
        self.classifier = nn.Sequential(
            nn.Linear(features_dim, out_size),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [13]:
size = 50
features_dim = 2048
out_size = 14

model = resnet_model(size, features_dim, out_size, pretrained=True).cuda() # Step 0: Initialize global model and load the model
model = torch.nn.DataParallel(model).cuda()
#model = model.cuda()

In [14]:
# model = DenseNet121(nnClassCount).cuda() # Step 0: Initialize global model and load the model
# model = torch.nn.DataParallel(model).cuda()

In [15]:
ls

Performance_Check.ipynb  [0m[38;5;9mm-epoch_FL2.pth.tar[0m         [38;5;9mm-epoch_FL_ResNet3.pth.tar[0m
[38;5;33m__pycache__[0m/             [38;5;9mm-epoch_FL3.pth.tar[0m         [38;5;33mmodel_saved[0m/
base3.py                 [38;5;9mm-epoch_FL_ResNet1.pth.tar[0m  train_mod.csv
[38;5;9mm-epoch_FL1.pth.tar[0m      [38;5;9mm-epoch_FL_ResNet2.pth.tar[0m  valid_mod.csv


In [31]:
def load_model(load_path, model):
    load_path = load_path
    checkpoint = torch.load(load_path)
    state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
    model.load_state_dict(state_dict, strict=False)

# ResNet-50 Trained

In [35]:
path = "m-epoch_FL_ResNet1.pth.tar"
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.8034605977281252
No Finding   0.9217657342657343
Enlarged Cardiomediastinum   0.5199803632793324
Cardiomegaly   0.7892156862745098
Lung Opacity   0.9100050276520866
Lung Lesion   0.37810945273631846
Edema   0.9270833333333333
Consolidation   0.8463235294117646
Pneumonia   0.7899484536082474
Atelectasis   0.8427296587926509
Pneumothorax   0.8234432234432234
Pleural Effusion   0.9321784420289854
Pleural Other   0.8407960199004976
Fracture   0.9234088457389428


In [36]:
path1 = 'm-epoch_FL_ResNet2.pth.tar'
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path1, class_names)

AUROC mean  0.8029576408278938
No Finding   0.8999125874125875
Enlarged Cardiomediastinum   0.5264604810996564
Cardiomegaly   0.8284313725490196
Lung Opacity   0.9149321266968327
Lung Lesion   0.23880597014925375
Edema   0.9136904761904763
Consolidation   0.8571691176470588
Pneumonia   0.788659793814433
Atelectasis   0.7938057742782153
Pneumothorax   0.9025641025641025
Pleural Effusion   0.9406702898550725
Pleural Other   0.8955223880597015
Fracture   0.9378248504462098


In [37]:
path3 = 'm-epoch_FL_ResNet3.pth.tar'
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path3, class_names)

AUROC mean  0.8625504258523878
No Finding   0.8940122377622377
Enlarged Cardiomediastinum   0.5771232204221896
Cardiomegaly   0.821524064171123
Lung Opacity   0.9180492709904474
Lung Lesion   0.8656716417910448
Edema   0.9230654761904762
Consolidation   0.9033088235294118
Pneumonia   0.8466494845360824
Atelectasis   0.8371653543307086
Pneumothorax   0.904029304029304
Pleural Effusion   0.9266304347826088
Pleural Other   0.8507462686567164
Fracture   0.9451799548886928


# CLIP ResNet50 trained

In [20]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.7830073622458903
No Finding   0.8957604895104896
Enlarged Cardiomediastinum   0.5549337260677467
Cardiomegaly   0.8580659536541889
Lung Opacity   0.8910005027652087
Lung Lesion   0.05970149253731338
Edema   0.919345238095238
Consolidation   0.8836397058823529
Pneumonia   0.8492268041237112
Atelectasis   0.8086089238845144
Pneumothorax   0.7648351648351648
Pleural Effusion   0.9138360507246377
Pleural Other   0.9054726368159204
Fracture   0.8746690203000882


In [21]:
load_path = "model_saved/m-epoch_FL_resnet_clip2.pth.tar"
checkpoint = torch.load(load_path)
state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict, strict=False)

<All keys matched successfully>

In [22]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.7961217992803559
No Finding   0.8909527972027972
Enlarged Cardiomediastinum   0.5240058910162003
Cardiomegaly   0.7389705882352942
Lung Opacity   0.8804424333836098
Lung Lesion   0.4477611940298507
Edema   0.921875
Consolidation   0.8509191176470587
Pneumonia   0.7197164948453608
Atelectasis   0.8035695538057742
Pneumothorax   0.8681318681318682
Pleural Effusion   0.9099864130434783
Pleural Other   0.8557213930348259
Fracture   0.9375306462685103


In [23]:
load_path = "model_saved/m-epoch_FL_resnet_clip3.pth.tar"
checkpoint = torch.load(load_path)
state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict, strict=False)

<All keys matched successfully>

In [24]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.7299622005409355
No Finding   0.8896416083916084
Enlarged Cardiomediastinum   0.2927835051546392
Cardiomegaly   0.7284982174688057
Lung Opacity   0.893212669683258
Lung Lesion   0.09452736318407962
Edema   0.9226190476190476
Consolidation   0.8628676470588235
Pneumonia   0.7377577319587629
Atelectasis   0.793490813648294
Pneumothorax   0.8183150183150183
Pleural Effusion   0.9093070652173914
Pleural Other   0.6567164179104478
Fracture   0.8897715014219869


# SLIP VICREG

In [25]:
load_path = "model_saved/m-epoch_FL_resnet_slipvicreg1.pth.tar"
checkpoint = torch.load(load_path)
state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict, strict=False)

<All keys matched successfully>

In [26]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.7995279980480292
No Finding   0.8533653846153846
Enlarged Cardiomediastinum   0.4302405498281787
Cardiomegaly   0.7995766488413548
Lung Opacity   0.9102061337355455
Lung Lesion   0.4328358208955224
Edema   0.9375
Consolidation   0.8398897058823529
Pneumonia   0.8369845360824741
Atelectasis   0.8015748031496063
Pneumothorax   0.8219780219780219
Pleural Effusion   0.9271965579710144
Pleural Other   0.8656716417910448
Fracture   0.9368441698538786


In [27]:
load_path = "model_saved/m-epoch_FL_resnet_slipvicreg2.pth.tar"
checkpoint = torch.load(load_path)
state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict, strict=False)

<All keys matched successfully>

In [28]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.7621250587463885
No Finding   0.8830856643356644
Enlarged Cardiomediastinum   0.504074619538537
Cardiomegaly   0.8269830659536542
Lung Opacity   0.9145299145299145
Lung Lesion   0.30348258706467657
Edema   0.9379464285714285
Consolidation   0.8292279411764707
Pneumonia   0.7725515463917526
Atelectasis   0.8233070866141733
Pneumothorax   0.780952380952381
Pleural Effusion   0.9348958333333334
Pleural Other   0.4626865671641791
Fracture   0.9339021280768853


In [29]:
load_path = "model_saved/m-epoch_FL_resnet_slipvicreg3.pth.tar"
checkpoint = torch.load(load_path)
state_dict = {k.replace("img_backbone.", "module."): v for k, v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict, strict=False)

<All keys matched successfully>

In [30]:
path = None
outGT, outPRED = CheXpertTrainer.test(model, dataLoaderVal, nnClassCount, path, class_names)

AUROC mean  0.8185607501543342
No Finding   0.9060314685314685
Enlarged Cardiomediastinum   0.5668139420716741
Cardiomegaly   0.8170677361853832
Lung Opacity   0.8972347913524384
Lung Lesion   0.5124378109452736
Edema   0.9251488095238095
Consolidation   0.8658088235294118
Pneumonia   0.7222938144329897
Atelectasis   0.8241469816272966
Pneumothorax   0.8117216117216117
Pleural Effusion   0.9296875
Pleural Other   0.945273631840796
Fracture   0.9176228302441894


             Atelectasis       Cardiomegaly       Consolidation       Edema Pleural      Effusion
PAPER:  0.858(0.806,0.910)  0.832(0.773,0.890)  0.899(0.854,0.944)  0.941(0.903,0.980)  0.934(0.901,0.967)
Result:    0.85070              0.84569               0.88235              0.90595          0.92142
