In [2]:
import torch
import os
import pandas as pd
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
import sys
import os
import cvxpy as cp

import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torch.optim as optim
import torch.nn.functional as tfunc
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as func

from sklearn.metrics.ranking import roc_auc_score

from torch.utils.data import Dataset
from PIL import Image
from models.chexnet.DensenetModels import DenseNet121, DenseNet169
from models.models import ResNet18
from tensorboardX import SummaryWriter
from sklearn.metrics import classification_report, accuracy_score
from dataset import ChestXrayDataSet
from metrics import *

%load_ext autoreload
%autoreload 2

torch.cuda.current_device()

0

In [3]:
classes = 9
classCount = 5


In [4]:
def evaluate_model(model, classes=9, classCount=5):
    model.eval()
    
    outGT = torch.FloatTensor().cuda()
    outPRED = torch.FloatTensor().cuda()

    with torch.no_grad():
        for i, (input, target) in enumerate(dataLoaderVal):
            target = target.cuda()
            varInput = torch.autograd.Variable(input.cuda())
            varTarget = torch.autograd.Variable(target)
            varOutput = model(varInput)

            varOutput[:,0] = torch.sigmoid(varOutput[:,0])
            varOutput[:,1] = torch.sigmoid(varOutput[:,1])
            varOutput[:,2] = torch.sigmoid(varOutput[:,2])            

            ### VAL Preds for AUROC
            bPRED = torch.zeros(varOutput.shape[0], 5).cuda()
            bPRED[:,0] = varOutput[:,0]
            bPRED[:,1] = varOutput[:,1]
            bPRED[:,2] = varOutput[:,2]

            softmax = torch.nn.Softmax()
            soft_a = softmax(varOutput[:,3:6]).data
            a0, a1, a2 = soft_a[:, 0], soft_a[:, 1], soft_a[:, 2]
            bPRED[:, 3] = a1/(a0+a1)
            soft_b = softmax(varOutput[:,6:9]).data
            b0, b1, b2 = soft_b[:, 0], soft_b[:, 1], soft_b[:, 2]
            bPRED[:, 4] = b1/(b0+b1)

            outPRED = torch.cat((outPRED, bPRED.data), 0)            
            outGT = torch.cat((outGT, target), 0)
            ##block comment was here

        aurocIndividual = computeAUROC(outGT, outPRED, classCount)
        aurocMean = np.array(aurocIndividual).mean()

        print("AUROC val", aurocMean)
        print("AUROC individual", aurocIndividual)
        
    aurocIndividual = computeAUROC(outGT, outPRED, classCount)
    aurocMean = np.array(aurocIndividual).mean()
    accMean = np.array(computeAcc(outGT, outPRED, classCount)).mean()
    print("Mean accuracy", accMean)
    return aurocIndividual

In [5]:
VAL_IMAGE_LIST = './data/CheXpert-v1.0-small/valid.csv'
DATA_DIR = './data'
batchSize = 32

transResize = (300, 300)

#-------------------- SETTINGS: DATA TRANSFORMS
normalize = transforms.Normalize([0.50616586, 0.50616586, 0.50616586], [0.2879059, 0.2879059, 0.2879059]) #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
transformList = []
transformList.append(transforms.Resize(transResize))
transformList.append(transforms.ToTensor())
transformList.append(normalize)    
transformSequence=transforms.Compose(transformList)

datasetVal =   ChestXrayDataSet(data_dir=DATA_DIR, image_list_file=VAL_IMAGE_LIST, transform=transformSequence)
dataLoaderVal = DataLoader(dataset=datasetVal, batch_size=batchSize, shuffle=False, num_workers=0, pin_memory=True)

  labels = chex_df.as_matrix(columns=diseases)
  image_names = chex_df.as_matrix(columns=['Path']).flatten()


In [None]:
model_zoo = []
model_dir = 'forward'
individuals = []
for f in os.listdir(model_dir):
    print(f)
    model = None
    model = DenseNet121(classes, False).cuda()
    model = torch.nn.DataParallel(model).cuda()
    checkpoint = torch.load(os.path.join(model_dir, f))
    model.load_state_dict(checkpoint['state_dict'])
    del checkpoint
    aurocIndividual = evaluate_model(model)
    individuals.append(aurocIndividual)
    
best = np.max(np.vstack(individuals), axis=0)
print(best)

In [None]:
np.mean(best)

In [None]:
def evaluate_model_zoo(model_zoo, classes=9, classCount=5):
    for model in model_zoo:
        model.eval()
    
    outGT = torch.FloatTensor().cuda()
    outPRED = torch.FloatTensor().cuda()

    with torch.no_grad():
        for i, (input, target) in enumerate(dataLoaderVal):
            target = target.cuda()
            varInput = torch.autograd.Variable(input.cuda())
            varTarget = torch.autograd.Variable(target)
            varOutputs = []
            for model in model_zoo:
                (32,54)
                varOutputs.append(np.expand_dims(model(varInput).cpu().numpy(), axis=0)) 
            
            varOutputs = np.vstack(varOutputs)
            varOutput = torch.from_numpy(np.mean(varOutputs, axis=0)) ##bug! mean should come after softmax
            varOutput[:,0] = torch.sigmoid(varOutput[:,0])
            varOutput[:,1] = torch.sigmoid(varOutput[:,1])
            varOutput[:,2] = torch.sigmoid(varOutput[:,2])            

            ### VAL Preds for AUROC
            bPRED = torch.zeros(varOutput.shape[0], 5).cuda()
            bPRED[:,0] = varOutput[:,0]
            bPRED[:,1] = varOutput[:,1]
            bPRED[:,2] = varOutput[:,2]

            softmax = torch.nn.Softmax()
            soft_a = softmax(varOutput[:,3:6]).data
            a0, a1, a2 = soft_a[:, 0], soft_a[:, 1], soft_a[:, 2]
            bPRED[:, 3] = a1/(a0+a1)
            soft_b = softmax(varOutput[:,6:9]).data
            b0, b1, b2 = soft_b[:, 0], soft_b[:, 1], soft_b[:, 2]
            bPRED[:, 4] = b1/(b0+b1)

            outPRED = torch.cat((outPRED, bPRED.data), 0)            
            outGT = torch.cat((outGT, target), 0)
            ##block comment was here

        aurocIndividual = computeAUROC(outGT, outPRED, classCount)
        aurocMean = np.array(aurocIndividual).mean()

        print("AUROC val", aurocMean)
        print("AUROC individual", aurocIndividual)
        
    aurocIndividual = computeAUROC(outGT, outPRED, classCount)
    aurocMean = np.array(aurocIndividual).mean()
    accMean = np.array(computeAcc(outGT, outPRED, classCount)).mean()
    print("Mean accuracy", accMean)
    return aurocIndividual

In [None]:
model_zoo_names = ['m-37050_0.897.pth.tar', 'm-48990_0.879.pth.tar', 'm-46590_0.876.pth.tar', 'm-25110_0.886.pth.tar', 'm-14340_0.891.pth.tar', 'm-22710_0.878.pth.tar']
model_dir ='forward'

model_zoo = []
for model_name in model_zoo_names:
    model = None
    model = DenseNet121(classes, False).cuda()
    model = torch.nn.DataParallel(model).cuda()
    checkpoint = torch.load(os.path.join(model_dir, model_name))
    model.load_state_dict(checkpoint['state_dict'])
    del checkpoint
    model_zoo.append(model)
#metrics = evaluate_model_zoo(model_zoo)


# 1st element: best model
# 2nd to 6th element: best models on each of our diseases

## Compute weighted sum ##

In [5]:
pathDirData = './data'
pathFileTrain = './data/CheXpert-v1.0-small/train.csv'
trBatchSize = 32
transResize = (300, 300)
transCrop = 224

In [6]:
normalize = transforms.Normalize([0.50616586, 0.50616586, 0.50616586], [0.2879059, 0.2879059, 0.2879059]) #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

transformList = []
transformList.append(transforms.Resize(transResize))
transformList.append(transforms.ToTensor())
transformList.append(normalize)    
transformSequence=transforms.Compose(transformList)

#-------------------- SETTINGS: DATASET BUILDER |TRAIN|

datasetTrain = ChestXrayDataSet(data_dir=pathDirData,image_list_file=pathFileTrain, transform=transformSequence)              
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True,  num_workers=0, pin_memory=False)

In [9]:
num_diseases = 5

In [7]:
def get_weights_training_set(model_zoo, num_examples):    

    for model in model_zoo:
        model.eval()
    num_models = len(model_zoo)
    
    outGT = torch.FloatTensor().cuda()
    outPREDs = [torch.FloatTensor().cuda()]*num_models
    with torch.no_grad():
        for i, (input, target) in enumerate(dataLoaderTrain):
            #Val code
            target = target.cuda()
            varInput = torch.autograd.Variable(input.cuda())
            varTarget = torch.autograd.Variable(target)
            
            outGT = torch.cat((outGT, target), 0)
            for m, model in enumerate(model_zoo):
                varOutput = model(varInput)

                varOutput[:,0] = torch.sigmoid(varOutput[:,0])
                varOutput[:,1] = torch.sigmoid(varOutput[:,1])
                varOutput[:,2] = torch.sigmoid(varOutput[:,2])            


                ### VAL Preds for AUROC
                bPRED = torch.zeros(varOutput.shape[0], 5).cuda()
                bPRED[:,0] = varOutput[:,0]
                bPRED[:,1] = varOutput[:,1]
                bPRED[:,2] = varOutput[:,2]
                
                soft_a = torch.nn.functional.softmax(varOutput[:,3:6], dim=-1).data

                a0, a1, a2 = soft_a[:, 0], soft_a[:, 1], soft_a[:, 2]
                bPRED[:, 3] = a1/(a0+a1)
                soft_b = torch.nn.functional.softmax(varOutput[:,6:9], dim=-1).data
                b0, b1, b2 = soft_b[:, 0], soft_b[:, 1], soft_b[:, 2]
                bPRED[:, 4] = b1/(b0+b1)

#                 outPRED = torch.cat((outPRED, bPRED.data), 0)            
#                 outGT = torch.cat((outGT, target), 0)
                
                outPREDs[m] = torch.cat((outPREDs[m], bPRED.data), 0)            

                ##block comment was here
            if i % int(100/trBatchSize) == 0:
                print(i*trBatchSize)
            if i*trBatchSize >= num_examples:
                break
    return outPREDs, outGT

In [24]:
outPREDs, outGT = get_weights_training_set(model_zoo, 10000)

0
96
192
288
384
480
576
672
768
864
960
1056
1152
1248
1344
1440
1536
1632
1728
1824
1920
2016
2112
2208
2304
2400
2496
2592
2688
2784
2880
2976
3072
3168
3264
3360
3456
3552
3648
3744
3840
3936
4032
4128
4224
4320
4416
4512
4608
4704
4800
4896
4992
5088
5184
5280
5376
5472
5568
5664
5760
5856
5952
6048
6144
6240
6336
6432
6528
6624
6720
6816
6912
7008
7104
7200
7296
7392
7488
7584
7680
7776
7872
7968
8064
8160
8256
8352
8448
8544
8640
8736
8832
8928
9024
9120
9216
9312
9408
9504
9600
9696
9792
9888
9984


In [25]:
def get_weight_training_sets(outPREDs, outGT):
    training_sets = []
    probs = []
    for m in range(len(outPREDs)):
        probs.append(outPREDs[m].cpu().numpy())
    pred_volume = np.stack(probs, axis=2)
    labels = outGT.cpu().numpy()
    for i in range(num_diseases):
        Ai = np.squeeze(pred_volume[:, i, :])
        bi = labels[:, i]
        clear_rows = np.array(bi!=2)
        Ai, bi = [Ai[clear_rows, :], bi[clear_rows]]
        training_sets.append([Ai, bi])
    return(training_sets)

In [12]:
#np.save('ensemble_training_sets.npy', np.array(training_sets),)

NameError: name 'training_sets' is not defined

In [26]:
training_sets = get_weight_training_sets(outPREDs, outGT)

In [36]:
reg_weights = []

o = np.ones(len(model_zoo))
for i in range(len(training_sets)):
    Ai, bi = training_sets[i]
    wi = cp.Variable(len(model_zoo))
    obj = cp.sum_squares(Ai*wi - bi)/Ai.shape[0]
    constraints = [wi >= 0.01, wi*o == 1]
    prob = cp.Problem(cp.Minimize(obj), constraints=constraints)
    print('disease_{}'.format(i))
    print(prob.solve())
    print('weights')
    print(wi.value) 
    reg_weights.append(wi.value)
reg_weights = np.array(reg_weights).T
print(reg_weights.shape)

disease_0
0.18176330936280885
weights
[0.91951901 0.01       0.07048099]
disease_1
0.08029805348829164
weights
[0.98 0.01 0.01]
disease_2
0.13417284341555916
weights
[0.96676433 0.01       0.02323567]
disease_3
0.06475894606611106
weights
[0.98 0.01 0.01]
disease_4
0.11788407239981458
weights
[0.9605097 0.01      0.0294903]
(3, 5)


In [28]:
bce_weights = []

o = np.ones(len(model_zoo))
for i in range(len(training_sets)):
    Ai, bi = training_sets[i]
    wi = cp.Variable(len(model_zoo))
    preds = Ai*wi
    obj = cp.sum(cp.multiply(bi, -cp.log(preds)) + cp.multiply(1-bi, -cp.log(1-preds)))
    obj = obj/Ai.shape[0]
    constraints = [wi >= 0.01, wi*o == 1]
    prob = cp.Problem(cp.Minimize(obj), constraints=constraints)
    print('disease_{}'.format(i))
    print(prob.solve())
    print('weights')
    print(wi.value) 
    bce_weights.append(wi.value)
bce_weights = np.array(bce_weights).T

disease_0
0.5358695972960016
weights
[0.96618399 0.01       0.02381601]
disease_1
0.2902021905595151
weights
[0.98 0.01 0.01]
disease_2
0.4147129374477254
weights
[0.98 0.01 0.01]
disease_3
0.22015427662275353
weights
[0.98 0.01 0.01]
disease_4
0.3723372866345213
weights
[0.97968452 0.01       0.01031548]


In [29]:
from sklearn.linear_model import LogisticRegression
log_weights = []
log_intercepts = []
for i in range(len(training_sets)):
    Ai, bi = training_sets[i]
    l = LogisticRegression()
    l.fit(Ai, bi)
    log_weights.append(l.coef_[0])
    log_intercepts.append(l.intercept_[0])
log_weights = np.array(log_weights).T
log_intercepts = np.array(log_intercepts).reshape(1, num_diseases)
print(log_weights.shape, log_intercepts.shape)


(3, 5) (1, 5)




In [6]:
def evaluate_model_zoo_with_weights(model_zoo, weights, log_reg=False, intercepts=None, classes=9, classCount=5):
    for model in model_zoo:
        model.eval()
    num_models = len(model_zoo)
    
    outGT = torch.FloatTensor().cuda()
    outPREDs = [torch.FloatTensor().cuda()]*num_models
    
    with torch.no_grad():
        for i, (input, target) in enumerate(dataLoaderVal):
            #Val code
            target = target.cuda()
            varInput = torch.autograd.Variable(input.cuda())
            varTarget = torch.autograd.Variable(target)
            
            outGT = torch.cat((outGT, target), 0)
            for m, model in enumerate(model_zoo):
                varOutput = model(varInput)

                varOutput[:,0] = torch.sigmoid(varOutput[:,0])
                varOutput[:,1] = torch.sigmoid(varOutput[:,1])
                varOutput[:,2] = torch.sigmoid(varOutput[:,2])            


                ### VAL Preds for AUROC
                bPRED = torch.zeros(varOutput.shape[0], 5).cuda()
                bPRED[:,0] = varOutput[:,0]
                bPRED[:,1] = varOutput[:,1]
                bPRED[:,2] = varOutput[:,2]
                
                soft_a = torch.nn.functional.softmax(varOutput[:,3:6], dim=-1).data

                a0, a1, a2 = soft_a[:, 0], soft_a[:, 1], soft_a[:, 2]
                bPRED[:, 3] = a1/(a0+a1)
                soft_b = torch.nn.functional.softmax(varOutput[:,6:9], dim=-1).data
                b0, b1, b2 = soft_b[:, 0], soft_b[:, 1], soft_b[:, 2]
                bPRED[:, 4] = b1/(b0+b1)

#                 outPRED = torch.cat((outPRED, bPRED.data), 0)            
#                 outGT = torch.cat((outGT, target), 0)
                
                outPREDs[m] = torch.cat((outPREDs[m], bPRED.data), 0) 
            
#         print(outPREDs[0][0,:])
#         print(weights)
        for j,w in enumerate(weights):
            w_tensor = torch.from_numpy(w.reshape(1, num_diseases))
            w_tensor = w_tensor.type(torch.FloatTensor).cuda()

            #w_tensor = torch.FloatTensor(w_tensor).cuda()
            outPREDs[j] = outPREDs[j]*w_tensor
            if log_reg:
                outPREDs[j] += torch.from_numpy(intercepts).type(torch.FloatTensor).cuda()
#         print(outPREDs[0][0,:])
        outPRED = sum(outPREDs)
        if log_reg:
            outPRED = torch.sigmoid(outPRED)
            
            

        aurocIndividual = computeAUROC(outGT, outPRED, classCount)
        aurocMean = np.array(aurocIndividual).mean()

        print("AUROC val", aurocMean)
        print("AUROC individual", aurocIndividual)
        
    aurocIndividual = computeAUROC(outGT, outPRED, classCount)
    aurocMean = np.array(aurocIndividual).mean()
    accMean = np.array(computeAcc(outGT, outPRED, classCount)).mean()
    print("Mean accuracy", accMean)
    return aurocIndividual

In [31]:
evaluate_model_zoo_with_weights(model_zoo, reg_weights)

AUROC val 0.9006227811204777
AUROC individual [0.8623622047244094, 0.909375, 0.9361607142857142, 0.8656417112299465, 0.9295742753623188]
Mean accuracy 0.8158415841584159


[0.8623622047244094,
 0.909375,
 0.9361607142857142,
 0.8656417112299465,
 0.9295742753623188]

In [32]:
evaluate_model_zoo_with_weights(model_zoo, bce_weights)

AUROC val 0.8970193131756142
AUROC individual [0.8619422572178478, 0.9034926470588235, 0.9266369047619047, 0.8680926916221035, 0.9249320652173914]
Mean accuracy 0.8069306930693069


[0.8619422572178478,
 0.9034926470588235,
 0.9266369047619047,
 0.8680926916221035,
 0.9249320652173914]

In [33]:
evaluate_model_zoo_with_weights(model_zoo, log_weights, log_reg=True, intercepts=log_intercepts)

AUROC val 0.8979755296223871
AUROC individual [0.8616272965879266, 0.9056985294117648, 0.9342261904761904, 0.8646390374331551, 0.9236865942028986]
Mean accuracy 0.7237623762376237


[0.8616272965879266,
 0.9056985294117648,
 0.9342261904761904,
 0.8646390374331551,
 0.9236865942028986]

In [15]:
del model_zoo2
#evaluate_model_zoo_with_weights(model_zoo, np.array([[1/6]*5]*6))

In [16]:
model_dir ='forward'

model_zoo2 = []
model = None
model = DenseNet121(classes, False).cuda()
model = torch.nn.DataParallel(model).cuda()
checkpoint = torch.load('best_models/forward121/m-37050_0-Copy1.897.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
del checkpoint
model_zoo2.append(model)

model = None
model = DenseNet121(classes, False).cuda()
model = torch.nn.DataParallel(model).cuda()
checkpoint = torch.load('forward/m-8370_0.893.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
del checkpoint
model_zoo2.append(model)

# model = None
# model = DenseNet121(classes, False).cuda()
# model = torch.nn.DataParallel(model).cuda()
# checkpoint = torch.load('best_models/forward121/m-14340_0-Copy1.891.pth.tar')
# model.load_state_dict(checkpoint['state_dict'])
# del checkpoint
# model_zoo2.append(model)

model = None
model = DenseNet169(classes, False).cuda()
model = torch.nn.DataParallel(model).cuda()
checkpoint = torch.load('best_models/forward169/m-26280_0.892.pth.tar')
checkpoint['state_dict']['module.densenet169.classifier.0.weight'] = checkpoint['state_dict']['module.densenet169.classifier.weight']
checkpoint['state_dict']['module.densenet169.classifier.0.bias'] = checkpoint['state_dict']['module.densenet169.classifier.bias']
checkpoint['state_dict'].pop('module.densenet169.classifier.weight', None)
checkpoint['state_dict'].pop('module.densenet169.classifier.bias', None)
model.load_state_dict(checkpoint['state_dict'])
del checkpoint
model_zoo2.append(model)

### auroc val 0.9029 without the following model:
model = None
model = DenseNet169(classes, False).cuda()
model = torch.nn.DataParallel(model).cuda()
checkpoint = torch.load('best_models/forward169/m-8370_0.887.pth.tar')
checkpoint['state_dict']['module.densenet169.classifier.0.weight'] = checkpoint['state_dict']['module.densenet169.classifier.weight']
checkpoint['state_dict']['module.densenet169.classifier.0.bias'] = checkpoint['state_dict']['module.densenet169.classifier.bias']
checkpoint['state_dict'].pop('module.densenet169.classifier.weight', None)
checkpoint['state_dict'].pop('module.densenet169.classifier.bias', None)
model.load_state_dict(checkpoint['state_dict'])
del checkpoint
model_zoo2.append(model)
###
model_zoo = model_zoo2

In [14]:
evaluate_model_zoo_with_weights(model_zoo2, np.array([[1/4]*5]*4))

AUROC val 0.9048359991969152
AUROC individual [0.8608923884514437, 0.9069852941176471, 0.9395833333333333, 0.8801247771836007, 0.9365942028985508]
Mean accuracy 0.8198019801980199


[0.8608923884514437,
 0.9069852941176471,
 0.9395833333333333,
 0.8801247771836007,
 0.9365942028985508]