# Deep Ensemble Calibration for Uncertainty Estimates

# Imports

In [39]:
# system
import os 

# data
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2 as cv
from pprint import pprint
from sklearn.metrics import accuracy_score, balanced_accuracy_score

# deep learning 
import torch.backends.cudnn as cudnn
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import DataLoader 
import torch.nn.functional as F

# custom helpers
import trainer
import uncertainty 
import process
import inference

import warnings
warnings.filterwarnings('ignore')

# Generics

In [40]:
path = './balanced_ensembles'
device = torch.device('cuda:0')
num_workers = 1
batch_size = 128
n_cls = 7
model = torchvision.models.resnet18(pretrained=False)
in_ftr  = model.fc.in_features
model.fc = nn.Linear(in_ftr,n_cls,bias=True)
model = model.to(device)

# Helpers

In [41]:
def loadPickle(fp):
    with open(fp, 'rb') as handle:
        return pickle.load(handle)

In [42]:
def compute(model, fp, data):
    pred_outputs = []
    label_outputs = []
    
    for f in sorted(os.listdir(fp)):
        full_model_path = os.path.join(fp, f)
        model.load_state_dict(torch.load(full_model_path))
        val_set = process.SkinSet(data['validation'])
        val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=num_workers)

        batch_total = 0.0
        labels = []
        predictions = [] 
    
        model.eval()
        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                preds = F.softmax(model(inputs), dim=1)
                labels.extend(targets.detach().cpu().numpy())
                predictions.extend(preds.detach().cpu().numpy())

        pred_outputs.append(predictions)
        label_outputs.append(labels)
        
    return np.array(pred_outputs), np.array(label_outputs)

# Compute Mean/Var

In [43]:
data = loadPickle('./balanced_exp.pickle')
preds, labels = compute(model, path, data)

In [44]:
print(preds.shape, labels.shape)

(6, 998, 7) (6, 998)


In [45]:
acc = []
bal_acc = []
for m in preds:
    p = []
    for val in m:
        p.append(np.argmax(val))
        
    acc.append(accuracy_score(labels[0], p))
    bal_acc.append(balanced_accuracy_score(labels[0], p))
c = 0
for v in acc:
    c += 1
    print("Model Acc", c, ':', v)
c = 0
for v in bal_acc:
    c += 1
    print("Model Bal Acc", c, ':', v)
    
print("Mean Acc:", np.mean(acc))
print("Mean Bal Acc:", np.mean(bal_acc))

Model Acc 1 : 0.8196392785571143
Model Acc 2 : 0.7805611222444889
Model Acc 3 : 0.843687374749499
Model Acc 4 : 0.8276553106212425
Model Acc 5 : 0.8246492985971944
Model Acc 6 : 0.814629258517034
Model Bal Acc 1 : 0.766710621991389
Model Bal Acc 2 : 0.6850030397585104
Model Bal Acc 3 : 0.7703654929810695
Model Bal Acc 4 : 0.7309029073842401
Model Bal Acc 5 : 0.7990184831691344
Model Bal Acc 6 : 0.8135194690111399
Mean Acc: 0.8184702738810955
Mean Bal Acc: 0.7609200023825805


In [46]:
mean = np.mean(preds, 0)
r_var = np.var(preds,0)
var = np.mean(np.var(preds,0), 0)

print(mean.shape)
print(var.shape)
print(var)

(998, 7)
(7,)
[0.0113876  0.015099   0.01973732 0.00275105 0.02780673 0.02693587
 0.00160483]


In [47]:
p = []
for val in mean:
    p.append(np.argmax(val))
print("Ensemble Accuracy:", accuracy_score(labels[0], p))
print("Ensemble Balanced Accuracy:", balanced_accuracy_score(labels[0], p))

Ensemble Accuracy: 0.8817635270541082
Ensemble Balanced Accuracy: 0.8730295552851854


# Remove bottom 2 models

In [48]:
def computeExclude(model, fp, data, exclude):
    pred_outputs = []
    label_outputs = []
    
    for idx, f in enumerate(sorted(os.listdir(fp))):
        if idx in exclude: continue
        full_model_path = os.path.join(fp, f)
        model.load_state_dict(torch.load(full_model_path))
        val_set = process.SkinSet(data['validation'])
        val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=num_workers)

        batch_total = 0.0
        labels = []
        predictions = [] 
    
        model.eval()
        with torch.no_grad():
            for idx, (inputs, targets) in enumerate(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                preds = F.softmax(model(inputs), dim=1)
                labels.extend(targets.detach().cpu().numpy())
                predictions.extend(preds.detach().cpu().numpy())

        pred_outputs.append(predictions)
        label_outputs.append(labels)
        
    return np.array(pred_outputs), np.array(label_outputs)

In [49]:
preds, labels = computeExclude(model, path, data, [1,3])

In [50]:
mean = np.mean(preds, 0)
r_var = np.var(preds,0)
var = np.mean(np.var(preds,0), 0)

In [51]:
p = []
for val in mean:
    p.append(np.argmax(val))
print("Ensemble Accuracy:", accuracy_score(labels[0], p))
print("Ensemble Balanced Accuracy:", balanced_accuracy_score(labels[0], p))

Ensemble Accuracy: 0.8707414829659319
Ensemble Balanced Accuracy: 0.8790445834880132


# Testing

In [52]:
IDs = list(data['test'].keys())

In [54]:
ID = IDs[np.random.randint(len(IDs))]
results = inference.infer('/usr/local/data/ezimmer/implementAI2020/dl/data/HAM/images/'+ID+'.jpg', path)
pprint(results)
inverse_mapping = {0:"Bowen's Disease", 
                   1:"Basal Cell Carcinoma", 
                   2:'Benign Keratosis-like Lesions', 
                   3:'Dermatofibroma', 
                   4:'Melanoma', 
                   5:'Melanocytic Nevi', 
                   6:'Vascular Lesions'} 
inverse_mapping[data['test'][ID]['label']]

{'conf': False, 'disease': 'Melanocytic Nevi', 'prob': 0.62132996}


'Melanoma'

In [16]:
model.load_state_dict(torch.load(os.path.join(path, 'model_2.pth.tar')))

<All keys matched successfully>

In [17]:
x = F.softmax(model(torch.tensor((np.load(data['test'][ID]['image']))).unsqueeze(0).to(device)), dim=1)

In [18]:
print(x, x.argmax(dim=1))

tensor([[1.7803e-05, 9.9263e-06, 4.5048e-03, 5.1321e-06, 7.8747e-01, 2.0799e-01,
         4.6164e-07]], device='cuda:0', grad_fn=<SoftmaxBackward>) tensor([4], device='cuda:0')


In [19]:
import skimage.io as io
from skimage.transform import resize
img = io.imread('/usr/local/data/ezimmer/implementAI2020/dl/data/HAM/images/'+ID+'.jpg').astype(np.float32)
print(img.shape)
img = resize(img, (225,300,3)).transpose(2,0,1) / 255.0
img = torch.tensor(img).unsqueeze(0).to(device)
print(img.size())


(450, 600, 3)
torch.Size([1, 3, 225, 300])


In [20]:
x = F.softmax(model(img), dim=1)

In [21]:
print(x)

tensor([[1.7803e-05, 9.9263e-06, 4.5048e-03, 5.1321e-06, 7.8747e-01, 2.0799e-01,
         4.6164e-07]], device='cuda:0', grad_fn=<SoftmaxBackward>)


In [22]:
np.load(data['test'][ID]['image']).shape

(3, 225, 300)