In [1]:
import copy
import json
import math
import os
import random
import shutil
import time
import pandas as pd

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data

import sklearn
import copy

import gc
from torch.utils.data import DataLoader


import torchvision.transforms as transforms
from PIL import Image

  warn(f"Failed to load image Python extension: {e}")


In [2]:
#import Verma.main_increase_experts_hard_coded as verm
import Verma.experts as vexp
import Verma.losses as vlos
from Verma.utils import AverageMeter, accuracy
import Verma.resnet50 as vres
from AL.utils import *
from AL.metrics import *
from AL.neural_network import NetSimple

import NIH.Dataset as ds

In [4]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

In [5]:
class Expert:
    def __init__(self, dataset, labeler_id, modus="perfect", param=None, nLabels=800, prob=0.5):
        self.labelerId = labeler_id
        self.dataset = dataset
        self.data = dataset.getData()[["Image ID", str(self.labelerId)]]
        self.nLabels = nLabels
        self.param = param
        self.prob = prob
        self.modus = modus

        if self.modus == "perfect":
            self.predictions = self.data

    def predict(self, img, target, fnames):
        """
        img: the input image
        target: the GT label
        fname: filename (id for the image)
        """
        return np.array([self.predictions[self.predictions["Image ID"] == image_id][str(self.labelerId)].values for image_id in fnames]).ravel()

    def setModel(self, model):
        self.model = model
        
    def predictModel(self, img, target, fnames):
        if len(img.shape) == 3:
            img = img.unsqueeze(0) 
        outputs = self.model(img)
        _, predicted = torch.max(outputs.data, 1)
        return predicted
    
    def predictImage(self, img):
        return self.predictModel(img, None, None)
    
    def getModel(self):
        return self.model
    
    def saveModel(self, path, name):
        torch.save(self.model, PATH + "/" + name + "_" + str(labeler_id))
        
    def loadModel(self, path, name):
        self.model = torch.load(path + "/" + name + "_" + str(labeler_id))
        model.eval()
        
    def predictWithModel(self, img, target, filename):
        """
        Checks with the model if the expert would be correct
        If it predicts 1 than it returns the true label
        If it predicts 0 than is returns the opposit label
        """
        predicted = self.predictModel(img, target, filename)
        result = []
        for i, pred in enumerate(predicted):
            if pred == 1:
                result.append(target.cpu().detach().numpy()[i])
            else:
                if target.cpu().detach().numpy()[i] == 1:
                    result.append(0)
                else:
                    result.append(1)
        return result

In [6]:
class NIHExpertDataset():
    def __init__(self, images, filenames, targets, expert_fn, labeled, indices = None, expert_preds = None):
        """
        Original cifar dataset
        images: images
        targets: labels
        expert_fn: expert function
        labeled: indicator array if images is labeled
        indices: indices in original CIFAR dataset (if this subset is subsampled)
        expert_preds: used if expert_fn or have different expert model
        """
        self.images = images
        self.filenames = filenames
        self.targets = np.array(targets)
        self.expert_fn = expert_fn
        self.labeled = np.array(labeled)
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3]],
                                         std=[x / 255.0 for x in [63.0]])
        self.transform_test = transforms.Compose([transforms.Resize(128),transforms.ToTensor(), normalize])
        if expert_preds is not None:
            self.expert_preds = expert_preds
        else:
            self.expert_preds = np.array(expert_fn(self.images, torch.FloatTensor(targets), self.filenames))
        for i in range(len(self.expert_preds)):
            if self.labeled[i] == 0:
                self.expert_preds[i] = -1 # not labeled by expert
        if indices is not None:
            self.indices = indices
        else:
            self.indices = np.array(list(range(len(self.targets))))
    def __getitem__(self, index):
        """Take the index of item and returns the image, label, expert prediction and index in original dataset"""
        label = self.targets[index]
        image = self.transform_test(self.images[index])
        filename = self.filenames[index]
        expert_pred = self.expert_preds[index]
        indice = self.indices[index]
        labeled = self.labeled[index]
        return torch.FloatTensor(image), label, expert_pred, indice, labeled

    def __len__(self):
        return len(self.targets)
    
class NIHExpertDatasetMemory():
    def __init__(self, images, filenames, targets, expert_fn, labeled, indices = None, expert_preds = None, param=None):
        """
        Original cifar dataset
        images: images
        targets: labels
        expert_fn: expert function
        labeled: indicator array if images is labeled
        indices: indices in original CIFAR dataset (if this subset is subsampled)
        expert_preds: used if expert_fn or have different expert model
        """
        self.images = images
        self.filenames = filenames
        self.targets = np.array(targets)
        self.expert_fn = expert_fn
        self.labeled = np.array(labeled)
        
        self.image_ids = filenames
        self.preload = False
        self.PATH = param["PATH"]
        
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3]],
                                         std=[x / 255.0 for x in [63.0]])
        self.transform_test = transforms.Compose([transforms.Resize(128), transforms.ToTensor(), normalize])
        if expert_preds is not None:
            self.expert_preds = expert_preds
        else:
            self.expert_preds = np.array(expert_fn(self.images, torch.FloatTensor(targets), fnames = self.filenames))
        for i in range(len(self.expert_preds)):
            if self.labeled[i] == 0:
                self.expert_preds[i] = -1 # not labeled by expert
        if indices is not None:
            self.indices = indices
        else:
            self.indices = np.array(list(range(len(self.targets))))
            
    def loadImage(self, idx):
        """
        Load one single image
        """
        return Image.open(self.PATH + "images/" + self.image_ids[idx]).convert("RGB").resize((244,244))
            
    def getImage(self, idx):
        """
        Returns the image from index idx
        """
        if self.preload:
            return self.images[idx]
        else:
            return self.loadImage(idx)
    
    
    def __getitem__(self, index):
        """Take the index of item and returns the image, label, expert prediction and index in original dataset"""
        label = self.targets[index]
        img = self.getImage(index)
        image = self.transform_test(img)
        #image = self.transform_test(self.images[index])
        filename = self.filenames[index]
        expert_pred = self.expert_preds[index]
        indice = self.indices[index]
        labeled = self.labeled[index]
        return torch.FloatTensor(image), label, expert_pred, indice, labeled, filename

    def __len__(self):
        return len(self.targets)

In [41]:
from scipy.stats import entropy

def getQbQPoints(expert_models, data_loader, budget):
    """
    Selects n (budget) points with query by committee
    """
    # Get Predictions for all points for all experts
    global prediction_matrix
    prediction_matrix = []
    indices_all = []
    for data in data_loader:
        images, labels, _, indices, _, filenames = data
        experts_preds = []
        for j, expert_model in enumerate(expert_models):
            images = images.to(device)
            outputs_exp = expert_model(images)
            preds = []
            for i in range(outputs_exp.size()[0]):
                pred_exp = outputs_exp.data[i].cpu().numpy()
                pred_exp = pred_exp[1]
                preds.append(round(pred_exp))
                if (j == 0): #Add the indices only the first time
                    indices_all.append(indices[i].item())
            experts_preds.append(np.array(preds))
        prediction_matrix.append(np.swapaxes(np.array(experts_preds), 0, 1))
    predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))

    #Get where the experts disagree
    qbq = []
    for row in predictions_matrix:
        dis = False
        start = None
        for element in row:
            if start == None:
                start = element
            else: 
                if element != start:
                    dis = True
        qbq.append(dis)
    #Get indices
    ids = []
    for i, el in enumerate(qbq):
        if el:
            ids.append(indices_all[i])
    return random.sample(ids, budget)



def getExpertModels(experts, train_dataset, val_dataset, test_dataset, param=None):
    
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    train_dataset.getAllImagesNP().shape
    all_data_x = train_dataset.getAllImagesNP()[all_indices]
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]
    
    print("Complete data generation")

    # Bestimmt die Indizes, welche gelabelt und welche ungelabelt sind
    
    Intial_random_set = random.sample(all_indices, param["INITIAL_SIZE"])
    indices_labeled  = Intial_random_set
    indices_unlabeled= list(set(all_indices) - set(indices_labeled))

    gc.collect()

    # train expert model on labeled data
    # Expertenmodell variabel
    expert_models = []
    for i, expert in enumerate(experts):

        print("Starting with expert " + str(i))

        # Lädt die Datasets für die beschrifteten und unbeschrifteten Daten
        dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param)
        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param)
    
    
        # Lädt die Dataloaders
        dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=False)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=False)
    
        print("Complete dataloader generation")

        gc.collect()
        
        expert_models.append(NetSimple(2, 3, 100, 100, 1000,500).to(device)) 
        run_expert(expert_models[-1], param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderTrainLabeled, param=param) 

    print("Experts trained")

    data_sizes = []
    error_confidence = []
    data_sizes.append(param["INITIAL_SIZE"])
    
    print("Starting with AL")
    for round in range(param["MAX_ROUNDS"]):

        print(f'\n \n Round {round} \n \n')

        # get points where expert model is least confident on
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)
        #indices_confidence = get_least_confident_points(model_expert, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])

        #Try to get better Points
        indices_qbq = getQbQPoints(expert_models, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])
        
        #indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_labeled  = indices_labeled + list(indices_qbq) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))
        
        # train model on labeled data
        for j, expert in enumerate(experts):

            dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param)
            dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param)
        
            dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)
            dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)
            
            run_expert(expert_models[j], param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderTrainLabeled, param=param)

    print("AL finished")
    return expert_models

In [8]:
def train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax, param=None):
    """Train for one epoch the model to predict expert agreement with label"""
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, label, expert_pred, _, _, filenames ) in enumerate(train_loader):
        #print(input)
        #print(label)
        expert_pred = expert_pred.long()
        expert_pred = (expert_pred == label) *1
        target = expert_pred.to(device)
        input = input.to(device)
        # compute output
        output = model(input)

        # compute loss
        
        if apply_softmax:
            loss = my_CrossEntropyLossWithSoftmax(output, target)
        else:
            #loss = my_CrossEntropyLoss(output, target)
            loss = my_CrossEntropyLoss(output, target, cost=param["COST"])
        
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, top1=top1))
            

def run_expert(model, epochs, train_loader, val_loader, apply_softmax = False, param=param):
    '''
    train expert model to predict disagreement with label
    model: WideResNet model or pytorch model (2 outputs)
    epochs: number of epochs to train
    '''
    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    #optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)

    optimizer = torch.optim.SGD(model.parameters(), 0.001, #0.001
                                momentum=0.9, nesterov=True,
                                weight_decay=5e-4)
    # cosine learning rate
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs)

    for epoch in range(0, epochs):
        # train for one epoch
        train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax, param=param)
        if epoch % 10 == 0:
            metrics_print_expert(model, val_loader)
            
    metrics_print_expert(model, val_loader)

def metrics_print_expert(model, data_loader, defer_net = False):
    '''
    Computes metrics for expert model error prediction
    model: model
    data_loader: data loader
    '''
    correct = 0
    total = 0
    
    label_list = np.empty(0)
    predictions_list = np.empty(0)
    # again no gradients needed
    with torch.no_grad():
        for data in data_loader:
            images, label, expert_pred, _ ,_, filenames = data
            expert_pred = expert_pred.long()
            expert_pred = (expert_pred == label) *1
            images, labels = images.to(device), expert_pred.to(device)
            outputs = model(images)
            _, predictions = torch.max(outputs.data, 1) # maybe no .data

            total += labels.size(0)
            correct += (predictions == labels).sum().item()
            
            label_list = np.concatenate((label_list, labels.cpu().numpy()), axis=0)
            predictions_list = np.concatenate((predictions_list, predictions.cpu().numpy()), axis=0)

    print('Accuracy of the network on the %d test images: %.3f %%' % (total,
        100 * correct / total))
    
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(label_list, predictions_list).ravel()
    
    print("Confusion Matrix:")
    print(sklearn.metrics.confusion_matrix(label_list, predictions_list))

In [14]:
def increase_experts(param):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    al_param = param["AL"]
    al_param["PATH"] = param["PATH"]
    al_param["TRAIN REJECTOR"] = False
    l2d_param = param["L2D"]

    use_AL = True

    basic_Dataset = ds.BasicDataset(param["PATH"], param["TARGET"])
    
    for seed in [958, 625, 436]:
        print("run for seed {}".format(seed))
        if seed != "":
            set_seed(seed)
        log = {"selected_experts": [], "selected_expert_fns": []}
        
        #Use new Dataset
        nih_dataloader = ds.NIH_K_Fold_Dataloader(
            dataset = basic_Dataset,
            k = param["K"],
            labelerIds = param["LABELER_IDS"],
            train_batch_size = param["TRAIN_BATCH_SIZE"],
            test_batch_size = param["TEST_BATCH_SIZE"],
            seed = seed,
            #maxLabels = maxL,
            preload = False,
            prebuild = False,
            param = param
        )
            
        for fold_idx in range(param["K"]):
            print(f'Running fold {fold_idx+1} out of {param["K"]}')

            expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold_idx)
            expert_train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param)
            expert_val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param)
            expert_test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param)

            expert_fns = []
            experts = []
            for labelerId in list(param["LABELER_IDS"]):
                #nih_expert = ex.Expert(dataset = basic_Dataset, labeler_id=labelerId)
                nih_expert = Expert(dataset = basic_Dataset, labeler_id=labelerId)
                experts.append(nih_expert)
                expert_fns.append(nih_expert.predictWithModel)
            if (use_AL):
                expert_models = getExpertModels(experts, expert_train_dataset, expert_val_dataset, expert_test_dataset, al_param)
                for ex, ex_model in enumerate(expert_models):
                    experts[ex].setModel(ex_model)
                    
            
            num_experts = len(expert_fns)

            #Use new Expert
            #expert_fns = [experts[j] for j in range(n)]
            
            model = model = vres.ResNet50_defer(int(param["n_classes"]) + num_experts)
            # print(model)
            #trainD = GalaxyZooDataset()
            #valD = GalaxyZooDataset(split="val")
            
            train_loader, val_loader, test_loader = nih_dataloader.get_data_loader_for_fold(fold_idx)
            
            train(model, train_loader, val_loader, test_loader, expert_fns, param, seed=seed, experts=experts)

In [15]:
def train(model, train_loader, valid_loader, test_loader, expert_fns, config, seed="", experts=None):

    print("Start L2D Training")
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    n_classes = config["n_classes"] + len(expert_fns)
    kwargs = {"num_workers": 0, "pin_memory": True}

    model = model.to(device)
    cudnn.benchmark = True
    optimizer = torch.optim.Adam(
        model.parameters(), config["lr"], weight_decay=config["weight_decay"]
    )
    criterion = vlos.Criterion()
    loss_fn = getattr(criterion, config["loss_type"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, len(train_loader) * config["epochs"]
    )
    best_validation_loss = np.inf
    patience = 0
    iters = 0
    warmup_iters = config["warmup_epochs"] * len(train_loader)
    lrate = config["lr"]

    for epoch in range(0, config["epochs"]):
        iters, train_loss = train_epoch(
            iters,
            warmup_iters,
            lrate,
            train_loader,
            model,
            optimizer,
            scheduler,
            epoch,
            expert_fns,
            loss_fn,
            n_classes,
            config["alpha"],
            config,
        )

        experts_fns_eval = []
        for expert in experts:
            experts_fns_eval.append(expert.predict)
        #metrics = evaluate(model, expert_fns, loss_fn, n_classes, valid_loader, config)
        metrics = evaluate(model, experts_fns_eval, loss_fn, n_classes, valid_loader, config)

        validation_loss = metrics["validation_loss"]

        if validation_loss < best_validation_loss:
            """best_validation_loss = validation_loss
            print(
                "Saving the model with classifier accuracy {}".format(
                    metrics["classifier_accuracy"]
                ),
                flush=True,
            )
            save_path = os.path.join(
                config["ckp_dir"],
                config["experiment_name"]
                + "_"
                + str(len(expert_fns))
                + "_experts"
                + "_seed_"
                + str(seed),
            )"""
            #torch.save(model.state_dict(), save_path + ".pt")
            # Additionally save the whole config dict
            #with open(save_path + ".json", "w") as f:
            #    json.dump(config, f)
            patience = 0
        else:
            patience += 1

        if patience >= config["patience"]:
            print("Early Exiting Training.", flush=True)
            break
            
    print("Evaluate on Test Data")
    metrics = evaluate(model, expert_fns, loss_fn, n_classes, test_loader, config)

In [16]:
def train_epoch(
    iters,
    warmup_iters,
    lrate,
    train_loader,
    model,
    optimizer,
    scheduler,
    epoch,
    expert_fns,
    loss_fn,
    n_classes,
    alpha,
    config,
):
    """ Train for one epoch """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()
    end = time.time()

    epoch_train_loss = []

    for i, (input, target, hpred) in enumerate(train_loader):
        if iters < warmup_iters:
            lr = lrate * float(iters) / warmup_iters
            #print(iters, lr)
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        target = target.to(device)
        input = input.to(device)
        hpred = hpred

        # compute output
        output = model(input)

        if config["loss_type"] == "softmax":
            output = F.softmax(output, dim=1)

        # get expert  predictions and costs
        batch_size = output.size()[0]  # batch_size
        collection_Ms = []
        # We only support \alpha=1
        for _, fn in enumerate(expert_fns):
            # We assume each expert function has access to the extra metadata, even if they don't use it.
            m = fn(input, target, hpred)
            #m = fn(hpred)
            m2 = [0] * batch_size
            for j in range(0, batch_size):
                if m[j] == target[j].item():
                    m[j] = 1
                    m2[j] = alpha
                else:
                    m[j] = 0
                    m2[j] = 1
            m = torch.tensor(m)
            m2 = torch.tensor(m2)
            m = m.to(device)
            m2 = m2.to(device)
            collection_Ms.append((m, m2))

        # compute loss
        loss = loss_fn(output, target, collection_Ms, n_classes)
        epoch_train_loss.append(loss.item())

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not iters < warmup_iters:
            scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        iters += 1

        if i % 10 == 0:
            print(
                "Epoch: [{0}][{1}/{2}]\t"
                "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                "Prec@1 {top1.val:.3f} ({top1.avg:.3f})".format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    loss=losses,
                    top1=top1,
                ),
                flush=True,
            )

    return iters, np.average(epoch_train_loss)

In [17]:
def evaluate(model, expert_fns, loss_fn, n_classes, data_loader, config):
    """
    Computes metrics for deferal
    -----
    Arguments:
    net: model
    expert_fn: expert model
    n_classes: number of classes
    loader: data loader
    """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    #  === Individual Expert Accuracies === #
    expert_correct_dic = {k: 0 for k in range(len(expert_fns))}
    expert_total_dic = {k: 0 for k in range(len(expert_fns))}
    #  === Individual  Expert Accuracies === #
    alpha = config["alpha"]
    losses = []
    with torch.no_grad():
        for data in data_loader:
            images, labels, hpred = data
            images, labels, hpred = images.to(device), labels.to(device), hpred
            outputs = model(images)
            if config["loss_type"] == "softmax":
                outputs = F.softmax(outputs, dim=1)
            if config["loss_type"] == "ova":
                ouputs = F.sigmoid(outputs)

            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]  # batch_size

            expert_predictions = []
            collection_Ms = []  # a collection of 3-tuple
            for i, fn in enumerate(expert_fns, 0):
                exp_prediction1 = fn(images, labels, hpred)
                #exp_prediction1 = fn(hpred)
                m = [0] * batch_size
                m2 = [0] * batch_size
                for j in range(0, batch_size):
                    if exp_prediction1[j] == labels[j].item():
                        m[j] = 1
                        m2[j] = alpha
                    else:
                        m[j] = 0
                        m2[j] = 1

                m = torch.tensor(m)
                m2 = torch.tensor(m2)
                m = m.to(device)
                m2 = m2.to(device)
                collection_Ms.append((m, m2))
                expert_predictions.append(exp_prediction1)

            loss = loss_fn(outputs, labels, collection_Ms, n_classes)
            losses.append(loss.item())

            for i in range(0, batch_size):
                r = predicted[i].item() >= n_classes - len(expert_fns)
                prediction = predicted[i]
                if predicted[i] >= n_classes - len(expert_fns):
                    max_idx = 0
                    # get second max
                    for j in range(0, n_classes - len(expert_fns)):
                        if outputs.data[i][j] >= outputs.data[i][max_idx]:
                            max_idx = j
                    prediction = max_idx
                else:
                    prediction = predicted[i]
                alone_correct += (prediction == labels[i]).item()
                if r == 0:
                    total += 1
                    correct += (predicted[i] == labels[i]).item()
                    correct_sys += (predicted[i] == labels[i]).item()
                if r == 1:
                    deferred_exp = (predicted[i] - (n_classes - len(expert_fns))).item()
                    # cdeferred_exp = ((n_classes - 1) - predicted[i]).item()  # reverse order, as in loss function
                    exp_prediction = expert_predictions[deferred_exp][i]
                    #
                    # Deferral accuracy: No matter expert ===
                    exp += exp_prediction == labels[i].item()
                    exp_total += 1
                    # Individual Expert Accuracy ===
                    expert_correct_dic[deferred_exp] += (
                        exp_prediction == labels[i].item()
                    )
                    expert_total_dic[deferred_exp] += 1
                    #
                    correct_sys += exp_prediction == labels[i].item()
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)

    #  === Individual Expert Accuracies === #
    expert_accuracies = {
        "expert_{}".format(str(k)): 100
        * expert_correct_dic[k]
        / (expert_total_dic[k] + 0.0002)
        for k in range(len(expert_fns))
    }
    # Add expert accuracies dict
    to_print = {
        "coverage": cov,
        "system_accuracy": 100 * correct_sys / real_total,
        "expert_accuracy": 100 * exp / (exp_total + 0.0002),
        "classifier_accuracy": 100 * correct / (total + 0.0001),
        "alone_classifier": 100 * alone_correct / real_total,
        "validation_loss": np.average(losses),
        "n_experts": len(expert_fns),
        **expert_accuracies,
    }
    print(to_print, flush=True)
    return to_print

In [None]:
increase_experts(param)

run for seed 958
Running fold 1 out of 10




Complete data generation
Starting with expert 0
Complete dataloader generation
Number of model parameters: 84860202
Epoch: [0][0/4]	Time 0.168 (0.168)	Loss 2.2444 (2.2444)	Prec@1 25.000 (25.000)
Accuracy of the network on the 32 test images: 25.000 %
Confusion Matrix:
[[ 4  0]
 [24  4]]
Epoch: [1][0/4]	Time 0.165 (0.165)	Loss 1.5866 (1.5866)	Prec@1 25.000 (25.000)
Epoch: [2][0/4]	Time 0.161 (0.161)	Loss 0.9495 (0.9495)	Prec@1 100.000 (100.000)
Epoch: [3][0/4]	Time 0.167 (0.167)	Loss 2.1040 (2.1040)	Prec@1 100.000 (100.000)
Epoch: [4][0/4]	Time 0.159 (0.159)	Loss 1.4771 (1.4771)	Prec@1 75.000 (75.000)
Epoch: [5][0/4]	Time 0.165 (0.165)	Loss 0.9119 (0.9119)	Prec@1 100.000 (100.000)
Epoch: [6][0/4]	Time 0.161 (0.161)	Loss 1.9660 (1.9660)	Prec@1 100.000 (100.000)
Epoch: [7][0/4]	Time 0.162 (0.162)	Loss 1.3675 (1.3675)	Prec@1 100.000 (100.000)
Accuracy of the network on the 32 test images: 100.000 %
Confusion Matrix:
[[ 4  0]
 [ 0 28]]
Starting with expert 1
Complete dataloader generation
N

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/5]	Time 0.167 (0.167)	Loss 0.9030 (0.9030)	Prec@1 100.000 (100.000)
Accuracy of the network on the 40 test images: 97.500 %
Confusion Matrix:
[[ 4  0]
 [ 1 35]]
Epoch: [1][0/5]	Time 0.160 (0.160)	Loss 1.2697 (1.2697)	Prec@1 100.000 (100.000)
Epoch: [2][0/5]	Time 0.166 (0.166)	Loss 1.0250 (1.0250)	Prec@1 100.000 (100.000)
Epoch: [3][0/5]	Time 0.164 (0.164)	Loss 1.0049 (1.0049)	Prec@1 100.000 (100.000)
Epoch: [4][0/5]	Time 0.163 (0.163)	Loss 0.4067 (0.4067)	Prec@1 100.000 (100.000)
Epoch: [5][0/5]	Time 0.159 (0.159)	Loss 0.4182 (0.4182)	Prec@1 100.000 (100.000)
Epoch: [6][0/5]	Time 0.168 (0.168)	Loss 0.2544 (0.2544)	Prec@1 100.000 (100.000)
Epoch: [7][0/5]	Time 0.162 (0.162)	Loss 0.4361 (0.4361)	Prec@1 100.000 (100.000)
Accuracy of the network on the 40 test images: 100.000 %
Confusion Matrix:
[[ 4  0]
 [ 0 36]]
Number of model parameters: 84860202
Epoch: [0][0/5]	Time 0.158 (0.158)	Loss 1.1194 (1.1194)	Prec@1 37.500 (37.500)
Accuracy of 

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/6]	Time 0.164 (0.164)	Loss 0.1399 (0.1399)	Prec@1 100.000 (100.000)
Accuracy of the network on the 48 test images: 100.000 %
Confusion Matrix:
[[ 5  0]
 [ 0 43]]
Epoch: [1][0/6]	Time 0.162 (0.162)	Loss 0.2925 (0.2925)	Prec@1 100.000 (100.000)
Epoch: [2][0/6]	Time 0.156 (0.156)	Loss 0.3409 (0.3409)	Prec@1 87.500 (87.500)
Epoch: [3][0/6]	Time 0.154 (0.154)	Loss 0.3594 (0.3594)	Prec@1 75.000 (75.000)
Epoch: [4][0/6]	Time 0.159 (0.159)	Loss 0.0730 (0.0730)	Prec@1 100.000 (100.000)
Epoch: [5][0/6]	Time 0.163 (0.163)	Loss 0.0638 (0.0638)	Prec@1 100.000 (100.000)
Epoch: [6][0/6]	Time 0.157 (0.157)	Loss 0.0029 (0.0029)	Prec@1 100.000 (100.000)
Epoch: [7][0/6]	Time 0.163 (0.163)	Loss 0.0662 (0.0662)	Prec@1 100.000 (100.000)
Accuracy of the network on the 48 test images: 100.000 %
Confusion Matrix:
[[ 5  0]
 [ 0 43]]
Number of model parameters: 84860202
Epoch: [0][0/6]	Time 0.162 (0.162)	Loss 0.6242 (0.6242)	Prec@1 87.500 (87.500)
Accuracy of the

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/7]	Time 0.163 (0.163)	Loss 0.0214 (0.0214)	Prec@1 100.000 (100.000)
Accuracy of the network on the 56 test images: 96.429 %
Confusion Matrix:
[[ 5  0]
 [ 2 49]]
Epoch: [1][0/7]	Time 0.156 (0.156)	Loss 0.2223 (0.2223)	Prec@1 100.000 (100.000)
Epoch: [2][0/7]	Time 0.166 (0.166)	Loss 0.0248 (0.0248)	Prec@1 100.000 (100.000)
Epoch: [3][0/7]	Time 0.168 (0.168)	Loss 0.4131 (0.4131)	Prec@1 87.500 (87.500)
Epoch: [4][0/7]	Time 0.154 (0.154)	Loss 1.1651 (1.1651)	Prec@1 62.500 (62.500)
Epoch: [5][0/7]	Time 0.161 (0.161)	Loss 0.0166 (0.0166)	Prec@1 100.000 (100.000)
Epoch: [6][0/7]	Time 0.159 (0.159)	Loss 0.0571 (0.0571)	Prec@1 100.000 (100.000)
Epoch: [7][0/7]	Time 0.160 (0.160)	Loss 0.1295 (0.1295)	Prec@1 100.000 (100.000)
Accuracy of the network on the 56 test images: 100.000 %
Confusion Matrix:
[[ 5  0]
 [ 0 51]]
Number of model parameters: 84860202
Epoch: [0][0/7]	Time 0.161 (0.161)	Loss 0.3265 (0.3265)	Prec@1 100.000 (100.000)
Accuracy of th

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/8]	Time 0.156 (0.156)	Loss 0.1905 (0.1905)	Prec@1 87.500 (87.500)
Accuracy of the network on the 64 test images: 100.000 %
Confusion Matrix:
[[ 5  0]
 [ 0 59]]
Epoch: [1][0/8]	Time 0.165 (0.165)	Loss 0.0268 (0.0268)	Prec@1 100.000 (100.000)
Epoch: [2][0/8]	Time 0.155 (0.155)	Loss 1.3723 (1.3723)	Prec@1 87.500 (87.500)
Epoch: [3][0/8]	Time 0.167 (0.167)	Loss 1.2147 (1.2147)	Prec@1 87.500 (87.500)
Epoch: [4][0/8]	Time 0.171 (0.171)	Loss 0.2522 (0.2522)	Prec@1 100.000 (100.000)
Epoch: [5][0/8]	Time 0.166 (0.166)	Loss 0.2223 (0.2223)	Prec@1 100.000 (100.000)
Epoch: [6][0/8]	Time 0.158 (0.158)	Loss 0.0899 (0.0899)	Prec@1 100.000 (100.000)
Epoch: [7][0/8]	Time 0.164 (0.164)	Loss 0.0660 (0.0660)	Prec@1 100.000 (100.000)
Accuracy of the network on the 64 test images: 100.000 %
Confusion Matrix:
[[ 5  0]
 [ 0 59]]
Number of model parameters: 84860202
Epoch: [0][0/8]	Time 0.154 (0.154)	Loss 1.6023 (1.6023)	Prec@1 87.500 (87.500)
Accuracy of the n

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/5]	Time 0.162 (0.162)	Loss 0.3077 (0.3077)	Prec@1 100.000 (100.000)
Accuracy of the network on the 40 test images: 87.500 %
Confusion Matrix:
[[ 4  0]
 [ 5 31]]
Epoch: [1][0/5]	Time 0.162 (0.162)	Loss 0.4724 (0.4724)	Prec@1 87.500 (87.500)
Epoch: [2][0/5]	Time 0.155 (0.155)	Loss 0.6830 (0.6830)	Prec@1 87.500 (87.500)
Epoch: [3][0/5]	Time 0.163 (0.163)	Loss 1.2993 (1.2993)	Prec@1 87.500 (87.500)
Epoch: [4][0/5]	Time 0.164 (0.164)	Loss 0.3275 (0.3275)	Prec@1 100.000 (100.000)
Epoch: [5][0/5]	Time 0.155 (0.155)	Loss 0.1408 (0.1408)	Prec@1 100.000 (100.000)
Epoch: [6][0/5]	Time 0.163 (0.163)	Loss 0.2236 (0.2236)	Prec@1 87.500 (87.500)
Epoch: [7][0/5]	Time 0.162 (0.162)	Loss 0.1305 (0.1305)	Prec@1 100.000 (100.000)
Accuracy of the network on the 40 test images: 95.000 %
Confusion Matrix:
[[ 4  0]
 [ 2 34]]
Number of model parameters: 84860202
Epoch: [0][0/5]	Time 0.161 (0.161)	Loss 2.3646 (2.3646)	Prec@1 50.000 (50.000)
Accuracy of the netwo

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/6]	Time 0.175 (0.175)	Loss 0.7831 (0.7831)	Prec@1 87.500 (87.500)
Accuracy of the network on the 48 test images: 95.833 %
Confusion Matrix:
[[ 4  0]
 [ 2 42]]
Epoch: [1][0/6]	Time 0.166 (0.166)	Loss 0.5680 (0.5680)	Prec@1 100.000 (100.000)
Epoch: [2][0/6]	Time 0.160 (0.160)	Loss 0.2241 (0.2241)	Prec@1 100.000 (100.000)
Epoch: [3][0/6]	Time 0.160 (0.160)	Loss 0.4746 (0.4746)	Prec@1 100.000 (100.000)
Epoch: [4][0/6]	Time 0.159 (0.159)	Loss 0.6739 (0.6739)	Prec@1 100.000 (100.000)
Epoch: [5][0/6]	Time 0.168 (0.168)	Loss 0.3462 (0.3462)	Prec@1 87.500 (87.500)
Epoch: [6][0/6]	Time 0.154 (0.154)	Loss 0.0053 (0.0053)	Prec@1 100.000 (100.000)
Epoch: [7][0/6]	Time 0.157 (0.157)	Loss 0.1123 (0.1123)	Prec@1 100.000 (100.000)
Accuracy of the network on the 48 test images: 95.833 %
Confusion Matrix:
[[ 4  0]
 [ 2 42]]
Number of model parameters: 84860202
Epoch: [0][0/6]	Time 0.160 (0.160)	Loss 1.8146 (1.8146)	Prec@1 50.000 (50.000)
Accuracy of the n

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/7]	Time 0.165 (0.165)	Loss 0.3365 (0.3365)	Prec@1 100.000 (100.000)
Accuracy of the network on the 56 test images: 94.643 %
Confusion Matrix:
[[ 4  0]
 [ 3 49]]
Epoch: [1][0/7]	Time 0.157 (0.157)	Loss 0.5162 (0.5162)	Prec@1 87.500 (87.500)
Epoch: [2][0/7]	Time 0.164 (0.164)	Loss 0.3533 (0.3533)	Prec@1 87.500 (87.500)
Epoch: [3][0/7]	Time 0.165 (0.165)	Loss 0.5809 (0.5809)	Prec@1 75.000 (75.000)
Epoch: [4][0/7]	Time 0.170 (0.170)	Loss 0.1830 (0.1830)	Prec@1 100.000 (100.000)
Epoch: [5][0/7]	Time 0.162 (0.162)	Loss 0.0034 (0.0034)	Prec@1 100.000 (100.000)
Epoch: [6][0/7]	Time 0.166 (0.166)	Loss 0.1844 (0.1844)	Prec@1 87.500 (87.500)
Epoch: [7][0/7]	Time 0.160 (0.160)	Loss 0.0478 (0.0478)	Prec@1 100.000 (100.000)
Accuracy of the network on the 56 test images: 98.214 %
Confusion Matrix:
[[ 4  0]
 [ 1 51]]
Number of model parameters: 84860202
Epoch: [0][0/7]	Time 0.168 (0.168)	Loss 0.5291 (0.5291)	Prec@1 75.000 (75.000)
Accuracy of the netwo

  predictions_matrix = np.vstack(np.ravel(np.array(prediction_matrix)))


Number of model parameters: 84860202
Epoch: [0][0/8]	Time 0.169 (0.169)	Loss 0.1433 (0.1433)	Prec@1 100.000 (100.000)
Accuracy of the network on the 64 test images: 79.688 %
Confusion Matrix:
[[ 4  0]
 [13 47]]
Epoch: [1][0/8]	Time 0.157 (0.157)	Loss 0.4458 (0.4458)	Prec@1 87.500 (87.500)
Epoch: [2][0/8]	Time 0.153 (0.153)	Loss 1.5726 (1.5726)	Prec@1 37.500 (37.500)
Epoch: [3][0/8]	Time 0.162 (0.162)	Loss 0.4052 (0.4052)	Prec@1 100.000 (100.000)
Epoch: [4][0/8]	Time 0.159 (0.159)	Loss 0.2741 (0.2741)	Prec@1 87.500 (87.500)
Epoch: [5][0/8]	Time 0.167 (0.167)	Loss 0.1711 (0.1711)	Prec@1 100.000 (100.000)
Epoch: [6][0/8]	Time 0.158 (0.158)	Loss 0.1859 (0.1859)	Prec@1 100.000 (100.000)
Epoch: [7][0/8]	Time 0.171 (0.171)	Loss 0.1595 (0.1595)	Prec@1 100.000 (100.000)
Accuracy of the network on the 64 test images: 100.000 %
Confusion Matrix:
[[ 4  0]
 [ 0 60]]
Number of model parameters: 84860202
Epoch: [0][0/8]	Time 0.160 (0.160)	Loss 0.2451 (0.2451)	Prec@1 87.500 (87.500)
Accuracy of the ne

In [28]:
import copy

def run_experiment(param):
    al_param = param["AL"]
    al_param["PATH"] = param["PATH"]
    l2d_param = param["L2D"]
    
    run_param = copy.deepcopy(param)

    

    #basic_Dataset = ds.BasicDataset(param["PATH"], param["TARGET"])

    for labeler_ids in param["LABELER_IDS"]:
        run_param["LABELER_IDS"] = labeler_ids

        for init_size in param["AL"]["INITIAL_SIZE"]:
            run_param["AL"]["INITIAL_SIZE"] = init_size

            for batch_size_al in param["AL"]["BATCH_SIZE_AL"]:
                run_param["AL"]["BATCH_SIZE_AL"] = batch_size_al

                for max_rounds in param["AL"]["MAX_ROUNDS"]:
                    run_param["AL"]["MAX_ROUNDS"] = max_rounds

                    for cost in param["AL"]["COST"]:
                        run_param["AL"]["COST"] = cost

                        run = neptune.init_run(
                            project=config_neptune["project"],
                            api_token=config_neptune["api_token"],
                            #custom_run_id="AL_" + 
                        )

                        for mod in ["confidence", "disagreement"]:
                            run_param["mod"] = mod

                            run["param"] = run_param

                            increase_experts(run_param)

In [26]:
param = {
    "AL": { #Parameter for Active Learning
        "INITIAL_SIZE": [4, 8, 16, 32, 64], #
        "EPOCH_TRAIN": 12, #
        "n_dataset": 2, #Number Classes
        "BATCH_SIZE": 8,
        "MAX_ROUNDS": [2, 4, 8],
        "BATCH_SIZE_AL": [4, 8],
        #"EPOCHS_DEFER": 5,
        "COST": [(0, 0), (2, 0), (4, 0), (8, 0), (10, 0)], #Cost for Cost sensitiv learning
        #"TRAIN REJECTOR": False,
    },
    "L2D": { # Parameter for Learning to defer
        "maxLabels": 16,
    },
    "TARGET": "Airspace_Opacity",
    "PATH": "../Datasets/NIH/",
    "K": 10, #Number of folds
    "LABELER_IDS": [[4323195249, 4295232296]],
    
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    "epochs": 50,
    "patience": 15, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "k": 0, #
    "n_experts": 2, #
    "lr": 0.001, #learning rate
    "weight_decay": 5e-4, #
    "warmup_epochs": 5, #
    "loss_type": "softmax", #surrogate loss type for learning to defer
    "ckp_dir": "./Models", #directory name to save the checkpoints
    "experiment_name": "multiple_experts", #specify the experiment name. Checkpoints will be saved with this name
    #
    "TRAIN_BATCH_SIZE": 64,
    "TEST_BATCH_SIZE": 64,
    "NUM_EXPERTS": 2,

    "GT": True, # Determines if the classifier gets all data with GT Label or only the labeld data
    "Modes": ["Random", "Confidence", "Disagreement"]
    
}

In [None]:
import neptune

import json

with open('neptune_config.json', 'r') as f:
    config = json.load(f)

config_neptune = config["neptune"]
