In [1]:
import copy
import json
import math
import os
import random
import shutil
import time
import pandas as pd

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data

import sklearn
import copy

import gc
from torch.utils.data import DataLoader


import torchvision.transforms as transforms
from PIL import Image

import Verma.experts as vexp
import Verma.losses as vlos
from Verma.utils import AverageMeter, accuracy
import Verma.resnet50 as vres
from AL.utils import *
from AL.metrics import *
from AL.neural_network import NetSimple

import Dataset.Dataset as ds

ModuleNotFoundError: No module named 'NIH'

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

#torch.multiprocessing.set_start_method('spawn')

In [None]:
class Expert:
    def __init__(self, dataset, labeler_id, modus="perfect", param=None, nLabels=800, prob=0.5):
        self.labelerId = labeler_id
        self.dataset = dataset
        self.data = dataset.getData()[["Image ID", str(self.labelerId)]]
        #self.data["Image ID"] = self.data["Image ID"].astype('category')
        self.nLabels = nLabels
        self.param = param
        self.prob = prob
        self.modus = modus
        
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        if self.modus == "perfect":
            self.predictions = self.data
            self.predictions["Image ID"] = self.predictions["Image ID"].astype('category')
            
        self.prebuild_predictions = []
        self.prebuild_filenames = []

    def predict(self, img, target, fnames):
        """
        img: the input image
        target: the GT label
        fname: filename (id for the image)
        """
        #return np.array([self.predictions[self.predictions["Image ID"] == image_id][str(self.labelerId)].values for image_id in fnames]).ravel()
        return np.array([self.predictions.loc[self.predictions["Image ID"] == image_id, str(self.labelerId)].values[0] for image_id in fnames])

    
    def setModel(self, model):
        self.model = model
        
    def predictModel(self, img, target, fnames):
        #if len(img.shape) == 3:
        if img.dim() == 3:
            img = img.unsqueeze(0)
        with torch.no_grad():
            outputs = self.model(img)
            _, predicted = torch.max(outputs.data, 1)
        #return predicted
        return predicted
    
    def predictImage(self, img):
        return self.predictModel(img, None, None)
    
    def getModel(self):
        return self.model
    
    def saveModel(self, path, name):
        torch.save(self.model, path + "/" + name + "_" + str(labeler_id))
        
    def loadModel(self, path, name):
        self.model = torch.load(path + "/" + name + "_" + str(labeler_id))
        model.eval()
        
    def predictWithModel(self, img, target, filename):
        """
        Checks with the model if the expert would be correct
        If it predicts 1 than it returns the true label
        If it predicts 0 than is returns the opposit label
        """
        predicted = self.predictModel(img, target, filename)
        result = []
        target = target.cpu().detach().numpy()
        for i, pred in enumerate(predicted):
            if pred == 1:
                result.append(target[i])
            else:
                result.append(1 - target[i])
        return result
    
    def init_model_predictions(self, train_dataloader):
        for i, (input, target, hpred) in enumerate(train_dataloader):
            result = self.predictWithModel(input.to(self.device), target, hpred)
            self.prebuild_predictions += result
            self.prebuild_filenames += hpred
    
    def predict_model_predefined(self, img, target, filenames):
        return [self.prebuild_predictions[self.prebuild_filenames.index(filename)] for filename in filenames]


In [None]:
class NIHExpertDatasetMemory():
    def __init__(self, images, filenames, targets, expert_fn, labeled, indices = None, expert_preds = None, param=None, preload=False, preprocess=False, image_container=None):
        """
        Original cifar dataset
        images: images
        targets: labels
        expert_fn: expert function
        labeled: indicator array if images is labeled
        indices: indices in original CIFAR dataset (if this subset is subsampled)
        expert_preds: used if expert_fn or have different expert model
        """
        self.preprocess = preprocess
        self.filenames = filenames
        self.targets = np.array(targets)
        self.expert_fn = expert_fn
        self.labeled = np.array(labeled)
        
        self.image_ids = filenames
        self.preload = False
        self.PATH = param["PATH"]
        
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3]],
                                         std=[x / 255.0 for x in [63.0]])
        self.transform_test = transforms.Compose([transforms.Resize(128), transforms.ToTensor(), normalize])

        self.image_container = image_container

        self.images = images
        if images is not None:
            self.images = images
            self.preload = True
        else:
            self.preload = preload
            if self.preload:
                self.images = []
                self.loadImages()
        
        if expert_preds is not None:
            self.expert_preds = expert_preds
        else:
            self.expert_preds = np.array(expert_fn(self.images, torch.FloatTensor(targets), fnames = self.filenames))
        for i in range(len(self.expert_preds)):
            if self.labeled[i] == 0:
                self.expert_preds[i] = -1 # not labeled by expert
        if indices is not None:
            self.indices = indices
        else:
            self.indices = np.array(list(range(len(self.targets))))
            
    def loadImage(self, idx):
        """
        Load one single image
        """
        if self.image_container is not None:
            return self.image_container.get_image_from_name(self.image_ids[idx])
        else:
            return Image.open(self.PATH + "images/" + self.image_ids[idx]).convert("RGB").resize((244,244))
            
    def getImage(self, idx):
        """
        Returns the image from index idx
        """
        if self.preload:
            return self.images[idx]
        else:
            return self.loadImage(idx)

    def loadImages(self):
        """
        Load all images
        """
        if self.image_container is not None:
            self.images = self.image_container.get_images_from_name(self.image_ids)
            if self.preprocess:
                print("Preprocessed")
                #self.images = [self.transformImage(img) for img in self.images]
        else:
            for idx in range(len(self.image_ids)):
                if self.preprocess:
                    self.images.append(self.transformImage(self.loadImage(idx)))
                else:
                    self.images.append(self.loadImage(idx))

    def transformImage(self, img):
        return self.transform_test(img)
    
    
    def __getitem__(self, index):
        """Take the index of item and returns the image, label, expert prediction and index in original dataset"""
        label = self.targets[index]
        img = self.getImage(index)
        if self.preprocess:
            image = img
        else:
            image = self.transformImage(img)
        #image = self.transform_test(self.images[index])
        filename = self.filenames[index]
        expert_pred = self.expert_preds[index]
        indice = self.indices[index]
        labeled = self.labeled[index]
        return torch.FloatTensor(image), label, expert_pred, indice, labeled, str(filename)

    def __len__(self):
        return len(self.targets)

In [None]:
def sampleIndices(n, k, all_indices, experten, seed = None):
    if seed is not None:
        set_seed(seed)
    same_indices = random.sample(all_indices, k)
    diff_indices = []
    used_indices = same_indices
    indices = []
    if k == n:
        for expert in experten:
            indices.append(same_indices)
    if k < n:
        for expert in experten:
            temp_indices = []
            count = 0 # To avoid infinity loop
            while len(temp_indices) < (n - k):
                count += 1
                temp = random.sample(all_indices, 1)
                if temp not in used_indices:
                    temp_indices = temp_indices + temp
                    used_indices = used_indices + temp
                if count >= 1000:
                    temp = random.sample(used_indices, n-k-len(temp_indices))
                    if isinstance(temp, list):
                        temp_indices = temp_indices + temp
                    else:
                        temp_indices.append(temp)
                    break
            indices.append(same_indices + temp_indices)
    return indices

def getIndicesWithoutLabel(all_indices, labeled_indices):
    temp = all_indices
    for indices in labeled_indices:
        temp = [x for x in temp if x not in indices]
    return temp

In [None]:
from scipy.stats import entropy

def get_least_confident_points(model, data_loader, budget):
    '''
    based on entropy score get points, can chagnge, but make sure to get max or min accordingly
    '''
    uncertainty_estimates = []
    indices_all = []
    for data in data_loader:
        images, labels, expert_preds, indices, _, filenames = data
        with torch.no_grad():
            images, labels, expert_preds = images.to(device), labels.to(device), expert_preds.to(device)
            outputs = model(images)
            batch_size = outputs.size()[0]  
            for i in range(0, batch_size):
                output_i =  outputs.data[i].cpu().numpy()
                entropy_i = entropy(output_i)
                #entropy_i = 1 - max(output_i)
                uncertainty_estimates.append(entropy_i)
                indices_all.append(indices[i].item())
    indices_all = np.array(indices_all)
    top_budget_indices = np.argsort(uncertainty_estimates)[-budget:]
    actual_indices = indices_all[top_budget_indices]
    uncertainty_estimates = np.array(uncertainty_estimates)
    return actual_indices

def disagree(array):
    start = array[0]
    for el in array[1:]:
        if start != el:
            return start != el
    return False

def getQbQPoints(expert_models, data_loader, budget):
    """
    Selects n (budget) points with query by committee
    """
    # Get Predictions for all points for all experts
    #global prediction_matrix
    prediction_matrix = None
    indices_all = []
    for data in data_loader:
        images, labels, _, indices, _, filenames = data
        experts_preds = []
        for j, expert_model in enumerate(expert_models):
            with torch.no_grad():
                images = images.to(device)
                outputs_exp = expert_model(images)
                preds = []
                for i in range(outputs_exp.size()[0]):
                    pred_exp = outputs_exp.data[i].cpu().numpy()
                    pred_exp = pred_exp[1]
                    #preds.append(round(pred_exp))
                    preds.append(pred_exp)
                    if (j == 0): #Add the indices only the first time
                        indices_all.append(indices[i].item())
            experts_preds.append(np.array(preds))

        if prediction_matrix is None:
            prediction_matrix = np.swapaxes(np.array(experts_preds), 0, 1)
        else:
            prediction_matrix = np.concatenate((prediction_matrix, np.swapaxes(np.array(experts_preds), 0, 1)), axis=0)
    predictions_matrix = prediction_matrix

    #Get where the experts disagree

    print(predictions_matrix.shape)

    matrixx = [disagree(np.round(row)) for row in predictions_matrix]
    ids = np.array(indices_all)[matrixx]
    
    print("Disagreement on " + str(len(ids)) + " Points")
    if NEPTUNE:
        run["Disagreement Points"].append(len(ids))

    ids = ids[:budget].tolist()

    if len(ids) < budget:
        matrixx = [row for row in predictions_matrix if not disagree(np.round(row))]
        points = np.array([np.sum(np.abs(row - 0.5)) for row in matrixx])

        for row in np.array(matrixx)[points.argsort()[:(budget - len(ids))].tolist()]:
            ids.append(indices_all[np.argwhere(predictions_matrix == row)[0][0]])

    print(ids)

    return random.sample(ids, min(budget, len(ids)))

def getQbQPointsDifference(expert_models, data_loader, budget):
    """
    Selects n (budget) points with query by committee
    """
    # Get Predictions for all points for all experts
    #global prediction_matrix
    prediction_matrix = None
    indices_all = []
    for data in data_loader:
        images, labels, _, indices, _, filenames = data
        experts_preds = []
        for j, expert_model in enumerate(expert_models):
            with torch.no_grad():
                images = images.to(device)
                outputs_exp = expert_model(images)
                preds = []
                for i in range(outputs_exp.size()[0]):
                    pred_exp = outputs_exp.data[i].cpu().numpy()
                    pred_exp = pred_exp[1]
                    #preds.append(round(pred_exp))
                    preds.append(pred_exp)
                    if (j == 0): #Add the indices only the first time
                        indices_all.append(indices[i].item())
            experts_preds.append(np.array(preds))

        if prediction_matrix is None:
            prediction_matrix = np.swapaxes(np.array(experts_preds), 0, 1)
        else:
            prediction_matrix = np.concatenate((prediction_matrix, np.swapaxes(np.array(experts_preds), 0, 1)), axis=0)
    predictions_matrix = prediction_matrix

    #Get where the experts disagree
    print(predictions_matrix.shape)

    matrixx = [row for row in predictions_matrix if disagree(np.round(row))]
    points = np.array([np.sum(np.abs(row - 0.5)) for row in matrixx])

    print("Disagreement on " + str(len(points)) + " Points")
    if NEPTUNE:
        run["Disagreement Points"].append(len(points))

    ids = []
    for row in np.array(matrixx)[points.argsort()[:budget].tolist()]:
        ids.append(indices_all[np.argwhere(predictions_matrix == row)[0][0]])

    if len(ids) < budget:
        matrixx = [row for row in predictions_matrix if not disagree(np.round(row))]
        points = np.array([np.sum(np.abs(row - 0.5)) for row in matrixx])

        for row in np.array(matrixx)[points.argsort()[:(budget - len(ids))].tolist()]:
            ids.append(indices_all[np.argwhere(predictions_matrix == row)[0][0]])

    print(ids)
    
    #print("Disagreement on " + str(len(ids)) + " Points")
    return ids[:budget]



def getExpertModels(indices, experts, train_dataset, val_dataset, test_dataset, param=None, seed=None, fold=None, mod="", image_container=None):
    
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    train_dataset.getAllImagesNP().shape
    #all_data_x = train_dataset.getAllImagesNP()[all_indices]
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]
    
    print("Complete data generation")

    # Bestimmt die Indizes, welche gelabelt und welche ungelabelt sind

    set_seed(seed)

    #Samples the indices with k same and n-k different images
    """if k is not None:
        indices = sampleIndices(n = param["INITIAL_SIZE"], k = k, all_indices = all_indices, experten = experts, seed = seed)
    else:
        Intial_random_set = random.sample(all_indices, param["INITIAL_SIZE"])
        indices_labeled  = Intial_random_set
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))"""

    gc.collect()

    # train expert model on labeled data
    # Expertenmodell variabel
    
    expert_models = []
    for i, expert in enumerate(experts):

        print("Starting with expert " + str(i))

        Intial_random_set = indices[i]
        indices_labeled  = Intial_random_set

        # Lädt die Datasets für die beschrifteten und unbeschrifteten Daten
        dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["PRELOAD"], image_container=image_container)
        #dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"])
        #dataset_val_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [1]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"])
        # Lädt die Dataloaders
        dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
        #dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)

        dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"], image_container=image_container)
        dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)

        print("Complete dataloader generation")

        gc.collect()
        
        expert_models.append(NetSimple(2, 3, 100, 100, 1000,500).to(device)) 
        run_expert(expert_models[-1], param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderValUnlabeled, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"]) 

    print("Experts trained")

    #Returns all indices without any used label
    indices_unlabeled = getIndicesWithoutLabel(all_indices = all_indices, labeled_indices = indices)
    indices_labeled = list(set(all_indices) - set(indices_unlabeled))

    dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"], image_container=image_container)
    dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    data_sizes = []
    error_confidence = []
    data_sizes.append(param["INITIAL_SIZE"])
    
    print("Starting with AL")
    for round in range(param["MAX_ROUNDS"]):

        print(f'\n \n Round {round} \n \n')

        # get points where expert model is least confident on
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)
        #indices_confidence = get_least_confident_points(model_expert, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])

        #Try to get better Points
        if mod == "disagreement":
            indices_qbq = getQbQPoints(expert_models, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])
        if mod == "disagreement_diff":
            indices_qbq = getQbQPointsDifference(expert_models, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])
        
        #indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_labeled  = indices_labeled + list(indices_qbq) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))
        
        # train model on labeled data
        for j, expert in enumerate(experts):

            dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["PRELOAD"], image_container=image_container)
            

            #dataset_val_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [1]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"])
            #dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"])
            dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
            
            #dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)

            dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"], image_container=image_container)
            dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
            
            run_expert(expert_models[j], param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderValUnlabeled, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"] + (round+1)*param["BATCH_SIZE_AL"])

        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"], image_container=image_container)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    print("Test Data:")
    dataset_test_unlabeled = NIHExpertDatasetMemory(None, test_dataset.getAllFilenames(), np.array(test_dataset.getAllTargets()), expert.predict , [1]*len(test_dataset.getAllIndices()), test_dataset.getAllIndices(), param=param, preload=param["PRELOAD"], image_container=image_container)
    dataLoaderVal = DataLoader(dataset=dataset_test_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    met = {}
    for j, expert in enumerate(experts):
        temp = metrics_print_expert(expert_models[j], dataLoaderVal, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"] + (param["MAX_ROUNDS"] + 5)*param["BATCH_SIZE_AL"], test=True)
        met[expert.labelerId] = temp
    print("AL finished")
    return expert_models, met

In [None]:
#for trial in range(MAX_TRIALS):
def getExpertModel(indices, train_dataset, val_dataset, test_dataset, expert, param=None, seed=None, fold=None, image_container=None):
    
    error_confidence_trials_LCE = []
    
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    train_dataset.getAllImagesNP().shape
    #all_data_x = train_dataset.getAllImagesNP()[all_indices]
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]
    
    print("Complete first data generation")

    gc.collect()

    # Bestimmt die Indizes, welche gelabelt und welche ungelabelt sind

    set_seed(seed)
    
    Intial_random_set = indices
    indices_labeled  = Intial_random_set
    indices_unlabeled= list(set(all_indices) - set(indices_labeled))

    gc.collect()

    # Lädt die Datasets für die beschrifteten und unbeschrifteten Daten
    dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["PRELOAD"], image_container=image_container)
    dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"], image_container=image_container)

    dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"], image_container=image_container)
    
    # Lädt die Dataloaders
    dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    print("Complete dataloader generation")

    gc.collect()

    # train expert model on labeled data
    # Expertenmodell variabel
    model_expert = NetSimple(2, 3, 100, 100, 1000,500).to(device)
    # Trainier Modell um Experten vorherzusagen
    print("Labeler: " + str(expert.labelerId))
    
    run_expert(model_expert, param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderValUnlabeled, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"]) 
    
    print("Expert trained")


    data_sizes = []
    error_confidence = []
    data_sizes.append(param["INITIAL_SIZE"])
        
    gc.collect()

    #Trainiere Rejector nur noch, wenn notwendig
    
    print("Starting with AL")
    for round in range(param["MAX_ROUNDS"]):

        print(f'\n \n Round {round} \n \n')

        # get points where expert model is least confident on
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)
        indices_confidence = get_least_confident_points(model_expert, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])
        indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))

        dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["PRELOAD"], image_container=image_container)
        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"], image_container=image_container)

        #dataset_val_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [1]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"])
        #dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"])
        
        dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)

        #dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)
        
        # train model on labeled data
        run_expert(model_expert, param["EPOCH_TRAIN"], dataLoaderTrainLabeled, dataLoaderValUnlabeled, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"] + (round+1)*param["BATCH_SIZE_AL"])


    print("Test Data:")
    dataset_test_unlabeled = NIHExpertDatasetMemory(None, test_dataset.getAllFilenames(), np.array(test_dataset.getAllTargets()), expert.predict , [1]*len(test_dataset.getAllIndices()), test_dataset.getAllIndices(), param=param, preload=param["PRELOAD"], image_container=image_container)
    dataLoaderVal = DataLoader(dataset=dataset_test_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    met = metrics_print_expert(model_expert, dataLoaderVal, id=expert.labelerId, seed=seed, fold=fold, n_images=param["INITIAL_SIZE"] + (param["MAX_ROUNDS"] + 5)*param["BATCH_SIZE_AL"], test=True)
    print("AL finished")
    return model_expert, met

In [None]:
def train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax, param=None, id=""):
    """Train for one epoch the model to predict expert agreement with label"""
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, label, expert_pred, _, _, filenames ) in enumerate(train_loader):
        expert_pred = expert_pred.long()
        expert_pred = (expert_pred == label) *1
        #expert_pred = (expert_pred == label).int()
        target = expert_pred.to(device)
        input = input.to(device)
        
        # compute output
        output = model(input)

        # compute loss
        
        if apply_softmax:
            loss = my_CrossEntropyLossWithSoftmax(output, target)
        else:
            #loss = my_CrossEntropyLoss(output, target)
            loss = my_CrossEntropyLoss(output, target, cost=param["COST"])
        
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        #losses.update(loss.data.item(), input.size(0))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, top1=top1))
            

def run_expert(model, epochs, train_loader, val_loader, apply_softmax = False, param=None, id=0, seed=None, fold=None, n_images=None):
    '''
    train expert model to predict disagreement with label
    model: WideResNet model or pytorch model (2 outputs)
    epochs: number of epochs to train
    '''
    # get the number of model parameters
    print("Expert: " + str(id))

    # define loss function (criterion) and optimizer
    #optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)

    optimizer = torch.optim.SGD(model.parameters(), 0.001, #0.001
                                momentum=0.9, nesterov=True,
                                weight_decay=5e-4)
    # cosine learning rate
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs)

    for epoch in range(0, epochs):
        # train for one epoch
        train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax, param=param)
        if epoch % 10 == 0:
            pass
            #metrics_print_expert(model, val_loader, id, seed=seed, fold=fold)
            
    metrics_print_expert(model, val_loader, id=id, seed=seed, fold=fold, n_images=n_images)

def metrics_print_expert(model, data_loader, defer_net = False, id=0, seed=None, fold=None, n_images=None, test=False):
    '''
    Computes metrics for expert model error prediction
    model: model
    data_loader: data loader
    '''
    correct = 0
    total = 0
    
    #label_list = np.empty(0)
    #predictions_list = np.empty(0)
    label_list = []
    predictions_list = []
    # again no gradients needed
    with torch.no_grad():
        for data in data_loader:
            images, label, expert_pred, _ ,_, filenames = data
            expert_pred = expert_pred.long()
            expert_pred = (expert_pred == label) *1
            #expert_pred = (expert_pred == label).int()
            images, labels = images.to(device), expert_pred.to(device)
            outputs = model(images)
            #_, predictions = torch.max(outputs.data, 1) # maybe no .data
            _, predictions = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predictions == labels).sum().item()

            label_list.extend(labels.cpu().numpy())
            predictions_list.extend(predictions.cpu().numpy())
            
            #label_list = np.concatenate((label_list, labels.cpu().numpy()), axis=0)
            #predictions_list = np.concatenate((predictions_list, predictions.cpu().numpy()), axis=0)

    label_list = np.array(label_list)
    predictions_list = np.array(predictions_list)
    
    print('Accuracy of the network on the %d test images: %.3f %%' % (total,
        100 * correct / total))
    
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(label_list, predictions_list, labels=[0, 1]).ravel()

    f1 = sklearn.metrics.f1_score(label_list, predictions_list)

    ac_balanced = sklearn.metrics.balanced_accuracy_score(label_list, predictions_list)

    met = {
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "tp": tp,
        "f1": f1,
        "accurancy_balanced": ac_balanced,
    }

    if NEPTUNE:
        if test:
            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/tn"].append(tn)
            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/fp"].append(fp)
            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/fn"].append(fn)
            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/tp"].append(tp)

            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/accuracy"].append(100 * correct / total)

            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/f1"].append(f1)

            run[f"Test_Seed_{seed}_Fold_{fold}_expert_{id}" + "/accuracy_balanced"].append(ac_balanced)

        else:
            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/tn"].append(tn, step=n_images)
            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/fp"].append(fp, step=n_images)
            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/fn"].append(fn, step=n_images)
            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/tp"].append(tp, step=n_images)

            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/accuracy"].append(100 * correct / total, step=n_images)

            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/f1"].append(f1, step=n_images)

            run[f"Seed_{seed}_Fold_{fold}_expert_{id}" + "/accuracy_balanced"].append(ac_balanced)
    
    print("Confusion Matrix:")
    print(sklearn.metrics.confusion_matrix(label_list, predictions_list, labels=[0, 1]))
    print("F1 Score: " + str(f1))

    print("Accuracy balanced")
    print(ac_balanced)

    if test:
        return met

In [None]:
def increase_experts(param, basic_Dataset, seeds, n_folds):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    al_param = param["AL"]
    al_param["PATH"] = param["PATH"]
    al_param["TRAIN REJECTOR"] = False
    l2d_param = param["L2D"]

    use_AL = True

    all_system_accuracy = []
    all_expert_accuracy = []
    all_classifier_accuracy = []
    all_alone_classifier = []
    all_validation_loss = []
    all_cov_classifier = []

    all_system_accuracy_full = []
    all_expert_accuracy_full = []
    all_classifier_accuracy_full = []
    all_alone_classifier_full = []
    all_validation_loss_full = []
    all_cov_classifier_full = []

    all_f1_experts = {}
    all_ac_b = {}

    for labelerId in list(param["LABELER_IDS"]):
        all_f1_experts[labelerId] = []
        all_ac_b[labelerId] = []
    
    for seed in seeds:
        print("run for seed {}".format(seed))
        if seed != "":
            set_seed(seed)
        log = {"selected_experts": [], "selected_expert_fns": []}
        
        nih_dataloader = n_folds[seed]

        system_accuracy = []
        expert_accuracy = []
        classifier_accuracy = []
        alone_classifier = []
        validation_loss = []
        cov_classifier = []

        system_accuracy_full = []
        expert_accuracy_full = []
        classifier_accuracy_full = []
        alone_classifier_full = []
        validation_loss_full = []
        cov_classifier_full = []

        f1_experts = {}
        ac_b = {}

        for labelerId in list(param["LABELER_IDS"]):
            f1_experts[labelerId] = []
            ac_b[labelerId] = []

        for fold_idx in range(param["K"]):
        #for fold_idx in range(1):
            print(f'Running fold {fold_idx+1} out of {param["K"]}')

            expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold_idx)
            image_container = nih_dataloader.get_ImageContainer()
            expert_train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param, image_container=image_container)
            expert_val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
            expert_test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

            expert_fns = []
            experts = []
            #Get init labeled indices with k same images and n-k different images
            #k=None means random indieces
            k = param["OVERLAP"]
            all_indices = list(range(len(expert_train_dataset.getAllIndices())))
            #If no k is set than it selects one randomly
            if k is None:
                k = random.randint(0, al_param["INITIAL_SIZE"])
                print("Random k: ", str(k))
                if NEPTUNE:
                    run["param/overlap_random_k"] = k
            else:
                k = round(al_param["INITIAL_SIZE"]*k/100)
                if NEPTUNE:
                    run["param/overlap_random_k"] = k
            indices = sampleIndices(n = al_param["INITIAL_SIZE"], k = k, all_indices = all_indices, experten = list(param["LABELER_IDS"]), seed = seed)

            if NEPTUNE:
                run["param/overlap_k"] = k

            print("Random indices:")
            print(indices)

            #Create the experts
            for i, labelerId in enumerate(list(param["LABELER_IDS"])):
                nih_expert = Expert(dataset = basic_Dataset, labeler_id=labelerId)
                experts.append(nih_expert)
                #expert_fns.append(nih_expert.predictWithModel)
                expert_fns.append(nih_expert.predict_model_predefined)
                
                if param["MOD"] == "confidence":
                    mod, met = getExpertModel(indices[i], expert_train_dataset, expert_val_dataset, expert_test_dataset, nih_expert, al_param, seed, fold_idx, image_container=image_container)
                    nih_expert.setModel(mod)
                    f1_experts[labelerId].append(met["f1"])
                    ac_b[labelerId].append(met["accurancy_balanced"])
            if param["MOD"] == "disagreement" or param["MOD"]== "disagreement_diff":
                expert_models, met = getExpertModels(indices, experts, expert_train_dataset, expert_val_dataset, expert_test_dataset, al_param, seed, fold_idx, mod=param["MOD"], image_container=image_container)
                for ex, ex_model in enumerate(expert_models):
                    experts[ex].setModel(ex_model)
                for labelerId in list(param["LABELER_IDS"]):
                    f1_experts[labelerId].append(met[labelerId]["f1"])
                    ac_b[labelerId].append(met[labelerId]["accurancy_balanced"])
            
                    
            num_experts = len(expert_fns)

            #Use new Expert
            #expert_fns = [experts[j] for j in range(n)]
            
            model = model = vres.ResNet50_defer(int(param["n_classes"]) + num_experts)
            # print(model)
            #trainD = GalaxyZooDataset()
            #valD = GalaxyZooDataset(split="val")
            
            train_loader, val_loader, test_loader = nih_dataloader.get_data_loader_for_fold(fold_idx)
            full_dataloader = nih_dataloader.getFullDataloader()
            
            #Mayby only the train and validation set for speed up
            for expert in experts:
                expert.init_model_predictions(full_dataloader)
            
            metrics, metrics_full = train(model, train_loader, val_loader, test_loader, expert_fns, param, seed=seed, experts=experts, fold=fold_idx, full_dataloader=full_dataloader)

            #Metrics for this fold within this seed
            system_accuracy.append(metrics["system_accuracy"])
            expert_accuracy.append(metrics["expert_accuracy"])
            classifier_accuracy.append(metrics["classifier_accuracy"])
            alone_classifier.append(metrics["alone_classifier"])
            validation_loss.append(metrics["validation_loss"])
            cov_classifier.append(metrics["cov_classifier"])

            system_accuracy_full.append(metrics_full["system_accuracy"])
            expert_accuracy_full.append(metrics_full["expert_accuracy"])
            classifier_accuracy_full.append(metrics_full["classifier_accuracy"])
            alone_classifier_full.append(metrics_full["alone_classifier"])
            validation_loss_full.append(metrics_full["validation_loss"])
            cov_classifier_full.append(metrics_full["cov_classifier"])

            if NEPTUNE:
                run[f"test_{seed}/system_accuracy"].append(system_accuracy, step=fold_idx)
                run[f"test_{seed}/expert_accuracy"].append(expert_accuracy, step=fold_idx)
                run[f"test_{seed}/classifier_accuracy"].append(classifier_accuracy, step=fold_idx)
                run[f"test_{seed}/alone_classifier"].append(alone_classifier, step=fold_idx)
                run[f"test_{seed}/validation_loss"].append(validation_loss, step=fold_idx)
                run[f"test_{seed}/classifier"].append(cov_classifier, step=fold_idx)

                run[f"full_{seed}/full_system_accuracy"].append(system_accuracy_full, step=fold_idx)
                run[f"full_{seed}/full_expert_accuracy"].append(expert_accuracy_full, step=fold_idx)
                run[f"full_{seed}/full_classifier_accuracy"].append(classifier_accuracy_full, step=fold_idx)
                run[f"full_{seed}/full_alone_classifier"].append(alone_classifier_full, step=fold_idx)
                run[f"full_{seed}/full_validation_loss"].append(validation_loss_full, step=fold_idx)
                run[f"full_{seed}/full_classifier"].append(cov_classifier_full, step=fold_idx)

        #Mean of all folds
        all_system_accuracy.append(np.mean(system_accuracy))
        all_expert_accuracy.append(np.mean(expert_accuracy))
        all_classifier_accuracy.append(np.mean(classifier_accuracy))
        all_alone_classifier.append(np.mean(alone_classifier))
        all_validation_loss.append(np.mean(validation_loss))
        all_cov_classifier.append(np.mean(cov_classifier))

        all_system_accuracy_full.append(np.mean(system_accuracy_full))
        all_expert_accuracy_full.append(np.mean(expert_accuracy_full))
        all_classifier_accuracy_full.append(np.mean(classifier_accuracy_full))
        all_alone_classifier_full.append(np.mean(alone_classifier_full))
        all_validation_loss_full.append(np.mean(validation_loss_full))
        all_cov_classifier_full.append(np.mean(cov_classifier_full))

        for labelerId in list(param["LABELER_IDS"]):
            all_f1_experts[labelerId].append(np.mean(f1_experts[labelerId]))
            all_ac_b[labelerId].append(np.mean(ac_b[labelerId]))

        if NEPTUNE:
            run[f"test/mean_system_accuracy"].append(np.mean(system_accuracy), step=seed)
            run[f"test/mean_expert_accuracy"].append(np.mean(expert_accuracy), step=seed)
            run[f"test/mean_classifier_accuracy"].append(np.mean(classifier_accuracy), step=seed)
            run[f"test/mean_alone_classifier"].append(np.mean(alone_classifier), step=seed)
            run[f"test/mean_validation_loss"].append(np.mean(validation_loss), step=seed)
            run[f"test/cov_classifier"].append(np.mean(cov_classifier), step=seed)

            run[f"full/mean_system_accuracy"].append(np.mean(system_accuracy_full), step=seed)
            run[f"full/mean_expert_accuracy"].append(np.mean(expert_accuracy_full), step=seed)
            run[f"full/mean_classifier_accuracy"].append(np.mean(classifier_accuracy_full), step=seed)
            run[f"full/mean_alone_classifier"].append(np.mean(alone_classifier_full), step=seed)
            run[f"full/mean_validation_loss"].append(np.mean(validation_loss_full), step=seed)
            run[f"full/cov_classifier"].append(np.mean(cov_classifier_full), step=seed)

            for labelerId in list(param["LABELER_IDS"]):
                run[f"test/expert_{labelerId}/f1"].append(np.mean(np.mean(f1_experts[labelerId])), step=seed)
                run[f"test/expert_{labelerId}/accurancy_balanced"].append(np.mean(np.mean(ac_b[labelerId])), step=seed)

            
    if NEPTUNE:
        run["mean_test/mean_system_accuracy"].append(np.mean(all_system_accuracy))
        run["mean_test/mean_expert_accuracy"].append(np.mean(all_expert_accuracy))
        run["mean_test/mean_classifier_accuracy"].append(np.mean(all_classifier_accuracy))
        run["mean_test/mean_alone_classifier"].append(np.mean(all_alone_classifier))
        run["mean_test/mean_validation_loss"].append(np.mean(all_validation_loss))
        run["mean_test/cov_classifier"].append(np.mean(all_cov_classifier))

        run["mean_full/mean_system_accuracy"].append(np.mean(all_system_accuracy_full))
        run["mean_full/mean_expert_accuracy"].append(np.mean(all_expert_accuracy_full))
        run["mean_full/mean_classifier_accuracy"].append(np.mean(all_classifier_accuracy_full))
        run["mean_full/mean_alone_classifier"].append(np.mean(all_alone_classifier_full))
        run["mean_full/mean_validation_loss"].append(np.mean(all_validation_loss_full))
        run["mean_full/cov_classifier"].append(np.mean(all_cov_classifier_full))

        for labelerId in list(param["LABELER_IDS"]):
            run[f"mean_test/expert_{labelerId}/f1"].append(np.mean(np.mean(all_f1_experts[labelerId])), step=seed)
            run[f"mean_test/expert_{labelerId}/accurancy_balanced"].append(np.mean(np.mean(all_ac_b[labelerId])), step=seed)


In [None]:
def train(model, train_loader, valid_loader, test_loader, expert_fns, config, seed="", experts=None, fold=None, full_dataloader=None):

    print("Start L2D Training")
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    n_classes = config["n_classes"] + len(expert_fns)
    kwargs = {"num_workers": 0, "pin_memory": True}

    model = model.to(device)
    cudnn.benchmark = True
    optimizer = torch.optim.Adam(
        model.parameters(), config["lr"], weight_decay=config["weight_decay"]
    )
    criterion = vlos.Criterion()
    loss_fn = getattr(criterion, config["loss_type"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, len(train_loader) * config["epochs"]
    )
    best_validation_loss = np.inf
    patience = 0
    iters = 0
    warmup_iters = config["warmup_epochs"] * len(train_loader)
    lrate = config["lr"]

    for epoch in range(0, config["epochs"]):
        iters, train_loss = train_epoch(
            iters,
            warmup_iters,
            lrate,
            train_loader,
            model,
            optimizer,
            scheduler,
            epoch,
            expert_fns,
            loss_fn,
            n_classes,
            config["alpha"],
            config,
        )

        experts_fns_eval = []
        for expert in experts:
            experts_fns_eval.append(expert.predict)
        #metrics = evaluate(model, expert_fns, loss_fn, n_classes, valid_loader, config)

        metrics_train = evaluate(model, experts_fns_eval, loss_fn, n_classes, train_loader, config, print_m=False)
        if NEPTUNE:
            run[f"train_{seed}_{fold}/system_accuracy"].append(metrics_train["system_accuracy"])
            run[f"train_{seed}_{fold}/expert_accuracy"].append(metrics_train["expert_accuracy"])
            run[f"train_{seed}_{fold}/classifier_accuracy"].append(metrics_train["classifier_accuracy"])
            run[f"train_{seed}_{fold}/alone_classifier"].append(metrics_train["alone_classifier"])
            run[f"train_{seed}_{fold}/validation_loss"].append(metrics_train["validation_loss"])
            run[f"train_{seed}_{fold}/cov_classifier"].append(metrics_train["cov_classifier"])
        
        metrics = evaluate(model, experts_fns_eval, loss_fn, n_classes, valid_loader, config)

        if NEPTUNE:
            run[f"val_{seed}_{fold}/system_accuracy"].append(metrics["system_accuracy"])
            run[f"val_{seed}_{fold}/expert_accuracy"].append(metrics["expert_accuracy"])
            run[f"val_{seed}_{fold}/classifier_accuracy"].append(metrics["classifier_accuracy"])
            run[f"val_{seed}_{fold}/alone_classifier"].append(metrics["alone_classifier"])
            run[f"val_{seed}_{fold}/validation_loss"].append(metrics["validation_loss"])
            run[f"val_{seed}_{fold}/cov_classifier"].append(metrics["cov_classifier"])

        validation_loss = metrics["validation_loss"]

        if validation_loss < best_validation_loss:
            """best_validation_loss = validation_loss
            print(
                "Saving the model with classifier accuracy {}".format(
                    metrics["classifier_accuracy"]
                ),
                flush=True,
            )
            save_path = os.path.join(
                config["ckp_dir"],
                config["experiment_name"]
                + "_"
                + str(len(expert_fns))
                + "_experts"
                + "_seed_"
                + str(seed),
            )"""
            #torch.save(model.state_dict(), save_path + ".pt")
            # Additionally save the whole config dict
            #with open(save_path + ".json", "w") as f:
            #    json.dump(config, f)
            patience = 0
        else:
            patience += 1

        if patience >= config["patience"]:
            print("Early Exiting Training.", flush=True)
            break
            
    print("Evaluate on Test Data")
    metrics = evaluate(model, experts_fns_eval, loss_fn, n_classes, test_loader, config)
    if NEPTUNE:
        run[f"test_{seed}_{fold}/system_accuracy"].append(metrics["system_accuracy"])
        run[f"test_{seed}_{fold}/expert_accuracy"].append(metrics["expert_accuracy"])
        run[f"test_{seed}_{fold}/classifier_accuracy"].append(metrics["classifier_accuracy"])
        run[f"test_{seed}_{fold}/alone_classifier"].append(metrics["alone_classifier"])
        run[f"test_{seed}_{fold}/validation_loss"].append(metrics["validation_loss"])
        run[f"test_{seed}_{fold}/cov_classifier"].append(metrics["cov_classifier"])

    metrics_full = None
    if full_dataloader is not None:
        print("Test on all Data")
        metrics_full = evaluate(model, experts_fns_eval, loss_fn, n_classes, full_dataloader, config)
        if NEPTUNE:
            run[f"test_{seed}_{fold}/system_accuracy_all"].append(metrics_full["system_accuracy"])
            run[f"test_{seed}_{fold}/expert_accuracy_all"].append(metrics_full["expert_accuracy"])
            run[f"test_{seed}_{fold}/classifier_accuracy_all"].append(metrics_full["classifier_accuracy"])
            run[f"test_{seed}_{fold}/alone_classifier_all"].append(metrics_full["alone_classifier"])
            run[f"test_{seed}_{fold}/validation_loss_all"].append(metrics_full["validation_loss"])
            run[f"test_{seed}_{fold}/cov_classifier_all"].append(metrics_full["cov_classifier"])
        
    return metrics, metrics_full

In [None]:
def train_epoch(
    iters,
    warmup_iters,
    lrate,
    train_loader,
    model,
    optimizer,
    scheduler,
    epoch,
    expert_fns,
    loss_fn,
    n_classes,
    alpha,
    config,
):
    """ Train for one epoch """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()
    end = time.time()

    epoch_train_loss = []

    for i, (input, target, hpred) in enumerate(train_loader):
        if iters < warmup_iters:
            lr = lrate * float(iters) / warmup_iters
            #print(iters, lr)
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        target = target.to(device)
        input = input.to(device)
        hpred = hpred

        # compute output
        output = model(input)

        if config["loss_type"] == "softmax":
            output = F.softmax(output, dim=1)

        # get expert  predictions and costs
        #batch_size = output.size()[0]  # batch_size
        batch_size = output.size(0)
        collection_Ms = []
        # We only support \alpha=1
        for _, fn in enumerate(expert_fns):
            # We assume each expert function has access to the extra metadata, even if they don't use it.
            m = fn(input, target, hpred)
            #m = fn(hpred)
            
            m2 = [0] * batch_size
            for j in range(0, batch_size):
                if m[j] == target[j].item():
                    m[j] = 1
                    m2[j] = alpha
                else:
                    m[j] = 0
                    m2[j] = 1
            m = torch.tensor(m)
            m2 = torch.tensor(m2)
            m = m.to(device)
            m2 = m2.to(device)
            #Optimization
            #m2 = torch.where(m == target, alpha, 1)
            #m = torch.where(m == target, 1, 0)
            
            collection_Ms.append((m, m2))

        # compute loss
        loss = loss_fn(output, target, collection_Ms, n_classes)
        epoch_train_loss.append(loss.item())

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not iters < warmup_iters:
            scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        iters += 1

        if i % 10 == 0:
            print(
                "Epoch: [{0}][{1}/{2}]\t"
                "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                "Prec@1 {top1.val:.3f} ({top1.avg:.3f})".format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    loss=losses,
                    top1=top1,
                ),
                flush=True,
            )

    return iters, np.average(epoch_train_loss)

In [None]:
def evaluate(model, expert_fns, loss_fn, n_classes, data_loader, config, print_m=True):
    """
    Computes metrics for deferal
    -----
    Arguments:
    net: model
    expert_fn: expert model
    n_classes: number of classes
    loader: data loader
    """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    #  === Individual Expert Accuracies === #
    expert_correct_dic = {k: 0 for k in range(len(expert_fns))}
    expert_total_dic = {k: 0 for k in range(len(expert_fns))}
    #  === Individual  Expert Accuracies === #
    alpha = config["alpha"]
    losses = []
    with torch.no_grad():
        for data in data_loader:
            images, labels, hpred = data
            images, labels, hpred = images.to(device), labels.to(device), hpred
            outputs = model(images)
            if config["loss_type"] == "softmax":
                outputs = F.softmax(outputs, dim=1)
            elif config["loss_type"] == "ova":
                ouputs = F.sigmoid(outputs)

            _, predicted = torch.max(outputs.data, 1)
            #batch_size = outputs.size()[0]  # batch_size
            batch_size = outputs.size(0)

            expert_predictions = []
            collection_Ms = []  # a collection of 3-tuple
            for i, fn in enumerate(expert_fns, 0):
                exp_prediction1 = fn(images, labels, hpred)
                #exp_prediction1 = fn(hpred)
                m = [0] * batch_size
                m2 = [0] * batch_size
                for j in range(0, batch_size):
                    if exp_prediction1[j] == labels[j].item():
                        m[j] = 1
                        m2[j] = alpha
                    else:
                        m[j] = 0
                        m2[j] = 1

                m = torch.tensor(m)
                m2 = torch.tensor(m2)
                m = m.to(device)
                m2 = m2.to(device)
                #m = torch.tensor([1 if pred == label.item() else 0 for pred, label in zip(exp_prediction1, labels)])
                #m2 = torch.tensor([alpha if pred == label.item() else 1 for pred, label in zip(exp_prediction1, labels)])

                collection_Ms.append((m.to(device), m2.to(device)))
                expert_predictions.append(exp_prediction1)
                #End of optimization
                
                collection_Ms.append((m, m2))
                expert_predictions.append(exp_prediction1)

            loss = loss_fn(outputs, labels, collection_Ms, n_classes)
            losses.append(loss.item())

            for i in range(batch_size):
                r = predicted[i].item() >= n_classes - len(expert_fns)
                prediction = predicted[i]
                if predicted[i] >= n_classes - len(expert_fns):
                    max_idx = 0
                    # get second max
                    for j in range(0, n_classes - len(expert_fns)):
                        if outputs.data[i][j] >= outputs.data[i][max_idx]:
                            max_idx = j
                    prediction = max_idx
                else:
                    prediction = predicted[i]
                alone_correct += (prediction == labels[i]).item()
                if r == 0:
                    total += 1
                    correct += (predicted[i] == labels[i]).item()
                    correct_sys += (predicted[i] == labels[i]).item()
                elif r == 1:
                    deferred_exp = (predicted[i] - (n_classes - len(expert_fns))).item()
                    # cdeferred_exp = ((n_classes - 1) - predicted[i]).item()  # reverse order, as in loss function
                    exp_prediction = expert_predictions[deferred_exp][i]
                    #
                    # Deferral accuracy: No matter expert ===
                    exp += exp_prediction == labels[i].item()
                    exp_total += 1
                    # Individual Expert Accuracy ===
                    expert_correct_dic[deferred_exp] += (
                        exp_prediction == labels[i].item()
                    )
                    expert_total_dic[deferred_exp] += 1
                    #
                    correct_sys += exp_prediction == labels[i].item()
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)

    #  === Individual Expert Accuracies === #
    expert_accuracies = {
        "expert_{}".format(str(k)): 100
        * expert_correct_dic[k]
        / (expert_total_dic[k] + 0.0002)
        for k in range(len(expert_fns))
    }
    # Add expert accuracies dict
    to_print = {
        "coverage": cov,
        "system_accuracy": 100 * correct_sys / real_total,
        "expert_accuracy": 100 * exp / (exp_total + 0.0002),
        "classifier_accuracy": 100 * correct / (total + 0.0001),
        "alone_classifier": 100 * alone_correct / real_total,
        "validation_loss": np.average(losses),
        "n_experts": len(expert_fns),
        **expert_accuracies,
    }
    if print_m:
        print(to_print, flush=True)
    to_print["cov_classifier"] = total
    return to_print

In [None]:
import copy

def run_experiment(param):
    al_param = param["AL"]
    al_param["PATH"] = param["PATH"]
    l2d_param = param["L2D"]
    
    run_param = copy.deepcopy(param)

    basic_Dataset = ds.BasicDataset(param["PATH"], param["TARGET"])

    for labeler_ids in param["LABELER_IDS"]:
        run_param["LABELER_IDS"] = labeler_ids

        n_folds = {}
        for seed in SEEDS:
            #Use new Dataset
            n_folds[seed] = ds.NIH_K_Fold_Dataloader(
                dataset = basic_Dataset,
                k = param["K"],
                labelerIds = labeler_ids,
                train_batch_size = param["TRAIN_BATCH_SIZE"],
                test_batch_size = param["TEST_BATCH_SIZE"],
                seed = seed,
                #maxLabels = maxL,
                preprocess = False,
                preload = param["PRELOAD"],
                prebuild = param["PREBUILD"],
                param = param
            )

        for init_size in param["AL"]["INITIAL_SIZE"]:
            run_param["AL"]["INITIAL_SIZE"] = init_size

            for batch_size_al in param["AL"]["BATCH_SIZE_AL"]:
                run_param["AL"]["BATCH_SIZE_AL"] = batch_size_al

                for max_rounds in param["AL"]["MAX_ROUNDS"]:
                    run_param["AL"]["MAX_ROUNDS"] = max_rounds

                    if (init_size == 64) and (max_rounds >= 12) and (batch_size_al >= 8):
                        continue
                    #if (max_rounds * batch_size_al >= 64):
                    #    continue
                    if (init_size + max_rounds * batch_size_al >= 150):
                        continue

                    for cost in param["AL"]["COST"]:
                        run_param["AL"]["COST"] = cost
                        run_param["AL"]["cost"] = convert_cost_to_string(cost)
                        
                        for mod in param["MOD"]:
                            run_param["MOD"] = mod

                            for overlap in param["AL"]["OVERLAP"]:
                                run_param["OVERLAP"] = overlap

                                #if (mod == "confidence") and (overlap is not None):
                                #    continue
                                    
                                #if (mod != "confidence") and (overlap is None):
                                #    continue

                            
                                if NEPTUNE:
                                    global run
                                    run = neptune.init_run(
                                        project=config_neptune["project"],
                                        api_token=config_neptune["api_token"],
                                        #custom_run_id="AL_" + 
                                    )
                                    run["param"] = run_param

                                print("/n /n NEW RUN /n")
                                print("Initial size: " + str(init_size))
                                print("Batch size: " + str(batch_size_al))
                                print("Max rounds: " + str(max_rounds))
                                print("Cost: " + str(cost))
                                print("Mod: " + str(mod))
                                if overlap is not None:
                                    print("Overlap: " + str(overlap))
                                else:
                                    print("Overlap: None (random)")

                                increase_experts(run_param, basic_Dataset, SEEDS, n_folds)

                                if NEPTUNE:
                                    run.stop()

In [None]:
param = {
    "AL": { #Parameter for Active Learning
        "INITIAL_SIZE": [8, 16, 32], #
        "EPOCH_TRAIN": 10, #
        "n_dataset": 2, #Number Classes
        "BATCH_SIZE": 4,
        "MAX_ROUNDS": [2, 4, 8],
        "BATCH_SIZE_AL": [4, 8, 16],
        #"EPOCHS_DEFER": 5,
        "COST": [(10, 0)], #Cost for Cost sensitiv learning
        #"TRAIN REJECTOR": False,
        "PRELOAD": True,
        "PREPROCESS": True,
        "OVERLAP": [0, 100]
    },
    "L2D": { # Parameter for Learning to defer
        "maxLabels": 16,
    },
    "TARGET": "Airspace_Opacity",
    "PATH": "../Datasets/NIH/",
    "K": 10, #Number of folds
    "LABELER_IDS": [[4323195249, 4295232296]],
    
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    #"epochs": 50,
    "epochs": 50,
    "patience": 15, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "k": 0, #
    "n_experts": 2, #
    "lr": 0.001, #learning rate
    "weight_decay": 5e-4, #
    "warmup_epochs": 5, #
    "loss_type": "softmax", #surrogate loss type for learning to defer
    "ckp_dir": "./Models", #directory name to save the checkpoints
    "experiment_name": "multiple_experts", #specify the experiment name. Checkpoints will be saved with this name
    #
    "TRAIN_BATCH_SIZE": 64,
    "TEST_BATCH_SIZE": 64,
    "NUM_EXPERTS": 2,

    "GT": True, # Determines if the classifier gets all data with GT Label or only the labeld data
    "MOD": ["confidence", "disagreement", "disagreement_diff"],

    "PRELOAD": True,
    "PREBUILD": False,
}
SEEDS = [42]

In [None]:
import neptune

import json

with open('neptune_config.json', 'r') as f:
    config = json.load(f)

config_neptune = config["neptune"]

NEPTUNE = True

In [None]:
def convert_cost_to_string(tp):
    return "(" + str(tp[0]) + ", " + str(tp[1]) + ")"

In [None]:
def convert_list_to_string(li):
    result = "["
    for el in li[:-2]:
        result = result + str(el)
    result = result + "]"
    return 

In [None]:
run_experiment(param)

{1: 2, 2: 4, 3: 6}

In [None]:
sklearn.metrics.confusion_matrix(label_list, predictions_list, labels=[0, 1]).ravel()

In [18]:
sklearn.metrics.confusion_matrix([0, 0, 1, 0, 1], [0, 0, 1, 1, 1], labels=[0, 1])

array([[2, 1],
       [0, 2]])

In [18]:
param = {
    "AL": { #Parameter for Active Learning
        "INITIAL_SIZE": [8], #
        "EPOCH_TRAIN": 12, #
        "n_dataset": 2, #Number Classes
        "BATCH_SIZE": 8,
        "MAX_ROUNDS": [4],
        "BATCH_SIZE_AL": [8],
        #"EPOCHS_DEFER": 5,
        "COST": [(10, 0)], #Cost for Cost sensitiv learning
        #"TRAIN REJECTOR": False,
        "PRELOAD": True,
        "PREPROCESS": False,
        "OVERLAP": [50]
    },
    "L2D": { # Parameter for Learning to defer
        "maxLabels": 16,
    },
    "TARGET": "Airspace_Opacity",
    "PATH": "../Datasets/NIH/",
    "K": 10, #Number of folds
    "LABELER_IDS": [[4323195249, 4295232296]],
    
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    #"epochs": 50,
    "epochs": 50,
    "patience": 15, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "k": 0, #
    "n_experts": 2, #
    "lr": 0.001, #learning rate
    "weight_decay": 5e-4, #
    "warmup_epochs": 5, #
    "loss_type": "softmax", #surrogate loss type for learning to defer
    "ckp_dir": "./Models", #directory name to save the checkpoints
    "experiment_name": "multiple_experts", #specify the experiment name. Checkpoints will be saved with this name
    #
    "TRAIN_BATCH_SIZE": 64,
    "TEST_BATCH_SIZE": 64,
    "NUM_EXPERTS": 2,

    "GT": True, # Determines if the classifier gets all data with GT Label or only the labeld data
    #"MOD": ["confidence", "disagreement", "disagreement_diff"]
    #"MOD": ["disagreement", "disagreement_diff"],
    "MOD": ["disagreement_diff"],

    "PRELOAD": True,
    "PREBUILD": True,
}
SEEDS = [42]
NEPTUNE = False

In [19]:
CUDA_LAUNCH_BLOCKING=1

In [21]:
run_experiment(param)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category')


Loaded set number 0
Loaded set number 1
Loaded set number 2
Loaded set number 3
Loaded set number 4
Loaded set number 5
Loaded set number 6
Loaded set number 7
Loaded set number 8
Loaded set number 9
/n /n NEW RUN /n
Initial size: 8
Batch size: 8
Max rounds: 4
Cost: (10, 0)
Mod: disagreement_diff
Overlap: 50
run for seed 42
Running fold 1 out of 10
Random indices:
[[114, 25, 281, 250, 228, 142, 104, 558], [114, 25, 281, 250, 89, 432, 32, 30]]
Complete data generation
Starting with expert 0
Complete dataloader generation
Expert: 4323195249
Epoch: [0][0/1]	Time 0.197 (0.197)	Loss 1.0003 (1.0003)	Prec@1 50.000 (50.000)
Epoch: [1][0/1]	Time 0.167 (0.167)	Loss 0.9312 (0.9312)	Prec@1 100.000 (100.000)
Epoch: [2][0/1]	Time 0.186 (0.186)	Loss 0.8501 (0.8501)	Prec@1 100.000 (100.000)
Epoch: [3][0/1]	Time 0.190 (0.190)	Loss 0.7644 (0.7644)	Prec@1 100.000 (100.000)
Epoch: [4][0/1]	Time 0.186 (0.186)	Loss 0.6758 (0.6758)	Prec@1 100.000 (100.000)
Epoch: [5][0/1]	Time 0.191 (0.191)	Loss 0.5873 (0.58

In [19]:
%prun run_experiment(param)

/n /n NEW RUN /n
Initial size: 8
Batch size: 8
Max rounds: 4
Cost: (10, 0)
Mod: disagreement_diff
Overlap: 50
run for seed 42
Running fold 1 out of 10




2
Random indices:
[[114, 25, 281, 250, 228, 142, 104, 558], [114, 25, 281, 250, 89, 432, 32, 30]]
Complete data generation
Starting with expert 0
Complete dataloader generation
Expert: 4323195249
Epoch: [0][0/1]	Time 0.436 (0.436)	Loss 2.2495 (2.2495)	Prec@1 37.500 (37.500)
Epoch: [1][0/1]	Time 0.178 (0.178)	Loss 2.1297 (2.1297)	Prec@1 12.500 (12.500)
Epoch: [2][0/1]	Time 0.168 (0.168)	Loss 2.0430 (2.0430)	Prec@1 25.000 (25.000)
Epoch: [3][0/1]	Time 0.177 (0.177)	Loss 1.9525 (1.9525)	Prec@1 37.500 (37.500)
Epoch: [4][0/1]	Time 0.170 (0.170)	Loss 1.8437 (1.8437)	Prec@1 50.000 (50.000)
Epoch: [5][0/1]	Time 0.174 (0.174)	Loss 1.7323 (1.7323)	Prec@1 50.000 (50.000)
Epoch: [6][0/1]	Time 0.172 (0.172)	Loss 1.6165 (1.6165)	Prec@1 62.500 (62.500)
Epoch: [7][0/1]	Time 0.171 (0.171)	Loss 1.5066 (1.5066)	Prec@1 75.000 (75.000)
Epoch: [8][0/1]	Time 0.173 (0.173)	Loss 1.4160 (1.4160)	Prec@1 75.000 (75.000)
Epoch: [9][0/1]	Time 0.173 (0.173)	Loss 1.3479 (1.3479)	Prec@1 75.000 (75.000)
Epoch: [10][0/

         606391518 function calls (594553974 primitive calls) in 2647.356 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    23180  735.012    0.032  735.077    0.032 {method 'poll' of 'select.poll' objects}
    11560  310.787    0.027  312.930    0.027 {built-in method posix.fork}
   834576  187.415    0.000  187.415    0.000 {pandas._libs.ops.scalar_compare}
    60556  137.833    0.002  137.833    0.002 {method 'cpu' of 'torch._C._TensorBase' objects}
  4303826  130.149    0.000  130.149    0.000 {method 'item' of 'torch._C._TensorBase' objects}
   283723  102.956    0.000  102.956    0.000 {method 'acquire' of '_thread.lock' objects}
     9560   82.743    0.009   82.743    0.009 {method 'run_backward' of 'torch._C._EngineBase' objects}
     1020   80.197    0.079 1238.169    1.214 2106997364.py:1(evaluate)
   119644   79.967    0.001   79.969    0.001 {method 'to' of 'torch._C._TensorBase' objects}
   663384   37.550   