In [1]:
import copy
import json
import math
import os
import random
import shutil
import time
import pandas as pd

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data

import sklearn

import torchvision.transforms as transforms
from PIL import Image

  warn(f"Failed to load image Python extension: {e}")


In [2]:
#import Verma.main_increase_experts_hard_coded as verm
import Verma.experts as vexp
import Verma.losses as vlos
from Verma.utils import AverageMeter, accuracy
import Verma.resnet50 as vres
from AL.utils import *
from AL.metrics import *

import NIH.Dataset as ds

In [3]:
param = {
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    "epochs": 150,
    "patience": 50, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "k": 0, #
    "n_experts": 2, #
    "lr": 0.001, #learning rate
    "weight_decay": 5e-4, #
    "warmup_epochs": 5, #
    "loss_type": "softmax", #surrogate loss type for learning to defer
    "ckp_dir": "./Models", #directory name to save the checkpoints
    "experiment_name": "multiple_experts", #specify the experiment name. Checkpoints will be saved with this name
    #
    "TRAIN_BATCH_SIZE": 64,
    "TEST_BATCH_SIZE": 64,
    "NUM_EXPERTS": 2,
    "K": 10,
    "TARGET": "Airspace_Opacity",
    "LABELER_IDS": [4323195249, 4295232296],
    #
    "maxLabels": 16,
    "PATH": "../Datasets/NIH/"
    #
    "Cost": (9, 0)
}

In [4]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

In [5]:
class Expert:
    def __init__(self, dataset, labeler_id, modus="perfect", param=None, nLabels=800, prob=0.5):
        self.labelerId = labeler_id
        self.dataset = dataset
        self.data = dataset.getData()[["Image ID", str(self.labelerId)]]
        self.nLabels = nLabels
        self.param = param
        self.prob = prob
        self.modus = modus

        if self.modus == "perfect":
            self.predictions = self.data

    def predict(self, img, target, fnames):
        """
        img: the input image
        target: the GT label
        fname: filename (id for the image)
        """
        return np.array([self.predictions[self.predictions["Image ID"] == image_id][str(self.labelerId)].values for image_id in fnames]).ravel()

    def setModel(self, model):
        self.model = model
        
    def predictModel(self, img, target, fnames):
        if len(img.shape) == 3:
            img = img.unsqueeze(0) 
        outputs = self.model(img)
        _, predicted = torch.max(outputs.data, 1)
        return predicted
    
    def predictImage(self, img):
        return self.predictModel(img, None, None)
    
    def getModel(self):
        return self.model
    
    def saveModel(self, path, name):
        torch.save(self.model, PATH + "/" + name + "_" + str(labeler_id))
        
    def loadModel(self, path, name):
        self.model = torch.load(path + "/" + name + "_" + str(labeler_id))
        model.eval()
        
    def predictWithModel(self, img, target, filename):
        """
        Checks with the model if the expert would be correct
        If it predicts 1 than it returns the true label
        If it predicts 0 than is returns the opposit label
        """
        predicted = self.predictModel(self, img, target, fnames)
        if predicted == 1:
            return target
        else:
            if target == 1:
                return 0
            else:
                return 1

In [6]:
class NIHExpertDataset():
    def __init__(self, images, filenames, targets, expert_fn, labeled, indices = None, expert_preds = None):
        """
        Original cifar dataset
        images: images
        targets: labels
        expert_fn: expert function
        labeled: indicator array if images is labeled
        indices: indices in original CIFAR dataset (if this subset is subsampled)
        expert_preds: used if expert_fn or have different expert model
        """
        self.images = images
        self.filenames = filenames
        self.targets = np.array(targets)
        self.expert_fn = expert_fn
        self.labeled = np.array(labeled)
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3]],
                                         std=[x / 255.0 for x in [63.0]])
        self.transform_test = transforms.Compose([transforms.Resize(128),transforms.ToTensor(), normalize])
        if expert_preds is not None:
            self.expert_preds = expert_preds
        else:
            self.expert_preds = np.array(expert_fn(self.images, torch.FloatTensor(targets), self.filenames))
        for i in range(len(self.expert_preds)):
            if self.labeled[i] == 0:
                self.expert_preds[i] = -1 # not labeled by expert
        if indices is not None:
            self.indices = indices
        else:
            self.indices = np.array(list(range(len(self.targets))))
    def __getitem__(self, index):
        """Take the index of item and returns the image, label, expert prediction and index in original dataset"""
        label = self.targets[index]
        image = self.transform_test(self.images[index])
        filename = self.filenames[index]
        expert_pred = self.expert_preds[index]
        indice = self.indices[index]
        labeled = self.labeled[index]
        return torch.FloatTensor(image), label, expert_pred, indice, labeled

    def __len__(self):
        return len(self.targets)
    
class NIHExpertDatasetMemory():
    def __init__(self, images, filenames, targets, expert_fn, labeled, indices = None, expert_preds = None, param=None):
        """
        Original cifar dataset
        images: images
        targets: labels
        expert_fn: expert function
        labeled: indicator array if images is labeled
        indices: indices in original CIFAR dataset (if this subset is subsampled)
        expert_preds: used if expert_fn or have different expert model
        """
        self.images = images
        self.filenames = filenames
        self.targets = np.array(targets)
        self.expert_fn = expert_fn
        self.labeled = np.array(labeled)
        
        self.image_ids = filenames
        self.preload = False
        self.PATH = param["PATH"]
        
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3]],
                                         std=[x / 255.0 for x in [63.0]])
        self.transform_test = transforms.Compose([transforms.Resize(128), transforms.ToTensor(), normalize])
        if expert_preds is not None:
            self.expert_preds = expert_preds
        else:
            self.expert_preds = np.array(expert_fn(self.images, torch.FloatTensor(targets), fnames = self.filenames))
        for i in range(len(self.expert_preds)):
            if self.labeled[i] == 0:
                self.expert_preds[i] = -1 # not labeled by expert
        if indices is not None:
            self.indices = indices
        else:
            self.indices = np.array(list(range(len(self.targets))))
            
    def loadImage(self, idx):
        """
        Load one single image
        """
        return Image.open(self.PATH + "images/" + self.image_ids[idx]).convert("RGB").resize((244,244))
            
    def getImage(self, idx):
        """
        Returns the image from index idx
        """
        if self.preload:
            return self.images[idx]
        else:
            return self.loadImage(idx)
    
    
    def __getitem__(self, index):
        """Take the index of item and returns the image, label, expert prediction and index in original dataset"""
        label = self.targets[index]
        img = self.getImage(index)
        image = self.transform_test(img)
        #image = self.transform_test(self.images[index])
        filename = self.filenames[index]
        expert_pred = self.expert_preds[index]
        indice = self.indices[index]
        labeled = self.labeled[index]
        return torch.FloatTensor(image), label, expert_pred, indice, labeled, filename

    def __len__(self):
        return len(self.targets)

In [7]:
from scipy.stats import entropy

def get_least_confident_points(model, data_loader, budget):
    '''
    based on entropy score get points, can chagnge, but make sure to get max or min accordingly
    '''
    uncertainty_estimates = []
    indices_all = []
    for data in data_loader:
        images, labels, expert_preds, indices, _, filenames = data
        images, labels, expert_preds = images.to(device), labels.to(device), expert_preds.to(device)
        outputs = model(images)
        batch_size = outputs.size()[0]  
        for i in range(0, batch_size):
            output_i =  outputs.data[i].cpu().numpy()
            entropy_i = entropy(output_i)
            #entropy_i = 1 - max(output_i)
            uncertainty_estimates.append(entropy_i)
            indices_all.append(indices[i].item())
    indices_all = np.array(indices_all)
    top_budget_indices = np.argsort(uncertainty_estimates)[-budget:]
    actual_indices = indices_all[top_budget_indices]
    uncertainty_estimates = np.array(uncertainty_estimates)
    return actual_indices
import copy
EPOCHS_DEFER = 10





#for trial in range(MAX_TRIALS):
def getExpertModel(train_dataset, val_dataset, test_dataset, expert, param):
    
    error_confidence_trials_LCE = []
    
    #print(f'\n \n \n Trial {trial} \n \n \n ')
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    train_dataset.getAllImagesNP().shape
    all_data_x = train_dataset.getAllImagesNP()[all_indices]
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]
    
    print("Complete first data generation")

    gc.collect()

    # Bestimmt die Indizes, welche gelabelt und welche ungelabelt sind
    
    Intial_random_set = random.sample(all_indices, INITIAL_SIZE)
    indices_labeled  = Intial_random_set
    indices_unlabeled= list(set(all_indices) - set(indices_labeled))

    gc.collect()

    # Lädt die Datasets für die beschrifteten und unbeschrifteten Daten
    #dataset_train_labeled = NIHExpertDataset(all_data_x[indices_labeled], all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled)
    #dataset_train_unlabeled = NIHExpertDataset(all_data_x[indices_unlabeled], all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled)
    dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param)
    dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param)
    
    
    # Lädt die Dataloaders
    dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=False)
    dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=False)
    
    print("Complete dataloader generation")

    gc.collect()

    # train expert model on labeled data
    # Expertenmodell variabel
    model_expert = NetSimple(2, 3, 100, 100, 1000,500).to(device)
    #model_expert = NetSimple(1, 3, 50, 50, 500,256).to(device)
    # Trainier Modell um Experten vorherzusagen
    
    param_size = 0
    for paramn in model_expert.parameters():
        param_size += paramn.nelement() * paramn.element_size()
    buffer_size = 0
    for buffer in model_expert.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    print('model size: {:.3f}MB'.format(size_all_mb))
    
    run_expert(model_expert, EPOCH_TRAIN, dataLoaderTrainLabeled, dataLoaderTrainLabeled) 
    
    print("Expert trained")

    data_sizes = []
    error_confidence = []
    data_sizes.append(INITIAL_SIZE)
    # train model to do classification & Rejector
    #model_lce = NetSimple(n_dataset + 1, 3, 100, 100, 1000,500).to(device)
    model_lce = NetSimple(n_dataset + 1, 3, 50, 50, 500,256).to(device)

    #TODO: Dataloader erstellen
    gc.collect()
    
    train_indices = list(range(len(train_dataset.getAllIndices())))
    val_indices = list(range(len(val_dataset.getAllIndices())))
    test_indices = list(range(len(test_dataset.getAllIndices())))

    #dataset_train = NIHExpertDataset(train_dataset.getAllImagesNP()[train_indices], np.array(train_dataset.getAllFilenames())[train_indices], np.array(train_dataset.getAllTargets())[train_indices], Expert.predict , [1]*len(train_indices))
    #dataset_val = NIHExpertDataset(train_dataset.getAllImagesNP()[val_indices], np.array(train_dataset.getAllFilenames())[val_indices], np.array(train_dataset.getAllTargets())[val_indices], Expert.predict , [1]*len(val_indices))
    dataset_train = NIHExpertDatasetMemory(None, np.array(train_dataset.getAllFilenames()), np.array(train_dataset.getAllTargets()), expert.predict , [1]*len(train_indices), param=param)
    dataset_val = NIHExpertDatasetMemory(None, np.array(val_dataset.getAllFilenames()), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_indices), param=param)
    dataset_test = NIHExpertDatasetMemory(None, np.array(test_dataset.getAllFilenames()), np.array(test_dataset.getAllTargets()), expert.predict , [1]*len(test_indices), param=param)
    
    dataLoaderTrain = DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)
    dataLoaderVal = DataLoader(dataset=dataset_val, batch_size=BATCH_SIZE, shuffle=False,  num_workers=0, pin_memory=True)
    dataLoaderTest = DataLoader(dataset=dataset_test, batch_size=BATCH_SIZE, shuffle=False,  num_workers=0, pin_memory=True)
    ##
    
    run_reject_class(model_lce, EPOCH_TRAIN, dataLoaderTrain, dataLoaderVal)
    model_lce_saved = copy.deepcopy(model_lce.state_dict())
    
    print("Rejector trained")

    gc.collect()
    
    # get expert model predictions on unlabeled data
    dataLoaderTrainUnlabeledUnshuffled = DataLoader(dataset=dataset_train_unlabeled, batch_size=BATCH_SIZE, shuffle=False,  num_workers=0, pin_memory=True)
    expert_preds_arr = []
    for data in dataLoaderTrainUnlabeledUnshuffled:
        images, labels, _, _, _, filenames = data
        images = images.to(device)
        outputs_exp = model_expert(images)
        for i in range(outputs_exp.size()[0]):
            #pred_exp = np.argmax(outputs_exp.data[i].cpu().numpy())
            pred_exp = outputs_exp.data[i].cpu().numpy()
            pred_exp = pred_exp[1]
            expert_preds_arr.append(pred_exp)
    expert_preds_unlabeled = np.array(expert_preds_arr)
    expert_preds_labeled = np.array(expert.predict(all_data_x[indices_labeled], torch.FloatTensor(all_data_y[indices_labeled]), all_data_filenames[indices_labeled]))
    expert_preds_labeled = ( expert_preds_labeled == all_data_y[indices_labeled]) * 1
    expert_preds_combined = np.concatenate(( expert_preds_labeled, expert_preds_unlabeled))
    
    print("Got predictions for all data")
    
    # create pseudo-labeled dataset
    """dataset_train_pseudolabeled = NIHExpertDataset(np.concatenate((all_data_x[indices_labeled], all_data_x[indices_unlabeled])),
                                                     np.concatenate((all_data_filenames[indices_labeled], all_data_filenames[indices_unlabeled])),
                                                        np.concatenate((all_data_y[indices_labeled] , all_data_y[indices_unlabeled])), 
                                                     expert.predict , [1]*(len(indices_labeled) + len(indices_unlabeled)), None,
                                                        expert_preds_combined)"""
    dataset_train_pseudolabeled = NIHExpertDatasetMemory(None,
                                                     np.concatenate((all_data_filenames[indices_labeled], all_data_filenames[indices_unlabeled])),
                                                        np.concatenate((all_data_y[indices_labeled] , all_data_y[indices_unlabeled])), 
                                                     expert.predict , [1]*(len(indices_labeled) + len(indices_unlabeled)), None,
                                                        expert_preds_combined, param=param)
    
    dataLoaderTrainPseudoLabeled = DataLoader(dataset=dataset_train_pseudolabeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)

    # train model on pseudo-labeled data
    run_reject_pseudo(model_lce, n_dataset, expert.predict, EPOCHS_DEFER, 1, dataLoaderTrainPseudoLabeled, dataLoaderTrainLabeled)
    
    print("Model with pseudo labels trained")
    
    metrics_confidence = metrics_print(model_lce, expert.predict, n_dataset, dataLoaderTest)
    error_confidence.append(metrics_confidence['system accuracy'])
    
    print("Starting with AL")
    for round in range(MAX_ROUNDS):
        model_lce.load_state_dict(model_lce_saved)
        # get points where expert model is least confident on
        print(f'\n \n Round {round} \n \n')
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)#
        indices_confidence = get_least_confident_points(model_expert, dataLoaderTrainUnlabeled, BATCH_SIZE_AL)
        indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))
        
        dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param)
        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param)
        
        dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)
        # train model on labeled data
        run_expert(model_expert, EPOCH_TRAIN, dataLoaderTrainLabeled, dataLoaderTrainLabeled)
        # get expert predictions on unlabeled data
        dataLoaderTrainUnlabeledUnshuffled = DataLoader(dataset=dataset_train_unlabeled, batch_size=BATCH_SIZE, shuffle=False,  num_workers=0, pin_memory=True)
        expert_preds_arr = []
        for data in dataLoaderTrainUnlabeledUnshuffled:
            images, labels, _, _, _, filenames = data
            images = images.to(device)
            outputs_exp = model_expert(images)
            for i in range(outputs_exp.size()[0]):
                #pred_exp = np.argmax(outputs_exp.data[i].cpu().numpy())
                pred_exp = outputs_exp.data[i].cpu().numpy()
                pred_exp = pred_exp[1]
                expert_preds_arr.append(pred_exp)
        expert_preds_unlabeled = np.array(expert_preds_arr)
        expert_preds_labeled = np.array(expert.predict (all_data_x[indices_labeled], torch.FloatTensor(all_data_y[indices_labeled]), all_data_filenames[indices_labeled]))
        expert_preds_labeled = ( expert_preds_labeled == all_data_y[indices_labeled]) * 1
        expert_preds_combined = np.concatenate(( expert_preds_labeled, expert_preds_unlabeled))
        # create pseudo-labeled dataset
        
        """dataset_train_pseudolabeled = NIHExpertDataset(np.concatenate((all_data_x[indices_labeled] , all_data_x[indices_unlabeled])),
                                                         np.concatenate((all_data_filenames[indices_labeled] , all_data_filenames[indices_unlabeled])),
                                                         np.concatenate((all_data_y[indices_labeled] , all_data_y[indices_unlabeled])), expert.predict , [1]*(len(indices_labeled) + len(indices_unlabeled))  , None,
                                                         expert_preds_combined)"""
        
        dataset_train_pseudolabeled = NIHExpertDatasetMemory(None,
                                                     np.concatenate((all_data_filenames[indices_labeled], all_data_filenames[indices_unlabeled])),
                                                        np.concatenate((all_data_y[indices_labeled] , all_data_y[indices_unlabeled])), 
                                                     expert.predict , [1]*(len(indices_labeled) + len(indices_unlabeled)), None,
                                                        expert_preds_combined, param=param)
        
        dataLoaderTrainPseudoLabeled = DataLoader(dataset=dataset_train_pseudolabeled, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)

        # train model on pseudo labeled data
        best_score = 0
        best_model = None
        for alpha in [1]:
            print(f'alpha {alpha}')
            model_lce.load_state_dict(model_lce_saved)
            model_dict_alpha = run_reject_pseudo(model_lce, n_dataset, expert.predict, EPOCHS_DEFER, 1, dataLoaderTrainPseudoLabeled, dataLoaderTest, True, EPOCHS_DEFER-1)
            model_lce.load_state_dict(model_dict_alpha)
            score = metrics_print(model_lce, expert.predict, n_dataset, dataLoaderTest)['system accuracy']
            if score >= best_score:
                best_score =  score
                best_model = model_dict_alpha
        model_lce.load_state_dict(best_model)

        #run_reject(model_lce, 10, Expert.predict, EPOCHS_DEFER, 1, dataLoaderTrainPseudoLabeled, dataLoaderTrainLabeled)
        metrics_confidence = metrics_print(model_lce, expert.predict, n_dataset, dataLoaderTest)
        error_confidence.append(metrics_confidence['system accuracy'])
        data_sizes.append((round+1)*BATCH_SIZE_AL + INITIAL_SIZE)
    
    error_confidence_trials_LCE.append(error_confidence)
    return model_expert

In [8]:
def train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax, param=None):
    """Train for one epoch the model to predict expert agreement with label"""
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, label, expert_pred, _, _, filenames ) in enumerate(train_loader):
        #print(input)
        #print(label)
        expert_pred = expert_pred.long()
        expert_pred = (expert_pred == label) *1
        target = expert_pred.to(device)
        input = input.to(device)
        # compute output
        output = model(input)

        # compute loss
        
        if apply_softmax:
            loss = my_CrossEntropyLossWithSoftmax(output, target)
        else:
            #loss = my_CrossEntropyLoss(output, target)
            loss = my_CrossEntropyLoss(output, target, cost=param["Cost"])
        
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, top1=top1))
            

def run_expert(model, epochs, train_loader, val_loader, apply_softmax = False):
    '''
    train expert model to predict disagreement with label
    model: WideResNet model or pytorch model (2 outputs)
    epochs: number of epochs to train
    '''
    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    #optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)

    optimizer = torch.optim.SGD(model.parameters(), 0.001, #0.001
                                momentum=0.9, nesterov=True,
                                weight_decay=5e-4)
    # cosine learning rate
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs)

    for epoch in range(0, epochs):
        # train for one epoch
        train_expert_confidence(train_loader, model, optimizer, scheduler, epoch, apply_softmax)
        if epoch % 10 == 0:
            metrics_print_expert(model, val_loader)
    metrics_print_expert(model, val_loader)
    


In [9]:
def run_reject_class(model, epochs, train_loader, val_loader, apply_softmax = False):
    '''
    only train classifier
    model: WideResNet model
    epochs: number of epochs to train
    train_loader:
    val_loader:
    apply_softmax: apply softmax on top of model
    '''
    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define loss function (criterion) and optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)


    # cosine learning rate
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs)

    for epoch in range(0, epochs):
        # train for one epoch
        train_reject_class(train_loader, model, optimizer, scheduler, epoch, apply_softmax)
        #if epoch % 10 == 0:
            #metrics_print_classifier(model, val_loader)

def train_reject_class(train_loader, model, optimizer, scheduler, epoch, apply_softmax):
    """Train for one epoch on the training set without deferral
    apply_softmax: boolean to apply softmax, if model last layer doesn't have softmax 
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target, expert, _, _, filenames ) in enumerate(train_loader):
        target = target.to(device)
        input = input.to(device)
        # compute output
        output = model(input)

        # compute loss
        if apply_softmax:
            loss = my_CrossEntropyLossWithSoftmax(output, target)
        else:
            loss = my_CrossEntropyLoss(output, target)

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, top1=top1))

def train_reject_pseudo(train_loader, model, optimizer, scheduler, epoch, expert_fn, n_classes, alpha):
    """Train for one epoch on the training set with deferral with pseudo labels"""
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target, expert, _, _, filenames ) in enumerate(train_loader):
        target = target.to(device)
        input = input.to(device)
        m = expert.to(device)
        # compute output
        output = model(input)

        # get expert  predictions and costs
        batch_size = output.size()[0]  # batch_size
        m2 = [1] * batch_size

        #m = torch.tensor(m)
        #m2 = torch.tensor(m2)
        
        m = m.clone().detach().requires_grad_(True)
        m2 = m2.clone().detach().requires_grad_(True)
        
        m = m.to(device)
        m2 = m2.to(device)
        # done getting expert predictions and costs 
        # compute loss
        criterion = nn.CrossEntropyLoss()
        loss = reject_CrossEntropyLoss(output, m, target, m2, n_classes)

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 10 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, top1=top1))


def run_reject_pseudo(model, n_dataset, expert_fn, epochs, alpha, train_loader, val_loader, best_on_val = False, epoch_freq = 10):
    '''
    This trains the model with labeled and pseudo labeled data, same mechanics as run_reject
    '''
    # Data loading code
   
    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # for training on multiple GPUs.
    # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    # model = torch.nn.DataParallel(model).cuda()
    model = model.to(device)

    # optionally resume from a checkpoint

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)

    # cosine learning rate
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * epochs)
    
    best_model = copy.deepcopy(model.state_dict())
    best_val_score = 0
    for epoch in range(0, epochs):
        # train for one epoch
        train_reject_pseudo(train_loader, model, optimizer, scheduler, epoch, expert_fn, n_dataset, alpha)
        if epoch % epoch_freq == 0:
            score = metrics_print(model, expert_fn, n_dataset, val_loader)['system accuracy']
            if score > best_val_score:
                best_model = copy.deepcopy(model.state_dict())
    if best_on_val:
        return  best_model 
    
def reject_CrossEntropyLoss(outputs, m, labels, m2, n_classes):
    '''
    The L_{CE} loss implementation for CIFAR
    ----
    outputs: network outputs
    m: cost of deferring to expert cost of classifier predicting (I_{m =y})
    labels: target
    m2:  cost of classifier predicting (alpha* I_{m\neq y} + I_{m =y})
    n_classes: number of classes
    '''
    batch_size = outputs.size()[0]  # batch_size
    rc = [n_classes] * batch_size
    outputs = -m * torch.log2(outputs[range(batch_size), rc]) - m2 * torch.log2(
        outputs[range(batch_size), labels])  
    return torch.mean(outputs)

def metrics_print(net, expert_fn, n_classes, loader):
    '''
    Computes metrics for deferal (L_{CE} loss method)
    -----
    Arguments:
    net: model
    expert_fn: expert model
    n_classes: number of classes
    loader: data loader
    '''
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    correct_pred = {classname: 0 for classname in cifar_classes}
    total_pred = {classname: 0 for classname in cifar_classes}
    with torch.no_grad():
        for data in loader:
            images, labels, _, _ ,_, filenames = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]  # batch_size
            exp_prediction = expert_fn(images, labels, filenames)
            for i in range(0, batch_size):
                r = (predicted[i].item() == n_classes)
                prediction = predicted[i]
                final_pred = 0
                if predicted[i] == n_classes:
                    max_idx = 0
                    # get second max
                    for j in range(0, n_classes):
                        if outputs.data[i][j] >= outputs.data[i][max_idx]:
                            max_idx = j
                    prediction = max_idx
                else:
                    prediction = predicted[i]
                alone_correct += (prediction == labels[i]).item()
                if r == 0:
                    total += 1
                    final_pred = predicted[i]
                    correct += (predicted[i] == labels[i]).item()
                    correct_sys += (predicted[i] == labels[i]).item()
                if r == 1:
                    final_pred = exp_prediction[i]
                    exp += (exp_prediction[i] == labels[i].item())
                    correct_sys += (exp_prediction[i] == labels[i].item())
                    exp_total += 1
                real_total += 1
                if labels[i].item() == final_pred:
                    correct_pred[cifar_classes[labels[i].item()]] += 1
                total_pred[cifar_classes[labels[i].item()]] += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print = {"coverage": cov, "system accuracy": 100 * correct_sys / real_total,
                "expert accuracy": 100 * exp / (exp_total + 0.0002),
                "classifier accuracy": 100 * correct / (total + 0.0001),
                "alone classifier": 100 * alone_correct / real_total}
    print(to_print)
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print("Accuracy for class {:5s} is: {:.3f} %".format(classname,
                                                    accuracy))
    return to_print

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

def metrics_print_expert(model, data_loader, defer_net = False):
    '''
    Computes metrics for expert model error prediction
    model: model
    data_loader: data loader
    '''
    correct = 0
    total = 0
    
    label_list = np.empty(0)
    predictions_list = np.empty(0)
    # again no gradients needed
    with torch.no_grad():
        for data in data_loader:
            images, label, expert_pred, _ ,_, filenames = data
            expert_pred = expert_pred.long()
            expert_pred = (expert_pred == label) *1
            images, labels = images.to(device), expert_pred.to(device)
            outputs = model(images)
            _, predictions = torch.max(outputs.data, 1) # maybe no .data

            total += labels.size(0)
            correct += (predictions == labels).sum().item()
            
            label_list = np.concatenate((label_list, labels.cpu().numpy()), axis=0)
            predictions_list = np.concatenate((predictions_list, predictions.cpu().numpy()), axis=0)

    print('Accuracy of the network on the %d test images: %.3f %%' % (total,
        100 * correct / total))
    
    tn, fp, fn, tp = sklearn.metrics.confusion_matrix(label_list, predictions_list).ravel()
    
    print("Confusion Matrix:")
    print(sklearn.metrics.confusion_matrix(label_list, predictions_list))

In [10]:
params = param
basic_Dataset = ds.BasicDataset(param["PATH"], "Airspace_Opacity")
nih_dataloader = ds.NIH_K_Fold_Dataloader(
            dataset = basic_Dataset,
            k = params["K"],
            labelerIds = params["LABELER_IDS"],
            train_batch_size = params["TRAIN_BATCH_SIZE"],
            test_batch_size = params["TEST_BATCH_SIZE"],
            #seed = seed,
            #maxLabels = maxL,
            #preload = True,
            #prebuild = True,
            param = params
        )
expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(1)
expert_train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param)
expert_val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param)
expert_test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param)



In [11]:
import gc

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [13]:
nih_expert = Expert(dataset = basic_Dataset, labeler_id=4323195249)

In [14]:
INITIAL_SIZE = 64
EPOCH_TRAIN = 5
n_dataset = 2
BATCH_SIZE = 16
MAX_ROUNDS = 4
BATCH_SIZE_AL = 8
EPOCHS_DEFER = 5

del basic_Dataset
gc.collect()

0

In [15]:
from torch.utils.data import DataLoader
from AL.neural_network import NetSimple

In [16]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [17]:
nih_expert.setModel(getExpertModel(expert_train_dataset, expert_val_dataset, expert_test_dataset, nih_expert))

Complete first data generation
Complete dataloader generation
model size: 323.716MB
Number of model parameters: 84860202
Epoch: [0][0/4]	Time 1.611 (1.611)	Loss 0.9828 (0.9828)	Prec@1 100.000 (100.000)
Accuracy of the network on the 64 test images: 95.312 %
Confusion Matrix:
[[ 0  3]
 [ 0 61]]
Epoch: [1][0/4]	Time 0.343 (0.343)	Loss 0.8828 (0.8828)	Prec@1 93.750 (93.750)
Epoch: [2][0/4]	Time 0.333 (0.333)	Loss 0.4901 (0.4901)	Prec@1 100.000 (100.000)
Epoch: [3][0/4]	Time 0.337 (0.337)	Loss 0.3438 (0.3438)	Prec@1 100.000 (100.000)
Epoch: [4][0/4]	Time 0.340 (0.340)	Loss 0.8467 (0.8467)	Prec@1 87.500 (87.500)
Accuracy of the network on the 64 test images: 95.312 %
Confusion Matrix:
[[ 0  3]
 [ 0 61]]
Expert trained
Number of model parameters: 21220877
Epoch: [0][0/38]	Time 0.326 (0.326)	Loss 1.5545 (1.5545)	Prec@1 56.250 (56.250)
Epoch: [0][10/38]	Time 0.319 (0.324)	Loss 1.2042 (1.7487)	Prec@1 50.000 (54.545)
Epoch: [0][20/38]	Time 0.331 (0.324)	Loss 0.9553 (1.3873)	Prec@1 50.000 (56.250

  m = torch.tensor(m)


Epoch: [0][0/38]	Time 0.324 (0.324)	Loss 13.2608 (13.2608)	Prec@1 75.000 (75.000)
Epoch: [0][10/38]	Time 0.318 (0.322)	Loss 2.9569 (4.2669)	Prec@1 68.750 (58.523)
Epoch: [0][20/38]	Time 0.316 (0.322)	Loss 2.9499 (3.6457)	Prec@1 0.000 (55.655)
Epoch: [0][30/38]	Time 0.330 (0.321)	Loss 2.8366 (3.4112)	Prec@1 0.000 (37.702)
{'coverage': '0 out of64', 'system accuracy': 95.3125, 'expert accuracy': 95.31220214936828, 'classifier accuracy': 0.0, 'alone classifier': 57.8125}
Accuracy for class GT 0  is: 96.296 %
Accuracy for class GT 1  is: 94.595 %
Epoch: [1][0/38]	Time 0.330 (0.330)	Loss 2.8838 (2.8838)	Prec@1 0.000 (0.000)
Epoch: [1][10/38]	Time 0.312 (0.324)	Loss 2.8624 (2.8702)	Prec@1 0.000 (0.000)
Epoch: [1][20/38]	Time 0.329 (0.324)	Loss 2.7478 (2.8586)	Prec@1 0.000 (0.000)
Epoch: [1][30/38]	Time 0.321 (0.323)	Loss 2.8863 (2.8530)	Prec@1 0.000 (0.000)
Epoch: [2][0/38]	Time 0.329 (0.329)	Loss 2.8431 (2.8431)	Prec@1 0.000 (0.000)
Epoch: [2][10/38]	Time 0.325 (0.324)	Loss 2.8521 (2.8500)	

  m = torch.tensor(m)


Epoch: [0][0/38]	Time 0.325 (0.325)	Loss 14.8673 (14.8673)	Prec@1 62.500 (62.500)
Epoch: [0][10/38]	Time 0.318 (0.325)	Loss 3.0831 (4.5391)	Prec@1 50.000 (56.250)
Epoch: [0][20/38]	Time 0.314 (0.323)	Loss 3.0792 (3.8439)	Prec@1 43.750 (56.548)
Epoch: [0][30/38]	Time 0.320 (0.322)	Loss 3.0065 (3.5803)	Prec@1 0.000 (38.306)
{'coverage': '0 out of80', 'system accuracy': 91.25, 'expert accuracy': 91.2497718755703, 'classifier accuracy': 0.0, 'alone classifier': 53.75}
Accuracy for class GT 0  is: 94.595 %
Accuracy for class GT 1  is: 88.372 %
Epoch: [1][0/38]	Time 0.335 (0.335)	Loss 2.9744 (2.9744)	Prec@1 0.000 (0.000)
Epoch: [1][10/38]	Time 0.322 (0.324)	Loss 2.9167 (2.9442)	Prec@1 0.000 (0.000)
Epoch: [1][20/38]	Time 0.325 (0.324)	Loss 2.8539 (2.9306)	Prec@1 0.000 (0.000)
Epoch: [1][30/38]	Time 0.317 (0.323)	Loss 2.9330 (2.9287)	Prec@1 0.000 (0.000)
Epoch: [2][0/38]	Time 0.314 (0.314)	Loss 2.9063 (2.9063)	Prec@1 0.000 (0.000)
Epoch: [2][10/38]	Time 0.322 (0.324)	Loss 2.9151 (2.9286)	Prec

  m = torch.tensor(m)


Epoch: [0][0/38]	Time 0.328 (0.328)	Loss 14.9479 (14.9479)	Prec@1 87.500 (87.500)
Epoch: [0][10/38]	Time 0.322 (0.323)	Loss 3.1199 (4.5995)	Prec@1 50.000 (59.659)
Epoch: [0][20/38]	Time 0.320 (0.323)	Loss 3.0711 (3.8775)	Prec@1 0.000 (55.952)
Epoch: [0][30/38]	Time 0.315 (0.322)	Loss 3.0021 (3.6021)	Prec@1 0.000 (37.903)
{'coverage': '0 out of80', 'system accuracy': 91.25, 'expert accuracy': 91.2497718755703, 'classifier accuracy': 0.0, 'alone classifier': 53.75}
Accuracy for class GT 0  is: 94.595 %
Accuracy for class GT 1  is: 88.372 %
Epoch: [1][0/38]	Time 0.330 (0.330)	Loss 2.9623 (2.9623)	Prec@1 0.000 (0.000)
Epoch: [1][10/38]	Time 0.326 (0.324)	Loss 2.8725 (2.9382)	Prec@1 0.000 (0.000)
Epoch: [1][20/38]	Time 0.322 (0.323)	Loss 2.9153 (2.9387)	Prec@1 0.000 (0.000)
Epoch: [1][30/38]	Time 0.316 (0.322)	Loss 2.9140 (2.9398)	Prec@1 0.000 (0.000)
Epoch: [2][0/38]	Time 0.332 (0.332)	Loss 2.9297 (2.9297)	Prec@1 0.000 (0.000)
Epoch: [2][10/38]	Time 0.320 (0.324)	Loss 2.9129 (2.9355)	Prec@

  m = torch.tensor(m)


Epoch: [0][0/38]	Time 0.320 (0.320)	Loss 14.1952 (14.1952)	Prec@1 75.000 (75.000)
Epoch: [0][10/38]	Time 0.321 (0.321)	Loss 3.1377 (4.5373)	Prec@1 37.500 (57.386)
Epoch: [0][20/38]	Time 0.313 (0.322)	Loss 3.0948 (3.8570)	Prec@1 0.000 (49.702)
Epoch: [0][30/38]	Time 0.324 (0.322)	Loss 3.0635 (3.5992)	Prec@1 0.000 (33.669)
{'coverage': '0 out of80', 'system accuracy': 91.25, 'expert accuracy': 91.2497718755703, 'classifier accuracy': 0.0, 'alone classifier': 53.75}
Accuracy for class GT 0  is: 94.595 %
Accuracy for class GT 1  is: 88.372 %
Epoch: [1][0/38]	Time 0.322 (0.322)	Loss 3.0262 (3.0262)	Prec@1 0.000 (0.000)
Epoch: [1][10/38]	Time 0.337 (0.324)	Loss 2.9307 (2.9701)	Prec@1 0.000 (0.000)
Epoch: [1][20/38]	Time 0.319 (0.322)	Loss 2.9322 (2.9621)	Prec@1 0.000 (0.000)
Epoch: [1][30/38]	Time 0.332 (0.323)	Loss 3.0007 (2.9590)	Prec@1 0.000 (0.000)
Epoch: [2][0/38]	Time 0.317 (0.317)	Loss 2.9834 (2.9834)	Prec@1 0.000 (0.000)
Epoch: [2][10/38]	Time 0.322 (0.325)	Loss 2.9821 (2.9603)	Prec@

  m = torch.tensor(m)


Epoch: [0][0/38]	Time 0.327 (0.327)	Loss 15.2134 (15.2134)	Prec@1 75.000 (75.000)
Epoch: [0][10/38]	Time 0.323 (0.321)	Loss 3.1475 (4.6276)	Prec@1 50.000 (63.636)
Epoch: [0][20/38]	Time 0.329 (0.322)	Loss 3.1009 (3.9069)	Prec@1 0.000 (54.464)
Epoch: [0][30/38]	Time 0.328 (0.323)	Loss 3.0447 (3.6331)	Prec@1 0.000 (36.895)
{'coverage': '0 out of80', 'system accuracy': 91.25, 'expert accuracy': 91.2497718755703, 'classifier accuracy': 0.0, 'alone classifier': 53.75}
Accuracy for class GT 0  is: 94.595 %
Accuracy for class GT 1  is: 88.372 %
Epoch: [1][0/38]	Time 0.316 (0.316)	Loss 3.0004 (3.0004)	Prec@1 0.000 (0.000)
Epoch: [1][10/38]	Time 0.329 (0.321)	Loss 3.0052 (2.9668)	Prec@1 0.000 (0.000)
Epoch: [1][20/38]	Time 0.310 (0.321)	Loss 3.0745 (2.9578)	Prec@1 0.000 (0.000)
Epoch: [1][30/38]	Time 0.326 (0.323)	Loss 2.9886 (2.9654)	Prec@1 0.000 (0.000)
Epoch: [2][0/38]	Time 0.332 (0.332)	Loss 2.9513 (2.9513)	Prec@1 0.000 (0.000)
Epoch: [2][10/38]	Time 0.326 (0.324)	Loss 2.9486 (2.9532)	Prec@

In [None]:
nih_expert.model

In [33]:
img, target, filename = next(iter(expert_train_dataset))

In [35]:
if len(img.shape) == 3:
    img = img.unsqueeze(0) 
outputs = nih_expert.model(img)
_, predicted = torch.max(outputs.data, 1)

3

In [61]:
img, target, filename = next(iter(expert_train_dataset))
img = img.to(device).unsqueeze(0) 
outputs = nih_expert.model(img)
_, predicted = torch.max(outputs.data, 1)
predicted
target

1

In [25]:
expert_train_dataset.targets
expert_train_dataset.__getitem__(21)[1]

0

In [28]:
img, target, filename = next(iter(expert_train_dataset))
filename

'00002763_027.png'

In [29]:
dataset_train = NIHExpertDatasetMemory(None, np.array(expert_train_dataset.getAllFilenames()), np.array(expert_train_dataset.getAllTargets()), nih_expert.predict , [1]*len(expert_train_dataset.getAllIndices()), param=param)
    
dataLoaderTrain = DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)

In [60]:
img.shape

torch.Size([32, 3, 244, 244])

In [32]:
i, (img, target, _, _, _, filename) = next(enumerate(dataLoaderTrain))
target
#img = img.to(device)
#nih_expert.model(img)

tensor([1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,
        1, 1, 1, 1, 0, 0, 1, 0])

In [40]:
a[1]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 0, 0, 0])

# L2D

In [None]:
import expert as ex

In [None]:
def increase_experts(param):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    experiment_experts = [7, 9, 10]
    # for seed in ['', 948,  625,  436,  791]:
    for seed in [948, 625, 436]:
        print("run for seed {}".format(seed))
        if seed != "":
            set_seed(seed)
        log = {"selected_experts": [], "selected_expert_fns": []}
        
        #Use new Dataset
        nih_dataloader = ds.NIH_K_Fold_Dataloader(
            dataset = basic_Dataset,
            k = params["K"],
            labelerIds = params["LABELER_IDS"],
            train_batch_size = params["TRAIN_BATCH_SIZE"],
            test_batch_size = params["TEST_BATCH_SIZE"],
            seed = seed,
            maxLabels = maxL,
            preload = True,
            prebuild = True,
            param = params
        )
            
        for fold_idx in range(param["K"]):
            print(f'Running fold {fold_idx+1} out of {param["K"]}')
        #for i, n in enumerate(experiment_experts):
            #print("n is {}".format(n))
            #num_experts = n

            expert_fns = []
            for labelerId in list(params["LABELER_IDS"]):
                #nih_expert = ex.Expert(dataset = basic_Dataset, labeler_id=labelerId)
                nih_expert = ex.Expert(dataset = basic_Dataset, labeler_id=labelerId)
                expert_fns.append(nih_expert.predict)
            
            num_experts = len(expert_fns)

            #Use new Expert
            #expert_fns = [experts[j] for j in range(n)]
            
            model = model = vres.ResNet50_defer(int(param["n_classes"]) + num_experts)
            # print(model)
            #trainD = GalaxyZooDataset()
            #valD = GalaxyZooDataset(split="val")
            
            train_loader, val_loader, test_loader = nih_dataloader.get_data_loader_for_fold(fold_idx)
            
            train(model, train_loader, val_loader, test_loader, expert_fns, param, seed=seed)

In [None]:
def train(model, train_loader, valid_loader, test_loader, expert_fns, config, seed=""):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    n_classes = config["n_classes"] + len(expert_fns)
    kwargs = {"num_workers": 0, "pin_memory": True}

    model = model.to(device)
    cudnn.benchmark = True
    optimizer = torch.optim.Adam(
        model.parameters(), config["lr"], weight_decay=config["weight_decay"]
    )
    criterion = vlos.Criterion()
    loss_fn = getattr(criterion, config["loss_type"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, len(train_loader) * config["epochs"]
    )
    best_validation_loss = np.inf
    patience = 0
    iters = 0
    warmup_iters = config["warmup_epochs"] * len(train_loader)
    lrate = config["lr"]

    for epoch in range(0, config["epochs"]):
        iters, train_loss = train_epoch(
            iters,
            warmup_iters,
            lrate,
            train_loader,
            model,
            optimizer,
            scheduler,
            epoch,
            expert_fns,
            loss_fn,
            n_classes,
            config["alpha"],
            config,
        )
        metrics = evaluate(model, expert_fns, loss_fn, n_classes, valid_loader, config)

        validation_loss = metrics["validation_loss"]

        if validation_loss < best_validation_loss:
            """best_validation_loss = validation_loss
            print(
                "Saving the model with classifier accuracy {}".format(
                    metrics["classifier_accuracy"]
                ),
                flush=True,
            )
            save_path = os.path.join(
                config["ckp_dir"],
                config["experiment_name"]
                + "_"
                + str(len(expert_fns))
                + "_experts"
                + "_seed_"
                + str(seed),
            )"""
            #torch.save(model.state_dict(), save_path + ".pt")
            # Additionally save the whole config dict
            #with open(save_path + ".json", "w") as f:
            #    json.dump(config, f)
            patience = 0
        else:
            patience += 1

        if patience >= config["patience"]:
            print("Early Exiting Training.", flush=True)
            break
            
    print("Evaluate on Test Data")
    metrics = evaluate(model, expert_fns, loss_fn, n_classes, test_loader, config)

In [None]:
def train_epoch(
    iters,
    warmup_iters,
    lrate,
    train_loader,
    model,
    optimizer,
    scheduler,
    epoch,
    expert_fns,
    loss_fn,
    n_classes,
    alpha,
    config,
):
    """ Train for one epoch """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()
    end = time.time()

    epoch_train_loss = []

    for i, (input, target, hpred) in enumerate(train_loader):
        if iters < warmup_iters:
            lr = lrate * float(iters) / warmup_iters
            print(iters, lr)
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        target = target.to(device)
        input = input.to(device)
        hpred = hpred

        # compute output
        output = model(input)

        if config["loss_type"] == "softmax":
            output = F.softmax(output, dim=1)

        # get expert  predictions and costs
        batch_size = output.size()[0]  # batch_size
        collection_Ms = []
        # We only support \alpha=1
        for _, fn in enumerate(expert_fns):
            # We assume each expert function has access to the extra metadata, even if they don't use it.
            m = fn(input, target, hpred)
            #m = fn(hpred)
            m2 = [0] * batch_size
            for j in range(0, batch_size):
                if m[j] == target[j].item():
                    m[j] = 1
                    m2[j] = alpha
                else:
                    m[j] = 0
                    m2[j] = 1
            m = torch.tensor(m)
            m2 = torch.tensor(m2)
            m = m.to(device)
            m2 = m2.to(device)
            collection_Ms.append((m, m2))

        # compute loss
        loss = loss_fn(output, target, collection_Ms, n_classes)
        epoch_train_loss.append(loss.item())

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not iters < warmup_iters:
            scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        iters += 1

        if i % 10 == 0:
            print(
                "Epoch: [{0}][{1}/{2}]\t"
                "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                "Prec@1 {top1.val:.3f} ({top1.avg:.3f})".format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    loss=losses,
                    top1=top1,
                ),
                flush=True,
            )

    return iters, np.average(epoch_train_loss)

In [None]:
def evaluate(model, expert_fns, loss_fn, n_classes, data_loader, config):
    """
    Computes metrics for deferal
    -----
    Arguments:
    net: model
    expert_fn: expert model
    n_classes: number of classes
    loader: data loader
    """
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    #  === Individual Expert Accuracies === #
    expert_correct_dic = {k: 0 for k in range(len(expert_fns))}
    expert_total_dic = {k: 0 for k in range(len(expert_fns))}
    #  === Individual  Expert Accuracies === #
    alpha = config["alpha"]
    losses = []
    with torch.no_grad():
        for data in data_loader:
            images, labels, hpred = data
            images, labels, hpred = images.to(device), labels.to(device), hpred
            outputs = model(images)
            if config["loss_type"] == "softmax":
                outputs = F.softmax(outputs, dim=1)
            if config["loss_type"] == "ova":
                ouputs = F.sigmoid(outputs)

            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]  # batch_size

            expert_predictions = []
            collection_Ms = []  # a collection of 3-tuple
            for i, fn in enumerate(expert_fns, 0):
                exp_prediction1 = fn(images, labels, hpred)
                #exp_prediction1 = fn(hpred)
                m = [0] * batch_size
                m2 = [0] * batch_size
                for j in range(0, batch_size):
                    if exp_prediction1[j] == labels[j].item():
                        m[j] = 1
                        m2[j] = alpha
                    else:
                        m[j] = 0
                        m2[j] = 1

                m = torch.tensor(m)
                m2 = torch.tensor(m2)
                m = m.to(device)
                m2 = m2.to(device)
                collection_Ms.append((m, m2))
                expert_predictions.append(exp_prediction1)

            loss = loss_fn(outputs, labels, collection_Ms, n_classes)
            losses.append(loss.item())

            for i in range(0, batch_size):
                r = predicted[i].item() >= n_classes - len(expert_fns)
                prediction = predicted[i]
                if predicted[i] >= n_classes - len(expert_fns):
                    max_idx = 0
                    # get second max
                    for j in range(0, n_classes - len(expert_fns)):
                        if outputs.data[i][j] >= outputs.data[i][max_idx]:
                            max_idx = j
                    prediction = max_idx
                else:
                    prediction = predicted[i]
                alone_correct += (prediction == labels[i]).item()
                if r == 0:
                    total += 1
                    correct += (predicted[i] == labels[i]).item()
                    correct_sys += (predicted[i] == labels[i]).item()
                if r == 1:
                    deferred_exp = (predicted[i] - (n_classes - len(expert_fns))).item()
                    # cdeferred_exp = ((n_classes - 1) - predicted[i]).item()  # reverse order, as in loss function
                    exp_prediction = expert_predictions[deferred_exp][i]
                    #
                    # Deferral accuracy: No matter expert ===
                    exp += exp_prediction == labels[i].item()
                    exp_total += 1
                    # Individual Expert Accuracy ===
                    expert_correct_dic[deferred_exp] += (
                        exp_prediction == labels[i].item()
                    )
                    expert_total_dic[deferred_exp] += 1
                    #
                    correct_sys += exp_prediction == labels[i].item()
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)

    #  === Individual Expert Accuracies === #
    expert_accuracies = {
        "expert_{}".format(str(k)): 100
        * expert_correct_dic[k]
        / (expert_total_dic[k] + 0.0002)
        for k in range(len(expert_fns))
    }
    # Add expert accuracies dict
    to_print = {
        "coverage": cov,
        "system_accuracy": 100 * correct_sys / real_total,
        "expert_accuracy": 100 * exp / (exp_total + 0.0002),
        "classifier_accuracy": 100 * correct / (total + 0.0001),
        "alone_classifier": 100 * alone_correct / real_total,
        "validation_loss": np.average(losses),
        "n_experts": len(expert_fns),
        **expert_accuracies,
    }
    print(to_print, flush=True)
    return to_print

In [None]:
increase_experts(param)