In [1]:
import copy
import json
import math
import os
import random
import shutil
import time
import pandas as pd
import time

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torch.utils.data

from scipy.stats import entropy

import sklearn
import copy

import gc
from torch.utils.data import DataLoader


import torchvision.transforms as transforms
from PIL import Image

import Verma.experts as vexp
import Verma.losses as vlos
from Verma.utils import AverageMeter, accuracy
import Verma.resnet50 as vres
from AL.utils import *
from AL.metrics import *

import Dataset.Dataset as ds

import ssl_functions as ssl
import active_learning as al
from active_learning import NIHExpertDatasetMemory

import expert as expert_module
import verma as verm
import hemmer as hm

2023-07-13 10:54:22.948122: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

In [3]:
import neptune

import json

with open('neptune_config.json', 'r') as f:
    config = json.load(f)

config_neptune = config["neptune"]

In [4]:
import shutil
def cleanTrainDir(path):
    shutil.rmtree(path)

In [5]:
def testExpert(expert, dataset, image_container, param, mod, prediction_type, seed, fold, data_name):
    
    #test_dataset = ds.NIHDataset(dataset, preload=False, preprocess=False, param=param, image_container=image_container)
    
    final_dataset = al.NIHExpertDatasetMemory(None, dataset.getAllFilenames(), np.array(dataset.getAllTargets()), expert.predict , [1]*len(dataset.getAllIndices()), 
                                                       dataset.getAllIndices(), param=param, preload=True, image_container=image_container)

    data_loader = DataLoader(dataset=final_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)

    if param["NEPTUNE"]["NEPTUNE"]:
        run = param["NEPTUNE"]["RUN"]
        param["NEPTUNE"]["RUN"] = None
        param_neptune_off = copy.deepcopy(param)
        param["NEPTUNE"]["RUN"] = run
        param_neptune_off["NEPTUNE"]["NEPTUNE"] = False
    else:
        param_neptune_off = copy.deepcopy(param)
    metrics = al.metrics_print_expert(model=None, data_loader=data_loader, expert=expert, id=expert.labelerId, mod=mod, prediction_type=prediction_type, param=param_neptune_off, print_result=False)

    if param["NEPTUNE"]["NEPTUNE"]:
        output = data_name + "_Start_End"
        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/tn"].append(metrics["tn"])
        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/fp"].append(metrics["fp"])
        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/fn"].append(metrics["fn"])
        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/tp"].append(metrics["tp"])

        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/accuracy"].append(metrics["accurancy"])

        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/f1"].append(metrics["f1"])

        run[f"Seed_{seed}/Fold_{fold}/Expert_{id}/" + output + "/accuracy_balanced"].append(metrics["accurancy_balanced"])

    return metrics

In [6]:
def getExpertModelSSL_AL(dataManager, expert, labelerId, param=None, seed=None, fold=None, learning_mod="ssl", prediction_type="target"):
    
    nih_dataloader = dataManager.getKFoldDataloader(seed)

    expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold)
    image_container = nih_dataloader.get_ImageContainer()
    train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param, image_container=image_container)
    val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
    test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

    sslDataset = dataManager.getSSLDataset(seed)
    usedFilenames = sslDataset.getLabeledFilenames(labelerId, fold)
    
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]

    used_indices = [index for index in all_indices if all_data_filenames[index] in usedFilenames]
    indices = used_indices

    print("Len overlapping used indices: " + str(len(used_indices)))

    metrics = {}

    met = al.testExpert(expert, val_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Val")
    metrics["Val"] = {}
    metrics["Val"]["Start"] = met

    met = al.testExpert(expert, test_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Test")
    metrics["Test"] = {}
    metrics["Test"]["Start"] = met

    set_seed(seed)
    
    Intial_random_set = indices
    indices_labeled  = Intial_random_set
    indices_unlabeled= list(set(all_indices) - set(indices_labeled))

    gc.collect()

    # Lädt die Datasets für die beschrifteten und unbeschrifteten Daten
    dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], None , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
    dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
    
    # Lädt die Dataloaders
    dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
        
    gc.collect()

    #Trainiere Rejector nur noch, wenn notwendig
    
    print("Starting with AL")
    for round in range(param["AL"]["ROUNDS"]):

        print(f'\n \n Round {round} \n \n')

        # get points where expert model is least confident on
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)
        indices_confidence = al.get_least_confident_points(expert, dataLoaderTrainUnlabeled, param["AL"]["LABELS_PER_ROUND"], mod="ssl")
        indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))

        dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)

        dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
        
        # train model on labeled data
        dataloaders = (dataLoaderTrainLabeled, dataLoaderValUnlabeled)
        n_images = param["AL"]["INITIAL_SIZE"] + (round+1)*param["AL"]["LABELS_PER_ROUND"]
        train_metrics, val_metrics = run_expert(model=None, expert=expert, epochs=param["AL"]["EPOCH_TRAIN"], dataloaders=dataloaders, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=n_images, mod="ssl", prediction_type="target")
        
        metrics[n_images] = {}
        metrics[n_images]["train_metrics"] = train_metrics
        metrics[n_images]["val_metrics"] = val_metrics
    
    dataset_test_unlabeled = NIHExpertDatasetMemory(None, test_dataset.getAllFilenames(), np.array(test_dataset.getAllTargets()), expert.predict , [1]*len(test_dataset.getAllIndices()), test_dataset.getAllIndices(), param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
    dataLoaderVal = DataLoader(dataset=dataset_test_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    met_test = al.metrics_print_expert(model=None, expert=expert, data_loader=dataLoaderVal, id=expert.labelerId, seed=seed, fold=fold, n_images=param["AL"]["INITIAL_SIZE"] + (param["AL"]["ROUNDS"] + 5)*param["AL"]["LABELS_PER_ROUND"], step="Test", param=param, mod="ssl", prediction_type="target")

    met = al.testExpert(expert, val_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Val")
    metrics["Val"]["End"] = met

    met = al.testExpert(expert, test_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Test")
    metrics["Test"]["End"] = met
    
    #metrics["Test"] = met
    print("AL finished")
    return met_test, metrics

In [7]:
def getExpertModelsSSL_AL(dataManager, experts, param, seed, fold, mod="ssl", prediction_type="target"):

    nih_dataloader = dataManager.getKFoldDataloader(seed)

    expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold)
    image_container = nih_dataloader.get_ImageContainer()
    train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param, image_container=image_container)
    val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
    test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

    sslDataset = dataManager.getSSLDataset(seed)
    usedFilenames = []
    for labelerId in param["LABELER_IDS"]:
        temp = usedFilenames + sslDataset.getLabeledFilenames(labelerId, fold)
    usedFilenames = temp
    
    
    # initialize data, Erhält alle Indizes der Daten
    all_indices = list(range(len(train_dataset.getAllIndices())))
    all_data_filenames = np.array(train_dataset.getAllFilenames())[all_indices]
    all_data_y = np.array(train_dataset.getAllTargets())[all_indices]

    unused_indices = [index for index in all_indices if all_data_filenames[index] not in usedFilenames]
    
    metrics = {}
    for labelerId, expert in experts.items():
        metrics[labelerId] = {}

        met = al.testExpert(expert, val_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Val")
        metrics[labelerId]["Val"] = {}
        metrics[labelerId]["Val"]["Start"] = met

        met = al.testExpert(expert, test_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Test")
        metrics[labelerId]["Test"] = {}
        metrics[labelerId]["Test"]["Start"] = met

    set_seed(seed)

    gc.collect()

    indices_unlabeled = unused_indices
    indices_labeled = list(set(all_indices) - set(indices_unlabeled))

    dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], experts[param["LABELER_IDS"][0]].predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
    dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    print("Starting with AL")
    for round in range(param["AL"]["ROUNDS"]):

        print(f'\n \n Round {round} \n \n')

        # get points where expert model is least confident on
        #indices_confidence =  random.sample(indices_unlabeled, BATCH_SIZE_AL)
        #indices_confidence = get_least_confident_points(model_expert, dataLoaderTrainUnlabeled, param["BATCH_SIZE_AL"])

        #Try to get better Points
        if param["MOD"] == "disagreement":
            indices_qbq = al.getQbQPoints(experts, dataLoaderTrainUnlabeled, param["AL"]["LABELS_PER_ROUND"], mod="ssl", param=param)
        if param["MOD"] == "disagreement_diff":
            indices_qbq = al.getQbQPointsDifference(experts, dataLoaderTrainUnlabeled, param["AL"]["LABELS_PER_ROUND"], mod="ssl", param=param)
        
        #indices_labeled  = indices_labeled + list(indices_confidence) 
        indices_labeled  = indices_labeled + list(indices_qbq) 
        indices_unlabeled= list(set(all_indices) - set(indices_labeled))
        
        # train model on labeled data
        for labelerId, expert in experts.items():

            dataset_train_labeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_labeled], all_data_y[indices_labeled], expert.predict , [1]*len(indices_labeled), indices_labeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
            dataLoaderTrainLabeled = DataLoader(dataset=dataset_train_labeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)

            #dataset_val_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [1]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["PRELOAD"])
            #dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["PRELOAD"])
            
            #dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["BATCH_SIZE"], shuffle=True,  num_workers=0, pin_memory=True)

            
            dataset_val_unlabeled = NIHExpertDatasetMemory(None, val_dataset.getAllFilenames(), np.array(val_dataset.getAllTargets()), expert.predict , [1]*len(val_dataset.getAllIndices()), val_dataset.getAllIndices(), param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
            dataLoaderValUnlabeled = DataLoader(dataset=dataset_val_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)

            dataloaders = (dataLoaderTrainLabeled, dataLoaderValUnlabeled)
            n_images = param["AL"]["INITIAL_SIZE"] + (round+1)*param["AL"]["LABELS_PER_ROUND"]
            train_metrics, val_metrics = run_expert(model=None, expert=expert, epochs=param["AL"]["EPOCH_TRAIN"], dataloaders=dataloaders, param=param, id=expert.labelerId, seed=seed, fold=fold, n_images=n_images, mod="ssl", prediction_type="target")

            metrics[labelerId][n_images] = {}
            metrics[labelerId][n_images]["train_metrics"] = train_metrics
            metrics[labelerId][n_images]["val_metrics"] = val_metrics
        
        dataset_train_unlabeled = NIHExpertDatasetMemory(None, all_data_filenames[indices_unlabeled], all_data_y[indices_unlabeled], expert.predict , [0]*len(indices_unlabeled), indices_unlabeled, param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
        dataLoaderTrainUnlabeled = DataLoader(dataset=dataset_train_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    
    print("Test Data:")
    dataset_test_unlabeled = NIHExpertDatasetMemory(None, test_dataset.getAllFilenames(), np.array(test_dataset.getAllTargets()), expert.predict , [1]*len(test_dataset.getAllIndices()), test_dataset.getAllIndices(), param=param, preload=param["AL"]["PRELOAD"], image_container=image_container)
    dataLoaderVal = DataLoader(dataset=dataset_test_unlabeled, batch_size=param["AL"]["BATCH_SIZE"], shuffle=True, num_workers=4, pin_memory=True)
    met_test = {}
    for labelerId, expert in experts.items():
        temp = al.metrics_print_expert(model=None, expert=expert, data_loader=dataLoaderVal, id=expert.labelerId, seed=seed, fold=fold, n_images=param["AL"]["INITIAL_SIZE"] + (param["AL"]["ROUNDS"] + 5)*param["AL"]["LABELS_PER_ROUND"], step="Test", param=param, mod="ssl", prediction_type="target")
        met_test[expert.labelerId] = temp

        met = al.testExpert(expert, val_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Val")
        metrics[labelerId]["Val"] = {}
        metrics[labelerId]["Val"]["End"] = met

        met = al.testExpert(expert, test_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Test")
        metrics[labelerId]["Test"] = {}
        metrics[labelerId]["Test"]["End"] = met
        #metrics[labelerId]["Test"] = temp
        
    print("AL finished")
    return expert_models, met_test, metrics

In [8]:
def getExpertsSSL_AL(dataManager, param, fold, seed):

    mod = "ssl"
    prediction_type = param["EXPERT_PREDICT"]

    sslDataset = dataManager.getSSLDataset(seed)

    sslDataset.createLabeledIndices(labelerIds=param["LABELER_IDS"], n_L=param["AL"]["INITIAL_SIZE"], k=round(param["AL"]["INITIAL_SIZE"]*param["OVERLAP"]/100), seed=seed, sample_equal=param["SAMPLE_EQUAL"])

    train_dataloader, val_dataloader, test_dataloader = sslDataset.get_data_loader_for_fold(fold)
    dataloaders = (train_dataloader, val_dataloader, test_dataloader)

    embedded_model = ssl.create_embedded_model(dataloaders, param, param["NEPTUNE"])

    experts = {}
    for labelerId in param["LABELER_IDS"]:
        nih_expert = expert_module.Expert(dataset = dataManager.getBasicDataset(), labeler_id=labelerId)
        emb_model, model = ssl.getExpertModelSSL(labelerId=labelerId, sslDataset=sslDataset, seed=seed, fold_idx=fold, n_labeled=None, embedded_model=embedded_model, param=param, neptune_param=param["NEPTUNE"])
        nih_expert.setModel(expert_module.SSLModel(emb_model, model), mod="SSL")
        experts[labelerId] = nih_expert

    if param["MOD"] == "confidence":
        for i, labelerId in enumerate(param["LABELER_IDS"]):
            met, metrics = getExpertModelSSL_AL(dataManager=dataManager, expert=experts[labelerId], labelerId=labelerId, param=param, seed=seed, fold=fold, learning_mod="ssl", prediction_type=param["EXPERT_PREDICT"])
    elif param["MOD"] == ("disagreement" or "disagreement_diff"):
        expert_models, met, metrics = getExpertModelsSSL_AL(dataManager, experts, param, seed, fold, learning_mod="ssl", prediction_type=param["EXPERT_PREDICT"])
        
    return experts, metrics

In [9]:
def getExpertsSSL(dataManager, param, fold, seed):

    sslDataset = dataManager.getSSLDataset(seed)

    mod = "ssl"
    prediction_type = param["EXPERT_PREDICT"]

    sslDataset.createLabeledIndices(labelerIds=param["LABELER_IDS"], n_L=param["LABELED"], k=round(param["LABELED"]*param["OVERLAP"]/100), seed=seed, sample_equal=param["SAMPLE_EQUAL"])

    train_dataloader, val_dataloader, test_dataloader = sslDataset.get_data_loader_for_fold(fold)
    dataloaders = (train_dataloader, val_dataloader, test_dataloader)

    embedded_model = ssl.create_embedded_model(dataloaders, param, param["NEPTUNE"])

    experts = {}
    for labelerId in param["LABELER_IDS"]:
        nih_expert = expert_module.Expert(dataset = dataManager.getBasicDataset(), labeler_id=labelerId)
        emb_model, model = ssl.getExpertModelSSL(labelerId=labelerId, sslDataset=sslDataset, seed=seed, fold_idx=fold, n_labeled=None, embedded_model=embedded_model, param=param, neptune_param=param["NEPTUNE"])
        nih_expert.setModel(expert_module.SSLModel(emb_model, model), mod="SSL")
        experts[labelerId] = nih_expert

    nih_dataloader = dataManager.getKFoldDataloader(seed)
    expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold)
    image_container = nih_dataloader.get_ImageContainer()

    val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
    test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

    metrics = {}
    for labelerId, expert in experts.items():
        metrics[labelerId] = {}

        met = al.testExpert(expert, val_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Val")
        metrics[labelerId]["Val"] = {}
        metrics[labelerId]["Val"]["End"] = met

        met = al.testExpert(expert, test_dataset, image_container, param, mod, prediction_type, seed, fold, data_name="Test")
        metrics[labelerId]["Test"] = {}
        metrics[labelerId]["Test"]["End"] = met


    return experts, metrics

In [10]:
def getExpertsAL(dataManager, param, fold_idx, seed):
    nih_dataloader = dataManager.getKFoldDataloader(seed)
    expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold_idx)
    image_container = nih_dataloader.get_ImageContainer()
    expert_train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param, image_container=image_container)
    expert_val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
    expert_test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

    
    #Get init labeled indices with k same images and n-k different images
    #k=None means random indieces
    k = param["OVERLAP"]
    all_indices = list(range(len(expert_train_dataset.getAllIndices())))
    #If no k is set than it selects one randomly
    k = round(param["AL"]["INITIAL_SIZE"]*k/100)
    if param["NEPTUNE"]["NEPTUNE"]:
        run["param/overlap_k"] = k
    indices = al.sampleIndices(n = param["AL"]["INITIAL_SIZE"], k = k, all_indices = all_indices, experten = list(param["LABELER_IDS"]), seed = seed)

    if param["NEPTUNE"]["NEPTUNE"]:
        run[f"Seed_{seed}/Fold_{fold_idx}/Experts/Indices"] = indices

    print("Random indices:")
    print(indices)

    f1_experts = {}
    ac_b = {}

    for labelerId in list(param["LABELER_IDS"]):
        f1_experts[labelerId] = []
        ac_b[labelerId] = []

    experts = {}
    #Create the experts
    metrics = {}
    for i, labelerId in enumerate(list(param["LABELER_IDS"])):
        nih_expert = expert_module.Expert(dataset = dataManager.getBasicDataset(), labeler_id=labelerId)
        experts[labelerId] = nih_expert

        if param["MOD"] == "confidence":
            mod, met, metric = al.getExpertModel(indices[labelerId], expert_train_dataset, expert_val_dataset, expert_test_dataset, nih_expert, param, seed, fold_idx, image_container=image_container, learning_mod="al", prediction_type=param["EXPERT_PREDICT"])
            nih_expert.setModel(mod, mod="AL")
            f1_experts[labelerId].append(met["f1"])
            ac_b[labelerId].append(met["accurancy_balanced"])
            metrics[labelerId] = metric
    if param["MOD"] == "disagreement" or param["MOD"]== "disagreement_diff":
        expert_models, met, metrics = al.getExpertModels(indices, experts, expert_train_dataset, expert_val_dataset, expert_test_dataset, param, seed, fold_idx, mod=param["MOD"], image_container=image_container, learning_mod="al", prediction_type=param["EXPERT_PREDICT"])
        for labelerId, expert in experts.items():
            expert.setModel(expert_models[labelerId], mod="AL")
        for labelerId in list(param["LABELER_IDS"]):
            f1_experts[labelerId].append(met[labelerId]["f1"])
            ac_b[labelerId].append(met[labelerId]["accurancy_balanced"])

    return experts, f1_experts, ac_b, metrics

In [11]:
def getExpertsNormal(dataManager, param, fold_idx, seed):
    nih_dataloader = dataManager.getKFoldDataloader(seed)
    expert_train, expert_val, expert_test = nih_dataloader.get_dataset_for_folder(fold_idx)
    image_container = nih_dataloader.get_ImageContainer()
    expert_train_dataset = ds.NIHDataset(expert_train, preload=False, preprocess=False, param=param, image_container=image_container)
    expert_val_dataset = ds.NIHDataset(expert_val, preload=False, preprocess=False, param=param, image_container=image_container)
    expert_test_dataset = ds.NIHDataset(expert_test, preload=False, preprocess=False, param=param, image_container=image_container)

    
    #Get init labeled indices with k same images and n-k different images
    #k=None means random indieces
    k = param["OVERLAP"]
    all_indices = list(range(len(expert_train_dataset.getAllIndices())))
    #If no k is set than it selects one randomly
    k = round(param["LABELED"]*k/100)
    if param["NEPTUNE"]["NEPTUNE"]:
        run["param/overlap_k"] = k
    indices = al.sampleIndices(n = param["LABELED"], k = k, all_indices = all_indices, experten = list(param["LABELER_IDS"]), seed = seed)

    if param["NEPTUNE"]["NEPTUNE"]:
        run[f"Seed_{seed}/Fold_{fold_idx}/Experts/Indices"] = indices

    print("Random indices:")
    print(indices)

    f1_experts = {}
    ac_b = {}

    for labelerId in list(param["LABELER_IDS"]):
        f1_experts[labelerId] = []
        ac_b[labelerId] = []

    experts = {}
    #Create the experts
    metrics = {}
    for i, labelerId in enumerate(list(param["LABELER_IDS"])):
        nih_expert = expert_module.Expert(dataset = dataManager.getBasicDataset(), labeler_id=labelerId)
        experts[labelerId] = nih_expert

        model, met, metric = al.getExpertModelNormal(indices[labelerId], expert_train_dataset, expert_val_dataset, expert_test_dataset, nih_expert, param, seed, fold_idx, image_container=image_container, learning_mod="al", prediction_type=param["EXPERT_PREDICT"])
        nih_expert.setModel(model, mod="AL")
        f1_experts[labelerId].append(met["f1"])
        ac_b[labelerId].append(met["accurancy_balanced"])
        metrics[labelerId] = metric

    return experts, f1_experts, ac_b, metrics

In [12]:
def getExperts(dataManager, param, seed, fold):
      
    #Creates expert models for the choosen method
    if param["SETTING"] == "AL":
        experts, f1_experts, ac_b, metrics = getExpertsAL(dataManager, param, fold, seed)
    elif param["SETTING"] == "SSL":
        experts, metrics = getExpertsSSL(dataManager, param, fold, seed)
    elif param["SETTING"] == "SSL_AL":
        experts, metrics = getExpertsSSL_AL(dataManager, param, fold, seed)
    elif param["SETTING"] == "NORMAL":
        experts, f1_experts, ac_b, metrics = getExpertsNormal(dataManager, param, fold, seed)

    print("Experts")
    print(experts)

    return experts, metrics

In [13]:
def L2D_Verma(train_loader, val_loader, test_loader, full_dataloader, expert_fns, param, seed, fold_idx, experts):
    num_experts = len(expert_fns)
            
    model = model = vres.ResNet50_defer(int(param["n_classes"]) + num_experts)

    metrics_train_all, metrics_val_all, metrics_test, metrics_full = verm.train(model, train_loader, val_loader, test_loader, expert_fns, param, seed=seed, experts=experts, 
                                                                                fold=fold_idx, full_dataloader=full_dataloader, param=param)

    return metrics_train_all, metrics_val_all, metrics_test, metrics_full

In [14]:
def one_run(dataManager, run_param):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    expert_metrics = {}

    for seed in run_param["SEEDS"]:
        print("Seed: " + str(seed))
        if seed != "":
            set_seed(seed)

        expert_metrics[seed] = {}

        #for fold_idx in range(run_param["K"]):
        for fold_idx in range(1):

            print("/n")
            print(f"Seed: {seed} - Fold: {fold_idx} \n")

            #if os.path.isdir('SSL_Working'):
            #    cleanTrainDir("SSL_Working")

            neptune = {
                "SEED": seed,
                "FOLD": fold_idx,
            }

            experts, expert_metric = getExperts(dataManager, run_param, seed, fold_idx)
            expert_metrics[seed][fold_idx] = expert_metric

            #print(f"Got {len(experts)} experts")

            nih_dataloader = dataManager.getKFoldDataloader(seed=seed)

            train_loader, val_loader, test_loader = nih_dataloader.get_data_loader_for_fold(fold_idx)
            full_dataloader = nih_dataloader.getFullDataloader()

            expert_fns = []
            print(run_param["SETTING"])
            for labelerId, expert in experts.items():
                if run_param["SETTING"] == "AL":
                    expert.init_model_predictions(full_dataloader, mod="AL")
                    expert_fns.append(expert.predict_model_predefined_al)
                elif run_param["SETTING"] == "SSL":
                    expert.init_model_predictions(full_dataloader, mod="SSL")
                    expert_fns.append(expert.predict_model_predefined_ssl)
                elif run_param["SETTING"] == "SSL_AL":
                    expert.init_model_predictions(full_dataloader, mod="SSL")
                    expert_fns.append(expert.predict_model_predefined_ssl)
                elif run_param["SETTING"] == "NORMAL":
                    expert.init_model_predictions(full_dataloader, mod="AL")
                    expert_fns.append(expert.predict_model_predefined_al)

            metrics_train_all, metrics_val_all, metrics_test, metrics_full = L2D_Verma(train_loader, val_loader, test_loader, full_dataloader, expert_fns, run_param, seed, fold_idx, experts=experts)
            system_accuracy, classifier_coverage, all_train_metrics, all_val_metrics, all_test_metrics = hm.L2D_Hemmer(train_loader, val_loader, test_loader, full_dataloader, expert_fns, run_param, seed, fold_idx, experts)
              
    return experts, expert_metrics, metrics_full, system_accuracy, classifier_coverage, all_train_metrics, all_val_metrics, all_test_metrics
            

In [15]:
def run_experiment(param):
    run_param = copy.deepcopy(param)

    expert_metrics_all = []

    #Every pair of labeler ids
    for labeler_ids in param["LABELER_IDS"]:
        run_param["LABELER_IDS"] = labeler_ids
        run_param["labeler_ids"] = convert_ids_to_string(labeler_ids)
        

        dataManager = ds.DataManager(path=param["PATH"], target=param["TARGET"], param=run_param, seeds=param["SEEDS"])
        dataManager.createData()

        for init_size in param["AL"]["INITIAL_SIZE"]:
            run_param["AL"]["INITIAL_SIZE"] = init_size

            for labels_per_round in param["AL"]["LABELS_PER_ROUND"]:
                run_param["AL"]["LABELS_PER_ROUND"] = labels_per_round

                for rounds in param["AL"]["ROUNDS"]:
                    run_param["AL"]["ROUNDS"] = rounds

                    labeled = init_size + rounds * labels_per_round

                    run_param["LABELED"] = labeled

                    if (labeled >= 128): #Prevents from large amount of data
                        continue

                    for cost in param["AL"]["COST"]:
                        run_param["AL"]["COST"] = cost
                        run_param["AL"]["cost"] = convert_cost_to_string(cost)

                        for overlap in param["OVERLAP"]:
                            run_param["OVERLAP"] = overlap

                            for setting in param["SETTING"]:
                                run_param["SETTING"] = setting
                        
                                for mod in param["MOD"]:
                                    run_param["MOD"] = mod

                                    if ((setting == "AL"  or setting=="SSL_AL") and (mod not in ["confidence", "disagreement", "disagreement_diff"])):
                                        continue

                                    if (setting == "SSL" and mod != "ssl"):
                                        continue

                                    if (setting == "NORMAL" and mod != "normal"):
                                        continue

                                    for expert_predict in param["EXPERT_PREDICT"]:
                                        run_param["EXPERT_PREDICT"] = expert_predict

                                        if ((setting == "SSL" or setting == "SSL_AL") and (expert_predict == "right")):
                                            continue

                                        if (expert_predict == "target") and (cost != param["AL"]["COST"][0]):
                                            continue
                                        if (expert_predict == "target"):
                                            run_param["AL"]["cost"] = convert_cost_to_string((0, 0))

                                        for sample_equal in param["SAMPLE_EQUAL"]:
                                            run_param["SAMPLE_EQUAL"] = sample_equal
                                    
                            
                                            NEPTUNE = param["NEPTUNE"]["NEPTUNE"]
                                            if param["NEPTUNE"]["NEPTUNE"]:
                                                global run
                                                run = neptune.init_run(
                                                    project=config_neptune["project"],
                                                    api_token=config_neptune["api_token"],
                                                    #custom_run_id="AL_" + 
                                                )
                                                run["param"] = run_param
                                                run_param["NEPTUNE"]["RUN"] = run

                                            print("\n #####################################################################################")
                                            print("\n \n \n NEW RUN \n")
                                            print("Initial size: " + str(init_size))
                                            print("Batch size: " + str(labels_per_round))
                                            print("Max rounds: " + str(rounds))
                                            print("Labeled: " + str(labeled))
                                            print("Cost: " + str(cost))
                                            print("Setting: " + str(setting))
                                            print("Mod: " + str(mod))
                                            print("Overlap: " + str(overlap))
                                            print("Prediction Type " + str(expert_predict))
                                            print("Sample equal " + str(sample_equal))

                                            metrics_save = {}
                                            metrics_save["labeler_ids"] = labeler_ids
                                            metrics_save["init_size"] = init_size
                                            metrics_save["labels_per_round"] = labels_per_round
                                            metrics_save["rounds"] = rounds
                                            metrics_save["labeled"] = labeled
                                            metrics_save["cost"] = cost
                                            metrics_save["overlap"] = overlap
                                            metrics_save["setting"] = setting
                                            metrics_save["mod"] = mod
                                            metrics_save["expert_predict"] = expert_predict
                                            metrics_save["sample_equal"] = sample_equal


                                            start_time = time.time()
                                            experts, expert_metrics, metrics_full, system_accuracy, classifier_coverage, all_train_metrics, all_val_metrics, all_test_metrics = one_run(dataManager, 
                                                                                                                                                                                                 run_param)
                                            print("--- %s seconds ---" % (time.time() - start_time))

                                            metrics_save["expert metrics"] = expert_metrics
                                            metrics_save["verma"] = metrics_full
                                            temp_dic = {
                                                "train": all_train_metrics,
                                                "val": all_val_metrics,
                                                "test": all_test_metrics
                                            }
                                            metrics_save["hemmer"] = temp_dic
                                            expert_metrics_all.append(metrics_save)
                                            if param["NEPTUNE"]["NEPTUNE"]:
                                                run["metrics"] = metrics_save

                                                run.stop()
                                            #return experts, expert_metrics
                                                

    return expert_metrics_all

In [16]:
def convert_cost_to_string(tp):
    return "(" + str(tp[0]) + ", " + str(tp[1]) + ")"

def convert_ids_to_string(ids):
    return f"{ids[0]}, {ids[1]}"

def convert_list_to_string(li):
    result = "["
    for el in li[:-2]:
        result = result + str(el)
    result = result + "]"
    return 

In [17]:
param = {
    "PATH": "../Datasets/NIH/",
    "TARGET": "Airspace_Opacity",
    "LABELER_IDS": [[4323195249, 4295232296]],
    "K": 10, #Number of folds
    "SEEDS": [42], #Seeds for the experiments
    "GT": True, # Determines if the classifier gets all data with GT Label or only the labeld data
    "MOD": ["confidence", "disagreement", "disagreement_diff", "ssl", "normal"], #Determines the experiment modus
    #"MOD": ["normal"],

    "OVERLAP": [100],
    "SAMPLE_EQUAL": [True],
    #"INITAL_SIZE": [8, 16, 32],
    #"ROUNDS": [2, 4, 8],
    #"LABELS_PER_ROUND": [4, 8, 16],

    "SETTING": ["AL", "SSL", "SSL_AL", "NORMAL"],
    #"SETTING": ["SSL"],

    "NUM_EXPERTS": 2,
    "NUM_CLASSES": 2,

    "EXPERT_PREDICT": ["right", "target"],

    "AL": { #Parameter for Active Learning
        "INITIAL_SIZE": [8], #
        "EPOCH_TRAIN": 2, #
        "n_dataset": 2, #Number Classes
        "BATCH_SIZE": 4,
        "BATCH_SIZE_VAL": 32,
        "ROUNDS": [2],
        "LABELS_PER_ROUND": [4],
        "EPOCHS_DEFER": 5,
        "COST": [(3,0)], #Cost for Cost sensitiv learning
        #"TRAIN REJECTOR": False,
        "PRELOAD": True,
        "PREPROCESS": True,
        
    },
    "SSL": {
        "PREBUILD": False,
        #"TRAIN_BATCH_SIZE": 128,
        "TRAIN_BATCH_SIZE": 254,
        "TEST_BATCH_SIZE": 64,
        "N_EPOCHS": 3, #number of training epoches
        "BATCHSIZE": 8, #train batch size of labeled samples
        #"N_IMGS_PER_EPOCH": 32768, #number of training images for each epoch
        "N_IMGS_PER_EPOCH": 4381*1, #number of training images for each epoch
    },
    "L2D": { # Parameter for Learning to defer
        "TRAIN_BATCH_SIZE": 128,
        "TEST_BATCH_SIZE": 64,
        "PRELOAD": True,
        "PREBUILD": True,
        "EPOCHS": 50,
        "VERMA": {},
        "HEMMER": {
            "EPOCHS": 5,
            "LR": 5e-3,
            "USE_LR_SCHEDULER": False,
            "DROPOUT": 0.00,
            "NUM_HIDDEN_UNITS": 30,
        },
        
    },
    "NEPTUNE": {
        "NEPTUNE": False,
    },
    "EMBEDDED": {
        "ARGS": {
            'dataset': "nih",
            'model': "resnet50",
            'num_classes': 2,
            'batch': 64,
            'lr': 0.001,
        },
        "EPOCHS": 16,
    },
    
    
    
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    #"epochs": 50,
    "epochs": 50,
    "patience": 15, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "k": 0, #
    "n_experts": 2, #
    "lr": 0.001, #learning rate
    "weight_decay": 5e-4, #
    "warmup_epochs": 5, #
    "loss_type": "softmax", #surrogate loss type for learning to defer
    "ckp_dir": "./Models", #directory name to save the checkpoints
    "experiment_name": "multiple_experts", #specify the experiment name. Checkpoints will be saved with this name
}

In [18]:
CUDA_LAUNCH_BLOCKING=1
torch.backends.cudnn.benchmark = True

In [None]:
expert_metrics_all = run_experiment(param)

Number of images of the whole dataset: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800
Loaded image number: 1000
Loaded image number: 1200
Loaded image number: 1400
Loaded image number: 1600
Loaded image number: 1800
Loaded image number: 2000
Loaded image number: 2200
Loaded image number: 2400
Loaded image number: 2600
Loaded image number: 2800
Loaded image number: 3000
Loaded image number: 3200
Loaded image number: 3400
Loaded image number: 3600
Loaded image number: 3800
Loaded image number: 4000
Loaded image number: 4200
Full length: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category').copy()


Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Loaded set number 0
Loaded set number 1
Loaded set number 2
Loaded set number 3
Loaded set number 4
Loaded set number 5
Loaded set number 6
Loaded set number 7
Loaded set number 8
Loaded set number 9
Added
Added
Added
Added
Added
Added
Added
Added
Added
Added

 #####################################################################################

 
 
 NEW RUN 

Initial size: 8
Batch size: 4
Max rounds: 2
Labeled: 16
Cost: (3, 0)
Setting: AL
Mod: confidence
Overlap: 100
Prediction Type right
Sample equal True
Seed: 42
/n
Seed: 42 - Fold: 0 

Random indices:
{4323195249: [114, 25, 281, 250, 228, 142, 104, 558], 4295232296: [114, 25, 281, 250, 228, 142, 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category').copy()


load Resnet-50 checkpoint for expert
None
Run Expert
Epoch: [0][0/2]	Time 0.289 (0.289)	Loss 2.9479 (2.9479)	Prec@1 100.000 (100.000)
Epoch: [1][0/2]	Time 0.257 (0.257)	Loss 1.8052 (1.8052)	Prec@1 50.000 (50.000)
al
right
Accuracy of the network on the 163 test images: 21.472 %
Confusion Matrix:
[[ 33   0]
 [128   2]]
F1 Score: 0.030303030303030307
Accuracy balanced
0.5076923076923077
Starting with AL

 
 Round 0 
 

Run Expert
Epoch: [0][0/3]	Time 0.299 (0.299)	Loss 2.8356 (2.8356)	Prec@1 50.000 (50.000)
Epoch: [1][0/3]	Time 0.280 (0.280)	Loss 1.6865 (1.6865)	Prec@1 50.000 (50.000)
al
right
Accuracy of the network on the 163 test images: 20.245 %
Confusion Matrix:
[[ 33   0]
 [130   0]]
F1 Score: 0.0
Accuracy balanced
0.5

 
 Round 1 
 

Run Expert
Epoch: [0][0/4]	Time 0.300 (0.300)	Loss 1.7313 (1.7313)	Prec@1 25.000 (25.000)
Epoch: [1][0/4]	Time 0.277 (0.277)	Loss 1.9482 (1.9482)	Prec@1 50.000 (50.000)
al
right
Accuracy of the network on the 163 test images: 20.245 %
Confusion Matrix

 60%|████████████████████████████████████████████████████████████████████████████████████████████▍                                                             | 3/5 [00:10<00:06,  3.31s/it]

In [20]:
import pickle

# Store data (serialize)
with open('Metrics.pickle', 'wb') as handle:
    pickle.dump(expert_metrics_all, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Load data (deserialize)
#with open('filename.pickle', 'rb') as handle:
#    unserialized_data = pickle.load(handle)

In [22]:
with open('Metrics.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [27]:
pd.DataFrame(data).drop(["expert metrics", "verma", "hemmer"], axis=1)

Unnamed: 0,labeler_ids,init_size,labels_per_round,rounds,labeled,cost,overlap,setting,mod,expert_predict,sample_equal
0,"[4323195249, 4295232296]",8,4,2,16,"(3, 0)",100,AL,confidence,right,True
1,"[4323195249, 4295232296]",8,4,2,16,"(0, 0)",100,AL,confidence,target,True
2,"[4323195249, 4295232296]",8,4,2,16,"(0, 0)",100,AL,disagreement,right,True
3,"[4323195249, 4295232296]",8,4,2,16,"(0, 0)",100,AL,disagreement_diff,right,True
4,"[4323195249, 4295232296]",8,4,2,16,"(0, 0)",100,NORMAL,normal,right,True


In [32]:
pd.DataFrame(data[0]["expert metrics"][42][0][4323195249]["Val"])

Unnamed: 0,End
accurancy,8.588957
accurancy_balanced,0.5
f1,0.0
fn,149.0
fp,0.0
tn,14.0
tp,0.0


In [24]:
data[0]

{'labeler_ids': [4323195249, 4295232296],
 'init_size': 8,
 'labels_per_round': 4,
 'rounds': 2,
 'labeled': 16,
 'cost': (3, 0),
 'overlap': 100,
 'setting': 'AL',
 'mod': 'confidence',
 'expert_predict': 'right',
 'sample_equal': True,
 'expert metrics': {42: {0: {4323195249: {'Val': {'End': {'tn': 14,
       'fp': 0,
       'fn': 149,
       'tp': 0,
       'accurancy': 8.588957055214724,
       'f1': 0.0,
       'accurancy_balanced': 0.5}},
     'Test': {'End': {'tn': 8,
       'fp': 0,
       'fn': 80,
       'tp': 0,
       'accurancy': 9.090909090909092,
       'f1': 0.0,
       'accurancy_balanced': 0.5}}},
    4295232296: {'Val': {'End': {'tn': 33,
       'fp': 0,
       'fn': 130,
       'tp': 0,
       'accurancy': 20.245398773006134,
       'f1': 0.0,
       'accurancy_balanced': 0.5}},
     'Test': {'End': {'tn': 18,
       'fp': 0,
       'fn': 70,
       'tp': 0,
       'accurancy': 20.454545454545453,
       'f1': 0.0,
       'accurancy_balanced': 0.5}}}}}},
 'verma': {

In [19]:
run_experiment(param)

Number of images of the whole dataset: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800
Loaded image number: 1000
Loaded image number: 1200
Loaded image number: 1400
Loaded image number: 1600
Loaded image number: 1800
Loaded image number: 2000
Loaded image number: 2200
Loaded image number: 2400
Loaded image number: 2600
Loaded image number: 2800
Loaded image number: 3000
Loaded image number: 3200
Loaded image number: 3400
Loaded image number: 3600
Loaded image number: 3800
Loaded image number: 4000
Loaded image number: 4200
Full length: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800




Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Loaded set number 0
Loaded set number 1
Loaded set number 2
Loaded set number 3
Loaded set number 4
Loaded set number 5
Loaded set number 6
Loaded set number 7
Loaded set number 8
Loaded set number 9
Added
Added
Added
Added
Added
Added
Added
Added
Added
Added
/n /n NEW RUN /n
Initial size: 8
Batch size: 4
Max rounds: 2
Labeled: 16
Cost: (10, 0)
Setting: SSL_AL
Mod: confidence
Overlap: 0
Seed: 42
{'NEPTUNE': False}
Train dir: /home/joli/Masterarbeit/SSL_Working/NIH/emb_net@dataset-nih-model-resnet50-num_classes-2/
load Resnet-50 checkpoint
load Resnet-50 pretrained on ImageNet
Loaded Model resnet50
48
No Checkpoint found
Starting new from epoch 1
ii 23 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category').copy()


load Resnet-50 checkpoint
None
Loaded Model resnet50
2023-07-06 07:33:10,865 - INFO - train -   Total params: 4.33M
Index: 0
Labels: 8
Index: 0
Index: 0
No Checkpoint found at SSL_Working/NIH/EmbeddingCM_bin/ex4323195249_xNone_seed42/ckp.latest
Starting new from epoch 1
2023-07-06 07:33:11,468 - INFO - train -   -----------start training--------------
2023-07-06 07:33:25,684 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 64. loss_u: 0.254. loss_x: 0.052. loss_c: 3.857. n_correct_u: 15.56/39.55. Mask:0.706. num_pos: 25.6. LR: 0.030. Time: 14.21
2023-07-06 07:33:38,693 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 128. loss_u: 0.196. loss_x: 0.030. loss_c: 3.822. n_correct_u: 15.66/39.00. Mask:0.696. num_pos: 25.7. LR: 0.030. Time: 13.00
2023-07-06 07:33:51,577 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 192. loss_u: 0.167. loss_x: 0.022. loss_c: 3.846. n_correct_u: 14.70/36.23. Mask:0.647. num_pos: 25.5. LR: 0.030. Time: 12

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.predictions["Image ID"] = self.predictions["Image ID"].astype('category').copy()


load Resnet-50 checkpoint
None
Loaded Model resnet50
2023-07-06 07:38:52,958 - INFO - train -   Total params: 4.33M
Index: 0
Labels: 8
Index: 0
Index: 0
No Checkpoint found at SSL_Working/NIH/EmbeddingCM_bin/ex4295232296_xNone_seed42/ckp.latest
Starting new from epoch 1
2023-07-06 07:38:53,561 - INFO - train -   -----------start training--------------
2023-07-06 07:39:07,808 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 64. loss_u: 0.191. loss_x: 0.125. loss_c: 3.972. n_correct_u: 5.53/41.39. Mask:0.739. num_pos: 37.4. LR: 0.030. Time: 14.25
2023-07-06 07:39:20,569 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 128. loss_u: 0.171. loss_x: 0.072. loss_c: 3.980. n_correct_u: 4.94/39.72. Mask:0.709. num_pos: 36.8. LR: 0.030. Time: 12.76
2023-07-06 07:39:33,577 - INFO - train -   NIH-xNone-s42, EmbeddingCM_bin | epoch:0, iter: 192. loss_u: 0.154. loss_x: 0.049. loss_c: 3.986. n_correct_u: 4.76/39.05. Mask:0.697. num_pos: 36.8. LR: 0.030. Time: 13.01

         93390953 function calls (89555646 primitive calls) in 1111.098 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   536037  372.437    0.001  372.437    0.001 {method 'item' of 'torch._C._TensorBase' objects}
    13268  182.562    0.014  182.562    0.014 {method 'cuda' of 'torch._C._TensorBase' objects}
    54377   66.511    0.001   66.511    0.001 {method 'acquire' of '_thread.lock' objects}
     1028   53.971    0.053   53.976    0.053 {method 'poll' of 'select.poll' objects}
   248690   49.118    0.000   49.118    0.000 {method 'decode' of 'ImagingDecoder' objects}
     1028   45.296    0.044   45.432    0.044 {built-in method posix.fork}
     4286   45.282    0.011   45.282    0.011 {method 'run_backward' of 'torch._C._EngineBase' objects}
     4242   43.275    0.010   43.275    0.010 {method 'cpu' of 'torch._C._TensorBase' objects}
     9858   33.819    0.003   33.819    0.003 {built-in method torch.cat}
     52

In [26]:
run

<neptune.metadata_containers.run.Run at 0x7fa88e10de80>

In [20]:
ex.Expert()

TypeError: __init__() missing 2 required positional arguments: 'dataset' and 'labeler_id'