In [12]:
import sys, os

from torch.utils.tensorboard import SummaryWriter
from absl import flags
from absl import app

from SSL.feature_extractor.utils import save_to_logs, get_train_dir
from SSL.feature_extractor.emb_model_lib import EmbeddingModel

import Dataset.Dataset as ds
import torch

In [13]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

In [14]:
param = {
    "PATH": "../Datasets/NIH/",
    "TARGET": "Airspace_Opacity",
    "LABELER_IDS": [4323195249, 4295232296],
    "K": 10, #Number of folds
    "SEEDS": [1, 2, 3, 4, 42], #Seeds for the experiments
    "GT": True, # Determines if the classifier gets all data with GT Label or only the labeld data
    "MOD": ["confidence", "disagreement", "disagreement_diff", "ssl"], #Determines the experiment modus

    "OVERLAP": 100,
    "INITAL_SIZE": [8, 16, 32],
    "ROUNDS": [2, 4, 8],
    "LABELS_PER_ROUND": [4, 8, 16],
    "LABELED": 32,

    "SETTING": ["AL", "SSL", "SSL_AL"],

    "AL": { #Parameter for Active Learning
        "INITIAL_SIZE": [8, 16, 32], #
        "EPOCH_TRAIN": 10, #
        "n_dataset": 2, #Number Classes
        "BATCH_SIZE": 4,
        "MAX_ROUNDS": [2, 4, 8],
        "BATCH_SIZE_AL": [4, 8, 16],
        #"EPOCHS_DEFER": 5,
        "COST": [(10, 0)], #Cost for Cost sensitiv learning
        #"TRAIN REJECTOR": False,
        "PRELOAD": True,
        "PREPROCESS": True,
        
    },
    "SSL": {
        "PREBUILD": False,
        "TRAIN_BATCH_SIZE": 64,
        "TEST_BATCH_SIZE": 64,
    },
    "L2D": { # Parameter for Learning to defer
        "TRAIN_BATCH_SIZE": 64,
        "TEST_BATCH_SIZE": 64,
        "PRELOAD": True,
        "PREBUILD": True,
        "VERMA": {},
        "HEMMER": {}
        
    },
    "NEPTUNE": {
        "NEPTUNE": False,
    },
    "EMBEDDED": {
        "ARGS": {
            'dataset': "nih",
            'model': "resnet18",
            'num_classes': 2,
            'batch': 64,
            'lr': 0.001,
        },
    },
}

In [15]:
dataManager = ds.DataManager(path = param["PATH"], target = param["TARGET"], param=param, seeds=[0])
dataManager.createData()
sslDataset = dataManager.getSSLDataset(0)


Number of images of the whole dataset: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800
Loaded image number: 1000
Loaded image number: 1200
Loaded image number: 1400
Loaded image number: 1600
Loaded image number: 1800
Loaded image number: 2000
Loaded image number: 2200
Loaded image number: 2400
Loaded image number: 2600
Loaded image number: 2800
Loaded image number: 3000
Loaded image number: 3200
Loaded image number: 3400
Loaded image number: 3600
Loaded image number: 3800
Loaded image number: 4000
Loaded image number: 4200
Full length: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of tr



In [6]:
round(param["LABELED"]*param["OVERLAP"]/100)

32

In [16]:
sslDataset.createLabeledIndices(labelerIds=param["LABELER_IDS"], n_L=param["LABELED"], k=round(param["LABELED"]*param["OVERLAP"]/100), seed=0)
train_dataloader, val_dataloader, test_dataloader = sslDataset.get_data_loader_for_fold(0)
#train_dataloader, val_dataloader, test_dataloader = nih_dataloader.get_data_loader_for_fold(1)
dataloaders = (train_dataloader, val_dataloader, test_dataloader)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
args = {
    'dataset': "nih",
    'model': "resnet18",
    'num_classes': 2,
    'batch': 64,
    'lr': 0.001,
}

path = "../../../Datasets/NIH/"

In [9]:
import shutil
def cleanTrainDir(path):
    shutil.rmtree(path)

In [10]:
cleanTrainDir("SSL_Working")

FileNotFoundError: [Errno 2] No such file or directory: 'SSL_Working'

In [21]:
def create_embedded(dataloaders, param, neptune_param):
    args = param["EMBEDDED"]["ARGS"]
    path = param["PATH"]
    neptune_param = neptune_param

    wkdir = os.getcwd() + "/SSL_Working"
    sys.path.append(wkdir)

    SAVE = True

    # get training directory
    train_dir = get_train_dir(wkdir, args, 'emb_net')

    print("Train dir: " + train_dir)

    NEPTUNE = neptune_param["NEPTUNE"]

    writer = None

    if SAVE:
        # initialize summary writer for tensorboard
        writer = SummaryWriter(train_dir + 'logs/')

    # initialize base model
    emb_model = EmbeddingModel(args, wkdir, writer, dataloaders, param, neptune_param)
    # try to load previous training runs
    start_epoch = emb_model.load_from_checkpoint(mode='latest')
    # train model
    for epoch in range(start_epoch, 100):
        # train one epoch
        loss = emb_model.train_one_epoch(epoch)
        # get validation accuracy
        valid_acc = emb_model.get_test_accuracy(return_acc=True)
        print(f'loss: {loss}')

        if NEPTUNE:
            run[f'embedded/seed{neptune_param["seed"]}_fold{neptune_param["fold"]}/loss/accuracy'].append(loss)
            run[f'embedded/seed{neptune_param["seed"]}_fold{neptune_param["fold"]}/val/accuracy'].append(valid_acc)
        # save logs to json
        if SAVE:
            save_to_logs(train_dir, valid_acc, loss.item())
            # save model to checkpoint
            emb_model.save_to_checkpoint(epoch, loss, valid_acc)
    # get test accuracy
    acc = emb_model.get_test_accuracy()

    if NEPTUNE:
        run["embedded/test/accuracy"].append(acc)

    return emb_model

In [22]:
create_embedded(dataloaders, param, param["NEPTUNE"])

Train dir: /home/joli/Masterarbeit/SSL_Working/NIH/emb_net@dataset-nih-model-resnet18-num_classes-2/
load Resnet-18 checkpoint
load Resnet-18 pretrained on ImageNet
Loaded Model resnet18




No Checkpoint found
Starting new from epoch 1
Train Epoch 1: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy: 0.7809307604994324 
Test-Acc-Class [0.82735426 0.73333333]
loss: 0.6101260781288147
Train Epoch 2: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy: 0.8024971623155505 
Test-Acc-Class [0.80493274 0.8       ]
loss: 0.5630812048912048
Train Epoch 3: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy: 0.8036322360953462 
Test-Acc-Class [0.80717489 0.8       ]
loss: 0.4447552561759949
Train Epoch 4: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy: 0.8047673098751419 
Test-Acc-Class [0.80717489 0.80229885]
loss: 0.47221580147743225
Train Epoch 5: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy: 0.8047673098751419 
Test-Acc-Class [0.81390135 0.7954023 ]
loss: 0.46554216742515564
Train Epoch 6: |███████████████████████████████████████-| 100.0% Complete
Test-Accuracy

<SSL.feature_extractor.emb_model_lib.EmbeddingModel at 0x7f08a96f5c10>

In [18]:
from __future__ import print_function
import random

import time
import os
import sys
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard.writer import SummaryWriter

from SSL.LinearModel import LinearNN
from SSL.utils import accuracy, setup_default_logging, AverageMeter, WarmupCosineLrScheduler
from SSL.utils import load_from_checkpoint
from SSL.Expert import CIFAR100Expert, NIHExpert
from SSL.feature_extractor.embedding_model import EmbeddingModel

In [4]:
def set_model(args):
    """Initialize models

    Lineare Modelle, welche später die extrahierten Features übergeben bekommen

    :param args: training arguments
    :return: tuple
        - model: Initialized model
        - criteria_x: Supervised loss function
        - ema_model: Initialized ema model
    """
    if args["dataset"].lower() == 'cifar100':
        feature_dim = 1280
    elif args["dataset"].lower() == 'nih':
        feature_dim = 512
    else:
        print(f'Dataset {args["dataset"]} not defined')
        sys.exit()
    model = LinearNN(num_classes=args["n_classes"], feature_dim=feature_dim, proj=True)

    model.train()
    model.cuda()  
    
    if args["eval_ema"]:
        ema_model = LinearNN(num_classes=args["n_classes"], feature_dim=feature_dim, proj=True)
        for param_q, param_k in zip(model.parameters(), ema_model.parameters()):
            param_k.data.copy_(param_q.detach().data)  # initialize
            param_k.requires_grad = False  # not update by gradient for eval_net
        ema_model.cuda()  
        ema_model.eval()
    else:
        ema_model = None
        
    criteria_x = nn.CrossEntropyLoss().cuda()
    return model, criteria_x, ema_model

In [5]:
def train_one_epoch(epoch,
                    model,
                    ema_model,
                    emb_model,
                    prob_list,
                    criteria_x,
                    optim,
                    lr_schdlr,
                    dltrain_x,
                    dltrain_u,
                    args,
                    n_iters,
                    logger,
                    queue_feats,
                    queue_probs,
                    queue_ptr,
                    ):
    """Train one epoch on the train set

    :param epoch: Current epoch
    :param model: Model
    :param ema_model: EMA-Model
    :param emb_model: Embedding model
    :param prob_list: List of probabilities
    :param criteria_x: Supervised loss function
    :param optim: Optimizer
    :param lr_schdlr: Learning rate scheduler
    :param dltrain_x: Data loader for the labeled training instances
    :param dltrain_u: Data loader for the unlabeled training instances
    :param args: Training arguments
    :param n_iters: Number of iterations per epoch
    :param logger: Logger
    :param queue_feats: Memory bank feature vectors
    :param queue_probs: Memory bank probabilities
    :param queue_ptr: Memory bank ptr
    :return: tuple
        - Average supervised loss
        - Average unsupervised loss
        - Average contrastive loss
        - Average mask
        - Average number of edges in the pseudo label graph
        - Percentage of correct pseudo labels
        - Memory bank feature vectors
        - Memory bank probabilities
        - Memory bank ptr
        - List of probabilities
    """

    model.train()
    loss_x_meter = AverageMeter()
    loss_u_meter = AverageMeter()
    loss_contrast_meter = AverageMeter()
    # the number of correct pseudo-labels
    n_correct_u_lbs_meter = AverageMeter()
    # the number of confident unlabeled data
    n_strong_aug_meter = AverageMeter()
    mask_meter = AverageMeter()
    # the number of edges in the pseudo-label graph
    pos_meter = AverageMeter()

    
    epoch_start = time.time()  # start time
    dl_x, dl_u = iter(dltrain_x), iter(dltrain_u)
    for it in range(n_iters):
        ims_x_weak, lbs_x, im_id = next(dl_x)
        (ims_u_weak, ims_u_strong0, ims_u_strong1), lbs_u_real, im_id = next(dl_u)

        lbs_x = lbs_x.type(torch.LongTensor) 
        lbs_x = lbs_x.cuda()
        lbs_u_real = lbs_u_real.cuda()

        # --------------------------------------
        bt = ims_x_weak.size(0)
        btu = ims_u_weak.size(0)

        imgs = torch.cat([ims_x_weak, ims_u_weak, ims_u_strong0, ims_u_strong1], dim=0).cuda()
        embedding = emb_model.get_embedding(batch=imgs)
        logits, features = model(embedding)

        logits_x = logits[:bt]
        logits_u_w, logits_u_s0, logits_u_s1 = torch.split(logits[bt:], btu)
        
        feats_x = features[:bt]
        feats_u_w, feats_u_s0, feats_u_s1 = torch.split(features[bt:], btu)

        
        loss_x = criteria_x(logits_x, lbs_x)

        with torch.no_grad():
            logits_u_w = logits_u_w.detach()
            feats_x = feats_x.detach()
            feats_u_w = feats_u_w.detach()
            
            probs = torch.softmax(logits_u_w, dim=1)            
            # DA
            prob_list.append(probs.mean(0))
            if len(prob_list)>32:
                prob_list.pop(0)
            prob_avg = torch.stack(prob_list, dim=0).mean(0)
            probs = probs / prob_avg
            probs = probs / probs.sum(dim=1, keepdim=True)   

            probs_orig = probs.clone()
            
            if epoch>0 or it>args["queue_batch"]: # memory-smoothing 
                A = torch.exp(torch.mm(feats_u_w, queue_feats.t())/args["temperature"])       
                A = A/A.sum(1,keepdim=True)                    
                probs = args["alpha"]*probs + (1-args["alpha"])*torch.mm(A, queue_probs)               
            
            scores, lbs_u_guess = torch.max(probs, dim=1)
            mask = scores.ge(args["thr"]).float() 
                   
            feats_w = torch.cat([feats_u_w,feats_x],dim=0)   
            onehot = torch.zeros(bt,args["n_classes"]).cuda().scatter(1,lbs_x.view(-1,1),1)
            probs_w = torch.cat([probs_orig,onehot],dim=0)
            
            # update memory bank
            n = bt+btu   
            queue_feats[queue_ptr:queue_ptr + n,:] = feats_w
            queue_probs[queue_ptr:queue_ptr + n,:] = probs_w      
            queue_ptr = (queue_ptr+n)%args["queue_size"]

            
        # embedding similarity
        sim = torch.exp(torch.mm(feats_u_s0, feats_u_s1.t())/args["temperature"]) 
        sim_probs = sim / sim.sum(1, keepdim=True)
        
        # pseudo-label graph with self-loop
        Q = torch.mm(probs, probs.t())       
        Q.fill_diagonal_(1)    
        pos_mask = (Q>=args["contrast_th"]).float()
            
        Q = Q * pos_mask
        Q = Q / Q.sum(1, keepdim=True)
        
        # contrastive loss
        loss_contrast = - (torch.log(sim_probs + 1e-7) * Q).sum(1)
        loss_contrast = loss_contrast.mean()  
        
        # unsupervised classification loss
        loss_u = - torch.sum((F.log_softmax(logits_u_s0,dim=1) * probs),dim=1) * mask                
        loss_u = loss_u.mean()
        
        loss = loss_x + args["lam_u"] * loss_u + args["lam_c"] * loss_contrast
        
        optim.zero_grad()
        loss.backward()
        optim.step()
        lr_schdlr.step()

        if args["eval_ema"]:
            with torch.no_grad():
                ema_model_update(model, ema_model, args["ema_m"])
                
        loss_x_meter.update(loss_x.item())
        loss_u_meter.update(loss_u.item())
        loss_contrast_meter.update(loss_contrast.item())
        mask_meter.update(mask.mean().item())       
        pos_meter.update(pos_mask.sum(1).float().mean().item())
        
        corr_u_lb = (lbs_u_guess == lbs_u_real).float() * mask
        n_correct_u_lbs_meter.update(corr_u_lb.sum().item())
        n_strong_aug_meter.update(mask.sum().item())

        if (it + 1) % 64 == 0:
            t = time.time() - epoch_start

            lr_log = [pg['lr'] for pg in optim.param_groups]
            lr_log = sum(lr_log) / len(lr_log)

            logger.info("{}-x{}-s{}, {} | epoch:{}, iter: {}. loss_u: {:.3f}. loss_x: {:.3f}. loss_c: {:.3f}. "
                        "n_correct_u: {:.2f}/{:.2f}. Mask:{:.3f}. num_pos: {:.1f}. LR: {:.3f}. Time: {:.2f}".format(
                args["dataset"], args["n_labeled"], args["seed"], args["exp_dir"], epoch, it + 1, loss_u_meter.avg, loss_x_meter.avg, loss_contrast_meter.avg, n_correct_u_lbs_meter.avg, n_strong_aug_meter.avg, mask_meter.avg, pos_meter.avg, lr_log, t))
            epoch_start = time.time()

    return loss_x_meter.avg, loss_u_meter.avg, loss_contrast_meter.avg, mask_meter.avg, pos_meter.avg, n_correct_u_lbs_meter.avg/n_strong_aug_meter.avg, queue_feats, queue_probs, queue_ptr, prob_list

In [6]:
def evaluate(model, ema_model, emb_model, dataloader):
    """Evaluate model on train or validation set

    :param model: Model
    :param ema_model: EMA-Model
    :param emb_model: Embedding model
    :param dataloader: Data loader for the evaluation set
    :return: tuple
        - Accuracy of the model
        - Accuracy of the ema_model
    """
    
    model.eval()
    preds = []
    targets = []
    top1_meter = AverageMeter()
    ema_top1_meter = AverageMeter()

    with torch.no_grad():
        for ims, lbs, im_id in dataloader:
            ims = ims.cuda()
            lbs = lbs.cuda()

            embedding = emb_model.get_embedding(batch=ims)
            logits, _ = model(embedding)
            scores = torch.softmax(logits, dim=1)
            preds += torch.argmax(scores, dim=1).cpu().tolist()
            targets += lbs.cpu().tolist()
            top1 = accuracy(scores, lbs, (1, ))
            top1_meter.update(top1.item())
            
            if ema_model is not None:
                embedding = emb_model.get_embedding(batch=ims)
                logits, _ = ema_model(embedding)
                scores = torch.softmax(logits, dim=1)
                top1 = accuracy(scores, lbs, (1, ))
                ema_top1_meter.update(top1.item())
    return top1_meter.avg, ema_top1_meter.avg

In [7]:
@torch.no_grad()
def ema_model_update(model, ema_model, ema_m):
    """Momentum update of evaluation model (exponential moving average)

    :param model: Model
    :param ema_model: EMA-Model
    :param ema_m: Ema parameter
    :return:
    """
    for param_train, param_eval in zip(model.parameters(), ema_model.parameters()):
        param_eval.copy_(param_eval * ema_m + param_train.detach() * (1-ema_m))

    for buffer_train, buffer_eval in zip(model.buffers(), ema_model.buffers()):
        buffer_eval.copy_(buffer_train)

In [8]:
class exper:
    def __init__(self, id):
        self.labeler_id = id

In [22]:
def getExpertModelSSL(labelerId, sslDataset, seed, fold_idx, n_labeled, embedded_model=None, param=None, neptune_param=None):
    args = {
        "dataset": "NIH", #
        "wresnet_k": 2, #width factor of wide resnet
        "wresnet_n": 28, #depth of wide resnet
        "n_classes": 2, #number of classes in dataset
        "n_epoches": 10, #number of training epoches
        "batchsize": 16, #train batch size of labeled samples
        "mu": 7, #factor of train batch size of unlabeled samples
        "n_imgs_per_epoch": 32768, #number of training images for each epoch
        #"n_imgs_per_epoch": 4381,
        "eval_ema": True, #whether to use ema model for evaluation
        "ema_m": 0.999, #
        "lam_u": 1., #coefficient of unlabeled loss
        "lr": 0.03, #learning rate for training
        "weight_decay": 5e-4, #weight decay
        "momentum": 0.9, #momentum for optimizer
        "temperature": 0.2, #softmax temperature
        "low_dim": 64, #
        "lam_c": 1, #coefficient of contrastive loss
        "contrast_th": 0.8, #pseudo label graph threshold
        "thr": 0.95, #pseudo label threshold
        "alpha": 0.9, #
        "queue_batch": 5, #number of batches stored in memory bank
        "exp_dir": "EmbeddingCM_bin", #experiment id
        #"ex_strength": 4323195249, #Strength of the expert 
        #"ex_strength": 4295232296
    }

    arg["labelerId"] = labelerId
    arg["n_labeled"] = n_labeled
    path = param["PATH"]

    #Setzt Logger fest
    logger, output_dir = setup_default_logging("SSL_Working/", args)
    logger.info(dict(args))
    
    tb_logger = SummaryWriter(output_dir)

    set_seed(seed)

    #Calculates number of iterations
    n_iters_per_epoch = args["n_imgs_per_epoch"] // args["batchsize"]  # 1024
    n_iters_all = n_iters_per_epoch * args["n_epoches"]  # 1024 * 200

    #Erstellt das Modell
    model, criteria_x, ema_model = set_model(args)
    #Lädt das trainierte eingebettete Modell
    emb_model = EmbeddingModel(os.getcwd() + "/SSL_Working", args["dataset"])
    logger.info("Total params: {:.2f}M".format(
        sum(p.numel() for p in model.parameters()) / 1e6))


    if 'nih' in args["dataset"].lower(): #Erstellt den Experten mit seiner ID
        exp = exper(int(args["labelerId"]))
        
        dltrain_x, dltrain_u = sslDataset.get_train_loader_interface( 
            exp, args["batchsize"], args["mu"], n_iters_per_epoch, L=args["n_labeled"], method='comatch')
        dlval = sslDataset.get_val_loader_interface(exp, batch_size=64, num_workers=4, fold_idx=fold_idx)
        dlval = sslDataset.get_test_loader_interface(exp, batch_size=64, num_workers=4, fold_idx=fold_idx)

    wd_params, non_wd_params = [], []
    for name, param in model.named_parameters():
        if 'bn' in name:
            non_wd_params.append(param)  
        else:
            wd_params.append(param)
    param_list = [
        {'params': wd_params}, {'params': non_wd_params, 'weight_decay': 0}]
    optim = torch.optim.SGD(param_list, lr=args["lr"], weight_decay=args["weight_decay"],
                            momentum=args["momentum"], nesterov=True)

    lr_schdlr = WarmupCosineLrScheduler(optim, n_iters_all, warmup_iter=0)
    
    model, ema_model, optim, lr_schdlr, start_epoch, metrics, prob_list, queue = \
        load_from_checkpoint(output_dir, model, ema_model, optim, lr_schdlr)

    # memory bank
    args["queue_size"] = args["queue_batch"]*(args["mu"]+1)*args["batchsize"]
    if queue is not None:
        queue_feats = queue['queue_feats']
        queue_probs = queue['queue_probs']
        queue_ptr = queue['queue_ptr']
    else:
        queue_feats = torch.zeros(args["queue_size"], args["low_dim"]).cuda()
        queue_probs = torch.zeros(args["queue_size"], args["n_classes"]).cuda()
        queue_ptr = 0

    train_args = dict(
        model=model,
        ema_model=ema_model,
        emb_model=emb_model,
        prob_list=prob_list,
        criteria_x=criteria_x,
        optim=optim,
        lr_schdlr=lr_schdlr,
        dltrain_x=dltrain_x,
        dltrain_u=dltrain_u,
        args=args,
        n_iters=n_iters_per_epoch,
        logger=logger
    )
    
    best_acc = -1
    best_epoch = 0

    if metrics is not None:
        best_acc = metrics['best_acc']
        best_epoch = metrics['best_epoch']
    logger.info('-----------start training--------------')
    for epoch in range(start_epoch, args["n_epoches"]):
        
        loss_x, loss_u, loss_c, mask_mean, num_pos, guess_label_acc, queue_feats, queue_probs, queue_ptr, prob_list = \
        train_one_epoch(epoch, **train_args, queue_feats=queue_feats,queue_probs=queue_probs,queue_ptr=queue_ptr)

        top1, ema_top1 = evaluate(model, ema_model, emb_model, dlval)


        tb_logger.add_scalar('loss_x', loss_x, epoch)
        tb_logger.add_scalar('loss_u', loss_u, epoch)
        tb_logger.add_scalar('loss_c', loss_c, epoch)
        tb_logger.add_scalar('guess_label_acc', guess_label_acc, epoch)
        tb_logger.add_scalar('test_acc', top1, epoch)
        tb_logger.add_scalar('test_ema_acc', ema_top1, epoch)
        tb_logger.add_scalar('mask', mask_mean, epoch)
        tb_logger.add_scalar('num_pos', num_pos, epoch)

        if best_acc < top1:
            best_acc = top1
            best_epoch = epoch

        logger.info("Epoch {}. Acc: {:.4f}. Ema-Acc: {:.4f}. best_acc: {:.4f} in epoch{}".
                    format(epoch, top1, ema_top1, best_acc, best_epoch))
        
        save_obj = {
            'model': model.state_dict(),
            'ema_model': ema_model.state_dict(),
            'optimizer': optim.state_dict(),
            'lr_scheduler': lr_schdlr.state_dict(),
            'prob_list': prob_list,
            'queue': {'queue_feats':queue_feats, 'queue_probs':queue_probs, 'queue_ptr':queue_ptr},
            'metrics': {'best_acc': best_acc, 'best_epoch': best_epoch},
            'epoch': epoch,
        }
        #torch.save(save_obj, os.path.join(output_dir, 'ckp.latest'))
    _, _ = evaluate(model, ema_model, emb_model, dlval)

    return emb_model, model

In [23]:
emb_model, model = getExpertModelSSL(sslDataset=sslDataset, fold_idx=0, embedded_model=None, param=None, neptune_param=None)

NIH
2023-07-03 12:06:02,666 - INFO - train -   {'root': '', 'dataset': 'NIH', 'wresnet_k': 2, 'wresnet_n': 28, 'n_classes': 2, 'n_labeled': 12, 'n_epoches': 10, 'batchsize': 16, 'mu': 7, 'n_imgs_per_epoch': 32768, 'eval_ema': True, 'ema_m': 0.999, 'lam_u': 1.0, 'lr': 0.03, 'weight_decay': 0.0005, 'momentum': 0.9, 'seed': 2, 'temperature': 0.2, 'low_dim': 64, 'lam_c': 1, 'contrast_th': 0.8, 'thr': 0.95, 'alpha': 0.9, 'queue_batch': 5, 'exp_dir': 'EmbeddingCM_bin', 'ex_strength': 4295232296}
2




load Resnet-18 checkpoint
None
Loaded Model resnet18
2023-07-03 12:06:02,973 - INFO - train -   Total params: 0.30M
Index: 0
Labels: 32
Index: 0
Index: 0
No Checkpoint found at SSL_Working/NIH/EmbeddingCM_bin/ex4295232296_x12_seed2/ckp.latest
Starting new from epoch 1
2023-07-03 12:06:03,449 - INFO - train -   -----------start training--------------
2023-07-03 12:06:26,918 - INFO - train -   NIH-x12-s2, EmbeddingCM_bin | epoch:0, iter: 64. loss_u: 0.558. loss_x: 1.951. loss_c: 4.719. n_correct_u: 11.75/98.02. Mask:0.875. num_pos: 93.5. LR: 0.030. Time: 23.47
2023-07-03 12:06:48,416 - INFO - train -   NIH-x12-s2, EmbeddingCM_bin | epoch:0, iter: 128. loss_u: 0.428. loss_x: 0.976. loss_c: 4.718. n_correct_u: 11.84/100.08. Mask:0.894. num_pos: 95.1. LR: 0.030. Time: 21.49
2023-07-03 12:07:09,779 - INFO - train -   NIH-x12-s2, EmbeddingCM_bin | epoch:0, iter: 192. loss_u: 0.372. loss_x: 0.652. loss_c: 4.718. n_correct_u: 11.73/98.90. Mask:0.883. num_pos: 93.4. LR: 0.030. Time: 21.36
2023-0