In [1]:
from __future__ import print_function
import random

import time
import os
import sys
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard.writer import SummaryWriter

from LinearModel import LinearNN
import datasets.cifar as cifar
import datasets.nih as nih
from utils import accuracy, setup_default_logging, AverageMeter, WarmupCosineLrScheduler
from utils import load_from_checkpoint
from Expert import CIFAR100Expert, NIHExpert
from feature_extractor.embedding_model import EmbeddingModel

2023-06-27 20:18:14.912909: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

In [3]:
import Dataset as ds

param = {
    "TARGET": "Airspace_Opacity",
    "PATH": "../../Datasets/NIH/",
    "K": 10, #Number of folds
    "LABELER_IDS": [4323195249, 4295232296],
    
    "batch_size": 64,
    "alpha": 1.0, #scaling parameter for the loss function, default=1.0
    #"epochs": 50,
    "epochs": 50,
    "patience": 15, #number of patience steps for early stopping the training
    "expert_type": "MLPMixer", #specify the expert type. For the type of experts available, see-> models -> experts. defualt=predict
    "n_classes": 2, #K for K class classification
    "K": 10, #
    
    "TRAIN_BATCH_SIZE": 64,
    "TEST_BATCH_SIZE": 64,
    "NUM_EXPERTS": 2,

    "PRELOAD": True,
    "PREBUILD": True,
    
    "OVERLAP K": 8,
    "NUMBER LABELS": 8,
}

dataManager = ds.DataManager(path = param["PATH"], target = param["TARGET"], param=param, seeds=[0])
dataManager.createData()
sslDataset = dataManager.getSSLDataset(0)
#train_dataloader, val_dataloader, test_dataloader = sslDataset.get_data_loader_for_fold(0)
#train_dataloader, val_dataloader, test_dataloader = nih_dataloader.get_data_loader_for_fold(1)
#dataloaders = (train_dataloader, val_dataloader, test_dataloader)
#sslDataset.get_train_loader_interface(expert=exper(4323195249), batch_size=64, mu = 5, n_iters_per_epoch = 10, L=8, method='comatch', imsize=128, fold_idx=0)

Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800
Loaded image number: 1000
Loaded image number: 1200
Loaded image number: 1400
Loaded image number: 1600
Loaded image number: 1800
Loaded image number: 2000
Loaded image number: 2200
Loaded image number: 2400
Loaded image number: 2600
Loaded image number: 2800
Loaded image number: 3000
Loaded image number: 3200
Loaded image number: 3400
Loaded image number: 3600
Loaded image number: 3800
Loaded image number: 4000
Loaded image number: 4200
Full length: 4381
Loaded image number: 0
Loaded image number: 200
Loaded image number: 400
Loaded image number: 600
Loaded image number: 800




Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Length of train + test + val: 852
Loaded set number 0
Loaded set number 1
Loaded set number 2
Loaded set number 3
Loaded set number 4
Loaded set number 5
Loaded set number 6
Loaded set number 7
Loaded set number 8
Loaded set number 9
Added
Added
Added
Added
Added
Added
Added
Added
Added
Added


In [4]:
def set_model(args):
    """Initialize models

    Lineare Modelle, welche später die extrahierten Features übergeben bekommen

    :param args: training arguments
    :return: tuple
        - model: Initialized model
        - criteria_x: Supervised loss function
        - ema_model: Initialized ema model
    """
    if args["dataset"].lower() == 'cifar100':
        feature_dim = 1280
    elif args["dataset"].lower() == 'nih':
        feature_dim = 512
    else:
        print(f'Dataset {args["dataset"]} not defined')
        sys.exit()
    model = LinearNN(num_classes=args["n_classes"], feature_dim=feature_dim, proj=True)

    model.train()
    model.cuda()  
    
    if args["eval_ema"]:
        ema_model = LinearNN(num_classes=args["n_classes"], feature_dim=feature_dim, proj=True)
        for param_q, param_k in zip(model.parameters(), ema_model.parameters()):
            param_k.data.copy_(param_q.detach().data)  # initialize
            param_k.requires_grad = False  # not update by gradient for eval_net
        ema_model.cuda()  
        ema_model.eval()
    else:
        ema_model = None
        
    criteria_x = nn.CrossEntropyLoss().cuda()
    return model, criteria_x, ema_model

In [5]:
class exper:
    def __init__(self, id):
        self.labeler_id = id

In [6]:
def main():
    args = {
        "root": "", #Dataset direcotry
        "dataset": "NIH", #
        "wresnet_k": 2, #width factor of wide resnet
        "wresnet_n": 28, #depth of wide resnet
        "dataset": "nih",
        "n_classes": 2, #number of classes in dataset
        "n_labeled": 40, #number of labeled samples for training
        "n_epoches": 10, #number of training epoches
        "batchsize": 16, #train batch size of labeled samples
        "mu": 7, #factor of train batch size of unlabeled samples
        "n_imgs_per_epoch": 32768, #number of training images for each epoch
        "eval_ema": True, #whether to use ema model for evaluation
        "ema_m": 0.999, #
        "lam_u": 1., #coefficient of unlabeled loss
        "lr": 0.03, #learning rate for training
        "weight_decay": 5e-4, #weight decay
        "momentum": 0.9, #momentum for optimizer
        "seed": 1, #seed for random behaviors, no seed if negtive
        "temperature": 0.2, #softmax temperature
        "low_dim": 64, #
        "lam_c": 1, #coefficient of contrastive loss
        "contrast_th": 0.8, #pseudo label graph threshold
        "thr": 0.95, #pseudo label threshold
        "alpha": 0.9, #
        "queue-batch": 5, #number of batches stored in memory bank
        "exp_dir": "EmbeddingCM_bin", #experiment id
        "ex_strength": 4323195249, #Strength of the expert 
    }

    #Setzt Logger fest
    logger, output_dir = setup_default_logging(args)
    #logger.info(dict(args["_get_kwargs()))
    
    #tb_logger = SummaryWriter(output_dir)

    print(args["seed"])

    #Seed init
    if args["seed"] >= 0:
        set_seed(args["seed"])

    #Calculates number of iterations
    n_iters_per_epoch = args["n_imgs_per_epoch"] // args["batchsize"]  # 1024
    n_iters_all = n_iters_per_epoch * args["n_epoches"]  # 1024 * 200

    path = "../../../Datasets/NIH/"

    #Erstellt das Modell
    model, criteria_x, ema_model = set_model(args)
    #Lädt das trainierte eingebettete Modell
    emb_model = EmbeddingModel(os.getcwd(), args["dataset"])
    #logger.info("Total params: {:.2f}M".format(
    #    sum(p.numel() for p in model.parameters()) / 1e6))

    #if 'cifar' in args["dataset.lower"]():
    #    expert = CIFAR100Expert(20, int(args["ex_strength"]), 1, 0, 123)
    #    dltrain_x, dltrain_u = cifar.get_train_loader(
    #        args["dataset, expert"], args["batchsize"], args["mu"], n_iters_per_epoch"], L=args["n_labeled"], root=args["root"],
    #        method='comatch')
    #    dlval = cifar.get_val_loader(args["dataset"], expert, batch_size=64, num_workers=2)
    if 'nih' in args["dataset"].lower(): #Erstellt den Experten mit seiner ID
        expert = NIHExpert(int(args["ex_strength"]), 2)
        exp = exper(int(args["ex_strength"]))
        #dltrain_x, dltrain_u = nih.get_train_loader( 
        #    expert, args["batchsize"], args["mu, n_iters_per_epoch"], L=args["n_labeled"], method='comatch')
        #dlval = nih.get_val_loader(expert, batch_size=64, num_workers=4)
        #dlval = nih.get_test_loader(expert, batch_size=64, num_workers=4)
        
        dltrain_x, dltrain_u = sslDataset.get_train_loader_interface( 
            exp, args["batchsize"], args["mu"], n_iters_per_epoch, L=args["n_labeled"], method='comatch')
        dlval = sslDataset.get_val_loader_interface(expert, batch_size=64, num_workers=4)
        dlval = sslDataset.get_test_loader_interface(expert, batch_size=64, num_workers=4)

    wd_params, non_wd_params = [], []
    for name, param in model.named_parameters():
        if 'bn' in name:
            non_wd_params.append(param)  
        else:
            wd_params.append(param)
    param_list = [
        {'params': wd_params}, {'params': non_wd_params, 'weight_decay': 0}]
    optim = torch.optim.SGD(param_list, lr=args["lr"], weight_decay=args["weight_decay"],
                            momentum=args["momentum"], nesterov=True)

    lr_schdlr = WarmupCosineLrScheduler(optim, n_iters_all, warmup_iter=0)
    
    model, ema_model, optim, lr_schdlr, start_epoch, metrics, prob_list, queue = \
        load_from_checkpoint(output_dir, model, ema_model, optim, lr_schdlr)

    # memory bank
    args["queue_size"] = args["queue_batch"]*(args["mu"]+1)*args["batchsize"]
    if queue is not None:
        queue_feats = queue['queue_feats']
        queue_probs = queue['queue_probs']
        queue_ptr = queue['queue_ptr']
    else:
        queue_feats = torch.zeros(args["queue_size"], args["low_dim"]).cuda()
        queue_probs = torch.zeros(args["queue_size"], args["n_classes"]).cuda()
        queue_ptr = 0

    train_args = dict(
        model=model,
        ema_model=ema_model,
        emb_model=emb_model,
        prob_list=prob_list,
        criteria_x=criteria_x,
        optim=optim,
        lr_schdlr=lr_schdlr,
        dltrain_x=dltrain_x,
        dltrain_u=dltrain_u,
        args=args,
        n_iters=n_iters_per_epoch,
        logger=logger
    )
    
    best_acc = -1
    best_epoch = 0

    if metrics is not None:
        best_acc = metrics['best_acc']
        best_epoch = metrics['best_epoch']
    logger.info('-----------start training--------------')
    for epoch in range(start_epoch, args["n_epoches"]):
        
        loss_x, loss_u, loss_c, mask_mean, num_pos, guess_label_acc, queue_feats, queue_probs, queue_ptr, prob_list = \
        train_one_epoch(epoch, **train_args, queue_feats=queue_feats,queue_probs=queue_probs,queue_ptr=queue_ptr)

        top1, ema_top1 = evaluate(model, ema_model, emb_model, dlval)


        tb_logger.add_scalar('loss_x', loss_x, epoch)
        tb_logger.add_scalar('loss_u', loss_u, epoch)
        tb_logger.add_scalar('loss_c', loss_c, epoch)
        tb_logger.add_scalar('guess_label_acc', guess_label_acc, epoch)
        tb_logger.add_scalar('test_acc', top1, epoch)
        tb_logger.add_scalar('test_ema_acc', ema_top1, epoch)
        tb_logger.add_scalar('mask', mask_mean, epoch)
        tb_logger.add_scalar('num_pos', num_pos, epoch)

        if best_acc < top1:
            best_acc = top1
            best_epoch = epoch

        logger.info("Epoch {}. Acc: {:.4f}. Ema-Acc: {:.4f}. best_acc: {:.4f} in epoch{}".
                    format(epoch, top1, ema_top1, best_acc, best_epoch))
        
        save_obj = {
            'model': model.state_dict(),
            'ema_model': ema_model.state_dict(),
            'optimizer': optim.state_dict(),
            'lr_scheduler': lr_schdlr.state_dict(),
            'prob_list': prob_list,
            'queue': {'queue_feats':queue_feats, 'queue_probs':queue_probs, 'queue_ptr':queue_ptr},
            'metrics': {'best_acc': best_acc, 'best_epoch': best_epoch},
            'epoch': epoch,
        }
        torch.save(save_obj, os.path.join(output_dir, 'ckp.latest'))
    _, _ = evaluate(model, ema_model, emb_model, dlval)
    if 'cifar' in args["dataset"].lower():
        predictions = predict_cifar(model, ema_model, emb_model, dltrain_x, dltrain_u, dlval)
    elif 'nih' in args["dataset"].lower():
        predictions = predict_nih(model, ema_model, emb_model, dltrain_x, dltrain_u, dlval)

    logger.info("***** Generate Predictions *****")
    if not os.path.exists('./artificial_expert_labels/'):
        os.makedirs('./artificial_expert_labels/')
    pred_file = f'{args["exp_dir"]}_{args["dataset"].lower()}_expert{args["ex_strength"]}.{args["seed"]}@{args["n_labeled"]}_predictions.json'
    with open(f'artificial_expert_labels/{pred_file}', 'w') as f:
        json.dump(predictions, f)
    with open(os.getcwd()[:-len('Embedding-Semi-Supervised')]+f'Learning-to-Defer-Algs/artificial_expert_labels/{pred_file}', 'w') as f:
        json.dump(predictions, f)

In [7]:
main()

AttributeError: 'dict' object has no attribute 'dataset'

TypeError: 'set' object is not subscriptable