In [None]:
import os
import sys
# try to import peal and if not installed, add the parent directory to the path
try:
    import peal

except ImportError:
    # if peal not installed, but project downloaded locally
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

# import basic libraries needed for sure and set the device depending on whether cuda is available or not
import torch
from peal.utils import request
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# set autoreload for more convinient development
%load_ext autoreload
%autoreload 2

# check and set that the right gpu is used
if device == 'cuda':
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    !nvidia-smi
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    print('Currently used device: ' + str(os.environ["CUDA_VISIBLE_DEVICES"]))
    os.environ["CUDA_VISIBLE_DEVICES"] = request(
        'cuda_visible_devices', default="0")
    torch.cuda.set_device(int(os.environ["CUDA_VISIBLE_DEVICES"]))
    import math
    import nvidia_smi
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    gigabyte_vram = info.total / math.pow(10, 9)
    print("Total memory:", gigabyte_vram)

else:
    gigabyte_vram = None


In [None]:
# if the dataset needs to be created otherwise this can be skipped
from peal.data.dataset_generators import ArtificialConfounderTabularDatasetGenerator




In [None]:
# create the datasets
from peal.data.datasets import get_datasets
from peal.data.dataset_generators import ArtificialConfounderTabularDatasetGenerator
from peal.utils import load_yaml_config
import copy

unpoisened_dataset_config = load_yaml_config('$PEAL/configs/data/artificial_symbolic.yaml')
dg = ArtificialConfounderTabularDatasetGenerator(
    dataset_name='artificial_symbolic',
    num_samples=unpoisened_dataset_config['num_samples'],
    input_size=unpoisened_dataset_config['input_size'][0],
    label_noise=unpoisened_dataset_config['label_noise'],
    seed=unpoisened_dataset_config['seed']
)
dg.generate_dataset()

unpoisened_dataset_train, unpoisened_dataset_val, unpoisened_dataset_test = get_datasets(
    config=unpoisened_dataset_config,
    base_dir=dg.label_dir
)

# create a copy of the dataset config that will be poisened in the next steps
poisened_dataset_config = copy.deepcopy(unpoisened_dataset_config)
poisened_dataset_config['num_samples'] = int(unpoisened_dataset_config['num_samples'] / 2)

confounder_probability = request('confounder_probability', '100')
poisened_dataset_config['confounder_probability'] = float(confounder_probability) / 100

# create dataset based changed data config
poisened_dataset_train, poisened_dataset_val, poisened_dataset_test = get_datasets(
    config=poisened_dataset_config,
    base_dir=dg.label_dir
)

In [None]:
is_train_generator = request('is_train_generator', True)
if is_train_generator:
    # if you want the generator getting trained from scratch
    from peal.generators.variational_autoencoders import VAE
    from peal.training.trainers import ModelTrainer
    generator_config = load_yaml_config(
        '$PEAL/configs/models/default_generator.yaml')
    generator_config['data'] = poisened_dataset_train.config
    generator = VAE(generator_config).to(device)

    generator_trainer = ModelTrainer(
        config=generator_config,
        model=generator,
        datasource=(poisened_dataset_train, poisened_dataset_val),
        model_name=request(
            'generator_model_name',
            'mnist_0vs8_' + confounder_probability + '_generator'
        ),
        gigabyte_vram=gigabyte_vram
    )
    generator_trainer.fit()

else:
    # if you want to use loaded generator
    generator_path = request(
        'generator_path',
        'peal_runs/mnist_0vs8_' + confounder_probability + '_generator/model.cpl'
    )
    generator = torch.load(generator_path).to(device)
