# VAE

In [1]:
import sys
import os
sys.path.append( '..' )
from VAE.vae import *
from VAE.VAE_smac import *
from helpers.pc_stats import *
import mlflow

## Parameter definition

In [2]:
data_dir  = os.path.normpath(os.path.join(os.getcwd(), "../../runs/FIA/Com8_grown_together/oms"))
run_dir = os.path.normpath(os.path.join(os.getcwd(), "../../runs/VAE/hyperparameter_optimization"))
results_dir = os.path.normpath(os.path.join(os.getcwd(), "../../runs/VAE/results"))
test_configuration = False
overwrite = False
verbosity = 1
framework = "pytorch_jupyter"
outdir = Path(os.path.normpath(os.path.join(run_dir, f"smac_vae_{framework}")))

# Logging (time and steps)
last_timestamp = time.time()
step = 0
runtimes = {}


time_step(message="Setup loaded", verbosity=verbosity)

Setup loaded (0.014248371124267578s)


In [3]:
X = read_data(data_dir, verbosity=verbosity)

configuration_space = ConfigurationSpace(name="LD", seed=42)
hyperparameters = [
    Constant(       "original_dim",             X.shape[1]),
    Float(          "input_dropout",            (0.0, 0.5), default=0.25),
    Integer(        "intermediate_layers",      (1, 5), default=2),
    Integer(        "intermediate_dimension",   (1, 5), log=True, default=4),
    Categorical(    "intermediate_activation",  ["relu", "selu", "tanh", "leakyrelu"], default="leakyrelu"),
    Integer(        "latent_dimension",         (1, 2), log=False, default=2),
    Categorical(    "solver",                   ["nadam"], default="nadam"),
    Float(          "learning_rate",            (1e-4, 1e-2), log=True, default=1e-3)
]
configuration_space.add_hyperparameters(hyperparameters)
forbidden_clauses = [
    ForbiddenGreaterThanRelation(configuration_space["latent_dimension"], configuration_space["intermediate_dimension"])
]
configuration_space.add_forbidden_clauses(forbidden_clauses)
if verbosity > 0: 
    print(f"Configuration space defined with estimated {configuration_space.estimate_size()} possible combinations.\n")

Data loaded (2.688612222671509s)
Configuration space defined with estimated inf possible combinations.



## Optimization definition

In [4]:
class FIA_VAE_tune:
    """
    Class for running the SMAC3 tuning
    """
    def __init__(self, data, test_size:float, configuration_space:ConfigurationSpace, model_builder,
                 log_dir:str, batch_size:int=16, verbosity:int=0, gpu:bool=False, name:str="smac_vae"):
        self.configuration_space = configuration_space
        self.model_builder = model_builder
        self.data = data
        self.training_data, self.test_data = train_test_split(data, test_size=test_size)
        self.batch_size = batch_size
        self.log_dir = log_dir
        self.verbosity = verbosity
        self.gpu = gpu
        self.name = name
        self.count = 0

    def train(self, config:Configuration, seed:int=0, budget:int=25) -> float:
        """
        Method to train the model

        Args:
            config: Configuration to be trained upon
            seed: initializing seed
            budget: number of epochs to be used in training
        
        Returns:
            Average loss of the model
        """
        time_step("Start", verbosity=self.verbosity, min_verbosity=2)
        keras.utils.set_random_seed(seed)

        # Definition
        model = self.model_builder(config)
        if self.verbosity >= 3:
            model.summary()
            print_utilization(gpu=self.gpu)
        time_step("Model built", verbosity=self.verbosity, min_verbosity=2)

        # Fitting
        callbacks = []
        with mlflow.start_run(run_name=f"fia_vae_hptune_{self.count}", nested=True):
            mlflow.set_tag("test_identifier", f"child_{self.count}")
            model.fit(x=self.training_data, y=self.training_data, validation_split=0.2,
                      batch_size=self.batch_size, epochs=int(budget),
                      callbacks=callbacks, verbose=self.verbosity)

            if self.verbosity >= 3:
                print("After training utilization:")
                print_utilization(gpu=self.gpu)
            time_step("Model trained", verbosity=self.verbosity, min_verbosity=2)

            # Evaluation
            loss, recon_loss, kl_loss = model.evaluate(self.test_data, self.test_data,
                                                    batch_size=self.batch_size, verbose=self.verbosity)
            
            mlflow.log_params(config)
            mlflow.log_metrics({"eval-loss": loss, "eval-reconstruction_loss": recon_loss, "eval-kl_loss": kl_loss},
                            step=int(budget) + 1)
        time_step("Model evaluated", verbosity=self.verbosity, min_verbosity=2)        
        
        # Clearing model parameters
        keras.backend.clear_session()
        self.count += 1
        time_step("Session cleared", verbosity=self.verbosity, min_verbosity=2)
                
        return loss

In [5]:
fia_vae_tune = FIA_VAE_tune( X, test_size=0.2, configuration_space=configuration_space, model_builder=FIA_VAE,
                                batch_size=64, log_dir=os.path.join(outdir, "log"), verbosity=verbosity, gpu=False )

In [6]:
scenario = Scenario( fia_vae_tune.configuration_space, deterministic=True,
                     n_trials=20, min_budget=2, max_budget=100,
                     n_workers=1, output_directory=outdir,
                     walltime_limit=np.inf, cputime_limit=np.inf, trial_memory_limit=None )   # Max RAM in Bytes (not MB)
                    
initial_design = MultiFidelityFacade.get_initial_design(scenario, n_configs=10)
intensifier = Hyperband(scenario, incumbent_selection="highest_budget")
facade = MultiFidelityFacade( scenario, fia_vae_tune.train, 
                              initial_design=initial_design, intensifier=intensifier,
                              overwrite=overwrite, logging_level=30-verbosity*10 )
time_step(message=f"SMAC defined. Overwriting: {overwrite}", verbosity=verbosity)

[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.
SMAC defined. Overwriting: False (2.118586301803589s)


### Search

In [7]:
from pathlib import Path

In [8]:
mlflow.set_tracking_uri(Path("../../runs/VAE/hyperparameter_optimization/smac_vae_test/mlruns"))
mlflow.set_experiment(f"FIA_VAE_hptune_test")
mlflow.autolog(log_datasets=False, log_models=False, silent=self.verbosity <= 2)
with mlflow.start_run(run_name=f"fia_vae_hptune_test"):
    mlflow.set_tag("test_identifier", "parent")
    incumbent = run_optimization(facade=facade, smac_model=fia_vae_tune, verbose_steps=10, verbosity=1)
mlflow.end_run()

NameError: name 'self' is not defined