# Optimising the GAN to produce flow duration and flow size using Optuna

Following three examples:

1. https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837
This example is a simple example to follow.

2. https://github.com/optuna/optuna-examples/blob/main/multi_objective/pytorch_simple.py
This example is more complicated but demonstrates how to have multi-objective optimisation, which is key in optimising the GAN, where we need to minimise loss for both neural networks.

3. https://gitlab.com/hpo-uq/applications/gan4hep/-/blob/main/gan4hep/train_gan_2angles.py?ref_type=heads
This example comes from the HYPPO paper, 'gan4hep' module.

In [1]:
import time
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import optuna
import functools

## Constants

In [2]:
TRAINING_DATA_LENGTH = 3030487
TRAINING_DATA = "training_data_tensor.pt"
MIN_TRIALS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RANDOM_SEED = 77

## Discriminator Class

In [3]:
class Discriminator(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            # Input is 2D, first hidden layer is composed of 256 neurons with ReLU activation
            nn.Linear(2, 128), 
            nn.ReLU(),

            # Have to use dropout to avoid overfitting
            nn.Dropout(0.3),

            # second and third layers are composed to 128 and 64 neurons, respectively
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            # output is composed of a single neuron with sigmoidal activation to represent a probability
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        output = self.model(x)
        return output

## Generator Class

In [4]:
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 2),
        )

    def forward(self, x):
        output = self.model(x)
        return output

## Pruner function


In [5]:
def evaluate_trial(study, current_g_loss, current_d_loss):
    
    should_prune = False
    
    if current_g_loss == 0.0 or current_d_loss == 0.0: # one of the networks are no longer learning
        should_prune = True
    
    else: 
        historic_g_loss = []
        historic_d_loss = []
    
        for trial in study.trials:
            # Only considering/calculating trials that have been completed
            if trial.state == optuna.trial.TrialState.COMPLETE:
                historic_g_loss.append(trial.values[0]) # accessing and appending the historic value for generator loss
                historic_d_loss.append(trial.values[1]) # doing the same for the discriminator loss
        
                # Convert to numpy arrays
                array_g_loss = np.array(historic_g_loss)
                array_d_loss = np.array(historic_d_loss)
        
                # Calculate the mean losses
                mean_g_loss = np.mean(array_g_loss)
                mean_d_loss = np.mean(array_d_loss)

        should_prune = current_g_loss > mean_g_loss or current_d_loss > mean_d_loss
        
    return should_prune

    
def prune(study, min_trials, current_g_loss, current_d_loss):
    should_prune = False
    completed_trials = 0
    
    # checking how many trials are complete to compare to the current trial
    for trial in study.trials:
        if trial.state == optuna.trial.TrialState.COMPLETE:
            completed_trials += 1
    
    # if there are sufficient trials to compare, it will compare
    if completed_trials >= min_trials:
        should_prune = evaluate_trial(study, current_g_loss, current_d_loss)
        if should_prune:
            print("Trial has been pruned")
            raise optuna.TrialPruned()
        

## Load data

In [6]:
def load_data():
    data = torch.load(TRAINING_DATA)
    data = data.to(torch.float32)
    return data

In [7]:
def train_and_optimise(params, generator, discriminator, study):

    # loading and moving the training data and models to the GPU if it is available
    train_data = load_data()
    train_data = train_data.to(DEVICE)
    train_labels = torch.zeros(size=(TRAINING_DATA_LENGTH, 1))
    train_labels = train_labels.to(DEVICE)
    train_set = [(train_data[i], train_labels[i]) for i in range(TRAINING_DATA_LENGTH)]
    train_loader = DataLoader(train_set, batch_size=params["batch_size"], shuffle=True, drop_last = True)

    loss_function = nn.BCELoss()
    discriminator_optimiser = optim.Adam(discriminator.parameters(), lr = params["learning_rate"])
    generator_optimiser = optim.Adam(generator.parameters(), lr = params["learning_rate"])

    
    discriminator = discriminator.to(DEVICE)
    generator = generator.to(DEVICE)
    loss_function = loss_function.to(DEVICE)
    
    warmup_epochs = params["epochs"] // 2
    
    start_time = time.time()
    for epoch in range(params["epochs"]):
        
        for n, (real_samples, _) in enumerate(tqdm(train_loader)):
            
            # DATA FOR DISCRIMINATOR
            torch.manual_seed(RANDOM_SEED)
            real_samples_labels = torch.ones((params["batch_size"], 1), device = DEVICE)
            
            latent_space_samples = torch.randn((params["batch_size"], 2), device = DEVICE)
            generated_samples = generator(latent_space_samples)
            generated_samples_labels = torch.zeros((params["batch_size"], 1), device = DEVICE)
            
            all_samples = torch.cat((real_samples, generated_samples))
            all_samples_labels = torch.cat((real_samples_labels, generated_samples_labels))


            # TRAINING DISCRIMINATOR
            discriminator.zero_grad()
            output_discriminator = discriminator(all_samples)
            discriminator_loss = loss_function(output_discriminator, all_samples_labels)
            discriminator_loss.backward()
            discriminator_optimiser.step()


            # DATA FOR GENERATOR
            torch.manual_seed(RANDOM_SEED)
            latent_space_samples = torch.randn((params["batch_size"], 2), device = DEVICE)

            # TRAINING GENERATOR
            generator.zero_grad()
            generated_samples = generator(latent_space_samples)
            output_discriminator_generated = discriminator(generated_samples)
            generator_loss = loss_function(output_discriminator_generated, real_samples_labels)
            generator_loss.backward()
            generator_optimiser.step()

            if epoch % 1 == 0 and n == params["batch_size"] - 1:
                print(f"Epoch: {epoch} | G. Loss: {generator_loss} | D. Loss: {discriminator_loss}")

        # Pruning
        if epoch == warmup_epochs: 
            # The trial is half way through completion, now checking whether to prune or not
            prune(study = study, min_trials = MIN_TRIALS, min_epochs = warmup_epochs, current_epoch = epoch, 
                current_g_loss = generator_loss, current_d_loss = discriminator_loss)

    
    end_time = time.time()
    run_time = round(end_time - start_time, 2)
    print(f"Trial Complete!\nRun time for this trial was {run_time} seconds.\n")

    return generator_loss, discriminator_loss

## Objective function
The aim of this function is to define a set of hyperparameter values, build the model, train the model, and evaluate the loss of both the generator and discriminator. 

In [8]:
def objective(trial, additional_arg):

    params = {
        "learning_rate": trial.suggest_float("learning_rate", low=1e-5, high=1e-1, log = True),
        "batch_size": trial.suggest_int("batch_size", low=500, high=2000, step=100),
        "epochs": trial.suggest_int("epochs", low=5, high=50, step=5),
        
    }

    discriminator = Discriminator()
    generator = Generator()
    g_loss, d_loss = train_and_optimise(params, generator, discriminator, additional_arg)

    return g_loss, d_loss

## Display all trials and best trial

In [10]:
def display_all_trials(study):
    df = study.trials_dataframe()
    df = pd.DataFrame(df, columns = ['number', 'values_0', 'values_1', 'params_batch_size', 'params_epochs',
                                          'params_learning_rate', 'state'])
    df= df.rename(columns = {"number":"Trial #", "values_0":"G Loss", "values_1":"D Loss", "params_batch_size":"Batch Size", 
                     "params_epochs":"Epochs", "params_learning_rate":"Learning Rate", "state":"State"})
    df["Trial #"] += 1 # Adjust the trial numbers
    
    total = len(df)
    complete = 0
    pruned = 0
    for trial in study.trials:
        if trial.state == optuna.trial.TrialState.COMPLETE:
            complete += 1    
        elif trial.state == optuna.trial.TrialState.PRUNED:
            pruned += 1
    
    print(f"{total} TOTAL TRIALS \n{complete} COMPLETED TRIALS \n{pruned} PRUNED TRIALS\n")
    print(df)
    print("\n")

def display_best_trial(study):
    best_trial = study.best_trials
    best_trial_number = best_trial[0].number
    best_trial_params = best_trial[0].params
    best_trial_values = best_trial[0].values
    print("~Best trial~")
    print(f"Trial #: {best_trial_number+1}")
    print(f"G Loss: {best_trial_values[0]}\nD Loss: {best_trial_values[1]}")
    print(f"Batch Size: {best_trial_params['batch_size']}\nEpochs: {best_trial_params['epochs']}\nLearning Rate: {best_trial_params['learning_rate']}")

def save_trials(study):
    df = study.trials_dataframe()
    now = str(datetime.now())
    date, time = now.split(" ")
    time = time.replace(":",".")
    df.to_csv(f"Optimisation Trials/gan_optimisation_{date}_{time}.csv", index = False)
    print(f"Trials saved to: gan_optimisation_{date}_{time}.csv\n") 
    

In [11]:
def convert_time(seconds):
    seconds = seconds % (24 * 3600)
    hour = seconds // 3600
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60
     
    return "%d hours %02d mins %02d secs" % (hour, minutes, seconds)

## Main program
The study that is created provides a multi-objective optimisation, so that it can optimise more than one value - generator loss and discriminator loss. 

In [12]:
if __name__ == "__main__":

    interrupted = False
    
    # Create the optimisation study
    study = optuna.create_study(directions=["minimize", "maximize"], 
                                study_name = "GAN-Optimiser",
                                sampler = optuna.samplers.NSGAIISampler(), 
                                storage = "sqlite:///demonstration_optimisation.db", 
                                load_if_exists=True)

    additional_arg = study
    wrapped_objective = functools.partial(objective, additional_arg = additional_arg)
    
    # checking if an optimisation loop was halted and there are incomplete trials
    for trial in study.trials:
        if trial.state == optuna.trial.TrialState.FAIL or trial.state == optuna.trial.TrialState.WAITING:
            interrupted = True
            study.enqueue_trial(trial.params)


    # Optimise the objective function 
            
    if interrupted: # carrying on from previously started optimisation loop
        complete_trials = 0
        for trial in study.trials:
            if trial.state == optuna.trial.TrialState.PRUNED or trial.state == optuna.trial.TrialState.COMPLETE:
                complete_trials +=1 
        print("Continuing previous optimisation loop...")
        study.optimize(wrapped_objective, n_trials = 200 - complete_trials)

    else: # starting a completely new study
        print("Starting new optimisation loop...")
        start_time = time.time()
        study.optimize(wrapped_objective, n_trials = 150) # Number of trials to test with different values
        end_time = time.time()

    optimisation_run_time = convert_time(round(end_time - start_time, 2))
    
    print(f"Optimsation complete!\nOptimisation run time was {optimisation_run_time}\n")
    print("Number of finished trials: ", len(study.trials))

    save_trials(study)
    display_all_trials(study)
    display_best_trial(study)

[I 2024-04-08 13:36:18,594] A new study created in RDB with name: GAN-Optimiser


Starting new optimisation loop...


  0%|          | 0/1515 [00:00<?, ?it/s]

[W 2024-04-08 13:37:32,423] Trial 0 failed with parameters: {'learning_rate': 0.00046574321034245544, 'batch_size': 2000, 'epochs': 30} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\rrema\anaconda3\envs\torch\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\rrema\AppData\Local\Temp\ipykernel_7688\2797391794.py", line 12, in objective
    g_loss, d_loss = train_and_optimise(params, generator, discriminator, additional_arg)
  File "C:\Users\rrema\AppData\Local\Temp\ipykernel_7688\1547163394.py", line 41, in train_and_optimise
    output_discriminator = discriminator(all_samples)
  File "C:\Users\rrema\anaconda3\envs\torch\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "C:\Users\rrema\anaconda3\envs\torch\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
 

KeyboardInterrupt: 