In [None]:
import json
import optuna
import numpy as np
import tensorflow as tf
from tensorflow import keras

### Data loading

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Streamlines signals/streamlines_signals_2.json"
flow_quantity = "U"

In [None]:
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness']

dataset = []
with open(dataset_path, 'r') as dataset_file:
  samples = json.load(dataset_file)
  for sample in samples:
    dataset.append({
        "features": sample["features"][flow_quantity],
        "labels": list(sample["naca_numbers"].values())
    })

### Shuffling the dataset

In [None]:
# Shuffling the dataset
np.random.shuffle(dataset)

### Training and test set

In [None]:
# Defining the training and test set splitting percentage
split_percentage = 0.8

# Computing the number of training samples according to the splitting percentage
num_training_samples = int(np.floor(split_percentage * len(dataset)))

# Extracting the training and test set
training_set, test_set = dataset[:num_training_samples], dataset[num_training_samples:]

In [None]:
# Extracting the training features and labels
train_features = np.array([sample["features"] for sample in training_set])
train_labels = np.array([sample["labels"] for sample in training_set])

# Extracting the test features and labels
test_features = np.array([sample["features"] for sample in test_set])
test_labels = np.array([sample["labels"] for sample in test_set])

### Data normalization

In [None]:
# Computing the mean and standard deviation of the training features
mean = train_features.mean(axis=0)
std = train_features.std(axis=0)

In [None]:
# Normalizing the training and test features w.r.t. the training statistics
normalized_train_data = (train_features - mean) / std
normalized_test_data = (test_features - mean) / std

In [None]:
# Expanding the dimensions of the training and test features
normalized_train_features = np.expand_dims(normalized_train_data, axis=2)
normalized_test_features = np.expand_dims(normalized_test_data, axis=2)

### Creating the study cases

In [None]:
epochs = 100 # Number of training epochs
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) # Early stopping with a patience of 10 epochs

In [None]:
# Function to create a testing model
def createModel(trial):
    # Creating the Model
    model = keras.Sequential()

    # Dropout rate
    dropout_rate = trial.suggest_discrete_uniform("dropout_rate", 0.01, 0.2, 0.01)

    # Kernels size
    kernel_size = trial.suggest_int("kernel_size", 1, 8)

    # Pools size
    pool_size = trial.suggest_int("pool_size", 2, 8)

    # Input layer
    model.add(keras.layers.InputLayer(input_shape=(normalized_train_features.shape[1], normalized_train_features.shape[2])))

    # Number of convolutional layers
    num_conv_layers = trial.suggest_int("num_conv_layers", 1, 5)
    for i in range(num_conv_layers):
        # Number of filters
        num_filters = trial.suggest_categorical(f"num_filters__conv_layer_{i+1}", [16*j for j in range(1, 17)])

        # Adding the convolutional layer
        model.add(keras.layers.Conv1D(filters=num_filters, kernel_size=kernel_size, activation=tf.nn.relu, padding="same"))
        model.add(keras.layers.AveragePooling1D(pool_size=pool_size, padding="same"))
        model.add(keras.layers.Dropout(rate=dropout_rate))

    # Flatten layer
    model.add(keras.layers.Flatten())

    # Number of fully connected layers
    num_fc_layers = trial.suggest_int("num_fc_layers", 1, 5)
    for i in range(num_fc_layers):
        # Number of units
        num_units = trial.suggest_categorical(f"num_units__fc_layer_{i+1}", [16*j for j in range(1, 17)])

        # Adding the fully connected layer
        model.add(keras.layers.Dense(num_units, activation=tf.nn.relu))
        model.add(keras.layers.Dropout(rate=dropout_rate))

    # Output layer
    model.add(keras.layers.Dense(len(naca_numbers)))

    # Compiling the model
    model.compile(loss='mse', optimizer="adam", metrics=['mae'])

    return model

In [None]:
# Function to train the model
def train(model, trial):
    model.fit(
        normalized_train_features, 
        train_labels,
        epochs=epochs,
        validation_split=0.2,
        verbose=0,
        callbacks=[early_stopping, optuna.integration.TFKerasPruningCallback(trial, 'val_mae')]
    )

In [None]:
# Function to evaluate the model
def evaluate(model):
    loss, mae = model.evaluate(normalized_test_features, test_labels, verbose=0)
    return loss, mae

In [None]:
# Objective function to be minimized
def objective(trial):
    # Building the model
    model = createModel(trial)

    # Training the model
    train(model, trial)

    # Evaluating the model
    _, mae = evaluate(model)

    return mae

### Evaluating the hyperparameters

In [None]:
# Creating the study object with the specified configurations
study = optuna.create_study(
    direction="minimize",
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=20)
)

# Running the study
study.optimize(objective, n_trials=100)

In [None]:
# Extractig the pruned and complete trials
pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

# Displaying the study statistics
print("STUDY STATISTICS")
print(f"Number of finished trials --> {len(study.trials)}")
print(f"Number of pruned trials --> {len(pruned_trials)}")
print(f"Number of complete trials --> {len(complete_trials)}")

In [None]:
# Extracting the best trial from the study performed
trial = study.best_trial

# Displaying the obtained results
print("BEST TRIAL")
print(f"Mean Absolute Error --> {trial.value}\n")

print("BEST HYPERPARAMETERS")
for key, value in trial.params.items():
    print(f"{key}: {value}")