In [None]:
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

### Constants and hyperparameters

In [None]:
k = 5 # Number of k folds for cross validation
batch_size = 24 # Batch size
step_size = 101 # Increment of samples per experiment
epochs = 500 # Number of training epochs
split_percentage = 0.8 # Training and test set splitting percentage
validation_split = 0.2 # Validation set percentage
early_stopping_patience = 15 # Number of epochs of patience before triggering early stopping
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness'] # NACA numbers to predict

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Regional averages/regional_averages_2.npz" # Dataset path
section_indices = [1] # Indices of the sections to extract
flow_quantity = "p" # Flow quantity to be used as feature

### Data loading

In [None]:
# Loading the data
dataset = np.load(dataset_path)
dataset = list(zip(dataset[flow_quantity], dataset["naca_numbers"]))

### Shuffling the dataset

In [None]:
# Shuffling the dataset
np.random.shuffle(dataset)

### Building the model

In [None]:
# BEST MODEL FOR THE 1D FEATURES
def buildModel(input_shape):
  # Sequential Model
  model = keras.Sequential([
    keras.layers.InputLayer(input_shape=input_shape),
    keras.layers.Dense(30, activation=tf.nn.tanh),
    keras.layers.Dense(20, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.tanh),
    keras.layers.Dense(len(naca_numbers))
  ])

  # Compiling the model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])

  return model

### Metrics computation

In [None]:
# Function to compute the classification accuracy
def classificationMetrics(predictions, labels):
    # Creating an array to save the results
    accuracy = np.zeros(len(naca_numbers))

    for idx in range(len(naca_numbers)):
        # Converting the NACA values to the closest interger
        naca_predictions = np.array([round(prediction) for prediction in predictions[:,idx]])
        naca_labels = np.array([round(label) for label in labels[:,idx]])

        # Extracting the samples correctly classified
        correctly_classified = np.where(np.equal(naca_predictions, naca_labels))

        # Computing the classification accuracy of the current NACA number
        accuracy[idx] = np.shape(correctly_classified)[1] / len(naca_labels)

    return accuracy

In [None]:
# Function to compute the regression MSE and MAE
def regressionMetrics(predictions, labels):
    # Creating an array to save the results
    mses, maes = np.zeros(len(naca_numbers)), np.zeros(len(naca_numbers))

    # Computing the Mean absolute error
    for idx in range(len(naca_numbers)):
        # Computing the Mean Absolute Error of the current NACA number
        mae = np.mean(np.absolute(predictions[:,idx] - labels[:,idx]))
        mse = ((predictions[:,idx] - labels[:,idx])**2).mean(axis=0)

        # Adding the result to the array
        mses[idx], maes[idx] = mse, mae

    return mses, maes

### Experiments

In [None]:
dataset_size = len(dataset) # Total number of samples available
num_experiments = int(np.ceil(dataset_size / step_size)) # Total number of experiments

In [None]:
# Creating empty arrays to store the experiments' results
mses, maes, accuracies = np.zeros([num_experiments,2]), np.zeros([num_experiments,2]), np.zeros([num_experiments,2])

In [None]:
# Iterating over the number of the experiments
for idx in range(num_experiments):
  # Extracting the numer of samples to use for the i-th experiment
  num_samples = ((idx + 1) * step_size) 
  num_samples = num_samples if num_samples < dataset_size else dataset_size

  # Displating status
  print(f'Experiment {idx+1}/{num_experiments} started | Number of samples: {num_samples}')

  # Extracting the samples to be used in the experiment
  experiment_dataset = random.sample(dataset, num_samples)

  # Extracting the features and the labels from the dataset
  X, Y = zip(*experiment_dataset)
  X, Y = np.array(X), np.array(Y)

  # Extacting a single X section from the dataset
  section_X = X[:, :, section_indices] if len(section_indices) > 0 else X
  section_X = section_X[:,:,0] if len(section_indices) > 0 else section_X

  # Creating empty arrays to store the folds' results
  fold_mses, fold_maes, fold_accuracies = np.zeros([k, len(naca_numbers)]), np.zeros([k, len(naca_numbers)]), np.zeros([k, len(naca_numbers)])

  # Iterating over the folds
  fold = 0
  for train_index, test_index in KFold(k).split(experiment_dataset):
    # Training and test set
    X_train, X_test = section_X[train_index], section_X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Normalizing the data
    mean = X_train.mean(axis=0)
    std = X_train.std(axis=0)

    # Normalizing the training and test features w.r.t. the training statistics
    normalized__X_train = (X_train - mean) / std
    normalized__X_test = (X_test - mean) / std

    # Building the model
    model = buildModel(input_shape=np.shape(normalized__X_train)[1:])

    # Early stopping with a predefined patience
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=early_stopping_patience,
        restore_best_weights=True,
        verbose=False
    )

    # Training the model
    history = model.fit(
        normalized__X_train, 
        Y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        shuffle=True,
        verbose=0,
        callbacks=[early_stopping]
    )

    # Computing the predictions of the test set
    predictions = model.predict(normalized__X_test)

    # Computing the regression Mean Absolute Error
    fold_mse, fold_mae = regressionMetrics(predictions, Y_test)
    
    # Computing the classification accuracy
    fold_accuracy = classificationMetrics(predictions, Y_test)
    
    # Adding the metrics to the results lists
    fold_mses[fold, :] = fold_mse
    fold_maes[fold, :] = fold_mae
    fold_accuracies[fold, :] = fold_accuracy

    # Displaying status
    print(f'    • Fold {fold+1}/{k} completed --> Train - Test: [{len(X_train)} - {len(X_test)}] | MSE: {np.mean(fold_mse)} | MAE: {np.mean(fold_mae)} | Accuracy: {np.mean(fold_accuracy)}')
    
    fold += 1 # Incrementing the folds counter

  # Computing the experiments metrics by averaging the results obtained by each fold
  experiment_mse = fold_mses.mean(axis = 0)
  experiment_mae = fold_maes.mean(axis = 0)
  experiment_accuracy = fold_accuracies.mean(axis = 0)

  # Adding the metrics to the results lists
  mses[idx, :] = [int(num_samples), np.mean(experiment_mse)]
  maes[idx, :] = [int(num_samples), np.mean(experiment_mae)]
  accuracies[idx, :] = [num_samples, np.mean(experiment_accuracy)]

  # Displaying status
  print(f'Experiment {idx + 1}/{num_experiments} completed | Regression MSE: {mses[idx, 1]} | Regression MAE: {maes[idx, 1]} | Classification Accuracy: {accuracies[idx, 1]}\n')

### Results

In [None]:
# Printing results
for idx in range(num_experiments):
    print(f'{int(mses[idx,0])};{mses[idx,1]};{maes[idx,1]};{accuracies[idx,1]}')

In [None]:
# Function to compute the moving average of the and array
def movingAverage(data, window):
    moving_average = np.convolve(data, np.ones(window), 'valid') / window
    return moving_average

In [None]:
# Function to plot the results
def plot_results(data, y_label):
    x = data[:,0]
    y = data[:,1]

    # Computing the moving average of the obtained results
    window = 3
    moving_average = movingAverage(y, window)

    # Plotting the results
    plt.plot(x[(window-1):], moving_average, color="red")
    plt.scatter(x, y, color="blue")

    plt.legend([f'Moving Average {window}'], loc='upper right')
    plt.xlabel("Training set size")
    plt.ylabel(y_label)
    plt.grid()

    plt.show()

In [None]:
plot_results(mses, "Regression - Mean Square Error")
plot_results(maes, "Regression - Mean Absolute Error")
plot_results(accuracies, "Classification - Accuracy")