In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold

### Constants and hyperparameters

In [None]:
k = 5 # Number of k folds for cross validation
batch_size = 24 # Batch size
epochs = 500 # Number of training epochs
split_percentage = 0.8 # Training and test set splitting percentage
validation_split = 0.2 # Validation set percentage
early_stopping_patience = 15 # Number of epochs of patience before triggering early stopping
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness'] # NACA numbers to predict

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Definitivo/NACA prediction/Arrival times/arrival_times.npz" # Dataset path
flow_quantity = "distribution_statistics" # Flow quantity to be used as feature

### Data Loading

In [None]:
# Loading the data
dataset = np.load(dataset_path)
dataset = list(zip(dataset[flow_quantity], dataset["naca_numbers"]))

### Shuffling the dataset

In [None]:
# Shuffling the dataset
np.random.shuffle(dataset)

### Features and labels

In [None]:
# Extracting the features and the labels from the dataset
X, Y = zip(*dataset)
X, Y = np.array(X), np.array(Y)

### Building the model

In [None]:
# BEST MODEL FOR THE 1D FEATURES
def buildModel(input_shape):
    # Sequential Model
    model = keras.Sequential([
        keras.layers.InputLayer(input_shape=input_shape),
        keras.layers.Dense(30, activation=tf.nn.tanh),
        keras.layers.Dense(20, activation=tf.nn.tanh),
        keras.layers.Dense(10, activation=tf.nn.tanh),
        keras.layers.Dense(len(naca_numbers))
    ])

    # Compiling the model
    model.compile(loss='mse', optimizer='adam', metrics=['mae'])

    return model

### Metrics computation

In [None]:
# Function to compute the classification accuracy
def classificationMetrics(predictions, labels):
    # Creating an array to save the results
    accuracy = np.zeros(len(naca_numbers))

    for idx in range(len(naca_numbers)):
        # Converting the NACA values to the closest interger
        naca_predictions = np.array([round(prediction) for prediction in predictions[:,idx]])
        naca_labels = np.array([round(label) for label in labels[:,idx]])

        # Extracting the samples correctly classified
        correctly_classified = np.where(np.equal(naca_predictions, naca_labels))

        # Computing the classification accuracy of the current NACA number
        accuracy[idx] = np.shape(correctly_classified)[1] / len(naca_labels)

    return accuracy

In [None]:
def regressionMetrics(predictions, labels):
    # Creating an array to save the results
    mses, maes = np.zeros(len(naca_numbers)), np.zeros(len(naca_numbers))

    # Computing the Mean absolute error
    for idx in range(len(naca_numbers)):
        # Computing the Mean Absolute Error of the current NACA number
        mae = np.mean(np.absolute(predictions[:,idx] - labels[:,idx]))
        mse = ((predictions[:,idx] - labels[:,idx])**2).mean(axis=0)

        # Adding the result to the array
        mses[idx], maes[idx] = mse, mae

    return mses, maes

### K fold cross validation

In [None]:
# Creating empty arrays to store the results
mse, mae, accuracy = np.zeros([k, len(naca_numbers)]), np.zeros([k, len(naca_numbers)]), np.zeros([k, len(naca_numbers)])

In [None]:
# Iterating over the number of experiments (folds)
experiment = 0
for train_index, test_index in KFold(k).split(X):
    # Training and test set
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    # Normalizing the data
    mean = X_train.mean(axis=0)
    std = X_train.std(axis=0)

    # Normalizing the training and test features w.r.t. the training statistics
    normalized__X_train = (X_train - mean) / std
    normalized__X_test = (X_test - mean) / std

    # Building the model
    model = buildModel(input_shape=np.shape(normalized__X_train)[1:])

    # Early stopping with a predefined patience
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=early_stopping_patience,
        restore_best_weights=True,
        verbose=False
    )

    # Training the model
    history = model.fit(
        normalized__X_train, 
        Y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        shuffle=True,
        verbose=0,
        callbacks=[early_stopping]
    )
    
    # Computing the predictions of the test set
    predictions = model.predict(normalized__X_test)

    # Computing the regression Mean Absolute Error
    experiment_mse, experiment_mae = regressionMetrics(predictions, Y_test)
    
    # Computing the classification accuracy
    experiment_accuracy = classificationMetrics(predictions, Y_test)
    
    # Adding the metrics to the results lists
    mse[experiment, :] = experiment_mse
    mae[experiment, :] = experiment_mae
    accuracy[experiment, :] = experiment_accuracy

    # Displaying status
    print(f'Experiment {experiment+1}/{k} | Number of train samples: {len(X_train)} | Number of test samples: {len(X_test)} | Regression MSE: {np.mean(experiment_mse)} | Regression MAE: {np.mean(experiment_mae)} | Classification Accuracy: {np.mean(experiment_accuracy)}')
    experiment += 1

### Results

In [None]:
# Displaying results
print(f"Mean Square Error (Loss) --> {np.mean(mse.flatten())}")
for i in range(len(naca_numbers)):
    print(f"  • {naca_numbers[i]}: {np.mean(mse[:,i])}")

print(f"\nMean Absolute Error --> {np.mean(mae.flatten())}")
for i in range(len(naca_numbers)):
    print(f"  • {naca_numbers[i]}: {np.mean(mae[:,i])}")

print(f"\nClassification Accuracy --> {np.mean(accuracy.flatten())}")
for i in range(len(naca_numbers)):
    print(f"  • {naca_numbers[i]}: {np.mean(accuracy[:,i])}")