In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold

### Constants and hyperparameters

In [None]:
k = 5 # Number of k folds for cross validation
epochs = 500 # Number of training epochs
split_percentage = 0.8 # Training and test set splitting percentage
validation_split = 0.2 # Validation set percentage
early_stopping_patience = 15 # Number of epochs of patience before triggering early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=early_stopping_patience) # Early stopping

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Flow signals/ND_signals/1d_flow_signals_-1c.json"
feature_name = "p"

### Data Loading

In [None]:
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness']

dataset = []
with open(dataset_path, 'r') as dataset_file:
  samples = json.load(dataset_file)
  for sample in samples:
    dataset.append({
        "features": sample["features"][feature_name],
        "labels": list(sample["naca_numbers"].values())
    })

### Shuffling the dataset

In [None]:
# Shuffling the dataset
np.random.shuffle(dataset)

### Building the model

In [None]:
# BEST MODEL FOR 1D SIGNALS
def buildModel(input_shape):
  # Sequential model - CNN 1D
  model = keras.Sequential([
    keras.layers.InputLayer(input_shape=input_shape),
    keras.layers.Conv1D(filters=30, kernel_size=3, activation=tf.nn.tanh),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.01),
    keras.layers.Conv1D(filters=20, kernel_size=3, activation=tf.nn.tanh),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.01),
    keras.layers.Flatten(),
    keras.layers.Dense(20, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.tanh),
    keras.layers.Dense(len(naca_numbers))
  ])

  # Compiling the model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  
  return model

### Metrics computation

In [None]:
# Function to compute the classification accuracy
def computeAccuracy(predictions, test_labels):
    # Creating an array to save the results
    accuracy = np.zeros(len(naca_numbers))

    for idx in range(len(naca_numbers)):
        # Converting the NACA values to the closest interger
        naca_predictions = np.array([round(prediction) for prediction in predictions[:,idx]])
        naca_labels = np.array([round(label) for label in test_labels[:,idx]])

        # Extracting the samples correctly classified
        correctly_classified = np.where(np.equal(naca_predictions, naca_labels))

        # Computing the classification accuracy of the current NACA number
        accuracy[idx] = np.shape(correctly_classified)[1] / len(naca_labels)

    return accuracy

In [None]:
def computeMAE(predictions, test_labels):
    # Creating an array to save the results
    maes = np.zeros(len(naca_numbers))

    # Computing the Mean absolute error
    for idx in range(len(naca_numbers)):
        # Computing the Mean Absolute Error of the current NACA number
        mae = np.mean(np.absolute(predictions[:,idx] - test_labels[:,idx]))

        # Adding the result to the array
        maes[idx] = mae

    return maes

### K fold cross validation

In [None]:
features = np.array([sample["features"] for sample in dataset])
labels = np.array([sample["labels"] for sample in dataset])

In [None]:
experiment = 0
mae, accuracy = np.zeros([k, len(naca_numbers)]), np.zeros([k, len(naca_numbers)])
for train_index, test_index in KFold(k).split(dataset):
    # Training and test set
    train_features, test_features = features[train_index], features[test_index]
    train_labels, test_labels= labels[train_index], labels[test_index]

    # Normalizing the data
    mean = train_features.mean(axis=0)
    std = train_features.std(axis=0)

    # Normalizing the training and test features w.r.t. the training statistics
    normalized_train_features = (train_features - mean) / std
    normalized_test_features = (test_features - mean) / std

    # Building the model
    model = buildModel(input_shape=np.shape(normalized_train_features)[1:])

    # Training the model
    history = model.fit(
        normalized_train_features, 
        train_labels,
        epochs=epochs,
        validation_split=validation_split,
        verbose=0,
        callbacks=[early_stopping]
    )

    # Computing the predictions of the test set
    predictions = model.predict(normalized_test_features)

    # Computing the regression Mean Absolute Error
    experiment_mae = computeMAE(predictions, test_labels)
    
    # Computing the classification accuracy
    experiment_accuracy = computeAccuracy(predictions, test_labels)
    
    # Adding the metrics to the results lists
    mae[experiment, :] = experiment_mae
    accuracy[experiment, :] = experiment_accuracy

    # Displaying status
    print(f'Experiment {experiment+1}/{k} | Number of train samples: {len(train_features)} | Number of test samples: {len(test_features)} | Regression MAE: {np.mean(experiment_mae)} | Classification Accuracy: {np.mean(experiment_accuracy)}')
    experiment += 1

### Results

In [None]:
print("REGRESSION")
print(f" - Mean Absolute Error --> {np.mean(mae.flatten())}")
for i in range(len(naca_numbers)):
    print(f"   • {naca_numbers[i]} --> {np.mean(mae[:,i])}")


print("\nCLASSIFICATION")
print(f" - Accuracy --> {np.mean(accuracy.flatten())}")
for i in range(len(naca_numbers)):
    print(f"   • {naca_numbers[i]} --> {np.mean(accuracy[:,i])}")