In [None]:
import json
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

### Data loading

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Regional averages/ND_regional_averages/1d_regional_averages_-1c.json"
feature_name = "p"

In [None]:
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness']

dataset = []
with open(dataset_path, 'r') as dataset_file:
  samples = json.load(dataset_file)
  for sample in samples:
    dataset.append({
        "features": sample["features"][feature_name],
        "labels": list(sample["naca_numbers"].values())
    })

### Building the model

In [None]:
# BEST MODEL FOR REGIONAL AVERAGES
def buildModel(input_shape):
  # Sequential Model
  model = keras.Sequential([
    keras.layers.Flatten(input_shape=input_shape),
    keras.layers.Dense(30, activation=tf.nn.tanh),
    keras.layers.Dense(20, activation=tf.nn.tanh),
    keras.layers.Dense(10, activation=tf.nn.tanh),
    keras.layers.Dense(len(naca_numbers))
  ])

  # Compiling the model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  
  return model

### Metrics computation

In [None]:
# Function to compute the classification accuracy
def computeAccuracy(predictions, test_labels):
    # Creating an array to save the results
    accuracy = np.zeros(len(naca_numbers))

    for idx in range(len(naca_numbers)):
        # Converting the NACA values to the closest interger
        naca_predictions = np.array([round(prediction) for prediction in predictions[:,idx]])
        naca_labels = np.array([round(label) for label in test_labels[:,idx]])

        # Extracting the samples correctly classified
        correctly_classified = np.where(np.equal(naca_predictions, naca_labels))

        # Computing the classification accuracy of the current NACA number
        accuracy[idx] = np.shape(correctly_classified)[1] / len(naca_labels)

    return accuracy

In [None]:
def computeMAE(predictions, test_labels):
    # Creating an array to save the results
    maes = np.zeros(len(naca_numbers))

    # Computing the Mean absolute error
    for idx in range(len(naca_numbers)):
        # Computing the Mean Absolute Error of the current NACA number
        mae = np.mean(np.absolute(predictions[:,idx] - test_labels[:,idx]))

        # Adding the result to the array
        maes[idx] = mae

    return maes

### Experiments

In [None]:
epochs = 500 # Number of training epochs
step_size = 100 # Increment of samples per experiment
split_percentage = 0.8 # Training and test set split percentage
validation_split = 0.2 # Validation set percentage
dataset_size = len(dataset) # Total number of samples available
num_experiments = int(np.ceil(dataset_size / step_size)) # Total number of experiments
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15) # Early stopping with a patience of 10 epochs

In [None]:
experiments_mae, experiments_accuracy = np.zeros((num_experiments, 2)), np.zeros((num_experiments, 2))

# Iterating over the number of the experiments
for idx in range(num_experiments):
  # Extracting the numer of samples to use for the i-th experiment
  num_samples = ((idx + 1) * step_size) 
  num_samples = num_samples if num_samples < dataset_size else dataset_size

  # Extracting the samples to be used in the experiment
  experiment_dataset = random.sample(dataset, num_samples)

  # Computing the number of training samples according to the splitting percentage
  num_training_samples = int(np.floor(split_percentage * len(experiment_dataset)))

  # Extracting the training and test set of the current experiment
  training_set, test_set = experiment_dataset[:num_training_samples], experiment_dataset[num_training_samples:]

  # Extracting the training features and labels
  train_features = np.array([sample["features"] for sample in training_set])
  train_labels = np.array([sample["labels"] for sample in training_set])

  # Extracting the test features and labels
  test_features = np.array([sample["features"] for sample in test_set])
  test_labels = np.array([sample["labels"] for sample in test_set])

  # Normalizing the data
  mean = train_features.mean(axis=0)
  std = train_features.std(axis=0)

  normalized_train_features = (train_features - mean) / std
  normalized_test_features = (test_features - mean) / std

  # Building the model
  model = buildModel(input_shape=np.shape(normalized_train_features)[1:])

  # Training the model using the samples of the i-th experiment
  model.fit(
    normalized_train_features, 
    train_labels,
    epochs=epochs,
    validation_split=validation_split,
    verbose=0,
    callbacks=[early_stopping]
  )

  # Computing the predictions of the test set
  predictions = model.predict(normalized_test_features)

  # Computing the regression Mean Absolute Error
  mae = computeMAE(predictions, test_labels)
  mae = np.mean(mae)

  # Computing the classification accuracy
  accuracy = computeAccuracy(predictions, test_labels)
  accuracy = np.mean(accuracy)

  # Display progress
  print(f'Experiment {idx + 1}/{num_experiments} | Number of samples: {len(experiment_dataset)} | Regression MAE: {mae} | Classification Accuracy: {accuracy}')

  # Adding the experiment's results to the results list
  experiments_mae[idx, :] = [num_samples, mae]
  experiments_accuracy[idx, :]= [num_samples, accuracy]

### Results

In [None]:
print(experiments_mae)
print(experiments_accuracy)

In [None]:
# Function to compute the moving average of the and array
def movingAverage(data, window):
    moving_average = np.convolve(data, np.ones(window), 'valid') / window
    return moving_average

In [None]:
# Function to plot the results
def plot_results(data, y_label):
    x = data[:,0]
    y = data[:,1]

    # Computing the moving average of the obtained results
    window = 3
    moving_average = movingAverage(y, window)

    # Plotting the results
    plt.plot(x[(window-1):], moving_average, color="red")
    plt.scatter(x, y, color="blue")

    plt.legend([f'Moving Average {window}'], loc='upper right')
    plt.xlabel("Training set size")
    plt.ylabel(y_label)

    plt.show()

In [None]:
plot_results(experiments_mae, "Regression - Mean Absolute Error")
plot_results(experiments_accuracy, "Classification - Accuracy")