In [None]:
import json
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

### Data loading

In [None]:
# CHANGE ME
dataset_path = "../Dataset/Streamlines signals/streamlines_signals_2.json"
flow_quantity = "U"

In [None]:
naca_numbers = ['maximum_camber', 'maximum_camber_position', 'maximum_thickness']

dataset = []
with open(dataset_path, 'r') as dataset_file:
  samples = json.load(dataset_file)
  for sample in samples:
    dataset.append({
        "features": sample["features"][flow_quantity],
        "labels": list(sample["naca_numbers"].values())
    })

### Building the model

In [None]:
# BEST MODEL FOR FLOW SIGNALS
def buildModel1(input_shape):
  # Sequential model - CNN 1D
  model = keras.Sequential([
    keras.layers.InputLayer(input_shape=input_shape),
    keras.layers.Conv1D(filters=48, kernel_size=7, activation=tf.nn.relu),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.01),
    keras.layers.Conv1D(filters=128, kernel_size=7, activation=tf.nn.relu),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.01),
    keras.layers.Flatten(),
    keras.layers.Dense(176, activation=tf.nn.relu),
    keras.layers.Dropout(0.01),
    keras.layers.Dense(112, activation=tf.nn.relu),
    keras.layers.Dropout(0.01),
    keras.layers.Dense(192, activation=tf.nn.relu),
    keras.layers.Dropout(0.01),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dropout(0.01),
    keras.layers.Dense(80, activation=tf.nn.relu),
    keras.layers.Dropout(0.01),
    keras.layers.Dense(len(naca_numbers))
  ])

  # Compiling the model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  
  return model

In [None]:
# BEST MODEL FOR STREAMLINES SIGNALS
def buildModel2(input_shape):
  # Sequential model - CNN 1D
  model = keras.Sequential([
    keras.layers.InputLayer(input_shape=input_shape),
    keras.layers.Conv1D(filters=16, kernel_size=6, activation=tf.nn.relu),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.03),
    keras.layers.Conv1D(filters=208, kernel_size=6, activation=tf.nn.relu),
    keras.layers.AveragePooling1D(pool_size=2),
    keras.layers.Dropout(0.03),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.03),
    keras.layers.Dense(48, activation=tf.nn.relu),
    keras.layers.Dropout(0.03),
    keras.layers.Dense(208, activation=tf.nn.relu),
    keras.layers.Dropout(0.03),
    keras.layers.Dense(224, activation=tf.nn.relu),
    keras.layers.Dropout(0.03),
    keras.layers.Dense(len(naca_numbers))
  ])

  # Compiling the model
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  
  return model

### Experiments

In [None]:
epochs = 100 # Number of training epochs
step_size = 100 # Increment of samples per experiment
split_percentage = 0.8 # Training and test set split percentage
dataset_size = len(dataset) # Total number of samples available
num_experiments = int(np.ceil(dataset_size / step_size)) # Total number of experiments
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) # Early stopping with a patience of 10 epochs

In [None]:
losses = np.zeros((num_experiments, 2))
maes = np.zeros((num_experiments, 2))

# Iterating over the number of the experiments
for idx in range(num_experiments):
  # Extracting the numer of samples to use for the i-th experiment
  num_samples = ((idx + 1) * step_size) 
  num_samples = num_samples if num_samples < dataset_size else dataset_size

  # Extracting the samples to be used in the experiment
  experiment_dataset = random.sample(dataset, num_samples)

  # Computing the number of training samples according to the splitting percentage
  num_training_samples = int(np.floor(split_percentage * len(experiment_dataset)))

  # Extracting the training and test set of the current experiment
  training_set, test_set = experiment_dataset[:num_training_samples], experiment_dataset[num_training_samples:]

  # Extracting the training features and labels
  train_features = np.array([sample["features"] for sample in training_set])
  train_labels = np.array([sample["labels"] for sample in training_set])

  # Extracting the test features and labels
  test_features = np.array([sample["features"] for sample in test_set])
  test_labels = np.array([sample["labels"] for sample in test_set])

  # Normalizing the data
  mean = train_features.mean(axis=0)
  std = train_features.std(axis=0)

  normalized_train_data = (train_features - mean) / std
  normalized_test_data = (test_features - mean) / std

  # Expanding the dimensions of the training and test features
  normalized_train_features = np.expand_dims(normalized_train_data, axis=2)
  normalized_test_features = np.expand_dims(normalized_test_data, axis=2)

  # Building the model
  input_shape = [np.shape(normalized_train_features)[1], np.shape(normalized_train_features)[2]]
  model = buildModel2(input_shape=input_shape)

  # Training the model using the samples of the i-th experiment
  model.fit(
      normalized_train_features, 
      train_labels,
      epochs=epochs,
      validation_split = 0.2,
      verbose = 0,
      callbacks=[early_stopping]
  )

  # Extracting the values of loss, mean absolute error and mean square error for the i-th experiment
  loss, mae = model.evaluate(normalized_test_features, test_labels, verbose = 0)

  # Display progress
  print(f'Experiment {idx + 1}/{num_experiments} | Number of samples: {len(experiment_dataset)} | Loss (MSE): {loss} | MAE: {mae}')

  # Adding the results obtained to and array
  losses[idx, :] = [num_samples, loss]
  maes[idx, :] = [num_samples, mae]

### Results

In [None]:
# Function to compute the moving average of the and array
def movingAverage(data, window):
    moving_average = np.convolve(data, np.ones(window), 'valid') / window
    return moving_average

In [None]:
# Function to plot the results
def plot_results(data, y_label):
    x = data[:,0]
    y = data[:,1]

    # Computing the moving average of the obtained results
    window = 3
    moving_average = movingAverage(y, window)

    # Plotting the results
    plt.plot(x[(window-1):], moving_average, color="red")
    plt.scatter(x, y, color="blue")

    plt.legend([f'Moving Average {window}'], loc='upper right')
    plt.xlabel("Training set size")
    plt.ylabel(y_label)

    plt.show()

In [None]:
plot_results(losses, "Loss")
plot_results(maes, "Mean Absolute Error")