# 0. Import libraries

The libraries used to create the data are in the following cell

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_visible_devices(physical_devices[0], 'GPU')

2023-09-20 12:52:49.333186: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-20 12:52:49.370515: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-20 12:52:49.371411: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-20 12:52:51.547300: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-20 12:52:51.547869: W tensorflow/core/common_runtime/gpu/gpu_device.

# 1. Define constants

This section defines constants to be used in the notebook

In [2]:
# Path constants
DATA_FOLDER_PATH = "./Data"
FEATURES_FILENAME = "surfaces.npy"
LABELS_FILENAME = "coefficients.npy"

FEATURES_FILE_PATH = f"{DATA_FOLDER_PATH}/{FEATURES_FILENAME}"
LABELS_FILE_PATH = f"{DATA_FOLDER_PATH}/{LABELS_FILENAME}"

# Column names
SURFACE_POINTS = "surface_points"
ZERNIKE_COEFFICIENTS = "zernike_coefficients"

# Neural network architecture size
INPUT_SIZE = 60
OUTPUT_SIZE = 10

# Neural network training parameters
N_EPOCHS = 100
BATCH_SIZE = 4096
LEARNING_RATE = 0.01
ACTIVATION = 'relu'
N_HIDDEN = [64, 32, 16]
REGULARIZER = keras.regularizers.L1L2(l1=0.001,l2=0.1)
INITIALIZER = keras.initializers.he_normal(seed=None)
LOSS = tf.keras.losses.MeanSquaredError()
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, beta_1=0.9, beta_2=0.999)
METRICS = tf.keras.metrics.MeanSquaredError()

# 2. Compile functions

In [3]:
def get_features_and_labels_from_df(df):
    """
    Converts the columns of a surface dataframe into a list of features and labels in the form of list of numpy arrays
    Input:
        df (pd.DataFrame): The surface dataframe with columns 'surface_points'(features) and 'zernike_coefficients'(labels)
    
    Output:
        features (np.array): A numpy array with the z values of the surface at sampled points
        labels (np.array): A numpy array with the Zernike coefficients of the surface
    """
    features = convert_column_into_numpy_array_list(df, SURFACE_POINTS)
    labels = convert_column_into_numpy_array_list(df, ZERNIKE_COEFFICIENTS)
    return features, labels
    

def convert_column_into_numpy_array_list(df, column_name):
    """
    Converts the specified column of the dataframe into a list of numpy arrays
    Input:
        df (pd.DataFrame): The surface dataframe with columns 'surface_points' and 'zernike_coefficients'
        column_name (string): The name of the column to convert to list of numpy arrays
        
    Returns:
        numpy_list (tensor): The list of numpy arrays converted from the column
    """
    numpy_series = df[column_name].apply(lambda x: np.fromstring(x[1: -1], dtype=float, sep=' ')).to_list()
    numpy_list = tf.data.Dataset.from_tensor_slices(numpy_series)
    return numpy_series


def create_architecture():
    model = keras.Sequential(name="SurfaceReconstructor")
    model.add(keras.layers.InputLayer(input_shape=(INPUT_SIZE,),
                                      batch_size=None))
    
    for neurons in N_HIDDEN:
        model.add(keras.layers.Dense(neurons,
                                     kernel_regularizer=REGULARIZER,
                                     kernel_initializer=keras.initializers.HeNormal(seed=None),
                                     use_bias=False
                                     ))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Activation(ACTIVATION))
        
    model.add(keras.layers.Dense(OUTPUT_SIZE,
                                 activation="softmax"
                                ))
    
    model_name = "final"
    return model, model_name


def compile_model(my_model):
    my_model.compile(loss=LOSS,
                     optimizer=OPTIMIZER,
                     metrics=[METRICS])
    

# 2. Read data

First read the data

In [4]:
features = np.load(FEATURES_FILE_PATH, allow_pickle=True)
labels = np.load(LABELS_FILE_PATH, allow_pickle=True)

Split the data:
- **80%** training
- **10%** validation/dev test
- **10%** final test

In [5]:
# Calculate the number of rows for each split
total_rows = len(features)
train_size = int(0.8 * total_rows)
val_size = int(0.1 * total_rows)

# Split the DataFrame into training (80%), validation (10%), and test (10%)
train_features = features[:train_size]
train_labels = labels[:train_size]

val_features = features[train_size:train_size + val_size]
val_labels = labels[train_size:train_size + val_size]

test_features = features[train_size + val_size:]
test_labels = labels[train_size + val_size:]

# 3. Train neural network

Create the neural network architecture and compile it

In [6]:
surface_reconstruction_model, model_name = create_architecture()
compile_model(surface_reconstruction_model)

In [None]:
history = surface_reconstruction_model.fit(train_features,
                                           train_labels,
                                           batch_size=BATCH_SIZE,
                                           epochs=N_EPOCHS,
                                           validation_data= (val_features, val_labels),
                                           verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
 1/12 [=>............................] - ETA: 0s - loss: 32.2378 - mean_squared_error: 32.1731

In [None]:


def string_to_array(string):
    return np.array([float(x) for x in string.split(',')])

In [None]:
# Generate some example data
np.random.seed(0)
X = np.random.rand(100, 10)  # 100 samples, 10 features
y = np.random.randint(0, 2, size=100)  # Binary classification labels (0 or 1)

# Split the data into training and validation sets
split_ratio = 0.8
split_index = int(len(X) * split_ratio)

X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y[:split_index], y[split_index:]

# Define the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Evaluate the model on the validation data
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")


In [None]:
results=pd.DataFrame(history.history)
results.plot(figsize=(8, 5))
plt.grid(True)
plt.xlabel ("Epochs")
plt.ylabel ("Mean Squared Error")
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()