# 0. Import libraries

The libraries used to create the data are in the following cell

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

2023-09-19 13:09:59.655203: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


# 1. Define constants

This section defines constants to be used in the notebook

In [None]:
# Path constants
DATA_FOLDER_PATH = "./Data"
DATA_FILENAME = "surfaces.csv"
DATA_FILE_PATH = f"{DATA_FOLDER_PATH}/{DATA_FILENAME}"

# Column names
SURFACE_POINTS = "surface_points"
ZERNIKE_COEFFICIENTS = "zernike_coefficients"

# Neural network architecture size
INPUT_SIZE = 60
OUTPUT_SIZE = 10

# Neural network training parameters
N_EPOCHS = 5000
BATCH_SIZE = 4096
LEARNING_RATE = 0.01
ACTIVATION = 'relu'
N_HIDDEN = [64, 32, 16]
REGULARIZER = keras.regularizers.L1L2(l1=0.001,l2=0.1)
INITIALIZER = keras.initializers.he_normal(seed=None)
LOSS = tf.keras.losses.MeanSquaredError()
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE, beta_1=0.9, beta_2=0.999)
METRICS = [tf.keras.metrics.MeanSquaredError()]

# 2. Compile functions

In [None]:
def get_features_and_labels_from_df(df):
    """
    Converts the columns of a surface dataframe into a list of features and labels in the form of list of numpy arrays
    Input:
        df (pd.DataFrame): The surface dataframe with columns 'surface_points'(features) and 'zernike_coefficients'(labels)
    
    Output:
        features (np.array): A numpy array with the z values of the surface at sampled points
        labels (np.array): A numpy array with the Zernike coefficients of the surface
    """
    features = convert_column_into_numpy_array_list(df, SURFACE_POINTS)
    labels = convert_column_into_numpy_array_list(df, ZERNIKE_COEFFICIENTS)
    return features, labels
    

def convert_column_into_numpy_array_list(df, column_name):
    """
    Converts the specified column of the dataframe into a list of numpy arrays
    Input:
        df (pd.DataFrame): The surface dataframe with columns 'surface_points' and 'zernike_coefficients'
        column_name (string): The name of the column to convert to list of numpy arrays
        
    Returns:
        numpy_list (list): The list of numpy arrays converted from the column
    """
    numpy_list = df[column_name].apply(lambda x: np.fromstring(x[1: -1], dtype=float, sep=' '))
    return numpy_list.values


def create_architecture():
    model = keras.Sequential(name="SurfaceReconstructor")
    model.add(keras.layers.InputLayer(input_shape=(INPUT_SIZE,),
                                      batch_size=None))
    
    for neurons in N_HIDDEN:
        model.add(keras.layers.Dense(neurons,
                                     kernel_regularizer=REGULARIZER,
                                     kernel_initializer=keras.initializers.HeNormal(seed=None),
                                     use_bias=False
                                     ))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Activation(ACTIVATION))
        
    model.add(keras.layers.Dense(OUTPUT_SIZE,
                                 activation="softmax"
                                ))
    
    model_name = "final"
    return model, model_name


def compile_model(model):
    my_model.compile(loss=LOSS,
                     optimizer=OPTIMIZER,
                     metrics=METRICS)
    

# 2. Read data

First read the data

In [None]:
surface_df = pd.read_csv(DATA_FILE_PATH)

Split the data:
- **80%** training
- **10%** validation/dev test
- **10%** final test

In [None]:
# Calculate the number of rows for each split
total_rows = len(surface_df)
train_size = int(0.8 * total_rows)
val_size = int(0.1 * total_rows)

# Split the DataFrame into training (80%), validation (10%), and test (10%)
train_df = surface_df.iloc[:train_size]
val_df = surface_df.iloc[train_size:train_size + val_size]
test_df = surface_df.iloc[train_size + val_size:]

# 3. Train neural network

Obtain features and labels from train and validation sets

In [None]:
features_train, labels_train = get_features_and_labels_from_df(train_df)
features_val, labels_val = get_features_and_labels_from_df(val_df)

Create the neural network architecture

In [None]:
surface_reconstruction_model = create_architecture()
compile_model(surface_reconstruction_model)