## CROSS VALIDATION for pressure


Code used for the cross validation of the neural network for the pressure. It is used to assess :
- the performance of the different models implemented
- fine tuning of some hyperparameters (learning rate, $\gamma$ coefficient for exponential scheduler, $\lambda$ coefficient for $L^1$ regularization )


In [None]:
# General setups and imports
from utils import *

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# initialization and seeds setup: the choice of these seeds has been fixed for reproducibility
if device=="cuda:0":
    torch.cuda.empty_cache()
    torch.cuda.manual_seed(42)
    torch.cuda.init()
    torch.cuda.empty_cache()
    
np.random.seed(42)
torch.manual_seed(42)

# Normalization of input parameters in [0, 1] range
# u1 u2 u3 u4 u5 u6
maxs = np.array([8.0, 0.3, 0.5, 0.5, 0.5, 0.0])
mins = np.array([4.0, 0.1, -0.1, -0.5, -0.5, -0.3])
for i in range(params.shape[1]):
    params[:, i] = (params[:, i] - mins[i]) / (maxs[i] - mins[i])


# Treat time as a parameter: add it to the parameter list
# u1 u2 u3 u4 u5 u6 t
#consider all the times of the simulations in [0,1] range
times = np.linspace(0, 1, 300)
# sample all the times for the test set
times_test= times
#sample all the times of training set if times[::] is written below
#sample every 5 timesteps of the simulation if times[::5] is written below
times_train= times[::]

# shuffle the parameters (order of the simulations) to perform a random splitting of the dataset 
# inside the cross validation function defined below
idx = np.random.permutation(params.shape[0])
params = params[idx]


# Expand pressure in time through matrix multiplication
pressure_time = solutions['pressure'] @ basis_time['pressure'].T

# shuffle the pressure values, according to the order of parameters (simulations) defined above
pressure_time = pressure_time[idx]

# setting the Neural Network: all the modifications to the number of hidden layers, batch normalization layers 
# and activation functions can be implemented into "Net" defined below

class Net(torch.nn.Module):
    
    # 7 input parameters, corresponding to: u1 u2 u3 u4 u5 u6 t
    # 7 output parameters, corresponding to the POD (reduced) coefficients of the pressure solution
    
    # 5 hidden layers with 128 neurons each with batch normalization
    # ReLU,Tanh activation function
    
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.F1 = torch.nn.ReLU()
        self.batch_norm1 = torch.nn.BatchNorm1d(hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, hidden_size)
        self.F2 = torch.nn.Tanh()
        self.batch_norm2 = torch.nn.BatchNorm1d(hidden_size)
        self.fc3 = torch.nn.Linear(hidden_size, hidden_size)
        self.F3 = torch.nn.ReLU()
        self.batch_norm3 = torch.nn.BatchNorm1d(hidden_size)
        self.fc4 = torch.nn.Linear(hidden_size, hidden_size)
        self.F4 = torch.nn.Tanh()
        self.batch_norm4 = torch.nn.BatchNorm1d(hidden_size)
        self.fc5 = torch.nn.Linear(hidden_size, hidden_size)
        self.F5 = torch.nn.ReLU()
        self.batch_norm5 = torch.nn.BatchNorm1d(hidden_size)
        self.fc6 = torch.nn.Linear(hidden_size, output_size)


    def forward(self, x):
        x = self.F1(self.fc1(x))
        x = self.batch_norm1(x)
        x = self.F2(self.fc2(x))
        x = self.batch_norm2(x)
        x = self.F3(self.fc3(x))
        x = self.batch_norm3(x)
        x = self.F4(self.fc4(x))
        x = self.batch_norm4(x)
        x = self.F5(self.fc5(x))
        x = self.batch_norm5(x)
        return self.fc6(x)

For each fold, the network is trained for `n_epochs` . The relative error, absolute error and test loss are computed. The process is repeated `k_fold` times and the mean values of the relative error, the absolute error and the test loss are computed in the end over all the folds.

In [None]:
# K-fold cross validation
def cross_validation(k_indices, k, n_epoch):
    
    #input:
    # k_indices: array of indices for the k-th fold
    # k: index of the fold
    # n_epoch: number of epochs for training
    
    #output:
    #relative error, absolute error and test loss within each test fold
    
    #print the current k considered as test set
    print ("Cross validation, k = ", k)
    #extract the indexes of the k folder for test
    test_indices = k_indices[k]
    #extract the remaining indexes for the training set
    train_indices = k_indices[np.arange(len(k_indices)) != k].flatten()

    # Training set
    
    # select the corresponding training parameters (simulations) according to the train_indices
    params_train = params[train_indices]
    # add time to the parameters vector 
    params_time_train = np.repeat(params_train, len(times_train), axis=0)
    params_time_train = np.hstack((params_time_train, np.tile(times_train, len(params_train)).reshape(-1, 1)))
    #extract the corresponding pressure values for the input parameters and time
    pressure_time_train = pressure_time[train_indices]
    #sample the pressure values. if [:,:,::] is set, all times are considered.
    #otherwise, with [:,:,::5] the solution is sampled every 5 time steps
    pressure_model_train = pressure_time_train[:, :, ::]
    #reshape the training set in order to have 7 POD coefficients of pressure solution for each simulation and time
    pressure_model_train = pressure_model_train.transpose(0, 2, 1).reshape((int(pressure_model_train.shape[0]) * len(times_train)), 7)

    # Testing set
    # select the corresponding test parameters (simulations) according to the test_indices
    params_test = params[test_indices]
    #add time to the parameters vector 
    params_time_test = np.repeat(params_test, len(times_test), axis=0)
    params_time_test = np.hstack((params_time_test, np.tile(times_test, len(params_test)).reshape(-1, 1)))
    #extract the corresponding pressure values for the input parameters and time (all times will be considered in this case)
    pressure_model_test = pressure_time[test_indices]
    #reshape the test set in order to have 7 POD coefficients of pressure solution for each simulation and time
    pressure_model_test = pressure_model_test.transpose(0, 2, 1).reshape((int(pressure_model_test.shape[0]) * len(times_test)), 7)

    # Convert to tensor
    params_time_train = torch.tensor(params_time_train, dtype=torch.float32).to(device)
    params_time_test = torch.tensor(params_time_test, dtype=torch.float32).to(device)
    pressure_model_train = torch.tensor(pressure_model_train, dtype=torch.float32).to(device)
    #with this implementation, converting also pressure_model_test is not necessary
    #pressure_model_test = torch.tensor(pressure_model_test, dtype=torch.float32).to(device)



    # Train the network

    #the network can be defined below choosing the input size, hidden size (same for each hidden layer) and output size
    input_size = 7 # u1 u2 u3 u4 u5 u6 t
    hidden_size = 128 #parameter to choose the hidden size
    output_size = 7 #number of POD coefficients to be predicted for pressure

    # Create the network
    net = Net(input_size=input_size, hidden_size=hidden_size, output_size=output_size).to(device)

    # Define the loss function
    loss_fn = torch.nn.MSELoss()

    # choice of learning rate
    learning_rate = .01
    # The factor by which the learning rate will be multiplied at each epoch in case
    #exponenetial scheduler is defined
    gamma = 0.9995  # The factor by which the learning rate will be multiplied at each epoch

    # choice of the optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    #optimizer = torch.optim.AdamW(net.parameters(), lr=learning_rate)

    # Create the ExponentialLR scheduler
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    # initialize the losses vector
    losses_train = []

    for t in range(n_epoch):
        #set the model to train mode
        net.train()
        # Forward pass: compute predicted y by passing x to the model.
        y_pred = net(params_time_train).to(device)

        ## Compute the loss and put the value in the vector
        loss_train = loss_fn(y_pred, pressure_model_train)
        losses_train.append(loss_train.item())

        #print loss every 100 epochs
        if t % 100 == 0:
            print("Epoch: ", t, "Loss: ", loss_train.item())

        # Before the backward pass, use the optimizer object to set zero all the
        # gradients for the variables it will update (which are the learnable
        # weights of the model)
        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss_train.backward()

        # Calling the step function on an optimizer makes an update to its
        # parameters
        optimizer.step()
        
        #apply the learning rate scheduler
        scheduler.step()

    # Test the network
    #predict the values for pressure
    y_pred_test = net(params_time_test)
    y_pred_cpu=y_pred_test.to("cpu")
    # convert the output of the NN to numpy array
    y_pred_numpy = y_pred_cpu.detach().numpy()

    # Compute the loss on the test set
    loss_t = torch.nn.MSELoss()(y_pred_cpu, torch.tensor(pressure_model_test, dtype=torch.float32))

    #print the test loss
    print("Test loss: ", loss_t.item())

    
    # Compute relative error:
    rel_error = np.linalg.norm(y_pred_numpy - pressure_model_test, axis=1) / np.linalg.norm(pressure_model_test, axis=1)
    
    #Compute absolute error rescaled by average values:
    #Compute average values
    aux = np.linalg.norm(pressure_model_test, axis=1)
    mean_vel = np.mean(aux.reshape(-1, len(times_test)), axis=1)
    mean_vel = np.repeat(mean_vel, len(times_test)).reshape(1, -1)
    #rescale the absolute error by the average values within each simulation
    abs_error = np.linalg.norm(y_pred_numpy - pressure_model_test, axis=1) / mean_vel
    #transpose the vector
    abs_error = abs_error.T
    
    print("Relative error: ", np.mean(rel_error))
    print("Absolute error: ", np.mean(abs_error))
    return np.mean(rel_error), np.mean(abs_error), loss_t.item()

Run the cross validation

In [None]:
#code to run the cross validation

rel_errors = []
abs_errors = []
test_losses = []

#define the number of folds
k_fold = 5
#define the size of each fold
interval = int(params.shape[0] / k_fold)
#extract the indexes of the parameters associated to each fold
k_indices = np.array([idx[k * interval: (k + 1) * interval] for k in range(k_fold)])

n_epoch = 3000
rel_err = 0
test_loss = 0
abs_err=0

for k in range(k_fold):

    rel_err, abs_err, test_loss = cross_validation(k_indices, k, n_epoch)
    rel_errors.append(rel_err)
    abs_errors.append(abs_err)
    test_losses.append(test_loss)

print("Mean relative error: ", np.mean(rel_errors))
print("Mean absolute error: ", np.mean(abs_errors))
print("Mean test loss: ", np.mean(test_losses))