## CROSS VALIDATION for velocity

Code used for the cross validation of the neural network for the velocity. It is used to find the optimal value of the regularization parameter $\lambda$, and assess the performance of the network.



In [None]:
# General setups and imports
from utils import *

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# initialization and seeds setup: the choice of these seeds has been fixed for reproducibility
if device=="cuda:0":
    torch.cuda.empty_cache()
    torch.cuda.manual_seed(42)
    torch.cuda.init()
    torch.cuda.empty_cache()
    
np.random.seed(42)
torch.manual_seed(42)

In [None]:
# Normalization of input parameters in [0, 1] range
# u1 u2 u3 u4 u5 u6
maxs = np.array([8.0, 0.3, 0.5, 0.5, 0.5, 0.0])
mins = np.array([4.0, 0.1, -0.1, -0.5, -0.5, -0.3])
for i in range(params.shape[1]):
    params[:, i] = (params[:, i] - mins[i]) / (maxs[i] - mins[i])

# Treat time as a parameter: add it to the parameter list
# u1 u2 u3 u4 u5 u6 t
times = np.linspace(0, 1, 300)
# sample all the times for the test set
times_test= times
#sample all the times of training set if times[::] is written below
#sample every 5 timesteps of the simulation if times[::5] is written below
times_train= times[::]


# shuffle the parameters (order of the simulations) to perform a random splitting of the dataset 
# inside the cross validation function defined below
idx = np.random.permutation(params.shape[0])
params = params[idx]



# Expand velocity in time through matrix multiplication
velocity_time = solutions['velocity'] @ basis_time['velocity'].T

# shuffle the velocity values, according to the order of parameters (simulations) defined above
velocity_time = velocity_time[idx]

# setting the Neural Network: all the modifications to the number of hidden layers, batch normalization layers 
# and activation functions can be implemented into "Net" defined below

class Net(torch.nn.Module):
    
    # 7 input parameters, corresponding to: u1 u2 u3 u4 u5 u6 t
    # 40 output parameters, corresponding to the POD (reduced) coefficients of the velocity solution
    
    # 5 hidden layers with 128 neurons each with batch normalization
    # Tanh activation function
    
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.F1 = torch.nn.Tanh()
        self.batch_norm1 = torch.nn.BatchNorm1d(hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, hidden_size)
        self.F2 = torch.nn.Tanh()
        self.batch_norm2 = torch.nn.BatchNorm1d(hidden_size)
        self.fc3 = torch.nn.Linear(hidden_size, hidden_size)
        self.F3 = torch.nn.Tanh()
        self.batch_norm3 = torch.nn.BatchNorm1d(hidden_size)
        self.fc4 = torch.nn.Linear(hidden_size, hidden_size)
        self.F4 = torch.nn.Tanh()
        self.batch_norm4 = torch.nn.BatchNorm1d(hidden_size)
        self.fc5 = torch.nn.Linear(hidden_size, hidden_size)
        self.F5 = torch.nn.Tanh()
        self.batch_norm5 = torch.nn.BatchNorm1d(hidden_size)
        self.fc6 = torch.nn.Linear(hidden_size, output_size)


    def forward(self, x):
        x = self.F1(self.fc1(x))
        x = self.batch_norm1(x)
        x = self.F2(self.fc2(x))
        x = self.batch_norm2(x)
        x = self.F3(self.fc3(x))
        x = self.batch_norm3(x)
        x = self.F4(self.fc4(x))
        x = self.batch_norm4(x)
        x = self.F5(self.fc5(x))
        x = self.batch_norm5(x)
        return self.fc6(x)

For each fold, the network is trained for `n_epochs` . The relative error, absolute error and test loss are computed. The process is repeated `k_fold` times and the mean values of the relative error, the absolute error and the test loss are computed in the end over all the folds.

In [None]:
# K-fold cross validation
def cross_validation(k_indices, k, n_epoch, l1_lambda):
    #input:
    # k_indices: array of indices for the k-th fold
    # k: index of the fold
    # n_epoch: number of epochs for training
    
    #output:
    #relative error, absolute error and test loss within each test fold
    
    #print the current k considered as test set
    print ("Cross validation, k = ", k)
    #extract the indexes of the k folder for test
    test_indices = k_indices[k]
    #extract the remaining indexes for the training set
    train_indices = k_indices[np.arange(len(k_indices)) != k].flatten()

    # Training set
    
    # select the corresponding training parameters (simulations) according to the train_indices
    params_train = params[train_indices]
    # add time to the parameters vector
    params_time_train = np.repeat(params_train, len(times_train), axis=0)
    params_time_train = np.hstack((params_time_train, np.tile(times_train, len(params_train)).reshape(-1, 1)))
    
    #extract the corresponding velocity values for the input parameters and time
    velocity_time_train = velocity_time[train_indices]
    #sample the velocity values. if [:,:,::] is set, all times are considered.
    #otherwise, with [:,:,::5] the solution is sampled every 5 time steps
    velocity_model_train = velocity_time_train[:, :, :]
    #reshape the training set in order to have 7 POD coefficients of velocity solution for each simulation and time
    velocity_model_train = velocity_model_train.transpose(0, 2, 1).reshape((int(velocity_model_train.shape[0]) * len(times_train)), 40)

    # Testing set
    # select the corresponding test parameters (simulations) according to the test_indices
    params_test = params[test_indices]
    # add time to the parameters vector
    params_time_test = np.repeat(params_test, len(times_test), axis=0)
    params_time_test = np.hstack((params_time_test, np.tile(times, len(params_test)).reshape(-1, 1)))
    #extract the corresponding velocity values for the input parameters and time (all times will be considered in this case)
    velocity_model_test = velocity_time[test_indices]
    #reshape the test set in order to have 7 POD coefficients of velocity solution for each simulation and time
    velocity_model_test = velocity_model_test.transpose(0, 2, 1).reshape((int(velocity_model_test.shape[0]) * len(times_test)), 40)

    # Convert to tensor
    params_time_train = torch.tensor(params_time_train, dtype=torch.float32).to(device)
    params_time_test = torch.tensor(params_time_test, dtype=torch.float32).to(device)

    velocity_model_train = torch.tensor(velocity_model_train, dtype=torch.float32).to(device)
    velocity_model_test = torch.tensor(velocity_model_test, dtype=torch.float32).to(device)
    #velocity_model_train = torch.log(torch.abs(velocity_model_train) + 1) * torch.sign(velocity_model_train)

    sv_space_velocity = sv_space['velocity']
    sv_space_velocity = sv_space_velocity / np.sum(sv_space_velocity)
    sv_space_velocity = torch.tensor(sv_space_velocity, dtype=torch.float32).to(device)
    sv_space_velocity = sv_space_velocity.reshape(40, 1)
    
    # Train the network
    
    #the network can be defined below choosing the input size, hidden size (same for each hidden layer) and output size
    input_size = 7 # u1 u2 u3 u4 u5 u6 t
    hidden_size = 128 # parameter to choose the hidden size
    output_size = 40 # number of POD coefficients to be predicted for vellocity

    net = Net(input_size=input_size, hidden_size=hidden_size, output_size=output_size).to(device)
    
    # Define the function f, used to weight the loss function by the SVD coefficients
    #def f(x):
      #return torch.sqrt(x)

    # Define the (weighted) loss function
   # def loss_fn(y_p, y_true):
   #     return torch.mean(torch.mm((y_p - y_true) ** 2, f(sv_space_velocity)))
    
    # Define the loss function
    loss_fn = torch.nn.MSELoss()
    
    # choice of learning rate
    learning_rate = .01
    
    # choice of the optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    #optimizer = torch.optim.AdamW(net.parameters(), lr=learning_rate)
    # Set the initial learning rate and decay factor for the exponential scheduler

    gamma = 0.9995  # The factor by which the learning rate will be multiplied at each epoch

    # Create the ExponentialLR scheduler
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    # Compute and print the number of weights in the model
    if l1_lambda>0: 
        nweights = 0
        for name,weights in net.named_parameters():
            if 'bias' not in name:
                nweights = nweights + weights.numel()
        print(f'Total number of weights in the model = {nweights}')
        
    # initialize the losses vector
    losses_train = []

    for t in range(n_epoch):
        #set the model to train mode
        net.train()
        # Forward pass: compute predicted y by passing x to the model.
        y_pred = net(params_time_train).to(device)

        ## Compute the loss and put the value in the vector
        loss_train = loss_fn(y_pred, velocity_model_train)
        # Calculate L1 term
        if l1_lambda>0:
            L1_term = torch.tensor(0., requires_grad=True).to(device)
            for name, weights in net.named_parameters():
                if 'bias' not in name:
                    weights_sum = torch.sum(torch.abs(weights))
                    L1_term = L1_term + weights_sum
            L1_term = L1_term / nweights
            
            # Regularize loss using L1 regularization
            loss_train = loss_train + L1_term * l1_lambda

        losses_train.append(loss_train.item())

        if t % 100 == 0:
            print("Epoch: ", t, "Loss: ", loss_train.item())

        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model)
        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss_train.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()

        # Step the scheduler at the end of each epoch
        scheduler.step()



    # Test the network
    #predict the values for velocity
    y_pred = net(params_time_test)
    y_pred_numpy=y_pred.to("cpu")
    
    # convert the output of the NN to numpy array
    y_pred_numpy = y_pred_numpy.detach().numpy()
    
    #inverse of the logarithmic transformation
    #y_pred_numpy=np.exp(y_pred_numpy)-1

    # convert the velocity_model_test to numpy
    velocity_model_test_numpy=velocity_model_test.to("cpu")
    velocity_model_test_numpy=velocity_model_test_numpy.detach().numpy()

    # Compute and print loss.
    loss_t = torch.nn.MSELoss()(y_pred, velocity_model_test)

    print("Test loss: ", loss_t.item())

    # Compute the relative error

    rel_error = np.linalg.norm(y_pred_numpy - velocity_model_test_numpy, axis=1) / np.linalg.norm(velocity_model_test_numpy, axis=1)
    
    
    #Compute absolute error rescaled by average values:
    #Compute average values
    aux = np.linalg.norm(velocity_model_test_numpy, axis=1)
    mean_vel = np.mean(aux.reshape(-1, len(times_test)), axis=1)
    mean_vel = np.repeat(mean_vel, len(times_test)).reshape(1, -1)
    #rescale the absolute error by the average values within each simulation
    abs_error = np.linalg.norm(y_pred_numpy - velocity_model_test_numpy, axis=1) / mean_vel
    abs_error = abs_error.T
    
    print("Relative error: ", np.mean(rel_error))
    print("Absolute error: ", np.mean(abs_error))
    return np.mean(rel_error), np.mean(abs_error), loss_t.item()

Run the cross validation for different values of the regularization parameter $\lambda$.

In [None]:
rel_errors = []
abs_errors = []
test_losses = []

#define the number of folds
k_fold = 5
#define the size of each fold
interval = int(params.shape[0] / k_fold)
#extract the indexes of the parameters associated to each fold
k_indices = np.array([idx[k * interval: (k + 1) * interval] for k in range(k_fold)])

n_epoch = 2000
rel_err = 0
test_loss = 0
abs_err=0

# Regularization parameter L1
l1_lambda=10**(-11) #change the value of lambda here, tested from 10^(-1) to 10^(-12)
print("Lambda:", l1_lambda)


for k in range(k_fold):
  rel_err, abs_err, test_loss = cross_validation(k_indices, k, n_epoch, l1_lambda)
  rel_errors.append(rel_err)
  abs_errors.append(abs_err)
  test_losses.append(test_loss)
  
print("Mean relative error: ", np.mean(rel_errors))
print("Mean absolute error: ", np.mean(abs_errors))
print("Mean test loss: ", np.mean(test_losses))