**First clone the github repository**

In [None]:
#%cd /content
#! git clone https://github.com/Tikquuss/lwd
#%cd lwd/scripts

# **Set-up** 
*Once this section is configured, you can comment (optionally) the previous cell and run everything at once (Runtime -> Run all).*  
**Once everything is executed, a csv file will be automatically created containing all the losses on the test data for each model.**

* Uncomment the line corresponding to your function, and choose the values of the bounds of the intervals in which the learning and test data will be generated (uniformly) : we have at the end of each line the corresponding values taken from the ["Sobolev Training"](https://arxiv.org/abs/1706.04859) original paper (page 14-15)
* The steps are used for the curves: they can therefore be chosen according to the desired precision.

In [None]:
f_name = "Styblinski-Tang" # https://www.sfu.ca/~ssurjano/stybtang.html (-5, 5), (-5, 5)
#f_name = "Ackley" # http://www.sfu.ca/~ssurjano/ackley.html (-5, 5), (-5, 5)
#f_name = "Beale" # https://www.sfu.ca/~ssurjano/beale.html (-4.5, 4.5), (-4.5, 4.5)
#f_name = "Booth" # https://www.sfu.ca/~ssurjano/booth.html (-10, 10), (-10, 10)
#f_name = "Bukin" # https://www.sfu.ca/~ssurjano/bukin6.html (-15, -5), (-3, 3)
#f_name = "McCormick" # https://www.sfu.ca/~ssurjano/mccorm.html (-1.5, 4), (-3, 4)
#f_name = "Rosenbrock" # https://www.sfu.ca/~ssurjano/rosen.html (-2, 2), (-2, 2)

(min_x, max_x), (min_y, max_y) = (-5, 5), (-5, 5)
step_x, step_y = 0.25, 0.25

In [None]:
try:
    %tensorflow_version 1.x
    %matplotlib inline
except Exception:
    pass

import tensorflow as tf
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import random
import os

from utils import gradient, genData, plotFunction, plotGrad, get_data_loader, plot_stat, global_stat, reshape, to_csv
from utils import forward, backprop, MLP, Siren, train, test
from twin_net_tf import graph, Generator, test as twin_net_tf_test 
from functions import * # Styblinski-Tang (ST), Ackley, Beale, Booth, Bukin, McCormick, Rosenbrock

In [None]:
# Global
max_epoch = 1000 # maximun number of epoch
batch_sizes = [20, 100, 1000, 1024, 1024] # batch_size
nTrains = [20, 100, 1000, 10000, 100000] # number of training examples
nTests = [10000]*5 # number of test examples

train_seed, test_seed = 0, 1 # for reproducibility

learning_rate = 3e-5 # learning rate

learning_rate_schedule = [(0.0, 1.0e-8), (0.2, 0.1), (0.6, 0.01), (0.9, 1.0e-6), (1.0, 1.0e-8)]

improving_limit = float("inf") # Stop training if the training loss does not decrease n times (no limit here)
twin_net_tf_improving_limit = 10 # tf
INPUT_DIM = 2 
HIDDEN_DIM = 256
N_HIDDEN = 4 # number of hidden layers
OUTPUT_DIM = 1
# To initialize the model parameters and to make sure that the models are initialized with the same values.
params_seed = 0 
init_weights = True
tf_config = {"init_weights" : init_weights, "input_dim" : OUTPUT_DIM}

# hyperparameters in the different loss functions to express a tradeoff between y loss and dydx loss
# Leave None and None instead of 1 and 1, this will be managed automatically.
loss_config = {'alpha': None, "beta" : None} # loss = alpha * loss_y + beta * loss_dydx

# MLP
import math
a = math.sqrt(2/math.pi)
b = 0.044715
part = lambda x : 1 + torch.tanh(a*(x + b*(x**3)))
dpart = lambda x : (a*(1 + 3*b*(x)**2))*(1 - torch.tanh(a*(x + b*(x**3)))**2)
g = F.gelu # or lambda x : x * part(x) / 2
dg = lambda x : part(x) / 2 + x * dpart(x) / 2
activation_function, deriv_activation_function = g, dg

# twin_net tf 
tf_config.update({"activation_function" : tf.nn.softplus, "deriv_activation_function" : tf.nn.sigmoid})

mlp_model_kwargs = {"in_features" : INPUT_DIM, # depends on the function
                    "hidden_features" : HIDDEN_DIM, 
                    "hidden_layers" : N_HIDDEN, 
                    "out_features": OUTPUT_DIM, 
                    "activation_function" : activation_function, 
                    "deriv_activation_function" : deriv_activation_function,
                    "init_weights" : init_weights,
                    "params_seed" : params_seed
                   }
# Siren
first_omega_0 = 30.
hidden_omega_0 = 30.
outermost_linear = True

siren_model_kwargs = {"in_features" : INPUT_DIM, 
                      "hidden_features" : HIDDEN_DIM, 
                      "hidden_layers" : N_HIDDEN, 
                      "out_features": OUTPUT_DIM, 
                      "outermost_linear" : outermost_linear, 
                      "first_omega_0" : first_omega_0, 
                      "hidden_omega_0" : hidden_omega_0,
                      "init_weights" : init_weights,
                      "params_seed" : params_seed
                      }

# twin_net tf                
generator_kwargs = {"hidden_units" : HIDDEN_DIM, "hidden_layers" : N_HIDDEN}

# **To avoid repeating the same code too much**

In [None]:
if f_name == "Styblinski-Tang" :
    callable_function = STFunction
    callable_function_deriv = STDeriv
    function = "Styblinski-Tang Function"
elif f_name == "Ackley" :
    callable_function = AckleyFunction
    callable_function_deriv = AckleyDeriv
    function = "Ackley Function"
elif f_name == "Beale" :
    callable_function = BealeFunction
    callable_function_deriv = BealeDeriv
    function = "Beale Function"
elif f_name == "Booth" :
    callable_function = BoothFunction
    callable_function_deriv = BoothDeriv
    function = "Booth Function"
elif f_name == "Bukin" :
    callable_function = BukinFunction
    callable_function_deriv = BukinDeriv
    function = "Bukin Function"
elif f_name == "McCormick" :
    callable_function = McCormickFunction
    callable_function_deriv = McCormickDeriv
    function = "McCormick Function"
elif f_name == "Rosenbrock" :
    callable_function = RosenbrockFunction
    callable_function_deriv = RosenbrockDeriv
    function = "Rosenbrock Function"

csv_path = os.path.join(main_path, f_name + ".csv")
import os
if not os.path.exists(main_path):
    os.makedirs(main_path)

grad = "Gradient Field of %s" % function

In [None]:
def run_train(name, model_class, model_kwargs, with_derivative, name_function, name_grad,  
              normalize = False, learning_rate_schedule = None):
    global callable_function, callable_function_deriv
    global nTrains, nTests, batch_sizes, train_seed, test_seed
    global learning_rate, max_epoch, improving_limit
    global min_x, max_x, step_x, min_y, max_y, step_y
    global loss_config
    global f_name, main_path

    model_list, loss_list, stat_list = [], [], []
    
    for nTrain, nTest, batch_size in zip(nTrains, nTests, batch_sizes) : 

        print("========== nTrain %d ===========" % nTrain)
        
        batch_samples = genData(function = callable_function, deriv_function = callable_function_deriv, dim_x = INPUT_DIM, min_x = min_x, max_x = max_x, num_samples = nTrain, random_seed = train_seed)
        x_train, y_train, dydx_train = zip(*batch_samples)

        batch_samples = genData(function = callable_function, deriv_function = callable_function_deriv, dim_x = INPUT_DIM, min_x = min_x, max_x = max_x, num_samples = nTest, random_seed = test_seed)
        x_test, y_test, dydx_test = zip(*batch_samples)

        if with_derivative :
            train_dataloader, config = get_data_loader(x = x_train, y = y_train,  dydx = dydx_train, 
                                                      batch_size = batch_size, normalize = normalize)
            test_dataloader, _ = get_data_loader(x = x_test, y = y_test,  dydx = dydx_test, batch_size = batch_size)
        else :
            train_dataloader, config = get_data_loader(x = x_train, y = y_train,  dydx = None, 
                                                      batch_size = batch_size, normalize = normalize)
            test_dataloader, _ = get_data_loader(x = x_test, y = y_test, dydx = None, batch_size = batch_size)

        config["learning_rate_schedule"] = learning_rate_schedule
        config.update({key : value for key, value in loss_config.items() if value})
        config["dump_path"] = main_path
        config["function_name"] = f_name
        model_name = name # 'net', 'twin_net'
        if name == "net" :
            model_name = "normal" if not with_derivative else "sobolev"
        model_name += "-norm" if normalize else ""
        model_name += "-lrs" if learning_rate_schedule else ""
        config["model_name"] = model_name
        config["nTrain"] = nTrain
        config["batch_size"] = batch_size
                           
        model = model_class(**model_kwargs)
        criterion = torch.nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
        
        model, stats, best_loss = train(name, model, train_dataloader, optimizer, criterion, config, 
                                        with_derivative, max_epoch = max_epoch, improving_limit = improving_limit)
        
        plot_stat(stats, with_derivative = with_derivative)

        (test_loss, r_y, r_dydx), (x_list, y_list, dydx_list, y_pred_list, dydx_pred_list) = test(
            name, model, test_dataloader, criterion, config, with_derivative
        )

        x_mean, x_std = config.get("x_mean", 0.), config.get("x_std", 1.)
        y_mean, y_std = config.get("y_mean", 0.), config.get("y_std", 1.)

        def function(x):
            x = torch.tensor(x)
            x_scaled = (x-x_mean) / x_std
            y_pred_scaled = model(x = x_scaled.float())
            y_pred = y_mean + y_std * y_pred_scaled
            y_pred = y_pred.detach().squeeze().numpy()
            return y_pred

        def deriv_function(index):
            def f(x) :
                x = torch.tensor(x, requires_grad = True)
                x_scaled = (x-x_mean) / x_std
                if name == "net" :
                    y_pred_scaled = model(x = x_scaled.float()) 
                    dydx_pred_scaled = gradient(y_pred_scaled, x_scaled)
                elif name == "twin_net" :
                    y_pred_scaled, zs = forward(net = model.net, x = x_scaled.float(), return_layers = True)
                    dydx_pred_scaled = backprop(net = model.net, y = y_pred_scaled, zs = zs)
                dydx_pred = y_std / x_std * dydx_pred_scaled
                dydx_pred = dydx_pred.detach().squeeze().numpy()
                return dydx_pred[index]
            return f

        plotFunction(name = name_function, function = function, 
                    min_x = min_x, max_x = max_x, step_x = step_x, 
                    min_y = min_y, max_y = max_y, step_y = step_y)

        plotGrad(name = name_grad, deriv_function = deriv_function, 
                min_x = min_x, max_x = max_x, step_x = step_x, 
                min_y = min_y, max_y = max_y, step_y = step_y)
        
        model_list.append(model)
        loss_list.append((r_y if r_y else test_loss, r_dydx, test_loss if r_y else None))
        stat_list.append(stats)

    return model_list, loss_list, stat_list

def run_diffML_train(name, generator, generator_kwargs, name_function, name_grad, siren = False, 
                     normalize = True, learning_rate_schedule = None):
    global nTrains, nTests, batch_sizes, train_seed, test_seed, learning_rate
    global min_x, max_x, step_x, min_y, max_y, step_y
    global max_epoch, twin_net_tf_improving_limit
    global first_omega_0, hidden_omega_0, outermost_linear
    global tf_config, loss_config
    global main_path, f_name
  
    config = {}
    config["learning_rate_schedule"] = learning_rate_schedule
    config["learning_rate"] = learning_rate
    config.update({key : value for key, value in loss_config.items() if value})
    config.update(tf_config)
    config["dump_path"] = main_path
    config["function_name"] = f_name  
    model_name = ""
    model_name += "-norm" if normalize else ""
    model_name += "-lrs" if learning_rate_schedule else ""
    config["model_name"] = model_name

    regressor_list, loss_list = [], []
    ass = {}
    ass["normal"] = ass["differential"] = []

    for nTrain, nTest, batch_size in zip(nTrains, nTests, batch_sizes) :
        print("========== nTrain %d ===========" % nTrain)

        config["nTrain"] = nTrain
        config["batch_size"] = batch_size

        if siren :
            config.update({"first_omega_0" : first_omega_0, 
                           "hidden_omega_0": hidden_omega_0, 
                           "outermost_linear" : outermost_linear})
            
            config["activation_function"] = tf.math.sin
            config["deriv_activation_function"] = tf.math.cos
            
        dic_loss, regressor, dtrain, dtest, dydxTest, values, deltas = twin_net_tf_test(
                  generator, [nTrain], 
                  nTrain, nTest, 
                  trainSeed = train_seed, testSeed = test_seed, weightSeed = 0, 
                  deltidx=0,
                  generator_kwargs = generator_kwargs,
                  epochs = max_epoch,
                  normalize = normalize,
                  improving_limit = twin_net_tf_improving_limit, 
                  min_batch_size = batch_size,
                  config = config
              )
        
        plot_stat(regressor.stats["normal"], with_derivative = with_derivative)
        plot_stat(regressor.stats["differential"], with_derivative = with_derivative)

        plotFunction(name = name_function, function =  lambda x : regressor.predict_values([x])[0][0], 
                    min_x = min_x, max_x = max_x, step_x = step_x, 
                    min_y = min_y, max_y = max_y, step_y = step_y)

        plotGrad(name = name_grad, 
                deriv_function = lambda index : lambda x : regressor.predict_values_and_derivs([x])[1][0][index], 
                min_x = min_x, max_x = max_x, step_x = step_x, 
                min_y = min_y, max_y = max_y, step_y = step_y)

        # show_graph_per_axis
        yTest = dtest[1]
        for i in range(2) :
            xAxis  = np.array([[x[i]] for x in dtest[0]])
            # show predicitions
            graph("%s x%d vs y" % (name, (i+1)), values, xAxis, "", "values", yTest, [nTrain], True)
            # show deltas
            graph("%s x%d vs dxdy" % (name, (i+1)), deltas, xAxis, "", "deltas", dydxTest, [nTrain], True)
      
        a = dic_loss['standard_loss']["yloss"][-1]
        b = dic_loss['standard_loss']["dyloss"][-1]
        normal = (a, b, a+b)
        a = dic_loss['differential_loss']["yloss"][-1]
        b = dic_loss['differential_loss']["dyloss"][-1]
        differential = (a, b, a+b)

        ass["normal"].append(normal)
        ass["differential"].append(differential)

        regressor_list.append(regressor) 

    return regressor_list, ass

# **Ground Truth**

In [None]:
plotFunction(name = function, function = callable_function, 
             min_x = min_x, max_x = max_x, step_x = step_x, 
             min_y = min_y, max_y = max_y, step_y = step_y)

plotGrad(name = grad, deriv_function = callable_function_deriv, 
         min_x = min_x, max_x = max_x, step_x = step_x, 
         min_y = min_y, max_y = max_y, step_y = step_y)

# **Data**

In [None]:
stats_dic = {}
tests_loss = {}

# **1) Normal Training**

In [None]:
name = "net"
with_derivative = False
key1 = "normal_training"
name_function = '%s %s' % (function, key1)
name_grad = '%s %s' % (grad, key1)
stats_dic[key1] = {}
tests_loss[key1] = {}

## **1.1) with MLP**

In [None]:
model_class = MLP
key2 = "mlp"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **1.1.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **1.1.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

## **1.2) with Siren**

In [None]:
model_class = Siren
key2 = "siren"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **1.2.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **1.2.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

# **2) Sobolev Training**

In [None]:
name = "net"
with_derivative = True
key1 = "sobolev_training"
name_function = '%s %s' % (function, key1)
name_grad = '%s %s' % (grad, key1)
stats_dic[key1] = {}
tests_loss[key1] = {}

## **2.1) with MLP**

In [None]:
model_class = MLP
key2 = "mlp"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **2.1.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **2.1.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(
          name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(
          name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

## **2.2) with Siren**

In [None]:
model_class = Siren
key2 = "siren"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **2.2.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = True, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **2.2.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

# **3) twin_net tensorflow**

In [None]:
generator = Generator(callable_function = callable_function, 
                      callable_function_deriv = callable_function_deriv, 
                      dim_x = INPUT_DIM,
                      min_x = min_x, max_x = max_x)

key1 = "twin_net_tf"
name_function = '%s %s' % (function, key1)
name_grad = '%s %s' % (grad, key1)
key1_1 = "%s_normal" % key1
key1_2 = "%s_differential" % key1
stats_dic[key1_1] = {}
stats_dic[key1_2] = {}
tests_loss[key1_1] = {}
tests_loss[key1_2] = {}

## **3.1) with MLP**

In [None]:
key2 = "mlp"
stats_dic[key1_1][key2] = {}
stats_dic[key1_2][key2] = {}
tests_loss[key1_1][key2] = {}
tests_loss[key1_2][key2] = {}


### **3.1.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1_1][key2][key3] = [None, None]
stats_dic[key1_2][key2][key3] = [None, None]
tests_loss[key1_1][key2][key3] = [None, None]
tests_loss[key1_2][key2][key3] = [None, None]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   normalize = False)

stats_dic[key1_1][key2][key3][0] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][0] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][0] = loss["normal"]
tests_loss[key1_2][key2][key3][0] = loss["differential"]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   normalize = False, 
                                   learning_rate_schedule = learning_rate_schedule 
                                   )

stats_dic[key1_1][key2][key3][1] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][1] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][1] = loss["normal"]
tests_loss[key1_2][key2][key3][1] = loss["differential"]

### **3.1.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1_1][key2][key3] = [None, None]
stats_dic[key1_2][key2][key3] = [None, None]
tests_loss[key1_1][key2][key3] = [None, None]
tests_loss[key1_2][key2][key3] = [None, None]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   normalize = True)

stats_dic[key1_1][key2][key3][0] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][0] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][0] = loss["normal"]
tests_loss[key1_2][key2][key3][0] = loss["differential"]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   normalize = True,
                                   learning_rate_schedule = learning_rate_schedule)

stats_dic[key1_1][key2][key3][1] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][1] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][1] = loss["normal"]
tests_loss[key1_2][key2][key3][1] = loss["differential"]

## **3.2) with Siren**

In [None]:
key2 = "siren"
stats_dic[key1_1][key2] = {}
stats_dic[key1_2][key2] = {}
tests_loss[key1_1][key2] = {}
tests_loss[key1_2][key2] = {}

### **3.2.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1_1][key2][key3] = [None, None]
stats_dic[key1_2][key2][key3] = [None, None]
tests_loss[key1_1][key2][key3] = [None, None]
tests_loss[key1_2][key2][key3] = [None, None]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   siren = True, normalize = False)

stats_dic[key1_1][key2][key3][0] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][0] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][0] = loss["normal"]
tests_loss[key1_2][key2][key3][0] = loss["differential"]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   siren = True, normalize = False,
                                   learning_rate_schedule = learning_rate_schedule)

stats_dic[key1_1][key2][key3][1] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][1] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][1] = loss["normal"]
tests_loss[key1_2][key2][key3][1] = loss["differential"]

### **3.2.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1_1][key2][key3] = [None, None]
stats_dic[key1_2][key2][key3] = [None, None]
tests_loss[key1_1][key2][key3] = [None, None]
tests_loss[key1_2][key2][key3] = [None, None]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   siren = True, normalize = True)

stats_dic[key1_1][key2][key3][0] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][0] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][0] = loss["normal"]
tests_loss[key1_2][key2][key3][0] = loss["differential"]

In [None]:
graph_name = "%s %s %s" % (name_function, key2, key3)
regressor, loss = run_diffML_train(graph_name, generator, generator_kwargs, name_function, name_grad, 
                                   siren = True, normalize = True, learning_rate_schedule = learning_rate_schedule)

stats_dic[key1_1][key2][key3][1] = [r.stats["normal"] for r in regressor]
stats_dic[key1_2][key2][key3][1] = [r.stats["differential"] for r in regressor]
tests_loss[key1_1][key2][key3][1] = loss["normal"]
tests_loss[key1_2][key2][key3][1] = loss["differential"]

# **4) twin_net pytorch**

In [None]:
name = "twin_net"
with_derivative = True
key1 = "twin_net_pytorch"
name_function = '%s %s' % (function, key1)
name_grad = '%s %s' % (grad, key1)
stats_dic[key1] = {}
tests_loss[key1] = {}

## **4.1) with MLP**

In [None]:
model_class = MLP
key2 = "mlp"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **4.1.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(
          name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = True, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(
          name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = True, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **4.1.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(
          name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = mlp_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

## **4.2) with Siren**

In [None]:
model_class = Siren
key2 = "siren"
stats_dic[key1][key2] = {}
tests_loss[key1][key2] = {}

### **4.2.1) normalize = False**

In [None]:
key3 = "no_normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = True, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = True, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = False,
          learning_rate_schedule = learning_rate_schedule
        )

### **4.2.2) normalize = True**

In [None]:
key3 = "normalize"
stats_dic[key1][key2][key3] = [None, None]
tests_loss[key1][key2][key3] = [None, None]

In [None]:
model, tests_loss[key1][key2][key3][0], stats_dic[key1][key2][key3][0] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = None
        )

In [None]:
model, tests_loss[key1][key2][key3][1], stats_dic[key1][key2][key3][1] = run_train(name = name, 
          model_class = model_class, 
          model_kwargs = siren_model_kwargs, 
          with_derivative = with_derivative, 
          name_function = name_function, 
          name_grad = name_grad,  
          normalize = True,
          learning_rate_schedule = learning_rate_schedule
        )

# **5) Global Stats**

In [None]:
import pickle, os

for nTrain, sd in reshape(dic = stats_dic, nTrains = nTrains).items() :
    
    print("nTrain %d" % nTrain)

    global_stat(stats_dic = sd, suptitle = function)

    file_path = os.path.join(main_path, str(nTrain) + ".pkl")
    pickle.dump(sd, open(file_path, 'wb'))
    #stats_dic = pickle.load(open(file_path, 'rb'))

In [None]:
for nTrain, tl in reshape(dic = tests_loss, nTrains = nTrains).items() :
    print("nTrain %d" % nTrain)
    rows, result = to_csv(dico = tl, csv_path = csv_path, n_samples = str(nTrain), mode='a+')