In [1]:
import os
from datetime import datetime

from core.runner import Runner
from core.environment import SingleVertexSpinFoam, StarModelSpinFoam


def train_gfn(
    spin_j,
    sf_model,
    main_layer_hidden_nodes,
    branch1_hidden_nodes,
    branch2_hidden_nodes,
    activation,
    exploration_rate,
    training_fraction_from_back_traj,
    learning_rate,
    batch_size,
    n_iterations,
    evaluation_batch_size,
    generate_samples_every_m_training_samples,
):
    if sf_model == "single_vertex_model":
        spinfoam_model = SingleVertexSpinFoam(spin_j=spin_j)
    elif sf_model == "star_model":
        spinfoam_model = StarModelSpinFoam(spin_j=spin_j)
    else:
        raise ValueError(
            "Spinfoam model not yet implemented. " "Custom Spinfoam class can be made."
        )

    # training_run_datetime = datetime.now().strftime("%B %d, %Y at %H:%M:%S")

    directory_for_generated_samples = f"./data/GFlowNet/{sf_model}/j_{spin_j}/n_iterations_{n_iterations}/main_layer_hid_nodes_{main_layer_hidden_nodes}/exploration_rate_{exploration_rate}/learning_rate_{learning_rate}/batch_size_{batch_size}"

    runner = Runner(
        spinfoam_model=spinfoam_model,
        main_layer_hidden_nodes=main_layer_hidden_nodes,
        branch1_hidden_nodes=branch1_hidden_nodes,
        branch2_hidden_nodes=branch2_hidden_nodes,
        activation=activation,
        exploration_rate=exploration_rate,
        training_fraction_from_back_traj=training_fraction_from_back_traj,
        learning_rate=learning_rate,
    )

    ave_losses = runner.train_agent(
        batch_size,
        n_iterations,
        evaluation_batch_size,
        generate_samples_every_m_training_samples,
        directory_for_generated_samples,
    )

    return ave_losses

2023-05-07 14:43:45.278368: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-07 14:43:45.408542: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-05-07 14:43:45.994543: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/frisus/Scrivania/sl2cfoam-next-dev/lib:/home/frisus/anaconda3/envs/tf/lib/
2023-05-07 14:43:45.994602: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvi

## Vertex Amplitude

In [2]:
spin_j = 3.5
sf_model = "single_vertex_model"
main_layer_hidden_nodes = (30, 20)
branch1_hidden_nodes = ()
branch2_hidden_nodes = ()
activation = "swish"
exploration_rate = 0.5
training_fraction_from_back_traj = 0.0
learning_rate = 0.0005
batch_size = int(5e1)
n_iterations = int(5e2)
evaluation_batch_size = int(5e3)
generate_samples_every_m_training_samples = int(5e3)

In [3]:
ave_losses = train_gfn(
    spin_j,
    sf_model,
    main_layer_hidden_nodes,
    branch1_hidden_nodes,
    branch2_hidden_nodes,
    activation,
    exploration_rate,
    training_fraction_from_back_traj,
    learning_rate,
    batch_size,
    n_iterations,
    evaluation_batch_size,
    generate_samples_every_m_training_samples,
)

2023-05-07 14:43:46.634966: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-07 14:43:46.641337: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-07 14:43:46.641812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-07 14:43:46.642336: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Nth iteration: 100 Trained on K samples: 5000 Average Loss: 7.8366326117182723
Nth iteration: 200 Trained on K samples: 10000 Average Loss: 7.9035970357317975
Nth iteration: 300 Trained on K samples: 15000 Average Loss: 3.9587579854827286
Nth iteration: 400 Trained on K samples: 20000 Average Loss: 3.6611270087984678
Nth iteration: 500 Trained on K samples: 25000 Average Loss: 6.194059498610188


# TODO list

- Write a script on Compute Canada clusters (possibly with GPU offloading) to test the algorithm (at least with the 4-simplex or vertex amplitude model) using different parameters, such as: 
  
  NODES AND LAYERS (neural network architectures):
  (160, 160, 80, 80, 40, 40),
  (160, 160, 40, 40),
  (80, 80, 40, 40),
  (80, 40).
  
  EXPLORATION RATES:
  0, 0.25, 0.5.
  
  LEARNING RATES:
  0.01, 0.001, 0.0001.

  BATCH SIZE: 
  ...
  
  NUMBER OF ITERATIONS: 
  ...

  EVALUATION BATCH SIZE:
  ...
