# Initialization

## Importing libs and setting plot style

In [1]:
import itertools
import multiprocessing as mp
import os
import random
import threading
import time
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from atpbar import atpbar, flush
from matplotlib import rc

In [2]:
# Setting plot style

sns.set()
sns.set_context("paper", font_scale=1.5, rc={"lines.linewidth": 2.0})

rc("text", usetex=True)

sns.set_style("ticks")
sns.set_style(
    "whitegrid",
    {
        "axes.edgecolor": "black",
        "axes.grid": True,
        "axes.axisbelow": True,
        "axes.labelcolor": ".15",
        "grid.color": "0.9",
        "grid.linestyle": "-",
        "xtick.direction": "in",
        "ytick.direction": "in",
        "xtick.bottom": True,
        "xtick.top": True,
        "ytick.left": True,
        "ytick.right": True,
        "font.family": ["sans-serif"],
        "font.sans-serif": ["Liberation Sans", "Bitstream Vera Sans", "sans-serif"],
    },
)

## Global variables

In [3]:
# folder with stored data
data_folder = "../../data"

# GFlowNet parameters
spin_list = [2.0]
iteration_list = [int(1e4) for i in range(0, 1)]
main_layer_hidden_nodes_list = [(30, 20) for i in range(0, 1)]
evaluation_batch_size_list = [int(1e6) for i in range(0, 1)]
training_batch_size_list = [int(1e3) for i in range(0, 1)]
generate_samples_every_training_samples_list = [int(1e6) for i in range(0, 1)]

model = "single_vertex_model"
branch1_hidden_nodes = ()
branch2_hidden_nodes = ()
activation = "swish"
exploration_rate = 0.5
training_fraction_from_back_traj = 0.0
learning_rate = 0.0005

# set optimal number of threads
optimal_number_of_threads = int(mp.cpu_count())
print(f"optimal number of threads: {optimal_number_of_threads}")

optimal number of threads: 12


# Angles

## Computation

In [4]:
# takes an intertwiner and returns the corresponding angle eigenvalue

def from_intertwiner_to_angle(matrix_element, spin):
    return (matrix_element * (matrix_element + 1) - 2 * spin * (spin + 1)) / (
        2 * spin * (spin + 1)
    )

In [5]:
def from_draws_to_angles(
    folder_prefix,
    spin,
    sample_name,
    dihedral_angle_path,
    n,
    name,
):
    for i in atpbar(range(n), name=name):
        time.sleep(0.0001)

    sample_path = f"{folder_prefix}/samples/{sample_name}"

    # load in memory the stored draws
    df = pd.read_csv(sample_path, low_memory=False)
    df.columns = df.columns.str.replace("intertwiner ", "node ", regex=True)

    # from intertwiners to angles
    df.iloc[:, :5] = df.iloc[:, :5].apply(from_intertwiner_to_angle, args=(spin,))

    ##########################################################
    # Computing exp values (avg)
    ##########################################################

    df_final = pd.concat([df.mean()], axis=1)
    df_final.columns = ["cosine angle avg"]

    angle_exp_values_path_batch = f"{dihedral_angle_path}/{name}"
    # os.makedirs(f"{angle_exp_values_path_batch}", exist_ok=True)
    df_final.to_csv(angle_exp_values_path_batch, index=True)

In [12]:
# Converts multiple samples into dihedral angles.
# Store the result for each batch, then combines all batchs in another CSV file

def angles_compute(
    data_folder,
    model,
    spin,
    total_iterations,
    main_layer_hidden_nodes,
    evaluation_batch_size,
    generate_samples_every_training_samples,
    batch_size,
    activation_function="swish",
    exploration_rate=0.5,
    training_fraction_from_back_traj=0.0,
    learning_rate=0.0005,
    number_of_threads=optimal_number_of_threads,
):
    folder_prefix = Path(
        f"{data_folder}/",
        f"GFlowNet/{model}/j_{spin}/n_iterations_{total_iterations}/",
        f"main_layer_hid_nodes_{main_layer_hidden_nodes}/exploration_rate_{exploration_rate}/learning_rate_{learning_rate}/",
        f"batch_size_{batch_size}/",
    )

    batch_sample_path_collection = []

    for i in range(total_iterations):
        trained_on_k_samples = (i + 1) * batch_size

        if trained_on_k_samples % generate_samples_every_training_samples == 0:
            sample_name = Path(
                f"Gen_samples_epoch_#{i + 1}"
                f"_after_learn_from_{trained_on_k_samples}"
                "_train_samples.csv",
            )

            if os.path.isfile(f"{folder_prefix}/samples/{sample_name}"):
                batch_sample_path_collection.append(sample_name)

            else:
                warnings.warn("Warning: the sample %s was not found" % (sample_name))

    batches_to_assemble = len(batch_sample_path_collection)

    if batches_to_assemble != 0:
        print(f"{batches_to_assemble} sample batches to process")

        dihedral_angle_path = f"{folder_prefix}/operators/angles"

        os.makedirs(f"{dihedral_angle_path}", exist_ok=True)

        print(
            f"\nComputing exp. values and autocorrelations of {batches_to_assemble} sample batches, using {number_of_threads} threads...\n"
        )

        threads = []

        for sample_name in batch_sample_path_collection:
            name = "angles_{}".format(sample_name)
            n = random.randint(number_of_threads, 10000)

            t = threading.Thread(
                target=from_draws_to_angles,
                args=(
                    folder_prefix,
                    spin,
                    sample_name,
                    dihedral_angle_path,
                    n,
                    name,
                ),
            )
            threads.append(t)
            t.start()

        # wait for the threads to complete
        for t in threads:
            t.join()

        flush()

        print(f"All samples in all batches have been processed")

        print(f"\nAssembling {batches_to_assemble} batches...")

        ##########################################################
        ### Assembling expectation values
        ##########################################################

        DF_list = [
            pd.read_csv(
                f"{dihedral_angle_path}/angles_{sample_name}",
                index_col=0,
                low_memory=False,
            )
            for sample_name in batch_sample_path_collection
        ]

        df_all_batches = pd.concat(
            DF_list[:], axis=1, keys=batch_sample_path_collection
        )

        df_all_batches.columns = df_all_batches.columns.droplevel(-1)

        df_all_batches.T

        df_all_batches.to_csv(
            f"{dihedral_angle_path}/batches_assembled_{batches_to_assemble}.csv",
            index=True,
        )

        print("Done")

    else:
        warnings.warn("I can't compute angles since there are no samples available")

In [13]:
for (
    spin,
    total_iterations,
    main_layer_hidden_nodes,
    evaluation_batch_size,
    training_batch_size,
    generate_samples_every_training_samples,
) in zip(
    spin_list,
    iteration_list,
    main_layer_hidden_nodes_list,
    evaluation_batch_size_list,
    training_batch_size_list,
    generate_samples_every_training_samples_list,
):
    angles_compute(
        data_folder,
        model,
        spin,
        total_iterations,
        main_layer_hidden_nodes,
        evaluation_batch_size,
        generate_samples_every_training_samples,
        training_batch_size,
        activation_function="swish",
        exploration_rate=0.5,
        training_fraction_from_back_traj=0.0,
        learning_rate=0.0005,
        number_of_threads=optimal_number_of_threads,
    )

10 sample batches to process

Computing exp. values and autocorrelations of 10 sample batches, using 12 threads...



VBox()

All samples in all batches have been processed

Assembling 10 batches...
        Gen_samples_epoch_#1000_after_learn_from_1000000_train_samples.csv  \
node 5                                          -0.729320                    
node 4                                          -0.351705                    
node 3                                          -0.326573                    
node 2                                          -0.745431                    
node 1                                          -0.084806                    

        Gen_samples_epoch_#2000_after_learn_from_2000000_train_samples.csv  \
node 5                                          -0.524196                    
node 4                                          -0.357558                    
node 3                                          -0.264184                    
node 2                                          -0.666719                    
node 1                                          -0.184463           

# TODO: continue from here