In [1]:
%%bash
cd ../torchgfn
pip install .

Processing /Users/erostrate9/Desktop/CSI5340 DL/Project/code/GFNEval/torchgfn
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: torchgfn
  Building wheel for torchgfn (pyproject.toml): started
  Building wheel for torchgfn (pyproject.toml): finished with status 'done'
  Created wheel for torchgfn: filename=torchgfn-1.1.1-py3-none-any.whl size=82716 sha256=cac2034c12dc936edfe94d9d8037806983eabc7b12fb455cc769b7584e03fa90
  Stored in directory: /private/var/folders/c_/9pzrss116732p7dxch3kn_bc0000gn/T/pip-ephem-wheel-cache-7ike197q/wheels/56/de/11/edbaf478c4bdb3bf4d2dadfda48c78d0790413f2f66eee7a21
Successfully built torchgfn
Installing collected packages: torchgfn
  Attemptin

In [2]:
import torch
import numpy as np
from scipy.stats import spearmanr
from tqdm import tqdm
from gfn.env import DiscreteEnv
from gfn.gflownet import GFlowNet, TBGFlowNet, SubTBGFlowNet, FMGFlowNet, DBGFlowNet
from gfn.gym import HyperGrid2, HyperGrid
from gfn.modules import DiscretePolicyEstimator
from gfn.samplers import Sampler
from gfn.utils.modules import MLP
from gfn.states import States, DiscreteStates
from gfn.utils.evaluation import get_random_test_set, get_sampled_test_set, evaluate_GFNEvalS, evaluate_GFNEvalS_with_monte_carlo

# Demo

In [None]:
# 0 - Find Available GPU resource
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# 1 - Define the environment
# env = HyperGrid(ndim=4, height=8, R0=0.01)
env = HyperGrid2(ndim=4, height=8, ncenters=4,
                             seed=torch.randint(0, 10000, (1,)).item(),
                             device_str='cpu')

# 2 - Define the neural network modules
module_PF = MLP(input_dim=env.preprocessor.output_dim, output_dim=env.n_actions)
module_PB = MLP(input_dim=env.preprocessor.output_dim, output_dim=env.n_actions - 1, trunk=module_PF.trunk)

# 3 - Define the estimators
pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)

# 4 - Define the GFlowNet
gfn = TBGFlowNet(logZ=0., pf=pf_estimator, pb=pb_estimator)

# 5 - Define the sampler and optimizer
sampler = Sampler(estimator=pf_estimator)
optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
optimizer.add_param_group({"params": gfn.logz_parameters(), "lr": 1e-1})

# 6 - Train the GFlowNet
for i in (pbar := tqdm(range(1000))):
    trajectories = sampler.sample_trajectories(env=env, n=16)
    optimizer.zero_grad()
    loss = gfn.loss(env, trajectories).to(device)
    loss.backward()
    optimizer.step()
    if i % 25 == 0:
        pbar.set_postfix({"loss": loss.item()})

 37%|███▋      | 373/1000 [00:04<00:07, 83.32it/s, loss=0.271]

In [None]:
n_tests = 100
test_states_sample, test_rewards_sample =  get_sampled_test_set(gfn, env, n=n_tests)
test_states_random, test_rewards_random =  get_random_test_set(env, n=n_tests)

In [None]:
_, _, _ = evaluate_GFNEvalS(gfn, env, test_states_random, test_rewards_random)
_, _, _ = evaluate_GFNEvalS(gfn, env, test_states_sample, test_rewards_sample)

n_samples = 20 * env.n_states
_, _, _ = evaluate_GFNEvalS_with_monte_carlo(gfn, env, test_states_random, test_rewards_random, n_samples=n_samples)
_, _, _ = evaluate_GFNEvalS_with_monte_carlo(gfn, env, test_states_sample, test_rewards_sample, n_samples=n_samples)

Evaluating test set...: 100%|██████████| 100/100 [00:00<00:00, 119.37it/s]


Spearman's Rank Correlation (Modified GFNEvalS, including termination actions): 0.9502670267026702. Runtime: 0.8506119251251221 seconds.
Function 'evaluate_GFNEvalS' executed in 0.8511 seconds


Evaluating test set...: 100%|██████████| 100/100 [00:00<00:00, 110.60it/s]


Spearman's Rank Correlation (Modified GFNEvalS, including termination actions): 0.943971673768229. Runtime: 0.9086501598358154 seconds.
Function 'evaluate_GFNEvalS' executed in 0.9092 seconds


TypeError: evaluate_GFNEvalS_with_monte_carlo() got an unexpected keyword argument 'n_samples'

# Experiments

In [None]:
def experiment_setup(env : DiscreteEnv,  algo: GFlowNet):
    gfn = None
    sampler = None
    optimizer = None

    if algo is TBGFlowNet:
        # The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
        module_PF = MLP(
            input_dim=env.preprocessor.output_dim,
            output_dim=env.n_actions
        ).to(env.device)  # Neural network for the forward policy, with as many outputs as there are actions
        module_PB = MLP(
            input_dim=env.preprocessor.output_dim,
            output_dim=env.n_actions - 1,
            trunk=module_PF.trunk  # We share all the parameters of P_F and P_B, except for the last layer
        ).to(env.device)

        pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor).to(env.device)
        pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor).to(env.device)

        gfn = TBGFlowNet(logZ=0., pf=pf_estimator, pb=pb_estimator).to(env.device)

        sampler = Sampler(estimator=pf_estimator)

        optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
        optimizer.add_param_group({"params": gfn.logz_parameters(), "lr": 1e-1})

    if algo is SubTBGFlowNet:
        # The environment has a preprocessor attribute, which is used to preprocess the state before feeding it to the policy estimator
        module_PF = MLP(
            input_dim=env.preprocessor.output_dim,
            output_dim=env.n_actions
        ).to(env.device)  # Neural network for the forward policy, with as many outputs as there are actions

        module_PB = MLP(
            input_dim=env.preprocessor.output_dim,
            output_dim=env.n_actions - 1,
            trunk=module_PF.trunk  # We share all the parameters of P_F and P_B, except for the last layer
        ).to(env.device)
        module_logF = MLP(
            input_dim=env.preprocessor.output_dim,
            output_dim=1,  # Important for ScalarEstimators!
        ).to(env.device)

        # 3 - We define the estimators.
        pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor).to(env.device)
        pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor).to(env.device)
        logF_estimator = ScalarEstimator(module=module_logF, preprocessor=env.preprocessor).to(env.device)

        # 4 - We define the GFlowNet.
        gfn = SubTBGFlowNet(pf=pf_estimator, pb=pb_estimator, logF=logF_estimator, lamda=0.9).to(env.device)

        # 5 - We define the sampler and the optimizer.
        sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy

        # Different policy parameters can have their own LR.
        # Log F gets dedicated learning rate (typically higher).
        optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
        optimizer.add_param_group({"params": gfn.logF_parameters(), "lr": 1e-2})

    # TODO: initialize parameterizations of FMGFlowNet and DBGFlowNet

    return gfn, sampler, optimizer

def training(gfn: GFlowNet, sample: Sampler, optimizer, num_epochs: int = 1000) -> Sampler:
    for i in (pbar := tqdm(range(num_epochs))):
        trajectories = sampler.sample_trajectories(env=env, n=16)
        optimizer.zero_grad()
        loss = gfn.loss(env, trajectories)
        loss.backward()
        optimizer.step()
        if i % 25 == 0:
            pbar.set_postfix({"loss": loss.item()})
    return sampler

#TODO
def testing(env: DiscreteEnv, gfn: GFlowNet, num_samples: int = 10000, num_epochs: int = 250):
    test_states_sample, test_rewards_sample =  get_sampled_test_set(gfn, env, n=n_tests)
    _, _, _ = evaluate_GFNEvalS(gfn, env, test_states_sample, test_rewards_sample)



In [13]:
#@title Hyper-parameters
seed = 1234
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

ndims =     [2, 4, 8]
heights =   [8, 16]
ncenters =  [2, 4, 8, 16, 32]
# algos =     [TBGFlowNet, SubTBGFlowNet, FMGFlowNet, DBGFlowNet]
algos =     [TBGFlowNet, SubTBGFlowNet]