In [1]:
import argparse
import glob
import os
import pickle
import time

import numpy as np
import pandas as pd
from tqdm import tqdm

import ray
from ray.tune.registry import register_env
from ray.tune import Analysis
import ray.rllib.agents.ppo as ppo
from ray.rllib.models import ModelCatalog

In [3]:
PCA_EMBED_SIZE = 1000

def env_creator(_):
    import tensorflow as tf

    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    from examples.gym.mol_fp_gym.molecule_graph_problem import MoleculeGraphProblem
    from rlmolecule.graph_gym.graph_gym_env import GraphGymEnv
    from rlmolecule.molecule.builder.builder import MoleculeBuilder

    # Load the PCA model for embedding
    import pickle
    with open(os.path.join("pca-{}.p".format(PCA_EMBED_SIZE)), "rb") as f:
        pca = pickle.load(f)

    result = GraphGymEnv(
        MoleculeGraphProblem(
            MoleculeBuilder(max_atoms=6, min_atoms=1),
            pca
        )
    )
    return result

from rlmolecule.graph_gym.graph_gym_model import GraphGymModel

class ThisModel(GraphGymModel):
    def __init__(self,
                    obs_space,
                    action_space,
                    num_outputs,
                    model_config,
                    name,
                    **kwargs):
        
        import tensorflow as tf
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        from examples.gym.mol_fp_gym.policy import policy_model

        super(ThisModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name,
            policy_model,
            **kwargs)

In [4]:
ray.init(_node_ip_address="127.0.0.1", num_cpus=1)

# values that get used more than once
restore_dir = '../log/PPO/PPO_mol_fp_pca=1000_c1d28_00000_0_lr=0.0001,num_sgd_iter=10_2021-09-30_14-26-56/'
chkpt = '000357'

a = Analysis(restore_dir)
config = a.get_best_config("episode_reward_mean", mode="max")
config["num_workers"] = 1
config["num_gpus"] = 0

env_name = 'mol_fp_pca={}'.format(PCA_EMBED_SIZE)
_ = register_env(env_name, env_creator)

ModelCatalog.register_custom_model('molecule_graph_problem_model', ThisModel)



In [None]:
for reload_try in range(1):
    trainer = ppo.PPOTrainer(env=env_name, config=config)
    checkpoint = os.path.join(
        restore_dir, 
        "checkpoint_{}/checkpoint-{}".format(
            chkpt, int(chkpt)))
    trainer.restore(checkpoint)
    policy = trainer.get_policy()

Required resources for this actor or task: {CPU: 1.000000}
Available resources on this node: {0.000000/1.000000 CPU, 383.523247 GiB/383.523247 GiB memory, 1.000000/1.000000 GPU, 168.358488 GiB/168.358488 GiB object_store_memory, 1.000000/1.000000 node:10.60.1.122, 1.000000/1.000000 accelerator_type:GV100}
In total there are 0 pending tasks and 1 pending actors on this node.


In [None]:
trainer.get_weights()