In [1]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from irelease.utils import generate_smiles
from .nb_utils import initialize, data_provider, agent_net_hidden_states_func, evaluate, device

ModuleNotFoundError: No module named 'irelease'

In [10]:
device = 'cuda:1'
use_cuda = torch.cuda.is_available()

In [2]:
def default_hparams():
    return {'d_model': 1500,
            'dropout': 0.0,
            'monte_carlo_N': 5,
            'use_monte_carlo_sim': True,
            'no_mc_fill_val': 0.0,
            'gamma': 0.97,
            'episodes_to_train': 10,
            'gae_lambda': 0.95,
            'ppo_eps': 0.2,
            'ppo_batch': 1,
            'ppo_epochs': 5,
            'entropy_beta': 0.01,
            'use_true_reward': args.use_true_reward,
            'reward_params': {'num_layers': 2,
                              'd_model': 512,
                              'unit_type': 'gru',
                              'demo_batch_size': 32,
                              'irl_alg_num_iter': 5,
                              'dropout': 0.2,
                              'use_attention': args.use_attention,
                              'use_validity_flag': ~args.no_smiles_validity_flag,
                              'bidirectional': True,
                              'optimizer': 'adadelta',
                              'optimizer__global__weight_decay': 0.0005,
                              'optimizer__global__lr': 0.001, },
            'agent_params': {'unit_type': 'gru',
                             'num_layers': 2,
                             'stack_width': 1500,
                             'stack_depth': 200,
                             'optimizer': 'adadelta',
                             'optimizer__global__weight_decay': 0.0000,
                             'optimizer__global__lr': 0.001},
            'critic_params': {'num_layers': 2,
                              'd_model': 256,
                              'dropout': 0.2,
                              'unit_type': 'lstm',
                              'optimizer': 'adadelta',
                              'optimizer__global__weight_decay': 0.00005,
                              'optimizer__global__lr': 0.001},
            'expert_model_params': {'model_dir': './model_dir/expert_rnn_reg',
                                    'd_model': 128,
                                    'rnn_num_layers': 2,
                                    'dropout': 0.8,
                                    'is_bidirectional': False,
                                    'unit_type': 'lstm'}
            }

In [7]:
def data_provider():
    tokens = get_default_tokens()
    demo_data = GeneratorData(training_data_path='../../data/logp_smiles_biased.smi',
                              delimiter='\t',
                              cols_to_read=[0],
                              keep_header=True,
                              pad_symbol=' ',
                              max_len=120,
                              tokens=tokens,
                              use_cuda=use_cuda)
    unbiased_data = GeneratorData(training_data_path='../../data/unbiased_smiles.smi',
                                  delimiter='\t',
                                  cols_to_read=[0],
                                  keep_header=True,
                                  pad_symbol=' ',
                                  max_len=120,
                                  tokens=tokens,
                                  use_cuda=use_cuda)
    return {'demo_data': demo_data, 'unbiased_data': unbiased_data, 'prior_data': None}

In [11]:
def initialize(hparams, demo_data_gen, unbiased_data_gen, *args, **kwargs):
    # Embeddings provider
    encoder = Encoder(vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'],
                      padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol],
                      dropout=hparams['dropout'], return_tuple=True)

    # Agent entities
    rnn_layers = []
    has_stack = True
    for i in range(1, hparams['agent_params']['num_layers'] + 1):
        rnn_layers.append(StackRNN(layer_index=i,
                                   input_size=hparams['d_model'],
                                   hidden_size=hparams['d_model'],
                                   has_stack=has_stack,
                                   unit_type=hparams['agent_params']['unit_type'],
                                   stack_width=hparams['agent_params']['stack_width'],
                                   stack_depth=hparams['agent_params']['stack_depth'],
                                   k_mask_func=encoder.k_padding_mask))
        if hparams['agent_params']['num_layers'] > 1:
            rnn_layers.append(StackedRNNDropout(hparams['dropout']))
            rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))
    agent_net = nn.Sequential(encoder,
                              *rnn_layers,
                              StackRNNLinear(out_dim=demo_data_gen.n_characters,
                                             hidden_size=hparams['d_model'],
                                             bidirectional=False,
                                             bias=True))
    with contextlib.suppress(Exception):
        agent_net = agent_net.to(device)
    optimizer_agent_net = parse_optimizer(hparams['agent_params'], agent_net)
    selector = MolEnvProbabilityActionSelector(actions=demo_data_gen.all_characters)
    probs_reg = StateActionProbRegistry()
    init_state_args = {'num_layers': hparams['agent_params']['num_layers'],
                       'hidden_size': hparams['d_model'],
                       'stack_depth': hparams['agent_params']['stack_depth'],
                       'stack_width': hparams['agent_params']['stack_width'],
                       'unit_type': hparams['agent_params']['unit_type']}
    agent = PolicyAgent(model=agent_net,
                        action_selector=selector,
                        states_preprocessor=seq2tensor,
                        initial_state=agent_net_hidden_states_func,
                        initial_state_args=init_state_args,
                        apply_softmax=True,
                        probs_registry=probs_reg,
                        device=device)
    critic = nn.Sequential(encoder,
                           CriticRNN(hparams['d_model'], hparams['critic_params']['d_model'],
                                     unit_type=hparams['critic_params']['unit_type'],
                                     dropout=hparams['critic_params']['dropout'],
                                     num_layers=hparams['critic_params']['num_layers']))
    with contextlib.suppress(Exception):
        critic = critic.to(device)
    optimizer_critic_net = parse_optimizer(hparams['critic_params'], critic)
    drl_alg = PPO(actor=agent_net, actor_opt=optimizer_agent_net,
                  critic=critic, critic_opt=optimizer_critic_net,
                  initial_states_func=agent_net_hidden_states_func,
                  initial_states_args=init_state_args,
                  device=device,
                  gamma=hparams['gamma'],
                  gae_lambda=hparams['gae_lambda'],
                  ppo_eps=hparams['ppo_eps'],
                  ppo_epochs=hparams['ppo_epochs'],
                  ppo_batch=hparams['ppo_batch'],
                  entropy_beta=hparams['entropy_beta'])

    # Reward function entities
    reward_net = nn.Sequential(encoder,
                               RewardNetRNN(input_size=hparams['d_model'],
                                            hidden_size=hparams['reward_params']['d_model'],
                                            num_layers=hparams['reward_params']['num_layers'],
                                            bidirectional=hparams['reward_params']['bidirectional'],
                                            use_attention=hparams['reward_params']['use_attention'],
                                            dropout=hparams['reward_params']['dropout'],
                                            unit_type=hparams['reward_params']['unit_type'],
                                            use_smiles_validity_flag=hparams['reward_params']['use_validity_flag']))
    with contextlib.suppress(Exception):
        reward_net = reward_net.to(device)
    expert_model = RNNPredictor(hparams['expert_model_params'], device)
    reward_function = RewardFunction(reward_net, mc_policy=agent, actions=demo_data_gen.all_characters,
                                     device=device, use_mc=hparams['use_monte_carlo_sim'],
                                     mc_max_sims=hparams['monte_carlo_N'],
                                     expert_func=expert_model,
                                     use_true_reward=hparams['use_true_reward'],
                                     true_reward_func=get_logp_reward,
                                     no_mc_fill_val=hparams['no_mc_fill_val'])
    optimizer_reward_net = parse_optimizer(hparams['reward_params'], reward_net)
    demo_data_gen.set_batch_size(hparams['reward_params']['demo_batch_size'])
    irl_alg = GuidedRewardLearningIRL(reward_net, optimizer_reward_net, demo_data_gen,
                                      k=hparams['reward_params']['irl_alg_num_iter'],
                                      agent_net=agent_net,
                                      agent_net_init_func=agent_net_hidden_states_func,
                                      agent_net_init_func_args=init_state_args,
                                      device=device)

    init_args = {'agent': agent,
                 'probs_reg': probs_reg,
                 'drl_alg': drl_alg,
                 'irl_alg': irl_alg,
                 'reward_func': reward_function,
                 'gamma': hparams['gamma'],
                 'episodes_to_train': hparams['episodes_to_train'],
                 'expert_model': expert_model,
                 'demo_data_gen': demo_data_gen,
                 'unbiased_data_gen': unbiased_data_gen,
                 'gen_args': {'num_layers': hparams['agent_params']['num_layers'],
                              'hidden_size': hparams['d_model'],
                              'num_dir': 1,
                              'stack_depth': hparams['agent_params']['stack_depth'],
                              'stack_width': hparams['agent_params']['stack_width'],
                              'has_stack': has_stack,
                              'has_cell': hparams['agent_params']['unit_type'] == 'lstm',
                              'device': device}}
    return init_args


In [14]:
generators = data_provider()
generators.keys()

dict_keys(['demo_data', 'unbiased_data', 'prior_data'])

In [None]:
model_els = initialize(default_hparams(),generators['demo_data'], generators['unbiased_data'])