In [3]:
%load_ext autoreload
%autoreload 2

##### Please run this code with the kernel reinvent.v3.2

In [4]:
# load dependencies
import sys
import pickle
import os
import shutil
import json
import pandas as pd
import numpy as np
import rdkit.Chem as Chem
from numpy.random import default_rng
import torch
from ast import literal_eval
from torch import nn, optim
from rdkit import Chem
from rdkit.Chem import AllChem
from tdc import Oracle
import subprocess

### First, we need to install the custom reinvent scoring package to support the Bradley-Terry model

In [5]:
# print python version
import sys

# Print Python version
print(f"Python version: {sys.version}")

Python version: 3.7.6 | packaged by conda-forge | (default, Jun  1 2020, 18:57:50) 
[GCC 7.5.0]


In [6]:
! pip show reinvent_scoring

Name: reinvent-scoring
Version: 0.0.73
Summary: Scoring functions for Reinvent
Home-page: https://github.com/MolecularAI/reinvent-scoring.git
Author: MolecularAI
Author-email: patronov@gmail.com
License: UNKNOWN
Location: /home/springnuance/reinvent-hitl/reinvent-scoring
Requires: 
Required-by: 


In [7]:
# ! conda install scikit-learn=0.21.3

##### If there already exists reinvent_scoring, we should uninstall it

In [8]:
! pip uninstall -y reinvent_scoring

Found existing installation: reinvent-scoring 0.0.73
Uninstalling reinvent-scoring-0.0.73:
  Successfully uninstalled reinvent-scoring-0.0.73


##### Now we install the custom reinvent scoring package
##### The flag -e means that the package is installed in editable mode, so that changes to the code will be immediately available without reinstalling the package. All package info is stored in the setup.py file.

In [9]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-chemistry"
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-models"

Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73
Obtaining file:///home/springnuance/reinvent-hitl/reinvent-chemistry
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-chemistry
  Attempting uninstall: reinvent-chemistry
    Found existing installation: reinvent-chemistry 0.0.51
    Uninstalling reinvent-chemistry-0.0.51:
      Successfully uninstalled reinvent-chemistry-0.0.51
  Running setup.py develop for reinvent-chemistry
Successfully installed reinvent-chemistry-0.0.51
Obtaining file:///home/springnuance/reinvent-hitl/reinvent-models
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-models
  Attempting uninstall: reinvent-models
    Found existing installation: reinvent-models 0.0.15rc

In [10]:
# ! conda install -y scikit-learn=0.21.3
! pip list | grep reinvent_scoring

In [18]:
from training_Bradley_Terry_model.bradley_terry import BradleyTerryModel
from training_Rank_ListNet_model.rank_listnet import RankListNetModel
from training_Score_Regression_model.score_regression import ScoreRegressionModel
from helper import load_drd2_dataset, write_REINVENT_config, change_config_json, \
                    read_scaffold_result, load_feedback_model, smiles_human_score, \
                    compute_fingerprints, retrain_feedback_model,\
                    create_drd2_dataset, combine_drd2_dataset, save_drd2_dataset
                        
from scripts.acquisition import select_query_feedback

def check_create(path):
    """
    Check if the directory exists, if not, create it.
    """
    if not os.path.exists(path):
        os.makedirs(path)
        
def run_HITL_classify(
        seed, reinvent_dir, reinvent_env, output_dir, 
        feedback_type, # scoring, comparing, ranking
        base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
        base_testing_dataset_path, # Name of the pretrained model before REINVENT_round_1/HITL_iteration_1
        model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
        model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
        num_rounds, # number of rounds, corresponding to R in the paper
        num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
        num_queries, # number of molecules shown to the simulated chemist at each iteration
        REINVENT_n_steps, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' 
        sigma_noise, # noise level for simulated chemist's responses
        choose_top_smiles, # number of top scoring molecules to choose for feedback
        training_epochs # number of epochs for training the model in each HITL iteration
        ):

    np.random.seed(seed)
    rng = default_rng(seed)
    
    ################################################
    # DEFINING REINVENT JOBNAME, JOBID, OUTPUT_DIR #
    ################################################

    jobname = "fine-tune predictive component HITL"
    jobid = output_dir
    conf_filename = "config.json"

    # create root output dir
    check_create(output_dir)

    # create HITL round folders to store results
    for REINVENT_round in range(1, num_rounds + 1):
        check_create(f"{output_dir}/REINVENT_round_{REINVENT_round}")
        for HITL_iteration in range(1, num_iters + 1):
            check_create(f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}")
    
    # Copy the pretrained model to the first REINVENT round/ HITL_iteration_1
    shutil.copy2(f"{model_pretrained_path}", f"{output_dir}/REINVENT_round_1/HITL_iteration_1")

    # multi-parameter optimization (MPO) loop
    print(f"\nRunning DRD2 (one objective) with rounds {num_rounds}, iters {num_iters}, queries {num_queries}, seed {seed}")
    print(f"Results will be saved at {output_dir}")
    
    base_training_dataset_outputs = load_drd2_dataset(feedback_type=feedback_type, 
                                                 data_path=base_training_dataset_path)

    
    print("Loading initial training and testing datasets successfully")

    # ########################### REINVENT rounds ######################################

    for REINVENT_round in range(1, num_rounds + 1):

        print("=====================================")
        print(f"REINVENT round = {REINVENT_round}")

        REINVENT_round_output_dir = f"{output_dir}/REINVENT_round_{REINVENT_round}"
        
        configuration_JSON_path = write_REINVENT_config(feedback_type, reinvent_dir, jobid, jobname, 
                                                        REINVENT_round_output_dir, conf_filename)

        print(f"Creating config file: {configuration_JSON_path}.")

        configuration = json.load(open(f"{configuration_JSON_path}"))

        current_model_path = f"{REINVENT_round_output_dir}/HITL_iteration_1/{model_pretrained_name}"

        configuration = change_config_json(configuration, REINVENT_n_steps, batch_size, current_model_path)

        # write the updated configuration file 

        with open(configuration_JSON_path, 'w') as f:
            json.dump(configuration, f, indent=4, sort_keys=True)
    
        print("Run REINVENT")                
        command = f"{reinvent_env}/bin/python"
        script = f"{reinvent_dir}/input.py"
        stderr_file = f"{REINVENT_round_output_dir}/run.err"
        stdout_file = f"{REINVENT_round_output_dir}/run.out"

        # Construct the full command to run
        cmd = [command, script, configuration_JSON_path]
        # Open the file to which you want to redirect stderr and stdout
        with open(stderr_file, 'w') as ferr, open(stdout_file, 'w') as fout:
            # Execute the command
            result = subprocess.run(cmd, text=True, stdout=fout, stderr=ferr)
        # Check the result
        print("Exit code:", result.returncode)
        
        #############################################################################
        # REINVENT HAS OUTPUT THE RESULT in path f"{REINVENT_round_output_dir}/results" #
        #############################################################################
        
        # Get the high scoring molecules
        output_high_score = read_scaffold_result(f"{REINVENT_round_output_dir}/results/scaffold_memory.csv", 
                                                 choose_top_smiles=choose_top_smiles)
        
        scaffold_df = output_high_score["scaffold_df"]
        smiles = output_high_score["smiles"]
        
        # store molecule indexes selected for feedback
        selected_feedback = np.empty(0).astype(int)

        ########################### HITL_iteration in each REINVENT round #####################
        
        for HITL_iteration in range(1, num_iters + 1): # T number of HITL_iterations

            print("----------------------------------")
            print(f"HITL iteration = {HITL_iteration}")
            
            # Loading feedback model
            feedback_model_path = f"{REINVENT_round_output_dir}/HITL_iteration_{HITL_iteration}/{model_pretrained_name}"
            feedback_model = load_feedback_model(feedback_type, feedback_model_path)
            
            ######################################################## 
            # Select queries number of smiles with Active Learning #
            ########################################################

            if len(smiles) > num_queries:
                new_queried_smiles_indices = select_query_feedback(feedback_type, feedback_model, 
                                                  scaffold_df, num_queries, list(smiles), 
                                                  selected_feedback, acquisition, rng) 
            else:
                new_queried_smiles_indices = select_query_feedback(feedback_type, feedback_model, 
                                                  scaffold_df, len(smiles), list(smiles), 
                                                  selected_feedback, acquisition, rng)
            
            print(f"Feedback idx at HITL iteration {HITL_iteration}: {new_queried_smiles_indices}")
            
            new_queried_smiles = [smiles[i] for i in new_queried_smiles_indices]
  
            selected_feedback = np.hstack((selected_feedback, new_queried_smiles_indices))

            new_queried_smiles_human_score = smiles_human_score(new_queried_smiles, sigma_noise)
            
            print(f"Human score at HITL iteration {HITL_iteration}: {new_queried_smiles_human_score}")
            
            # use the augmented training data to retrain the model
            new_queried_fps = np.array([compute_fingerprints(smiles) for smiles in new_queried_smiles])

            iteration_training_dataset_outputs = create_drd2_dataset(feedback_type, 
                                                                      new_queried_smiles, 
                                                                      new_queried_smiles_human_score,
                                                                      new_queried_fps)
            
            if feedback_type == "scoring":
                print(f"New queried dataset size: {len(iteration_training_dataset_outputs['smiles'])}")
            else:
                print(f"New queried dataset size: {len(iteration_training_dataset_outputs['smiles_1'])}")
            
            # combining the base training dataset with the new queried dataset
            base_training_dataset_outputs = combine_drd2_dataset(feedback_type, base_training_dataset_outputs, 
                                                                   iteration_training_dataset_outputs)
            
            if feedback_type == "scoring":
                print(f"Combined dataset size: {len(base_training_dataset_outputs['smiles'])}")
            else:
                print(f"Combined dataset size: {len(base_training_dataset_outputs['smiles_1'])}")
            
            # save augmented training data
            save_drd2_dataset(feedback_type, iteration_training_dataset_outputs, f"{REINVENT_round_output_dir}/HITL_iteration_{HITL_iteration}/iteration_queried_data.csv")
            save_drd2_dataset(feedback_type, base_training_dataset_outputs, f"{REINVENT_round_output_dir}/HITL_iteration_{HITL_iteration}/iteration_combined_data.csv")
            
            print(f"Saved augmented training data at {REINVENT_round_output_dir}/HITL_iteration_{HITL_iteration}/iteration_combined_data.csv")
            
            # Retraining the feedback model using the augmented train set
            retrained_feedback_model = retrain_feedback_model(feedback_type, feedback_model, 
                                                              base_training_dataset_outputs,
                                                              training_epochs)
            
            if REINVENT_round != num_rounds:
                if HITL_iteration < num_iters:
                    # Moving on to the next iteration at the current round
                    feedback_model_saving_path = f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration + 1}/{model_pretrained_name}"
                    torch.save(retrained_feedback_model.state_dict(), feedback_model_saving_path)
                else:
                    # Moving to the first iteration at the next round
                    feedback_model_saving_path = f"{output_dir}/REINVENT_round_{REINVENT_round + 1}/HITL_iteration_1/{model_pretrained_name}"
                    torch.save(retrained_feedback_model.state_dict(), feedback_model_saving_path)
            else:
                if HITL_iteration < num_iters:
                    # Moving on to the next iteration at the current round
                    feedback_model_saving_path = f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration + 1}/{model_pretrained_name}"
                    torch.save(retrained_feedback_model.state_dict(), feedback_model_saving_path)
                else:
                    # Last iteration at the last round, we save to the final model
                    feedback_model_saving_path = f"{output_dir}/final_{model_pretrained_name}"
                    torch.save(retrained_feedback_model.state_dict(), feedback_model_saving_path)

            print(f"Saved retrained feedback model at {feedback_model_saving_path}")

In [12]:
print(os.getcwd())

/home/springnuance/reinvent-hitl/Base-Code-Binh


### Running score regression model

In [2]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

seed = 42
restart = False # If restart is True, we would rerun everything
                # If restart is False, we would continue from the latest found HITL_iteration
                
# change these path variables as required
reinvent_dir = "/home/springnuance/reinvent-hitl/Reinvent" # We must use absolute path
reinvent_env = "/home/springnuance/miniconda3/envs/ReinventCommunity" # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "scoring" # scoring, comparing, ranking

# feedback type as scoring:
# Given a molecule, what is the probability that the molecule is active regarding DRD2?  

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Score_Regression_model/small_drd2_training_data.csv"
base_testing_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Score_Regression_model/small_drd2_testing_data.csv"
model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Score_Regression_model/score_regression_model.pth"
model_pretrained_name = "score_regression_model.pth"

num_rounds = 2 # number of rounds, corresponding to R in the paper
num_iters = 3 # number of iterations of showing molecules to the human for feedback at each round
REINVENT_n_steps = 50 # number of REINVENT optimization steps
batch_size = 256 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

# Please look at the thompson sampling code and fix it!
acquisition = "greedy" # acquisition: 'random', 'uncertainty', 'thompson', 'greedy' 

sigma_noise = 0.1 # noise level for simulated chemist's responses

num_queries = 10 # number of molecules, pairs or a set of molecules, dependig on the task, 
                 # shown to the simulated chemist at each HITL_iteration

choose_top_smiles = 50 # number of top molecules to choose from scaffold. 

training_epochs = 5 # number of epochs for training the model in each HITL iteration

output_dir = f"output_score_regression_R{num_rounds}_T{num_iters}_Q{num_queries}_{acquisition}_s{sigma_noise}"

run_HITL_classify(
        seed, reinvent_dir, reinvent_env, output_dir,
        feedback_type, # scoring, comparing, ranking
        base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
        base_testing_dataset_path, # Name of the pretrained model before REINVENT_round_1/HITL_iteration_1
        model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
        model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
        num_rounds, # number of rounds, corresponding to R in the paper
        num_iters, # number of molecules shown at each HITL_iteration to the human for feedback, corresponding to T in the paper
        num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
        REINVENT_n_steps, # number of REINVENT optimization steps
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
        sigma_noise, # noise level for simulated chemist's responses
        choose_top_smiles, # number of top scoring molecules to choose for feedback
        training_epochs, # number of epochs for training the model in each HITL iteration
)


Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73


NameError: name 'run_HITL_classify' is not defined

### Running Bradley Terry model

In [13]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

seed = 42
restart = False # If restart is True, we would rerun everything
                # If restart is False, we would continue from the latest found HITL_iteration
                    
# change these path variables as required
reinvent_dir = "/home/springnuance/reinvent-hitl/Reinvent" # We must use absolute path
reinvent_env = "/home/springnuance/miniconda3/envs/ReinventCommunity" # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "comparing" # scoring, comparing, ranking

# feedback type as comparing:
# Given two molecules, what is the probability that the first molecule is more active than the second molecule regarding DRD2?

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Bradley_Terry_model/small_drd2_training_data.csv"
base_testing_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Bradley_Terry_model/small_drd2_testing_data.csv"
model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Bradley_Terry_model/bradley_terry_model.pth"
model_pretrained_name = "bradley_terry_model.pth"

num_rounds = 2 # number of rounds, corresponding to R in the paper
num_iters = 3 # number of iterations of showing molecules to the human for feedback at each round

REINVENT_n_steps = 5 # number of REINVENT optimization steps
batch_size = 32 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

# Please look at the thompson sampling code and fix it!
acquisition = "random" # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' 

sigma_noise = 0.1 # noise level for simulated chemist's responses

num_queries = 10 # number of molecules, pairs or a set of molecules, dependig on the task, 
                 # shown to the simulated chemist at each HITL_iteration
choose_top_smiles = 100 # number of top molecules to choose from scaffold. 

training_epochs = 5 # number of epochs for training the model in each HITL iteration

output_dir = f"output_bradley_terry_R{num_rounds}_T{num_iters}_Q{num_queries}_{acquisition}_s{sigma_noise}"

run_HITL_classify(
        seed, reinvent_dir, reinvent_env, output_dir,
        feedback_type, # scoring, comparing, ranking
        base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
        base_testing_dataset_path, # Name of the pretrained model before REINVENT_round_1/HITL_iteration_1
        model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
        model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
        num_rounds, # number of rounds, corresponding to R in the paper
        num_iters, # number of molecules shown at each HITL_iteration to the human for feedback, corresponding to T in the paper
        num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
        REINVENT_n_steps, # number of REINVENT optimization steps
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
        sigma_noise, # noise level for simulated chemist's responses
        choose_top_smiles, # number of top scoring molecules to choose for feedback
        training_epochs, # number of epochs for training the model in each HITL iteration
)

Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73

Running DRD2 (one objective) with rounds 2, iters 3, queries 10, seed 42
Results will be saved at output_bradley_terry_R2_T3_Q10_random_s0.1
Loading initial training and testing datasets successfully
REINVENT round = 1
Creating config file: output_bradley_terry_R2_T3_Q10_random_s0.1/REINVENT_round_1/config.json.
Run REINVENT


Found local copy...


Exit code: 0
Number of SMILES in scaffold_memory.csv:  124
----------------------------------
HITL iteration = 1
Loading Bradley Terry model successfully from output_bradley_terry_R2_T3_Q10_random_s0.1/REINVENT_round_1/HITL_iteration_1/bradley_terry_model.pth
Feedback idx at HITL iteration 1: [96 71  8 60 41 94 68  9 19 82]
Human score at HITL iteration 1: [0.07038732416895124, 0.0, 0.06710522471843096, 0.15469819153594322, 0.0, 0.0, 0.15811997926963975, 0.07932702198016026, 0.6791328819885362, 0.055336551097420125]
New queried dataset size: 45
Combined dataset size: 69
Saved augmented training data at output_bradley_terry_R2_T3_Q10_random_s0.1/REINVENT_round_1/HITL_iteration_1/iteration_combined_data.csv


KeyboardInterrupt: 

### Running Rank ListNet model

In [17]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

seed = 42
restart = False # If restart is True, we would rerun everything
                # If restart is False, we would continue from the latest found HITL_iteration
                
# change these path variables as required
reinvent_dir = os.path.expanduser("/home/springnuance/reinvent-hitl/Reinvent") # We must use absolute path
reinvent_env = os.path.expanduser("/home/springnuance/miniconda3/envs/ReinventCommunity") # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "ranking" # scoring, comparing, ranking

# feedback type as ranking:
# Given N molecules, what are the orders of preference of these molecules regarding DRD2?

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Rank_ListNet_model/small_drd2_training_data.csv"
base_testing_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Rank_ListNet_model/small_drd2_testing_data.csv"
model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Rank_ListNet_model/rank_listnet_model.pth"
model_pretrained_name = "rank_listnet_model.pth"

num_rounds = 10 # number of rounds, corresponding to R in the paper
num_iters = 5 # number of iterations of showing molecules to the human for feedback at each round
REINVENT_n_steps = 25 # number of REINVENT optimization steps

# WARNING: CHOOSING LARGER BATCH SIZE WOULD EXPONENTIALLY INCREASE THE NUMBER OF COMBINATIONS
# BETTER KEEP IT AT 32 OR 64
# For example, REINVENT would fail and return exit code -9 if it is 80

batch_size = 32 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

# Please look at the thompson sampling code and fix it!
acquisition = "random" # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' 

sigma_noise = 0.0 # noise level for simulated chemist's responses

num_queries = 20 # number of molecules, pairs or a set of molecules, depending on the task, 
                 # shown to the simulated chemist at each HITL_iteration
choose_top_smiles = 200 # number of top molecules to choose from scaffold. 

training_epochs = 1 # number of epochs for training the model in each HITL iteration

output_dir = f"output_rank_listnet_R{num_rounds}_T{num_iters}_Q{num_queries}_{acquisition}_s{sigma_noise}"

run_HITL_classify(
        seed, reinvent_dir, reinvent_env, output_dir, 
        feedback_type, # scoring, comparing, ranking
        base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
        base_testing_dataset_path, # Name of the pretrained model before REINVENT_round_1/HITL_iteration_1
        model_pretrained_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
        model_pretrained_name, # Name of the pretrained model before REINVENT_round_1/HITL_iteration_1
        num_rounds, # number of rounds, corresponding to R in the paper
        num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
        num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
        REINVENT_n_steps, # number of REINVENT optimization steps
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
        sigma_noise, # noise level for simulated chemist's responses
        choose_top_smiles, # number of top molecules to choose from scaffold
        training_epochs, # number of epochs for training the model in each HITL iteration
)


Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73

Running DRD2 (one objective) with rounds 10, iters 5, queries 20, seed 42
Results will be saved at output_rank_listnet_R10_T5_Q20_random_s0.0
Loading initial training and testing datasets successfully
REINVENT round = 1
Creating config file: output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/config.json.
Run REINVENT


Found local copy...


Exit code: 0
Number of SMILES in scaffold_memory.csv:  785
----------------------------------
HITL iteration = 1
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_1/rank_listnet_model.pth
Feedback idx at HITL iteration 1: [140 139  38 187 167 147  25  16 119 101 184 186  90 159  17 142 154  80
 131 100]
Human score at HITL iteration 1: [0.9542954948344743, 0.0007585751838657166, 0.005540676267494359, 0.09403863127628723, 0.0005101371629612877, 0.013774972117641537, 0.029814977165815697, 0.016238625203295918, 0.33938953299934427, 0.000276966127419286, 0.01033759570646093, 0.01858239063911722, 0.028733176897448576, 0.006037345870299654, 0.01802089221013777, 0.0016668209762802162, 0.012454092941693409, 0.001028840726939133, 0.005633241529576334, 0.0023144929840674695]
New queried dataset size: 1140
Combined dataset size: 1164
Saved augmented training data at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HI

Found local copy...


Epoch 1, Loss: 9.281193342758343e-05
Epoch 1, Loss: 9.437937114853412e-05
Epoch 1, Loss: 9.523719927528873e-05
Epoch 1, Loss: 9.571758710080758e-05
Epoch 1, Loss: 9.665907418821007e-05
Epoch 1, Loss: 0.0005180555034106268
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_2/rank_listnet_model.pth
----------------------------------
HITL iteration = 2
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_2/rank_listnet_model.pth
Feedback idx at HITL iteration 2: [124 133  11 166  86 146 123  29  78  36 109  30 143  62   7  93 179 199
 105  67]
Human score at HITL iteration 2: [0.011835926908550665, 0.00038767474392169466, 0.030635647302016754, 0.0006883672019101086, 0.6083159306205319, 0.11450057743134232, 0.026277949463692673, 0.059476468414362625, 0.2900909327011289, 9.522609014828154e-05, 0.009053170570335325, 0.008518026765702336, 0.00887749090305092, 0.02351741

Found local copy...


Epoch 1, Loss: 3.0040762794669718e-05
Epoch 1, Loss: 3.033857501577586e-05
Epoch 1, Loss: 3.0539551516994834e-05
Epoch 1, Loss: 3.0586736102122813e-05
Epoch 1, Loss: 3.0651994165964425e-05
Epoch 1, Loss: 3.069406375288963e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_3/rank_listnet_model.pth
----------------------------------
HITL iteration = 3
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_3/rank_listnet_model.pth
Feedback idx at HITL iteration 3: [ 18  54 115 155  71 175 152  47 112  28 126  55 144 158  95 134  73 163
 169  77]
Human score at HITL iteration 3: [0.038920079618572676, 0.015695640207131503, 0.0012488808788619508, 0.0005387039705822871, 0.024418029067050925, 0.00064007590052303, 0.007598157147197996, 0.019454175387893412, 0.0003409379768917956, 0.0024589959667501255, 0.005707544952711055, 0.04432963002249314, 0.0001737636935651253, 0

Found local copy...


Epoch 1, Loss: 1.2517819413915277e-05
Epoch 1, Loss: 1.2538061127997935e-05
Epoch 1, Loss: 1.2637079635169357e-05
Epoch 1, Loss: 1.558479536137943e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_4/rank_listnet_model.pth
----------------------------------
HITL iteration = 4
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_4/rank_listnet_model.pth
Feedback idx at HITL iteration 4: [ 97  12  14  56  99  40 192 102 196  19 198 138 114 111 113  70 103 157
  79  21]
Human score at HITL iteration 4: [0.17309724522279868, 0.00043325890630916954, 0.5378792533974456, 0.0101685600140103, 0.37199252299450686, 0.038691518493632567, 0.0054119920174428284, 0.003763831035210668, 0.0033591208029725827, 0.018090441804134712, 0.00031015422318228926, 0.004892617566822852, 0.3761756441050509, 0.02149562392181323, 0.7266077037500958, 0.010599348814571087, 7.438445718548502e

Found local copy...


Epoch 1, Loss: 3.263377816438151e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_5/rank_listnet_model.pth
----------------------------------
HITL iteration = 5
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_1/HITL_iteration_5/rank_listnet_model.pth
Feedback idx at HITL iteration 5: [ 20  31 180   2 141  52  84  57 135  85 176 116 150  64  81  13  66 151
 132 190]
Human score at HITL iteration 5: [0.017053239791503652, 0.014559150434579064, 0.023105683837224614, 0.012811871001834927, 0.001076212635586073, 0.06650456390058648, 0.024076602195205417, 0.000330445465903566, 0.22872489551043132, 0.014275512750084452, 0.01209183126164367, 0.0380640496778885, 9.156529602969416e-05, 0.006864462645423188, 0.05350245785202419, 0.26752134692112883, 0.005040761919874857, 0.0013775080228978717, 0.0007275481423545311, 0.0031725928805782936]
New queried dataset size: 1140
Combined da

Found local copy...


Exit code: 0
Number of SMILES in scaffold_memory.csv:  783
----------------------------------
HITL iteration = 1
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_1/rank_listnet_model.pth
Feedback idx at HITL iteration 1: [132 147  15  51 146 195 187 177  50 183 141  21  86 169 156 151  93  89
  62  68]
Human score at HITL iteration 1: [0.013420163637304455, 0.006106257070046325, 0.9310344185076902, 0.007749198537811534, 0.01626872962027841, 0.003018904982753186, 0.005487979418633843, 0.0089328532548344, 0.0024692251674823837, 0.1403191971893395, 0.008128088211439668, 0.9323606507340716, 0.7878713850778384, 0.0016620902237753229, 0.0008143191556980437, 0.06703016014350026, 0.001747437996615119, 0.02605595764477854, 0.18468142241698865, 0.01768891633104311]
New queried dataset size: 1140
Combined dataset size: 6864
Saved augmented training data at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iterat

Found local copy...


Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_2/rank_listnet_model.pth
Feedback idx at HITL iteration 2: [150  54  85  33  25 107  96  64 113 119 116  76   7  14  17  35 125 142
 117  78]
Human score at HITL iteration 2: [0.0007699963549256928, 0.030614792488563157, 0.0020992303362393364, 0.016756547902519833, 0.016072311088749312, 0.0077887353173807855, 0.12533911815159868, 0.1709580558960121, 0.0018870904752294527, 0.0009751117148078926, 0.006939657232626914, 0.0003767667932179672, 0.0586341281836162, 0.5, 0.008989734079547642, 0.4634129548575005, 0.0015517251374365193, 0.0007540575351041633, 0.014604307862653682, 0.00019762562208785176]
New queried dataset size: 1140
Combined dataset size: 8004
Saved augmented training data at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_2/iteration_combined_data.csv
Epoch 1, Loss: 2.377877535764128e-06
Epoch 1, Loss: 4.176989023108035e-06
Epoch 1

Found local copy...


Epoch 1, Loss: 2.4070788640528917e-05
Epoch 1, Loss: 2.427584695396945e-05
Epoch 1, Loss: 0.00038863334339112043
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_3/rank_listnet_model.pth
----------------------------------
HITL iteration = 3
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_3/rank_listnet_model.pth
Feedback idx at HITL iteration 3: [ 10 191 190  52 105  83  84  73 180  91  30 162 196 171 143  48 110 127
  94  13]
Human score at HITL iteration 3: [0.9951812669033241, 0.023677730528116184, 0.00042840248256400675, 0.056679358714425636, 0.00911201079632099, 0.0077544785414524545, 0.0033270395636325776, 0.005042865669023987, 0.00027656320469815997, 0.5382024353100184, 0.5737244521343007, 0.012153581393010784, 0.0010484620078877285, 0.03718672905869477, 0.018819103569982402, 0.0021397776218778485, 0.6865777173079818, 0.003824164621477794, 0.0013409

Found local copy...


Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_4/rank_listnet_model.pth
----------------------------------
HITL iteration = 4
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_4/rank_listnet_model.pth
Feedback idx at HITL iteration 4: [ 27 184 115  20 109 178  45 172  42  70 118 139 188  40 131 198  41  57
   5 170]
Human score at HITL iteration 4: [0.00022879861781488465, 0.0015227659807934327, 0.28018663173655395, 0.006167969746132676, 0.06033200187601354, 0.07090317700870749, 0.0036262794360667327, 0.00024241019867442948, 0.05519375273838244, 0.2600560516188085, 0.20377485465703268, 0.007625346607357865, 0.4860258840897437, 0.1178674727602919, 0.03207191376121947, 0.0022322928919826486, 0.04866453357174371, 0.05930571447389981, 0.0015296741717380688, 0.009436505447703568]
New queried dataset size: 1140
Combined dataset size: 10284
Saved augmented traini

Found local copy...


Epoch 1, Loss: 3.772873606067151e-05
Epoch 1, Loss: 5.49561463365287e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_5/rank_listnet_model.pth
----------------------------------
HITL iteration = 5
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_2/HITL_iteration_5/rank_listnet_model.pth
Feedback idx at HITL iteration 5: [153 185  72  99  26   2  66  60 135 155   4 164 168 161  39  49  90 158
  12  22]
Human score at HITL iteration 5: [0.021493644523574787, 0.012875641562294857, 0.023040252613232767, 0.11763151074059475, 0.09844090925361695, 0.059256555720282024, 0.00504227699641893, 0.01571471718542263, 0.5129612932443736, 0.0014178918021000614, 0.13827291684984752, 0.0025378083861121495, 0.004801889919236833, 0.0001214497749467147, 0.0030604217905346814, 0.039125027712201776, 0.0013377862611739662, 0.00030752663772786473, 0.9270456847675056, 0.012345386885349162]
New q

Found local copy...


Exit code: 0
Number of SMILES in scaffold_memory.csv:  787
----------------------------------
HITL iteration = 1
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_1/rank_listnet_model.pth
Feedback idx at HITL iteration 1: [ 61  36  97 149  98  85 185  53   8  58  28   2  32 188 190 135  67 162
 133 110]
Human score at HITL iteration 1: [0.8878436561003321, 0.2628122024472388, 0.009944762667618356, 0.011495445967819387, 0.9829070330707427, 0.005669686922993082, 0.1144187001515168, 0.02305858909693144, 0.0007731793435334822, 0.1168045526195333, 0.004943923751930015, 6.659543676185125e-05, 0.00039746796088365507, 0.002582139312125012, 0.06332260703407937, 0.869059096966333, 0.0019022346277832172, 0.00023257557657258688, 0.004653245168433133, 5.851077330530557e-05]
New queried dataset size: 1140
Combined dataset size: 12564
Saved augmented training data at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_

Found local copy...


Epoch 1, Loss: 0.00022472521759482335
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_2/rank_listnet_model.pth
----------------------------------
HITL iteration = 2
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_2/rank_listnet_model.pth
Feedback idx at HITL iteration 2: [ 95 184  23 151 101 134 105  76  22  59 187  25  15 107  63 128 173 130
 191  81]
Human score at HITL iteration 2: [0.004365836895902415, 0.000202147565573763, 0.000641826054799399, 0.011290344020938209, 9.388946186224868e-05, 0.11398919161583092, 0.24880497979835076, 0.8008672481070902, 6.426617030847283e-05, 0.007562254759261019, 0.0007049215410369902, 0.06347552377797011, 0.012297955006044442, 7.482046191253079e-05, 0.028110739674265646, 0.001396706195605443, 0.8114713075565998, 0.0024496518682156927, 0.002093711758001343, 0.0022369285662179285]
New queried dataset size: 1140
Combined

Found local copy...


Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_3/rank_listnet_model.pth
Feedback idx at HITL iteration 3: [146 132 158  68 118 108 125  33 145  45 159 165  86  78   3  54  47  90
  19 156]
Human score at HITL iteration 3: [0.02627046582450695, 0.0034771538695879877, 0.005005813824014194, 0.0014245106681679003, 0.008895890102583584, 0.003141030401033358, 0.002850085616142974, 0.0981545205955175, 0.0038414241005798014, 0.004060165998143026, 0.012283624808402201, 0.044367812240061866, 0.9413119806227995, 0.8997606981277597, 0.7002131121040605, 0.10959259800159014, 0.19236336689618536, 0.005213221680772978, 0.000834108531664901, 0.22640262707012987]
New queried dataset size: 1140
Combined dataset size: 14844
Saved augmented training data at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_3/iteration_combined_data.csv
Epoch 1, Loss: 1.6366393538191915e-06
Epoch 1, Loss: 2.6208508643321693e-06

Found local copy...


Epoch 1, Loss: 6.476912337044875e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_4/rank_listnet_model.pth
----------------------------------
HITL iteration = 4
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_4/rank_listnet_model.pth
Feedback idx at HITL iteration 4: [167  69  27 150 198 115  17  41 137  89 153  44 181 177  38 183  11 164
  62 176]
Human score at HITL iteration 4: [0.0027918714983138065, 0.004861064704245512, 0.025881765437378357, 0.0010180986653650363, 0.03847737304155601, 0.0962525879468358, 0.0070405901087830375, 0.004126609211081756, 0.0036520772715325465, 0.05907877715604976, 0.024389186644517762, 0.984190748159112, 0.0010843426326949456, 0.042837858836568946, 0.3311103985359444, 0.06880520645645927, 0.005830188255276822, 0.03244266825730293, 0.04015282189170953, 8.891235089551587e-05]
New queried dataset size: 1140
Combined datas

Found local copy...


Epoch 1, Loss: 5.162915476830676e-05
Epoch 1, Loss: 5.205912020755932e-05
Epoch 1, Loss: 5.2179348131176084e-05
Epoch 1, Loss: 5.237026198301464e-05
Epoch 1, Loss: 5.2807838073931634e-05
Epoch 1, Loss: 5.2873023378197104e-05
Epoch 1, Loss: 5.304151272866875e-05
Epoch 1, Loss: 7.081993518909258e-05
Saved retrained feedback model at output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_5/rank_listnet_model.pth
----------------------------------
HITL iteration = 5
Loading Rank ListNet model successfully from output_rank_listnet_R10_T5_Q20_random_s0.0/REINVENT_round_3/HITL_iteration_5/rank_listnet_model.pth
Feedback idx at HITL iteration 5: [100  88 161  71 178 102  35  56 131  94 174 144  73 116 143   7  92 163
  48 139]
Human score at HITL iteration 5: [0.0049682161807580675, 0.049338805672513614, 0.0015639027418600574, 0.46348893138716946, 0.020501166232387215, 0.0471147762899373, 0.007074101640968472, 0.020065508509959393, 0.0032330964970033642, 0.00200782325779579

KeyboardInterrupt: 