In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


##### Please run this code with the kernel reinvent.v3.2

In [17]:
# load dependencies
import sys
import pickle
import os
import shutil
import json
import pandas as pd
import numpy as np
import rdkit.Chem as Chem
from numpy.random import default_rng
import torch
from ast import literal_eval
from torch import nn, optim
from rdkit import Chem
from rdkit.Chem import AllChem
from tdc import Oracle
import subprocess

### First, we need to install the custom reinvent scoring package to support the Bradley-Terry model

In [18]:
# print python version
import sys

# Print Python version
print(f"Python version: {sys.version}")

Python version: 3.7.6 | packaged by conda-forge | (default, Jun  1 2020, 18:57:50) 
[GCC 7.5.0]


In [19]:
! pip show reinvent_scoring

[0m

In [20]:
# ! conda install scikit-learn=0.21.3

##### If there already exists reinvent_scoring, we should uninstall it

In [21]:
! pip uninstall -y reinvent_scoring

[0m

##### Now we install the custom reinvent scoring package
##### The flag -e means that the package is installed in editable mode, so that changes to the code will be immediately available without reinstalling the package. All package info is stored in the setup.py file.

In [22]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-chemistry"
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-models"

Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73
Obtaining file:///home/springnuance/reinvent-hitl/reinvent-chemistry
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-chemistry
  Attempting uninstall: reinvent-chemistry
    Found existing installation: reinvent-chemistry 0.0.51
    Uninstalling reinvent-chemistry-0.0.51:
      Successfully uninstalled reinvent-chemistry-0.0.51
  Running setup.py develop for reinvent-chemistry
Successfully installed reinvent-chemistry-0.0.51
Obtaining file:///home/springnuance/reinvent-hitl/reinvent-models
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-models
  Attempting uninstall: reinvent-models
    Found existing installation: reinvent-models 0.0.15rc

In [23]:
# ! conda install -y scikit-learn=0.21.3
! pip list | grep reinvent_scoring

In [26]:
from training_Bradley_Terry_model.bradley_terry import BradleyTerryModel
from training_Rank_ListNet_model.rank_listnet import RankListNetModel
from training_Score_Regression_model.score_regression import ScoreRegressionModel
from scripts.helper import load_drd2_dataset, write_REINVENT_config, change_config_json, \
                    read_scaffold_result, load_feedback_model, smiles_human_score, \
                    compute_fingerprints, retrain_feedback_model,\
                    create_drd2_dataset, combine_drd2_dataset, save_drd2_dataset
                        
from scripts.acquisition import select_query_feedback
from scripts.predict import predict_proba_from_model

def check_create(path):
    """
    Check if the directory exists, if not, create it.
    """
    if not os.path.exists(path):
        os.makedirs(path)
        
def run_HITL_classify(
        seed, reinvent_dir, reinvent_env, output_dir, 
        feedback_type, # scoring, comparing, ranking
        base_training_dataset_path, # path to the base training dataset
        model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
        model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
        num_rounds, # number of rounds, corresponding to R in the paper
        num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
        num_queries, # number of molecules shown to the simulated chemist at each iteration
        REINVENT_n_steps, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' 
        sigma_noise, # noise level for simulated chemist's responses
        choose_top_smiles, # number of top scoring molecules to choose for feedback
        training_epochs, # number of epochs for training the model in each HITL iteration
        learning_rate, # learning rate for training the model in each HITL iteration
        ):

    np.random.seed(seed)
    rng = default_rng(seed)
    
    ################################################
    # DEFINING REINVENT JOBNAME, JOBID, OUTPUT_DIR #
    ################################################

    jobname = "fine-tune predictive component HITL"
    jobid = output_dir
    conf_filename = "config.json"

    # create root output dir
    check_create(output_dir)

    # create HITL round folders to store results
    for REINVENT_round in range(1, num_rounds + 1):
        check_create(f"{output_dir}/REINVENT_round_{REINVENT_round}")
        for HITL_iteration in range(1, num_iters + 1):
            check_create(f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}")
    
    # multi-parameter optimization (MPO) loop
    print(f"\nRunning DRD2 (one objective) with rounds {num_rounds}, iters {num_iters}, queries {num_queries}, seed {seed}")
    print(f"Results will be saved at {output_dir}")
    
    base_training_dataset_outputs = load_drd2_dataset(feedback_type=feedback_type, 
                                                 data_path=base_training_dataset_path)

    
    print("Loading initial training datasets successfully")

    # ########################### REINVENT rounds ######################################

    for REINVENT_round in range(1, num_rounds + 1):

        print("=====================================")
        print(f"REINVENT round = {REINVENT_round}")
        
        configuration_JSON_path = write_REINVENT_config(feedback_type, reinvent_dir, jobid, jobname, 
                                REINVENT_round_output_dir=f"{output_dir}/REINVENT_round_{REINVENT_round}", 
                                conf_filename=conf_filename)

        print(f"Creating config file: {configuration_JSON_path}.")

        configuration = json.load(open(f"{configuration_JSON_path}"))

        if REINVENT_round == 1:
            current_model_path = model_pretrained_path
        else:
            # The last iteration's model from the previous round
            current_model_path = f"{output_dir}/REINVENT_round_{REINVENT_round - 1}/HITL_iteration_{num_iters}/{model_pretrained_name}"

        configuration = change_config_json(configuration, REINVENT_n_steps, batch_size, current_model_path)

        # write the updated configuration file 

        with open(configuration_JSON_path, 'w') as f:
            json.dump(configuration, f, indent=4, sort_keys=True)
    
        print("Run REINVENT")                
        command = f"{reinvent_env}/bin/python"
        script = f"{reinvent_dir}/input.py"
        stderr_file = f"{output_dir}/REINVENT_round_{REINVENT_round}/run.err"
        stdout_file = f"{output_dir}/REINVENT_round_{REINVENT_round}/run.out"

        # Construct the full command to run
        cmd = [command, script, configuration_JSON_path]
        # Open the file to which you want to redirect stderr and stdout
        with open(stderr_file, 'w') as ferr, open(stdout_file, 'w') as fout:
            # Execute the command
            result = subprocess.run(cmd, text=True, stdout=fout, stderr=ferr)
        # Check the result
        print("Exit code:", result.returncode)
        
        #############################################################################
        # REINVENT HAS OUTPUT THE RESULT in path f"{output_dir}/REINVENT_round_{REINVENT_round}/results" #
        #############################################################################
        
        # Get the high scoring molecules
        output_high_score = read_scaffold_result(f"{output_dir}/REINVENT_round_{REINVENT_round}/results/scaffold_memory.csv", 
                                                 choose_top_smiles=choose_top_smiles)
        
        scaffold_df = output_high_score["scaffold_df"]
        smiles = output_high_score["smiles"]
        
        # store molecule indexes selected for feedback
        selected_feedback = np.empty(0).astype(int)

        ########################### HITL_iteration in each REINVENT round #####################
        
        for HITL_iteration in range(1, num_iters + 1): # T number of HITL_iterations

            print("----------------------------------")
            print(f"HITL iteration = {HITL_iteration}")
            
            # Loading feedback model
            if REINVENT_round == 1 and HITL_iteration == 1:
                feedback_model_path = model_pretrained_path
            elif REINVENT_round != 1 and HITL_iteration == 1:
                # The last iteration's model from the previous round
                feedback_model_path = f"{output_dir}/REINVENT_round_{REINVENT_round - 1}/HITL_iteration_{num_iters}/{model_pretrained_name}"
            else:
                # The previous HITL iteration's model from current round
                feedback_model_path = f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration - 1}/{model_pretrained_name}"
            
            feedback_model = load_feedback_model(feedback_type, feedback_model_path)
            
            # print(f"The predicted scores are")
            # predicted_scores = predict_proba_from_model(feedback_type, feedback_model, smiles)
            # print(predicted_scores)

            ######################################################## 
            # Select queries number of smiles with Active Learning #
            ########################################################

            if len(smiles) > num_queries:
                new_queried_smiles_indices = select_query_feedback(feedback_type, feedback_model, 
                                                  scaffold_df, num_queries, list(smiles), 
                                                  selected_feedback, acquisition, rng) 
            else:
                new_queried_smiles_indices = select_query_feedback(feedback_type, feedback_model, 
                                                  scaffold_df, len(smiles), list(smiles), 
                                                  selected_feedback, acquisition, rng)
            
            #print(f"Feedback idx at HITL iteration {HITL_iteration}: {new_queried_smiles_indices}")
            
            new_queried_smiles = [smiles[i] for i in new_queried_smiles_indices]
  
            selected_feedback = np.hstack((selected_feedback, new_queried_smiles_indices))

            new_queried_smiles_human_score = smiles_human_score(new_queried_smiles, sigma_noise)
            
            print(f"Human score at HITL iteration {HITL_iteration}: {new_queried_smiles_human_score}")
            
            # use the augmented training data to retrain the model
            new_queried_fps = np.array([compute_fingerprints(smiles) for smiles in new_queried_smiles])

            iteration_training_dataset_outputs = create_drd2_dataset(feedback_type, 
                                                                      new_queried_smiles, 
                                                                      new_queried_smiles_human_score,
                                                                      new_queried_fps)
            
            if feedback_type == "scoring":
                print(f"New queried dataset size: {len(iteration_training_dataset_outputs['smiles'])}")
            else:
                print(f"New queried dataset size: {len(iteration_training_dataset_outputs['smiles_1'])}")
            
            # combining the base training dataset with the new queried dataset
            base_training_dataset_outputs = combine_drd2_dataset(feedback_type, base_training_dataset_outputs, 
                                                                   iteration_training_dataset_outputs)
            
            if feedback_type == "scoring":
                print(f"Combined dataset size: {len(base_training_dataset_outputs['smiles'])}")
            else:
                print(f"Combined dataset size: {len(base_training_dataset_outputs['smiles_1'])}")
            
            # save augmented training data
            save_drd2_dataset(feedback_type, iteration_training_dataset_outputs, f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}/iteration_queried_data.csv")
            save_drd2_dataset(feedback_type, base_training_dataset_outputs, f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}/iteration_combined_data.csv")
            
            print(f"Saved augmented training data at {output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}/iteration_combined_data.csv")
            
            # Retraining the feedback model using the augmented train set
            retrained_feedback_model = retrain_feedback_model(feedback_type, feedback_model, 
                                                              base_training_dataset_outputs,
                                                              training_epochs, learning_rate)
            
            # save the retrained feedback model
            feedback_model_saving_path = f"{output_dir}/REINVENT_round_{REINVENT_round}/HITL_iteration_{HITL_iteration}/{model_pretrained_name}"
            torch.save(retrained_feedback_model.state_dict(), feedback_model_saving_path)
  
            print(f"Saved retrained feedback model at {feedback_model_saving_path}")

def run_REINVENT_round(REINVENT_round,
        reinvent_dir, reinvent_env, output_dir, 
        feedback_type, # scoring, comparing, ranking
        current_model_path, # Path to the pretrained model 
        REINVENT_n_steps, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
        batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
    ):
    
    ################################################
    # DEFINING REINVENT JOBNAME, JOBID, OUTPUT_DIR #
    ################################################

    jobname = "fine-tune predictive component HITL"
    jobid = output_dir
    conf_filename = "config.json"

    # create root output dir
    check_create(output_dir)

    # create HITL round folders to store results
    check_create(f"{output_dir}/REINVENT_round_{REINVENT_round}")

    print("=====================================")
    print(f"REINVENT round = {REINVENT_round}")

    configuration_JSON_path = write_REINVENT_config(feedback_type, reinvent_dir, jobid, jobname, 
                            REINVENT_round_output_dir=f"{output_dir}/REINVENT_round_{REINVENT_round}", 
                            conf_filename=conf_filename)

    print(f"Creating config file: {configuration_JSON_path}.")

    configuration = json.load(open(f"{configuration_JSON_path}"))

    configuration = change_config_json(configuration, REINVENT_n_steps, batch_size, current_model_path)

    # write the updated configuration file 

    with open(configuration_JSON_path, 'w') as f:
        json.dump(configuration, f, indent=4, sort_keys=True)

    print("Run REINVENT")                
    command = f"{reinvent_env}/bin/python"
    script = f"{reinvent_dir}/input.py"
    stderr_file = f"{output_dir}/REINVENT_round_{REINVENT_round}/run.err"
    stdout_file = f"{output_dir}/REINVENT_round_{REINVENT_round}/run.out"

    # Construct the full command to run
    cmd = [command, script, configuration_JSON_path]
    # Open the file to which you want to redirect stderr and stdout
    with open(stderr_file, 'w') as ferr, open(stdout_file, 'w') as fout:
        # Execute the command
        result = subprocess.run(cmd, text=True, stdout=fout, stderr=ferr)
    # Check the result
    print("Exit code:", result.returncode)

In [27]:
print(os.getcwd())

/home/springnuance/reinvent-hitl/Base-Code-Binh


### Running score regression model

In [30]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

from training_Bradley_Terry_model.bradley_terry import BradleyTerryModel
from training_Rank_ListNet_model.rank_listnet import RankListNetModel
from training_Score_Regression_model.score_regression import ScoreRegressionModel
from scripts.helper import load_drd2_dataset, write_REINVENT_config, change_config_json, \
                    read_scaffold_result, load_feedback_model, smiles_human_score, \
                    compute_fingerprints, retrain_feedback_model,\
                    create_drd2_dataset, combine_drd2_dataset, save_drd2_dataset
                        
from scripts.acquisition import select_query_feedback
from scripts.predict import predict_proba_from_model

seed = 42

# change these path variables as required
reinvent_dir = "/home/springnuance/reinvent-hitl/Reinvent" # We must use absolute path
reinvent_env = "/home/springnuance/miniconda3/envs/ReinventCommunity" # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "scoring" # scoring, comparing, ranking

# feedback type as scoring:
# Given a molecule, what is the probability that the molecule is active regarding DRD2?  

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Score_Regression_model/small_drd2_training_data.csv"

model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Score_Regression_model/score_regression_model.pth"
model_pretrained_name = "score_regression_model.pth"

num_rounds = 3 # number of rounds, corresponding to R in the paper
num_iters = 5 # number of iterations of showing molecules to the human for feedback at each round
REINVENT_n_steps = 50 # number of REINVENT optimization steps
batch_size = 32 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

num_queries = 20 # number of molecules, pairs or a set of molecules, dependig on the task, 
                 # shown to the simulated chemist at each HITL_iteration

choose_top_smiles = 200 # number of top molecules to choose from scaffold. 

training_epochs = 25 # number of epochs for training the model in each HITL iteration
learning_rate = 0.000001 # learning rate for training the model in each HITL iteration

# Case 1
# acquisition = "random"
# sigma_noise = 0.0

# Case 2
# acquisition = "uncertainty"
# sigma_noise = 0.0

# Case 3
# acquisition = "greedy"
# sigma_noise = 0.0

# Case 4
# acquisition = "random"
# sigma_noise = 0.1

# Case 5
# acquisition = "uncertainty"
# sigma_noise = 0.1

# Case 6
# acquisition = "greedy"
# sigma_noise = 0.1

for acquisition in ["random", "uncertainty", "greedy"]:
    for sigma_noise in [0.0, 0.1]:
        print("\n*******************************************************************")
        print(f"Running HITL with {acquisition} acquisition and noise {sigma_noise}")
        output_dir = f"output_score_regression/R{num_rounds}_T{num_iters}_Q{num_queries}_acq_{acquisition}_noise_{sigma_noise}"

        if os.path.exists(output_dir):
            pass
        else:
            run_HITL_classify(
                    seed, reinvent_dir, reinvent_env, output_dir,
                    feedback_type, # scoring, comparing, ranking
                    base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
                    model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
                    model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
                    num_rounds, # number of rounds, corresponding to R in the paper
                    num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
                    num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
                    REINVENT_n_steps, # number of REINVENT optimization steps
                    batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
                    acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
                    sigma_noise, # noise level for simulated chemist's responses
                    choose_top_smiles, # number of top scoring molecules to choose for feedback
                    training_epochs, # number of epochs for training the model in each HITL iteration
                    learning_rate, # learning rate for training the model in each HITL iteration
            )
            
        if os.path.exists(f"{output_dir}/REINVENT_round_{num_rounds+1}"):
            shutil.rmtree(f"{output_dir}/REINVENT_round_{num_rounds+1}")
        REINVENT_n_steps_last = 200
        batch_size_last = 64
        # Run the last REINVENT round using the last feedback model
        current_model_path = f"{output_dir}/REINVENT_round_{num_rounds}/HITL_iteration_{num_iters}/{model_pretrained_name}"
        run_REINVENT_round(num_rounds+1,
            reinvent_dir, reinvent_env, output_dir, 
            feedback_type, # scoring, comparing, ranking
            current_model_path, # Path to the pretrained model 
            REINVENT_n_steps_last, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
            batch_size_last, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        )


Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73

*******************************************************************
Running HITL with random acquisition and noise 0.0
REINVENT round = 4
Creating config file: output_score_regression/R3_T5_Q20_acq_random_noise_0.0/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 0

*******************************************************************
Running HITL with random acquisition and noise 0.1
REINVENT round = 4
Creating config file: output_score_regression/R3_T5_Q20_acq_random_noise_0.1/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 

### Running Bradley Terry model

In [31]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

seed = 42
  
# change these path variables as required
reinvent_dir = "/home/springnuance/reinvent-hitl/Reinvent" # We must use absolute path
reinvent_env = "/home/springnuance/miniconda3/envs/ReinventCommunity" # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "comparing" # scoring, comparing, ranking

# feedback type as comparing:
# Given two molecules, what is the probability that the first molecule is more active than the second molecule regarding DRD2?

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Bradley_Terry_model/small_drd2_training_data.csv"

model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Bradley_Terry_model/bradley_terry_model.pth"
model_pretrained_name = "bradley_terry_model.pth"

num_rounds = 3 # number of rounds, corresponding to R in the paper
num_iters = 5 # number of iterations of showing molecules to the human for feedback at each round
REINVENT_n_steps = 50 # number of REINVENT optimization steps
batch_size = 32 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

num_queries = 5 # number of molecules, pairs or a set of molecules, dependig on the task, 
                 # shown to the simulated chemist at each HITL_iteration

# The actual queries is actually (num_queries) x (num_queries - 1), since we are comparing each pair of molecules
# As a result, 5 x 4 = 20 pairs of molecules are shown to the simulated chemist at each HITL_iteration

choose_top_smiles = 100 # number of top molecules to choose from scaffold. 

training_epochs = 25 # number of epochs for training the model in each HITL iteration
learning_rate = 0.000001 # learning rate for training the model in each HITL iteration

# Case 1
# acquisition = "random"
# sigma_noise = 0.0

# Case 2
# acquisition = "uncertainty"
# sigma_noise = 0.0

# Case 3
# acquisition = "greedy"
# sigma_noise = 0.0

# Case 4
# acquisition = "random"
# sigma_noise = 0.1

# Case 5
# acquisition = "uncertainty"
# sigma_noise = 0.1

# Case 6
# acquisition = "greedy"
# sigma_noise = 0.1

for acquisition in ["random", "uncertainty", "greedy"]:
    for sigma_noise in [0.0, 0.1]:
        print("\n*******************************************************************")
        print(f"Running HITL with {acquisition} acquisition and noise {sigma_noise}")
        output_dir = f"output_bradley_terry/R{num_rounds}_T{num_iters}_Q{num_queries}_acq_{acquisition}_noise_{sigma_noise}"

        if os.path.exists(output_dir):
            pass
        else:
            run_HITL_classify(
                    seed, reinvent_dir, reinvent_env, output_dir,
                    feedback_type, # scoring, comparing, ranking
                    base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
                    model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
                    model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
                    num_rounds, # number of rounds, corresponding to R in the paper
                    num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
                    num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
                    REINVENT_n_steps, # number of REINVENT optimization steps
                    batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
                    acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
                    sigma_noise, # noise level for simulated chemist's responses
                    choose_top_smiles, # number of top scoring molecules to choose for feedback
                    training_epochs, # number of epochs for training the model in each HITL iteration
                    learning_rate, # learning rate for training the model in each HITL iteration
            )

        if os.path.exists(f"{output_dir}/REINVENT_round_{num_rounds+1}"):
            shutil.rmtree(f"{output_dir}/REINVENT_round_{num_rounds+1}")
        
        REINVENT_n_steps_last = 200
        batch_size_last = 64
        # Run the last REINVENT round using the last feedback model
        current_model_path = f"{output_dir}/REINVENT_round_{num_rounds}/HITL_iteration_{num_iters}/{model_pretrained_name}"
        run_REINVENT_round(num_rounds+1,
            reinvent_dir, reinvent_env, output_dir, 
            feedback_type, # scoring, comparing, ranking
            current_model_path, # Path to the pretrained model 
            REINVENT_n_steps_last, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
            batch_size_last, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        )


Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73

*******************************************************************
Running HITL with random acquisition and noise 0.0
REINVENT round = 4
Creating config file: output_bradley_terry/R3_T5_Q5_acq_random_noise_0.0/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 0

*******************************************************************
Running HITL with random acquisition and noise 0.1
REINVENT round = 4
Creating config file: output_bradley_terry/R3_T5_Q5_acq_random_noise_0.1/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 0

*****

### Running Rank ListNet model

In [32]:
! pip install -e "/home/springnuance/reinvent-hitl/reinvent-scoring"

seed = 42

# change these path variables as required
reinvent_dir = os.path.expanduser("/home/springnuance/reinvent-hitl/Reinvent") # We must use absolute path
reinvent_env = os.path.expanduser("/home/springnuance/miniconda3/envs/ReinventCommunity") # We must use absolute path

# the performance of the initial model should not be good. Specifically, it should work at 0.5 accuracy 
# If the model is too good, retrain the model to become weaker, we are trying to make the model to learn via HITL

feedback_type = "ranking" # scoring, comparing, ranking

# feedback type as ranking:
# Given N molecules, what are the orders of preference of these molecules regarding DRD2?

base_training_dataset_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Rank_ListNet_model/small_drd2_training_data.csv"

model_pretrained_path = "/home/springnuance/reinvent-hitl/Base-Code-Binh/training_Rank_ListNet_model/rank_listnet_model.pth"
model_pretrained_name = "rank_listnet_model.pth"

num_rounds = 3 # number of rounds, corresponding to R in the paper
num_iters = 5 # number of iterations of showing molecules to the human for feedback at each round
REINVENT_n_steps = 50 # number of REINVENT optimization steps

# Be careful, as the matrix size could get very large, based on binom(num_queries, 3)
# batch_size at 80 could potentially crash the memory

batch_size = 32 # batch size of the reinforcement learning model, or size of scaffold_memory.csv

num_queries = 6 # number of molecules, pairs or a set of molecules, dependig on the task, 
                 # shown to the simulated chemist at each HITL_iteration

# The actual queries is actually binom(num_queries, 3), since we are ranking each set of 3 molecules
# As a result, binom(6, 3) = 20 sets of molecules are shown to the simulated chemist at each HITL_iteration

choose_top_smiles = 60 # number of top molecules to choose from scaffold. 

training_epochs = 25 # number of epochs for training the model in each HITL iteration
learning_rate = 0.000001 # learning rate for training the model in each HITL iteration

# Case 1
# acquisition = "random"
# sigma_noise = 0.0

# Case 2
# acquisition = "uncertainty"
# sigma_noise = 0.0

# Case 3
# acquisition = "greedy"
# sigma_noise = 0.0

# Case 4
# acquisition = "random"
# sigma_noise = 0.1

# Case 5
# acquisition = "uncertainty"
# sigma_noise = 0.1

# Case 6
# acquisition = "greedy"
# sigma_noise = 0.1

for acquisition in ["random", "uncertainty", "greedy"]:
    for sigma_noise in [0.0, 0.1]:
        print("\n*******************************************************************")
        print(f"Running HITL with {acquisition} acquisition and noise {sigma_noise}")
        
        output_dir = f"output_rank_listnet/R{num_rounds}_T{num_iters}_Q{num_queries}_acq_{acquisition}_noise_{sigma_noise}"

        if os.path.exists(output_dir):
            pass
        else:
            run_HITL_classify(
                    seed, reinvent_dir, reinvent_env, output_dir,
                    feedback_type, # scoring, comparing, ranking
                    base_training_dataset_path, # Path to the pretrained model before REINVENT_round_1/HITL_iteration_1
                    model_pretrained_path, # Path to the pretrained model before REINVENT_round_1
                    model_pretrained_name, # Name of the pretrained model before REINVENT_round_1
                    num_rounds, # number of rounds, corresponding to R in the paper
                    num_iters, # number of iterations of showing molecules to the human for feedback at each round, corresponding to T in the paper
                    num_queries, # number of molecules shown to the simulated chemist at each HITL_iteration
                    REINVENT_n_steps, # number of REINVENT optimization steps
                    batch_size, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
                    acquisition, # acquisition: 'uncertainty', 'random', 'thompson', 'greedy' (if None run with no human interaction)
                    sigma_noise, # noise level for simulated chemist's responses
                    choose_top_smiles, # number of top scoring molecules to choose for feedback
                    training_epochs, # number of epochs for training the model in each HITL iteration
                    learning_rate, # learning rate for training the model in each HITL iteration
            )
        
        if os.path.exists(f"{output_dir}/REINVENT_round_{num_rounds+1}"):
            shutil.rmtree(f"{output_dir}/REINVENT_round_{num_rounds+1}")

        REINVENT_n_steps_last = 200
        batch_size_last = 64
        # Run the last REINVENT round using the last feedback model
        current_model_path = f"{output_dir}/REINVENT_round_{num_rounds}/HITL_iteration_{num_iters}/{model_pretrained_name}"
        run_REINVENT_round(num_rounds+1,
            reinvent_dir, reinvent_env, output_dir, 
            feedback_type, # scoring, comparing, ranking
            current_model_path, # Path to the pretrained model 
            REINVENT_n_steps_last, # number of REINVENT optimization steps. This is not related to the HITL but on the REINVENT side
            batch_size_last, # batch size of the reinforcement learning model, or size of scaffold_memory.csv
        )

Obtaining file:///home/springnuance/reinvent-hitl/reinvent-scoring
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: reinvent-scoring
  Attempting uninstall: reinvent-scoring
    Found existing installation: reinvent-scoring 0.0.73
    Uninstalling reinvent-scoring-0.0.73:
      Successfully uninstalled reinvent-scoring-0.0.73
  Running setup.py develop for reinvent-scoring
Successfully installed reinvent-scoring-0.0.73

*******************************************************************
Running HITL with random acquisition and noise 0.0
REINVENT round = 4
Creating config file: output_rank_listnet/R3_T5_Q6_acq_random_noise_0.0/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 0

*******************************************************************
Running HITL with random acquisition and noise 0.1
REINVENT round = 4
Creating config file: output_rank_listnet/R3_T5_Q6_acq_random_noise_0.1/REINVENT_round_4/config.json.
Run REINVENT
Exit code: 0

*******