In [11]:
import pandas as pd
from glob import glob
import os
import json
import importlib
import multiprocessing as mp

Read the reactions

In [12]:
rxn_df = pd.read_csv("../reactions/reactions.csv")
rxn_df.rxn_id = rxn_df.rxn_id.str.strip()

Prepare TS

In [13]:
json_filename = "template_TS.json"
with open(json_filename, 'r') as ifs:
    input_dict = json.load(ifs)
    
for rxn_id, smarts in rxn_df.values:
    num_components = len(smarts.split("."))
    reagent_file_list = []
    iterations = 10000
    if num_components > 2:
        iterations = 18500
    for i in range(0,num_components):
        reagent_file_list.append(f'../reactions/{num_components}_component/{rxn_id}_reagent_{i}.smi')
    input_dict['reagent_file_list'] = reagent_file_list
    input_dict['evaluator_arg']['ref_colname'] = "query_001"
    input_dict['evaluator_arg']['ref_filename'] = f"../parquet/{num_components}_component/{rxn_id}.parquet"
    input_dict["num_ts_iterations"] = iterations
    input_dict['reaction_smarts'] = smarts
    with open(f"{num_components}_component/{rxn_id}_TS.json","w") as ofs:
        json.dump(input_dict, ofs, indent=4)

Prepare RWS

In [14]:
def read_input(json_filename: str, ref_colname) -> dict:
    """
    Read input parameters from a json file
    :param json_filename: input json file
    :return: a dictionary with the input parameters
    """
    input_data = None
    with open(json_filename, 'r') as ifs:
        input_data = json.load(ifs)
    default = {
        "nprocesses": 1,
        "num_warmup_trials": 3,
        "percent_of_library": 0.001,
        "min_cpds_per_core": 50,
        "scaling": 1,
        "stop": 6000,
        "hide_progress": True,
        "log_filename": "./logs.txt",
        "results_filename": "./results.csv"
    }
    for para in default:
        if para not in input_data:
           input_data[para] = default[para]
    return input_data

In [15]:
read_input("template_RWS.json","query_001")

{'reagent_file_list': ['../REAGENTS/rxn102a_reagent_0.smi',
  '../REAGENTS/rxn102a_reagent_1.smi'],
 'reaction_smarts': '[#6;a;$(c1:[c,n]:[c,n]:[c,n]:[c,n]:[c,n]:1):1][#35;A;D1].[#7;A;$(N[#6])!$(N=*)!$([N-])!$(N#*)!$([ND3])!$([ND4])!$(N[O,N])!$(N[C,S]=[S,O,N]):2]>>[#6:1]-[#7:2]',
 'nprocesses': 1,
 'min_cpds_per_core': 50,
 'num_warmup_trials': 5,
 'percent_of_library': 0.02,
 'scaling': 1,
 'stop': 6000,
 'evaluator_class_name': 'LookupEvaluator',
 'evaluator_arg': {'ref_filename': '../PARQUET/rxn102a.parquet',
  'ref_colname': 'query_001'},
 'log_filename': 'rws_amide_rocs_logs.txt',
 'results_filename': 'rws_amide_rocs_logs.csv',
 'hide_progress': True}

In [16]:
input_dict = None
input_dict = read_input("template_RWS.json","query_001")
for rxn_id, smarts in rxn_df.values:
    num_components = len(smarts.split("."))
    input_dict['reaction_smarts'] = smarts
    num_components = len(smarts.split("."))
    reagent_file_list = []
    for i in range(0,num_components):
        reagent_file_list.append(f'../reactions/{num_components}_component/{rxn_id}_reagent_{i}.smi')
    input_dict['reagent_file_list'] = reagent_file_list
    input_dict['evaluator_arg']['ref_colname'] = "query_001"
    input_dict['evaluator_arg']['ref_filename'] = f"../parquet/{num_components}_component/{rxn_id}.parquet"
    with open(f"{num_components}_component/{rxn_id}_RWS.json","w") as ofs:
        json.dump(input_dict, ofs, indent=4)