In [1]:
import sys
import pandas as pd
import logging
sys.path.append('../../')

from SynRBL.preprocess import preprocess
from SynRBL.postprocess import Validator
from SynRBL.rule_based import RuleBasedMethod
from SynRBL.mcs import MCS
from SynRBL.SynMCSImputer.model import MCSBasedMethod
from SynRBL.confidence_prediction import ConfidencePredictor

logger = logging.getLogger("SynRBL")

df = pd.read_csv('../../Data/Validation_set/USPTO_50K.csv')
df = df.iloc[:100, :]

In [2]:
df = df.to_dict("records")

In [3]:
from SynRBL import Balancer
test = ['[C]=O.C1=CC2CC1C1CC3C4C=CC(C4)C3C21.C1=CC2C(C1)C1CC2C2C3C=CC(C3)C12>>']
synrbl = Balancer(reaction_col="reactions", id_col="id")

results = synrbl.rebalance(reactions=test, output_dict=True)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


UnboundLocalError: cannot access local variable 'max_condition' where it is not associated with a value

In [5]:
from SynRBL import Balancer

synrbl = Balancer(reaction_col="reactions", id_col="id")

results = synrbl.rebalance(reactions=df, output_dict=False)
print(results[0])


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1


# 1. Preprocess

In [5]:
input_validator = Validator("reactions", "input-balanced")
rb_validator = Validator(
    "reactions", "rule-based", check_carbon_balance=False
)

stats = {}

reactions = preprocess(
    df, "reactions", "id", "solved",
)
l = len(reactions)
input_validator.check(reactions)
reactions[0]

{'id': '0',
 'class': 6,
 'reactions': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'solved': False,
 'reactants': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'products': 'COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'input_reaction': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'carbon_balance_check': 'products',
 'unbalance': 'Products'}

# 2. RuleBasedMethod

In [7]:
rb_method = RuleBasedMethod(id_col="id", reaction_col='reactions', output_col='reactions', 
                            rules_path= "../../Data/Rules/rules_manager.json.gz")

rb_method.run(reactions, stats=stats)
rb_validator.check(reactions)
reactions[1]

{'id': '1',
 'class': 2,
 'reactions': 'Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1>>O=C(Nc1cccc2cnccc12)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1.O',
 'solved': True,
 'reactants': 'Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1',
 'products': 'O=C(Nc1cccc2cnccc12)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1.O',
 'input_reaction': 'Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1>>O=C(Nc1cccc2cnccc12)c1cc([N+](=O)[O-])c(Sc2c(Cl)cncc2Cl)s1',
 'carbon_balance_check': 'balanced',
 'unbalance': 'Balance',
 'solved_by': 'rule-based'}

# 3. MCSBasedMethod

In [8]:
mcs = MCS(id_col="id", mcs_data_col="mcs")
mcs_method = MCSBasedMethod(
            reaction_col='reactions', output_col='reactions', mcs_data_col='mcs'
        )
mcs_validator = Validator('reactions', "mcs-based")

In [9]:
mcs.find(reactions)
mcs_method.run(reactions, stats=stats)
mcs_validator.check(reactions) 
reactions[0]

{'id': '0',
 'class': 6,
 'reactions': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1',
 'solved': False,
 'reactants': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'products': 'COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1',
 'input_reaction': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'carbon_balance_check': 'balanced',
 'unbalance': 'Reactants',
 'mcs': {'smiles': ['O=COCc1ccccc1'],
  'boundary_atoms_products': [[{'C': 1}]],
  'nearest_neighbor_products': [[{'N': 9}]],
  'issue': '',
  'Certainty': True,
  'sorted_reactants': ['COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O'],
  'mcs_results': ['[#6]-&!@[#8]-&!@[#6](=&!@[#8])-&!@[#6](-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#7])-&!@[#7]-&!@[#6](=&!@[#8])-&!@[#7]-&!@[#6]1:&@[#6]:&@[#6](-&!@[#8]-&!@[#6]):&@[#6]:&@[#

In [10]:
rb_method.run(reactions)
mcs_validator.check(reactions)
reactions[0]

{'id': '0',
 'class': 6,
 'reactions': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1',
 'solved': True,
 'reactants': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O',
 'products': 'COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1',
 'input_reaction': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'carbon_balance_check': 'balanced',
 'unbalance': 'Balance',
 'mcs': {'smiles': ['O=COCc1ccccc1'],
  'boundary_atoms_products': [[{'C': 1}]],
  'nearest_neighbor_products': [[{'N': 9}]],
  'issue': '',
  'Certainty': True,
  'sorted_reactants': ['COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O'],
  'mcs_results': ['[#6]-&!@[#8]-&!@[#6](=&!@[#8])-&!@[#6](-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#7])-&!@[#7]-&!@[#6](=&!@[#8])-&!@[#7]-&!@[#6]1:&@[#6]:&@[#6](-&!@[#8]-&!@[#6]):&@[#6]:&@[

# 4. Confidence Level

In [11]:
conf_predictor = ConfidencePredictor(model_path='../../Data/scoring_function.dump', reaction_col='reactions')
conf_predictor.predict(reactions, stats=stats, threshold=0)
reactions[0]

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


{'id': '0',
 'class': 6,
 'reactions': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O.O=C(O)OCc1ccccc1',
 'solved': True,
 'reactants': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'products': 'COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'input_reaction': 'COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O>>COC(=O)[C@H](CCCCN)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
 'carbon_balance_check': 'balanced',
 'unbalance': 'Balance',
 'mcs': {'smiles': ['O=COCc1ccccc1'],
  'boundary_atoms_products': [[{'C': 1}]],
  'nearest_neighbor_products': [[{'N': 9}]],
  'issue': '',
  'Certainty': True,
  'sorted_reactants': ['COC(=O)[C@H](CCCCNC(=O)OCc1ccccc1)NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O'],
  'mcs_results': ['[#6]-&!@[#8]-&!@[#6](=&!@[#8])-&!@[#6](-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#6]-&!@[#7])-&!@[#7]-&!@[#6](=&!@[#8])-&!@[#7]-&!@[#6]1:&@[#6]:&@[#6](-&!@[#8]-&!@[#6]):&@[#6]:&@[#6](:&@[#6]:&@1-&!@