# Basic Usage

This section outlines the fundamental functionalities of the replicated model, serving as a validation of its core implementation and operational integrity.

## 1.Reaction Property Prediction

### 1.1 Reactivity Prediction

In [8]:
import os
import warnings

from rxngraphormer.eval import reaction_prediction

warnings.filterwarnings("ignore")
cur_dir = os.getcwd()
father_dir = os.path.abspath(os.path.join(cur_dir, '..'))
os.chdir(father_dir)

In [9]:
# Load Buchwald-Hartwig reactivity model
bh_model_path = "./model_path/buchwald_hartwig/seed0"
rxn_smiles_lst = [
    "CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1.CN(C)C(=NC(C)(C)C)N(C)C.Cc1ccc(N)cc1.FC(F)(F)c1ccc(I)cc1.c1ccc(-c2ccon2)cc1>>Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1",
    "CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1.CCOC(=O)c1cc(C)on1.CN(C)C(=NC(C)(C)C)N(C)C.Cc1ccc(N)cc1.FC(F)(F)c1ccc(Cl)cc1>>Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1"
]
bh_react_preds = reaction_prediction(bh_model_path, rxn_smiles_lst, task_type="reactivity")
bh_react_preds

Model loaded successfully!


Processing...
100%|██████████| 2/2 [00:00<00:00, 189.38it/s]
Done!
Processing...
100%|██████████| 2/2 [00:00<00:00, 682.11it/s]
Done!
Processing...
100%|██████████| 2/2 [00:00<00:00, 572.48it/s]

[INFO] 2 Saving...
[INFO] 2 Saving...
[INFO] 2 Saving...
Done!



Done!


array([[36.664085],
       [19.065788]], dtype=float32)

### 1.2 Selectivity Prediction


In [10]:
# Load thiol addition selectivity model
thiol_add_model_path = "./model_path/thiol_addition/seed0"
rxn_smiles_lst = [
    "COCc1cccc(-c2cc3c(c4c2OP(=O)(O)Oc2c(-c5cccc(COC)c5)cc5c(c2-4)CCCC5)CCCC3)c1.O=C(/N=C/c1ccccc1)c1ccccc1.Sc1ccccc1>>O=C(NC(Sc1ccccc1)c1ccccc1)c1ccccc1",
    "COCc1cccc(-c2cc3ccccc3c3c2OP(=O)(O)Oc2c(-c4cccc(COC)c4)cc4ccccc4c2-3)c1.O=C(/N=C/c1cccc2ccccc12)c1ccccc1.SC1CCCCC1>>O=C(NC(SC1CCCCC1)c1cccc2ccccc12)c1ccccc1"
]
thiol_add_sel_preds = reaction_prediction(thiol_add_model_path, rxn_smiles_lst, task_type="selectivity")
thiol_add_sel_preds

Model loaded successfully!


Processing...
100%|██████████| 2/2 [00:00<00:00, 208.21it/s]
Done!
Processing...
100%|██████████| 2/2 [00:00<00:00, 549.46it/s]
Done!
Processing...
100%|██████████| 2/2 [00:00<00:00, 494.15it/s]

[INFO] 2 Saving...
[INFO] 2 Saving...
[INFO] 2 Saving...
Done!



Done!


tensor([[1.0733],
        [0.5368]])

## 2.Synthesis Planning
### 2.1 Retro-synthesis Planning


In [11]:
# Load USPTO-50k retro-synthesis model
uspto_50k_model_path = "./model_path/USPTO_50k"
pdt_smiles_lst = [
    'COC(=O)[C@H]NC(=O)Nc1cc(OC)cc(C(C)(C)C)c1O',
    'O=C(Nc1cccc2cnccc12)c1cc([O-])c(Sc2c(Cl)cncc2Cl)s1',
    'CCN(CC)Cc1ccc(-c2nc(C)c(COc3ccc([C@H]N4C(=O)OC[C@@H]4Cc4ccccc4)c4ccon4)c3)cc1'
]
rct_preds = reaction_prediction(uspto_50k_model_path, pdt_smiles_lst, task_type="retro-synthesis")
print(rct_preds)

Processing...
100%|██████████| 3/3 [00:00<00:00, 657.17it/s]

Model loaded successfully!
[INFO] 2 Saving...



Done!


Done!
                                               0  \
Top-1   COC(=O)NN.COc1cc(N=C=O)c(O)c(C(C)(C)C)c1   
Top-2        COC(=O)NN.COc1cc(NCOC)OC.COc1cc(N=O   
Top-3                      COC(=O)C(C(=O)Cl)Nc1c   
Top-4                 COC(=O)NN.COc1cc(NC)c1(C)C   
Top-5               COC(=O)C(C(=O)O)Nc1(C)c1cc(O   
Top-6        COC(=O)NN.COc1cc(NCOC)OC.COc1cc(N=N   
Top-7                    COC(=O)C(C(=O)O)Nc1(C)O   
Top-8                               COC(=O)Cl)=O   
Top-9               COC(=O)NN.COc1cc(NCOC)c)C)c1   
Top-10    COC(=O)NN.COc1cc(NCOC)OC)c1cc(=O)N)c1O   

                                                        1  
Top-1   Nc1cccc2cnccc12.O=C(O)c1cc([N+](=O)[O-])c(Sc2c...  
Top-2   Nc1cccc2cnccc12.O=C(OO)c1cc(=O)[O-])c(Sc2c(Cl)...  
Top-3   Nc1cccc2cnccc12.O=C(O)c1cc([O-])s3)c(Sc2c(Cl)c...  
Top-4   Nc1cccc2cnccc12.O=C(O)c1cc(=O)[O-])c(Sc2c(Cl)c...  
Top-5         Nc1cccc2cnccc12.O=C(Cl)c1cc([N+](=O)[O-])s1  
Top-6           Nc1cccc2cnccc12.O=C(O)c1cc2cnc2c1)c1cc1Cl  
T

## 2.2 Forward-synthesis Planning


In [12]:
# Load USPTO-480k forward-synthesis model
uspto_480k_model_path = "./model_path/USPTO_480k"

rct_smiles_lst = [
    'C1CCOC1.CC(C)C[Mg+].CON(C)C(=O)c1ccc(O)nc1.[Cl-]',
    'CN.O.O=C(O)c1ccc(Cl)c([O-])c1',
    'CCn1cc(C(=O)O)c(=O)c2cc(F)c(-c3ccc(N)cc3)cc21.O=CO'
]

pdt_preds = reaction_prediction(
    uspto_480k_model_path,
    rct_smiles_lst,
    task_type="forward-synthesis"
)
print(pdt_preds)

Processing...
100%|██████████| 3/3 [00:00<00:00, 497.17it/s]

Model loaded successfully!
[INFO] 3 Saving...



Done!


Done!
                                0                        1  \
Top-1      CC(C)CC(=O)c1ccc(O)nc1   CNC(=O)c1ccc(Cl)c(O)c1   
Top-2                   CC(C)CC(O        CNc1ccc(Cl)c(O)c1   
Top-3      CC(C)CC(Oc1ccc(C=O)cn1        NCc1ccc(Cl)c(O)c1   
Top-4   CC(C)CC(=O)c1ccc([O-])nc1       NCCc1ccc(Cl)c(O)c1   
Top-5                 CC(C)Cc1C(O  NC(=O.O=C([O-])c1ccc(Cl   
Top-6               CC(C)CCc1)CCC           NCc1ccc(Cl)cc1   
Top-7      CC(C)CCc1)CC(=O)CC(C)C    NCCC(=O)O)c1ccc2c(O)c   
Top-8            CC(C)CCc1)CC(=O)                    CN(=O   
Top-9      CC(C)CCc1)CC1O)C(=O)c1                   O=C(=O   
Top-10                      [Cl-]   NCCC(=O)O)c1cccC(Cl)CO   

                                                        2  
Top-1    CCn1cc(C(=O)O)c(=O)c2cc(F)c(-c3ccc(NC=O)cc3)cc21  
Top-2       CCn1cc(C(=O)O)c(=O)c2cc(F)c(-c3ccc(N)cc3)cc21  
Top-3     CCn1cc(C(=O)O)c(=O)c2cc(F)c(-c3ccccc3C=O)cc3)cc  
Top-4    CCn1cc(C(=O)O)c(=O)c2cc(F)c(-c3ccc(NC)c4cc3)cc21  
Top-5      

## 3. Using Pretrained Models
### 3.1 Using Pretrained Models


In [13]:
from rxngraphormer.rxn_emb import RXNEMB

pretrain_model_path = "./model_path/pretrained_classification_model"
rxnemb_calc_pretrained = RXNEMB(pretrained_model_path=pretrain_model_path, model_type="classifier")

# 修正后的反应SMILES列表（移除<FileRef>标签）
rxn_emb_pretrained = rxnemb_calc_pretrained.gen_rxn_emb([
    "C1CCCCC1.CCO.CS(=O)(=O)N1CCN(Cc2ccccc2)CC1.[OH-].[OH-].[Pd+2]>>CS(=O)(=O)N1CCNCC1",
    "CCOC(C)=O.Cc1cc([O-])ccc1NC(=O)c1ccccc1.Cl[Sn]Cl.O.O.O=C([O-])O.[Na+]>>Cc1cc(N)ccc1NC(=O)c1ccccc1",
    "COc1ccc(-c2coc3ccc(-c4nnc(S)o4)cc23)cc1.COc1ccc(CCl)cc1F>>COc1ccc(-c2coc3ccc(-c4nnc(SCc5ccc(OC)c(F)c5)o4)cc23)cc1"
])
print(rxn_emb_pretrained)

Processing...
100%|██████████| 3/3 [00:00<00:00, 356.11it/s]
Done!
Processing...
100%|██████████| 3/3 [00:00<00:00, 657.31it/s]
Done!
100%|██████████| 1/1 [00:00<00:00, 61.72it/s]

[INFO] There are 1 data files in total
[INFO] All data 1 files will be used
[INFO] ./rxn_emb_tmp/rct_smiles_0.csv is processing...
[INFO] 3 data index 0 is saving...
[INFO] There are 1 data files in total
[INFO] All data 1 files will be used
[INFO] ./rxn_emb_tmp/pdt_smiles_0.csv is processing...
[INFO] 3 data index 0 is saving...
[INFO] Generating reaction embedding...
tensor([[-0.8374, -1.2715, -2.1796,  ..., -1.1740, -0.9580, -0.3386],
        [-1.3530,  0.0987, -1.7155,  ..., -0.0869,  0.0523, -1.0340],
        [-1.2583,  0.0305, -1.0679,  ..., -0.7572, -0.3230, -1.2023]])





### 3.2 Using Fine-tuned Models


In [14]:
from rxngraphormer.rxn_emb import RXNEMB

# Initialize with fine-tuned model
finetune_model_path = "./model_path/buchwald_hartwig/seed0"
rxnemb_calc_finetuned = RXNEMB(pretrained_model_path=finetune_model_path, model_type="regressor")

rxn_emb_finetuned = rxnemb_calc_finetuned.gen_rxn_emb([
    "CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1.CN(C)C(=NC(C)(C)C)N(C)C.Cc1ccc(N)cc1.FC(F)(F)c1ccc(I)cc1.c1ccc(-c2ccon2)cc1>>Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1",

    "CCN=P(N=P(N(C)C)(N(C)C)N(C)C)(N(C)C)N(C)C.COc1ccc(OC)c(P([C@H]23C[C@H]4C2C3)[C@]23C[C@H]4C2C3)c1-c1c(C(C)C)cc(C(C)C)cc1C(C)C.Cc1ccc(N)cc1.Ic1cccnc1.c1ccc(-c2ccon2)cc1>>Cc1ccc(Nc2cccnc2)cc1",

    "CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1.CCc1ccc(I)cc1.CN1CCCN2CCCN=C12.Cc1ccc(N)cc1.c1ccc2oncc2c1>>CCc1ccc(Nc2ccc(C)cc2)cc1"

])
print(rxn_emb_finetuned)

Processing...
100%|██████████| 3/3 [00:00<00:00, 230.87it/s]
Done!
Processing...
100%|██████████| 3/3 [00:00<00:00, 767.48it/s]
Done!
100%|██████████| 1/1 [00:00<00:00, 64.02it/s]

[INFO] There are 1 data files in total
[INFO] All data 1 files will be used
[INFO] ./rxn_emb_tmp/rct_smiles_0.csv is processing...
[INFO] 3 data index 0 is saving...
[INFO] There are 1 data files in total
[INFO] All data 1 files will be used
[INFO] ./rxn_emb_tmp/pdt_smiles_0.csv is processing...
[INFO] 3 data index 0 is saving...
[INFO] Generating reaction embedding...
tensor([[ 0.8051, -0.3321, -1.2536,  ..., -0.2433,  0.1513, -0.1720],
        [-0.4195,  1.3716,  1.0248,  ..., -0.2249,  0.1240, -0.1813],
        [-0.8056,  0.8913, -0.4347,  ..., -0.2383,  0.1632, -0.1786]])



