This notebook is a tutorial for generating all molecular descriptors used in the paper *Towards Data-driven Design of Asymmetric Hydrogenation of Olefins: Database and Hierarchical Learning*

# load dependence

In [12]:
import numpy as np
import pandas as pd
import glob,os
from ase import Atoms
from gendesc import generate2Ddesc,generate3Ddesc,getusidx,getmorganfp
from dscribe.descriptors import MBTR
from rdkit import Chem
from rdkit.Chem.rdMolDescriptors import GetMorganFingerprintAsBitVect

# Download dataset

In [13]:
if not os.path.exists('./hierarchical_learning.zip'):
    ! wget http://spmsgen.net:8000/download/hierarchical_learning.zip
else:
    ! unzip ./hierarchical_learning.zip
    ! mv ./hierarchical_learning/ ./data/

Archive:  ./hierarchical_learning.zip
   creating: hierarchical_learning/
  inflating: hierarchical_learning/cat_mbtr_desc.csv  
  inflating: hierarchical_learning/cat_mf_desc.csv  
  inflating: hierarchical_learning/hierarchical_learning.npz  
  inflating: hierarchical_learning/hl_cat_deom_name_smi_map.csv  
   creating: hierarchical_learning/hl_cat_geoms_demo/
   creating: hierarchical_learning/hl_cat_geoms_demo/cat_0/
 extracting: hierarchical_learning/hl_cat_geoms_demo/cat_0/.xtboptok  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/LOG  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/cat_0.xyz  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/charges  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/md.inp  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/mdrestart  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/wbo  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_0/xtb.trj  
 extracting: hierarchical_

  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_3/xtbopt.log  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_3/xtbopt.xyz  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_3/xtbrestart  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_3/xtbtopo.mol  
   creating: hierarchical_learning/hl_cat_geoms_demo/cat_30/
 extracting: hierarchical_learning/hl_cat_geoms_demo/cat_30/.xtboptok  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/LOG  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/cat_30.xyz  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/charges  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/md.inp  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/mdrestart  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/wbo  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_30/xtb.trj  
 extracting: hierarchical_learning/hl_cat_geoms_demo/cat_30/xtbmdok  
  inflat

  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_47/xtbopt.log  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_47/xtbopt.xyz  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_47/xtbrestart  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_47/xtbtopo.mol  
   creating: hierarchical_learning/hl_cat_geoms_demo/cat_48/
 extracting: hierarchical_learning/hl_cat_geoms_demo/cat_48/.xtboptok  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/LOG  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/cat_48.xyz  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/charges  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/md.inp  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/mdrestart  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/wbo  
  inflating: hierarchical_learning/hl_cat_geoms_demo/cat_48/xtb.trj  
 extracting: hierarchical_learning/hl_cat_geoms_demo/cat_48/xtbmdok  
  in

mv: cannot move './hierarchical_learning/' to './data/hierarchical_learning': Directory not empty


# read data file

For demonstration, we just operate on 5 entries data here

In [14]:
df = pd.read_csv('./data/data_demo.csv')   ### 
df

Unnamed: 0,Reactant SMILES,Product SMILES,Solvent SMILES,Additive SMILES,Metal,Ligand SMILES,Catalyst SMILES(RDKit),Pressure/atm,Temperature/C,S/C,ddG,ee,Scaffold type,Multi-scaffold type,Olefin Type
0,CCOC(=O)/C=C(\C)c1ccccc1,CCOC(=O)C[C@@H](C)c1ccccc1,ClCCl,,Ir,Cc1ccccc1P(c1ccccc1C)N1[C@H]2CC[C@H](C2)[C@@H]...,Cc1ccccc1P(c1ccccc1C)8->N1[C@H]2CC[C@H](C2)[C@...,49,25,200,2.720953,0.98,"P,N type","['P,N type']",tri-sub
1,C/C(=C\c1ccccc1)C(=O)Cc1ccccc1,C[C@@H](Cc1ccccc1)C(=O)Cc1ccccc1,ClCCl,,Ir,Cc1ccccc1P(c1ccccc1C)N1[C@H]2CC[C@H](C2)[C@@H]...,Cc1ccccc1P(c1ccccc1C)8->N1[C@H]2CC[C@H](C2)[C@...,49,25,200,2.477863,0.97,"P,N type","['P,N type']",tri-sub
2,CC(=O)NC1=C(C)CCCC1,CC(=O)N[C@H]1CCCC[C@H]1C,CCOC(C)=O,,Rh,CC(C)[C@@H](Oc1cccc2c1P(C(C)(C)C)[C@H]([C@@H]1...,CC(C)[C@@H](Oc1cccc2c1P(C(C)(C)C)8->[C@H]([C@@...,34,25,100,1.881852,0.92,double P type,['double P type'],tetra-sub
3,CC(=O)NC(C)=C(C)C,CC(=O)N[C@@H](C)C(C)C,CO,,Rh,CC(C)[C@@H](Oc1cccc2c1P(C(C)(C)C)[C@H]([C@@H]1...,CC(C)[C@@H](Oc1cccc2c1P(C(C)(C)C)8->[C@H]([C@@...,34,25,100,0.715489,0.54,double P type,['double P type'],tetra-sub
4,COC(=O)/C(=C\c1ccccc1)NC(C)=O,COC(=O)[C@@H](Cc1ccccc1)NC(C)=O,ClCCl,,Rh,Cc1ccccc1OP(Oc1ccccc1C)N(c1ccccc1C)N(c1ccccc1C...,Cc1ccccc1OP(Oc1ccccc1C)8->N(c1ccccc1C)N(c1cccc...,5,25,100,0.083035,0.07,binol type,"['binol type', 'double P type']",tri-sub


# generate 2D/3D descriptor map

*generate2Ddesc*,*generate3Ddesc* modules in *gendesc* can be used to generate 2D descriptors including **MolecularFingerprint (MF)**, **200 molecular descriptors built-in RDKit**, and **molecular descriptors built-in Mordred**, and 3D descriptors like **ACSF**, **MBTR**, **SOAP** and **LMBTR**. In addition, the paramters of these modules can be modified.

In [15]:
gen2d = generate2Ddesc(df)
rdkit_desc_map = gen2d.calc_rdkit_desc()
mf_desc_map = gen2d.calc_mf_desc()

In [16]:
gen3d = generate3Ddesc('./data/geoms_demo/')
acsf_desc_map = gen3d.calc_acsf_desc()
soap_desc_map = gen3d.calc_soap_desc()
lmbtr_desc_map = gen3d.calc_lmbtr_desc()
mbtr_desc_map  = gen3d.calc_mbtr_desc()

descriptor map demonstration

In [17]:
mbtr_desc_map

{'COC(=O)/C(=C\\c1ccccc1)NC(C)=O': array([0.00000000e+00, 7.27410018e-01, 3.12217744e-04, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        8.82276241e-03, 6.62920773e-01, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.40184181e-05,
        5.59546128e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 7.40722214e-07, 1.67934418e-01,
        2.51550768e-02, 9.20745209e-02, 1.50286511e-01, 1.47988051e-01,
        1.35205656e-01, 1.17776804e-01, 5.92163280e-02, 1.14193503e-02,
        7.29998574e-04, 1.47099236e-05, 4.43520509e-02, 1.88884899e-01,
        3.65480870e-01, 4.1069

# generate reaction descritor

In this tutorial, we just generate MBTR+MF reaction descriptor for demonstration

In [18]:
re_smi = df['Reactant SMILES'].to_numpy()
pr_smi = df['Product SMILES'].to_numpy()
sol_smi = df['Solvent SMILES'].to_numpy()
cat_smi = df['Catalyst SMILES(RDKit)'].to_numpy()
press = df['Pressure/atm'].to_numpy().reshape(-1,1)
temp = df['Temperature/C'].to_numpy().reshape(-1,1)
s_c = df['S/C'].to_numpy().reshape(-1,1)
tag = df['ddG'].to_numpy().reshape(-1,1)

re_desc_1,re_desc_2 = np.array([mbtr_desc_map[tmp_smi] for tmp_smi in re_smi]),\
                      np.array([mf_desc_map[tmp_smi] for tmp_smi in re_smi])
pr_desc_1,pr_desc_2 = np.array([mbtr_desc_map[tmp_smi] for tmp_smi in pr_smi]),\
                      np.array([mf_desc_map[tmp_smi] for tmp_smi in pr_smi])
sol_desc_1,sol_desc_2 = np.array([mbtr_desc_map[tmp_smi] for tmp_smi in sol_smi]),\
                      np.array([mf_desc_map[tmp_smi] for tmp_smi in sol_smi])
cat_desc_1,cat_desc_2 = np.array([mbtr_desc_map[tmp_smi] for tmp_smi in cat_smi]),\
                      np.array([mf_desc_map[tmp_smi] for tmp_smi in cat_smi])
react_desc = np.concatenate([re_desc_1,re_desc_2,pr_desc_1,pr_desc_2,
                             sol_desc_1,sol_desc_2,cat_desc_1,cat_desc_2,press,temp,s_c],axis=1)
usidx = getusidx(react_desc)
react_desc = react_desc[:,usidx]
react_desc = (react_desc-react_desc.min(axis=0))/(react_desc.max(axis=0)-react_desc.min(axis=0))

In [19]:
react_desc.shape

(5, 3330)

In addition, **MBTR** and **MF** can be calculated with original API in **dscribe** and **rdkit**. Here, we use *target substrate set* and *test set* in the paper for demonstration.
For calculating **MBTR** and **MF**, we need define some paramters firstly.

In [20]:
k1={
        "geometry": {"function": "atomic_number"},
        "grid": {"min": 0, "max": 8, "n": 50, "sigma": 0.1},
    }
k2={
        "geometry": {"function": "inverse_distance"},
        "grid": {"min": 0, "max": 4, "n": 50, "sigma": 0.1},
        "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3},
    }
k3={
        "geometry": {"function": "cosine"},
        "grid": {"min": -1, "max": 4, "n": 50, "sigma": 0.1},
        "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3},
    }
mbtr = MBTR(
    species=["H","B","C","N","O","F","P","S","Fe","Rh"],
    k1=k1,
    k2=k2,
    k3=k3,
    periodic=False,
    normalization="l2_each",
)

In [21]:
df_2 = pd.read_csv('./data/hierarchical_learning/hl_target_substrate_test_set_demo.csv')
cat_trj_files = glob.glob('./data/hierarchical_learning/hl_cat_geoms_demo/*/xtb.trj')
cat_name_smi_df = pd.read_csv('./data/hierarchical_learning/hl_cat_deom_name_smi_map.csv')
cat_fn_smi_map = {cat_name_smi_df['file'].to_list()[i]:cat_name_smi_df['SMILES'].to_list()[i]
    for i in range(len(cat_name_smi_df['SMILES'].to_list()))}
cat_smi_ = cat_name_smi_df['SMILES'].to_list()
cat_smiles = df_2['Catalyst SMILES(RDKit)'].to_list()

Calculate MBTR descriptors based on 3D structures of molecules

In [22]:
file_fn = []
all_mbtr = []
for tmp_file in cat_trj_files:
    tmp_fn = tmp_file.split('/')[-2]
    file_fn.append(tmp_fn)
    with open(tmp_file,'r') as fr:
        lines = fr.readlines()
    atom_num = eval(lines[0].strip())
    mol_num = int(len(lines)/(atom_num+2))
    tmp_desc = []
    for i in range(mol_num):
        coord_string = lines[(2+atom_num)*i+2:(2+atom_num)*(i+1)]
        syms = [tmp_item.split()[0] for tmp_item in coord_string]
        coord = np.array([list(map(eval,coord_string[i].strip().split()[1:])) 
                          for i in range(len(coord_string))])
        tmp_atom = Atoms(syms,coord)
        tmp_desc.append(mbtr.create(tmp_atom).reshape(-1,))
    tmp_desc = np.mean(tmp_desc,axis=0)
    all_mbtr.append(tmp_desc)
all_mbtr = np.array(all_mbtr)
all_mbtr = all_mbtr[:,np.where(all_mbtr.max(axis=0) - all_mbtr.min(axis=0)!=0)[0]]
all_mbtr = (all_mbtr-all_mbtr.min(axis=0))/(all_mbtr.max(axis=0)-all_mbtr.min(axis=0))
file_mbtr_desc_map = {tmp_fn:tmp_desc for tmp_fn,tmp_desc in zip(file_fn,all_mbtr)}

Calculate MF descriptors based on 2D topological structures of molecules

In [23]:
cat_mols = [Chem.MolFromSmiles(tmp_smi) for tmp_smi in cat_smi_]
cat_fp = np.array([getmorganfp(tmp_mol,6,2048) for tmp_mol in cat_mols])
cat_fp = cat_fp[:,np.where(cat_fp.max(axis=0) - cat_fp.min(axis=0)!=0)[0]]
cat_fp = (cat_fp-cat_fp.min(axis=0))/(cat_fp.max(axis=0)-cat_fp.min(axis=0))

Save these descriptors, we will use these descriptors in the following tutorials

In [24]:
cat_smi_mf_map = {tmp_smi:tmp_desc for tmp_smi,tmp_desc in zip(cat_smi_,cat_fp)}
cat_smi_mbtr_map = {cat_fn_smi_map[tmp_key]:file_mbtr_desc_map[tmp_key] for tmp_key in file_mbtr_desc_map}
cat_smi_mf_df = pd.DataFrame.from_dict(cat_smi_mf_map).T
cat_smi_mbtr_df = pd.DataFrame.from_dict(cat_smi_mbtr_map).T
cat_smi_mf_df.to_csv('./data/hierarchical_learning/cat_mf_desc.csv')
cat_smi_mbtr_df.to_csv('./data/hierarchical_learning/cat_mbtr_desc.csv')

In [25]:
cat_smi_mf_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1591,1592,1593,1594,1595,1596,1597,1598,1599,1600
C=Cc1ccc(Np8->2oc3ccc4ccccc4c3c3c(ccc4ccccc43)o2)cc1.[Rh+]8,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CC(C)(C)[C@H](Op9->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1)P(c1ccccc1)8->c1ccccc1.[Rh+]89,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
CC(C)N(C(C)C)P8->1OC(c2ccccc2)(c2ccccc2)[C@@H]2OC3(CCCC3)O[C@H]2C(c2ccccc2)(c2ccccc2)O1.[Rh+]8,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CC(C)N(C(C)C)p18->oc2c(P(c3ccccc3)c3ccccc3)cc3ccccc3c2c2c(o1)c(P(c1ccccc1)c1ccccc1)cc1ccccc12.[Rh+]8,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CC(C)N(CCCN(C(C)C)p8->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1)p9->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1.[Rh+]89,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C[C@@H](C1=C[C@H]([Fe]C2C=CC=C2)C=C1P(c1ccccc1)8->c1ccccc1)n1nnc(-c2ccccc2F)c1P(c1ccccc1)9->c1ccccc1.[Rh+]89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
C[C@H](C1=C(P(c2ccccc2)8->c2ccccc2)C=C[C@@H]1[Fe]C1C=CC=C1)N(C)P(c1ccccc1)9->c1ccccc1.[Rh+]89,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
C[C@H](C1=C(P(c2ccccc2)8->c2ccccc2)C=C[C@@H]1[Fe]C1C=CC=C1)N(CC(C)(C)C)P(c1ccccc1)9->c1ccccc1.[Rh+]89,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cc1cc(C)cc(P(c2cc(C)cc(C)c2)8->N(C)[C@H](C)C2=C(P(c3ccccc3)9->c3ccccc3)C=C[C@@H]2[Fe]C2C=CC=C2)c1.[Rh+]89,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
cat_smi_mbtr_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7790,7791,7792,7793,7794,7795,7796,7797,7798,7799
C[C@@H](C1=C[C@H]([Fe]C2C=CC=C2)C=C1P(c1ccccc1)8->c1ccccc1)n1nnc(-c2ccccc2F)c1P(c1ccccc1)9->c1ccccc1.[Rh+]89,0.231742,0.231742,0.231741,0.231741,0.231742,0.231742,0.231741,0.0,0.0,0.0,...,1.00000,0.405337,0.054264,0.007460,0.000395,0.000010,0.000000,0.0,0.0,0.0
CN(C)P8->1Oc2cccc3c2C2(CC3)CCc3cccc(c32)O1.[Rh+]8,0.474662,0.474661,0.474661,0.474661,0.474661,0.474661,0.474661,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
Cc1cc(C)cc([B-](c2cc(C)cc(C)c2)(c2cc(C)cc(C)c2)c2c(F)c(F)c(CN(C)p8->3oc4ccc5ccccc5c4c4c(ccc5ccccc54)o3)c(F)c2F)c1.[Rh+]8,0.267239,0.267238,0.267238,0.267239,0.267238,0.267239,0.267239,1.0,1.0,1.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
CC(C)N(CCCN(C(C)C)p8->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1)p9->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1.[Rh+]89,0.324049,0.324049,0.324049,0.324049,0.324049,0.324049,0.324049,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
C[C@@H](c1ccccc1)N(CCCN([C@@H](C)c1ccccc1)p8->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1)p9->1oc2ccc3ccccc3c2c2c(ccc3ccccc32)o1.[Rh+]89,0.225604,0.225603,0.225604,0.225603,0.225603,0.225604,0.225604,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CC(C)P8->1Oc2cccc3c2C2(CC3)CCc3cccc(c32)O1.[Rh+]8,0.474085,0.474084,0.474084,0.474084,0.474084,0.474084,0.474084,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
Fc1c(F)c(F)c(CN2CCN(p8->3oc4ccc5ccccc5c4c4c(ccc5ccccc54)o3)CC2)c(F)c1F.[Rh+]8,0.083332,0.083332,0.083332,0.083332,0.083332,0.083332,0.083332,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
Cc1cc(C)cc(P(c2cc(C)cc(C)c2)8->N(C)[C@H](C)C2=C(P(c3ccccc3)9->c3ccccc3)C=C[C@@H]2[Fe]C2C=CC=C2)c1.[Rh+]89,0.476977,0.476977,0.476977,0.476977,0.476977,0.476977,0.476978,0.0,0.0,0.0,...,0.96167,0.482781,0.086496,0.016704,0.001270,0.000046,0.000001,0.0,0.0,0.0
c1ccc2c(c1)ccc1op(N3CCSCC3)8->oc3ccc4ccccc4c3c12.[Rh+]8,0.248689,0.248689,0.248689,0.248689,0.248689,0.248689,0.248689,0.0,0.0,0.0,...,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
