1. This Notebook will convert molecular conformers to SPMS
2. SPMS of CPAs, imines and thiols will be saved in **"./SPMS_Desc/cat.npy"**, **"./SPMS_Desc/imine.npy"** and **"./SPMS_Desc/thiol.npy"**
3. All cells should be executed sequentially
4. This Jupyter Notebook should be run on **Linux**

## Load original files

In [None]:
import glob,os
from spms.desc import SPMS
import pandas as pd
import numpy as np

In [None]:
if not os.path.exists('./CPA') and not os.path.exists('./imine') and not os.path.exists('./thiol'):
    !unzip ./Conformers.zip

In [None]:
! ls ./Reaction_Result/

In [None]:
result_df = pd.read_csv('./Reaction_Result/Denmark_Reaction_Data.csv')
cat_smiles = result_df['Catalyst'].to_list()
imine_smiles = result_df['Imine'].to_list()
thiol_smiles = result_df['Thiol'].to_list()
ddG = result_df['Output'].to_list()

cat_smiles_set = list(set(cat_smiles))
imine_smiles_set = list(set(imine_smiles))
thiol_smiles_set = list(set(thiol_smiles))

In [None]:
cpa_sdf_files = glob.glob('./CPA/*.sdf')
imine_sdf_files = glob.glob('./imine/*.sdf')
thiol_sdf_files = glob.glob('./thiol/*.sdf')

## Calculate SPMS

### CPA

In [None]:
cpa_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for tmp_sdf_file in cpa_sdf_files:
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=[3],desc_n=40,desc_m=40)
    spms_calc._Standarlize_Geomertry()
    tmp_sphere_radius = spms_calc.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [None]:
default_radius

In [None]:
cpa_sdf_files

In [None]:
### Execute this cell will take few seconds or few minutes
all_desc = []
for tmp_sdf_file in cpa_sdf_files:
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=[3],desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms_calc.GetSphereDescriptors()
    desc = spms_calc.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)

In [None]:
cat_desc_dict = {}
for item in cat_smiles_set:
    cat_desc_dict[item] = []
for i in range(len(all_desc)):
    tmp_sdf_file = cpa_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    cat_desc_dict[tmp_smiles].append(all_desc[i])
new_cat_desc_dict = {}
for tmp_key in cat_smiles_set:
    new_cat_desc_dict[tmp_key] = np.average(np.array(cat_desc_dict[tmp_key]),axis=0)

### imine

In [None]:
imine_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for tmp_sdf_file in imine_sdf_files:
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=[10],desc_n=40,desc_m=40)
    spms_calc._Standarlize_Geomertry()
    tmp_sphere_radius = spms_calc.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [None]:
imine_sdf_files

In [None]:
all_desc = []
for tmp_sdf_file in imine_sdf_files:
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=[9,10],desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms_calc.GetSphereDescriptors()
    desc = spms_calc.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)


In [None]:
imine_desc_dict = {}
for item in imine_smiles_set:
    imine_desc_dict[item] = []
for i in range(len(all_desc)):
    tmp_sdf_file = imine_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    imine_desc_dict[tmp_smiles].append(all_desc[i])
new_imine_desc_dict = {}
for tmp_key in imine_smiles_set:
    new_imine_desc_dict[tmp_key] = np.average(np.array(imine_desc_dict[tmp_key]),axis=0)

### thiol

In [None]:
thiol_key = [[1],[1],[1],[1],[3]]
thiol_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for i,tmp_sdf_file in enumerate(thiol_sdf_files):
    tmp_key_atom = thiol_key[i//20]
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=tmp_key_atom,desc_n=40,desc_m=40)
    spms_calc._Standarlize_Geomertry()
    tmp_sphere_radius = spms_calc.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [None]:
thiol_sdf_files

In [None]:
default_radius

In [None]:
all_desc = []
for i,tmp_sdf_file in enumerate(thiol_sdf_files):
    tmp_key_atom = thiol_key[i//20]
    spms_calc = SPMS(tmp_sdf_file,key_atom_num=tmp_key_atom,desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms_calc.GetSphereDescriptors()
    desc = spms_calc.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)

In [None]:
thiol_desc_dict = {}
for item in thiol_smiles_set:
    thiol_desc_dict[item] = []

for i in range(len(all_desc)):
    
    tmp_sdf_file = thiol_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    
    thiol_desc_dict[tmp_smiles].append(all_desc[i])
new_thiol_desc_dict = {}
for tmp_key in thiol_smiles_set:
    new_thiol_desc_dict[tmp_key] = np.average(np.array(thiol_desc_dict[tmp_key]),axis=0)

## Generate reaction SPMS

In [None]:
react_cat_desc = np.array([new_cat_desc_dict[item] for item in cat_smiles])
react_imine_desc = np.array([new_imine_desc_dict[item] for item in imine_smiles])
react_thiol_desc = np.array([new_thiol_desc_dict[item] for item in thiol_smiles])
if not os.path.exists('./SPMS_Desc'):
    os.mkdir('./SPMS_Desc')
np.save('./SPMS_Desc/cat.npy',react_cat_desc)
np.save('./SPMS_Desc/imine.npy',react_imine_desc)
np.save('./SPMS_Desc/thiol.npy',react_thiol_desc)