## ASSUMPTION
This example is written assuming we have a service up and running for us to utilise

Copyright IBM Corp. 2022, 2023
@author James Strudwick IBM Research

## Dependencies

In [1]:
#For descriptors
import pandas as pd
import numpy as np
from rdkit.Chem import AllChem, Descriptors, MolFromSmiles
from mordred import Calculator, descriptors, error

In [2]:
#For PCA
# from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from matplotlib import pyplot as plt

In [3]:
#For loading data
import pandas as pd
import numpy as np
# from io import StringIO

## Data recording

In [4]:
# SMILES from cheminfo.org and pubchem: propane reactant, pentane product, side product, solvent, HFIP, acid catalyst

MCP_SMILES=[r'C1(/C=C2CC\2)=CC=CC=C1', 
r'BrC(C=C1)=CC=C1/C=C2CC\2', 
r'ClC(C=C1)=CC=C1/C=C2CC\2', 
r'FC(C=C1)=CC=C1/C=C2CC\2', 
r'O=[N+](C(C=C1)=CC=C1/C=C2CC\2)[O-]',
r'COC(C=C1)=CC=C1/C=C2CC\2',
r'CN(C(C=C1)=CC=C1/C=C2CC\2)C',
r'CC(C)C(C=C1)=CC=C1/C=C2CC\2',
r'C1(/C=C2CC\2)=CC=C(C3=CC=CC=C3)C=C1',
r'CC1=CC(/C=C2CC\2)=CC=C1',
r'BrC1=CC(/C=C2CC\2)=CC=C1',
r'CC(C)C1=CC(/C=C2CC\2)=CC=C1',
r'FC(C(C=C1)=CC=C1/C=C2CC\2)(F)F',
r'CC(C=C1)=CC=C1/C=C2CC\2']#2-CH3'CC1=CC=CC=C1/C=C2CC\2' deleted due to no Hammett Const
Dimer_SMILES='c4ccc(C2=C(C1CC1)CCC2c3ccccc3)cc4'
byproduct1_SMILES='c4ccc(C3=C(c1ccccc1)C(C2CC2)CC3)cc4'
HFIP_SMILES='OC(C(F)(F)F)C(F)(F)F'
RATIO=1/2
Catalyst_SMILES=['CC1=CC=C(S(O)(=O)=O)C=C1',
'O=S(C1=CC=CC=C1)(O)=O',
'CS(=O)(O)=O',
'[H]Cl',
'F[B-](F)(F)F.[H+]',
'CC1(C)[C@]2([H])CC[C@@]1(CS(O)(=O)=O)C(C2)=O',
'OC1=C(F)C(F)=C(F)C(F)=C1F','O=S(O)(C(F)(F)F)=O',
'[H]Br','F[P-](F)(F)(F)(F)F.[H+]',
'O=C(O)C(F)(F)F',
'O=P(O)(O)O',
'O=Cl(=O)(O)=O',
'[H]I',
'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)O',
'O=S(NS(=O)(C(F)(F)F)=O)(C(F)(F)F)=O',
'O=S(C1=CC=CC=C1C2=CN=NC(C3=NC=C(C4=CC=CC=C4S(=O)([O-])=O)C=C3)=N2)([O-])=O.[Na+].[Na+]',
'CCCC[N+](CCCC)(CCCC)CCCC.[O-]S(=O)(O)=O',
'O=P(OC1=CC=C2C(C=CC=C2)=C31)(O)OC4=C3C(C=CC=C5)=C5C=C4']
solvent_SMILES=['ClC(Cl)Cl',
                'ClCCCl',
                'ClCCl',
                'CCC(C)(O)C',
                'CC1=CC=CC=C1',
                'CC1=CC(C)=CC(C)=C1',
                'FC(F)(F)C1=CC=CC=C1',
                'FC1=C(F)C(F)=C(F)C(F)=C1F',
                'FC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F',
                'C[N+]([O-])=O',
                '[H]O[H]',
                'C1CCOC1',
                'O1CCOCC1']
cosolvent_SMILES=['FC(F)(F)C(O)C(F)(F)F',
                  r'F/C(F)=C(F)\F',
                  'FC(F)(F)C(C(F)(F)F)(O)C(F)(F)F',
                  'CC(C)(O)C',
                  'CC(O)C']
additives_SMILES=['O=S([O-])([O-])=O.[Na+].[Na+]',
                  'CC(C1=CC(C)=CC(C(C)(C)C)=C1O)(C)C',
                  '[H]O[H]','[O-][Si](=O)[O-].[O-][Si](=O)[O-].[Na+].[Al+3]',
                  'O=C1CC[C@@]2([H])[C@]3([H])CC=C4C[C@@H](OC(C)=O)CC[C@]4(C)[C@@]3([H])CC[C@]12C',
                  'CO','O1CCOCC1']
# Initiate list of rdkit or mordred molecules
rdkit_mols = [MolFromSmiles(smiles) for smiles in MCP_SMILES]

In [5]:
MCP_SMILES_dic={
    'parent Ph-MCP':r'C1(/C=C2CC\2)=CC=CC=C1', 
    '4-Br-Ph-MCP':r'BrC(C=C1)=CC=C1/C=C2CC\2', 
    '4-Cl-Ph-MCP':r'ClC(C=C1)=CC=C1/C=C2CC\2', 
    '4-F-Ph-MCP':r'FC(C=C1)=CC=C1/C=C2CC\2', 
    '4-nitro-Ph-MCP':r'O=[N+](C(C=C1)=CC=C1/C=C2CC\2)[O-]',
    '4-MeO-Ph-MCP':r'COC(C=C1)=CC=C1/C=C2CC\2',
    '4-Me2N-Ph-MCP':r'CN(C(C=C1)=CC=C1/C=C2CC\2)C',
    '4-iPr-Ph-MCP':r'CC(C)C(C=C1)=CC=C1/C=C2CC\2',
    '4-Ph-Ph-MCP':r'C1(/C=C2CC\2)=CC=C(C3=CC=CC=C3)C=C1',
    '3-CH3-Ph-MCP':r'CC1=CC(/C=C2CC\2)=CC=C1',
    '3-Br-Ph-MCP':r'BrC1=CC(/C=C2CC\2)=CC=C1',
#     '2-CH3-Ph-MCP':r'CC1=CC=CC=C1/C=C2CC\2',
    '3-iPr-Ph-MCP':r'CC(C)C1=CC(/C=C2CC\2)=CC=C1',
    '4-CF3-Ph-MCP':r'FC(C(C=C1)=CC=C1/C=C2CC\2)(F)F',
    '4-CH3-Ph-MCP':'CC(C=C1)=CC=C1/C=C2CC\2'}

In [6]:
# pd.DataFrame(MCP_SMILES_dic.items()).to_csv('derivatives name.csv')

In [7]:
solvent_SMILES_dic={
    'chloroform':'ClC(Cl)Cl',
                '1,2-dichloroethane':'ClCCCl',
                'dichloromethane':'ClCCl',
                'tert-amyl alcohol':'CCC(C)(O)C',
                'toluene':'CC1=CC=CC=C1',
                'mesitylene':'CC1=CC(C)=CC(C)=C1',
                'α,α,α-trifluorotoluene':'FC(F)(F)C1=CC=CC=C1',
                'hexafluorobenzene':'FC1=C(F)C(F)=C(F)C(F)=C1F',
                'perfluorohexane':'FC(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F',
                'nitromethane':'C[N+]([O-])=O',
                'water':'[H]O[H]',
                'THF':'C1CCOC1',
                '1,4-dioxane':'O1CCOCC1'}

In [8]:
Catalyst_SMILES_dic={
    'ptsa': 'CC1=CC=C(S(O)(=O)=O)C=C1',
    'benzenesulfonic acid': 'O=S(C1=CC=CC=C1)(O)=O',
                  'MsOH':'CS(=O)(O)=O',
                  'HCl':'[H]Cl',
                  'HBF4':'F[B-](F)(F)F.[H+]',
                  'camphorsulfonic acid':'CC1(C)[C@]2([H])CC[C@@]1(CS(O)(=O)=O)C(C2)=O',
                  'pentafluorophenol':'OC1=C(F)C(F)=C(F)C(F)=C1F',
    'TfOH':'O=S(O)(C(F)(F)F)=O',
                  'HBr':'[H]Br',
    'HPF6':'F[P-](F)(F)(F)(F)F.[H+]',
                  'TFA':'O=C(O)C(F)(F)F',
                  'H3PO4':'O=P(O)(O)O',
                  'HClO4':'O=Cl(=O)(O)=O',
                  'HI':'[H]I',
                  'diphenyl hydrogen phosphate':'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)O',
                  'HNTf2':'O=S(NS(=O)(C(F)(F)F)=O)(C(F)(F)F)=O',
                  'Pyridinium p-toluene sulfonate':'O=S(C1=CC=CC=C1C2=CN=NC(C3=NC=C(C4=CC=CC=C4S(=O)([O-])=O)C=C3)=N2)([O-])=O.[Na+].[Na+]',
                  'tetrabutylammonium hydrogensulfate':'CCCC[N+](CCCC)(CCCC)CCCC.[O-]S(=O)(O)=O',
                  'chiral BINOL phosphoric acid':'O=P(OC1=CC=C2C(C=CC=C2)=C31)(O)OC4=C3C(C=CC=C5)=C5C=C4'}

In [9]:
cosolvent_SMILES_dic={'HFIP':'FC(F)(F)C(O)C(F)(F)F',
    'TFE':'F/C(F)=C(F)\F',
                  'nonafluoro-tert-butyl alcohol':'FC(F)(F)C(C(F)(F)F)(O)C(F)(F)F',
                  'tert-butanol':'CC(C)(O)C',
                  'iso-propanol':'CC(O)C'}

In [10]:
additives_SMILES_dic={'sodium sulfate':'O=S([O-])([O-])=O.[Na+].[Na+]',
                  'molecular sieves':'[O-][Si](=O)[O-].[O-][Si](=O)[O-].[Na+].[Al+3]',
                  'DHA':'CC(C1=CC(C)=CC(C(C)(C)C)=C1O)(C)C',
                  'water':'[H]O[H]',
                  'BHT':'O=C1CC[C@@]2([H])[C@]3([H])CC=C4C[C@@H](OC(C)=O)CC[C@]4(C)[C@@]3([H])CC[C@]12C',
                  'methanol':'CO',
                  'dioxane':'O1CCOCC1'}

In [11]:
# exp_columns[0]

In [13]:
import itertools

In [14]:
MCPs=pd.read_csv('df_MCPs_pca_mordred_sterimol.csv').drop(['Name'],axis=1).to_numpy()
catalysts=pd.read_csv('df_catalyst_pca_mordred_sterimol.csv').drop('Name',axis=1).to_numpy()
cosolvents=pd.read_csv('df_cosolvent_pca_mordred_sterimol.csv').drop('Name',axis=1).to_numpy()
solvents=pd.read_csv('df_solvent_pca_mordred_sterimol.csv').drop('Name',axis=1).to_numpy()
# ratio=[0 , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
# ratio=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
ratio=[0.5]
# temperature=[10,21,50]
temperature=[21]
additive_dim1=[0]
additive_dim2=[0]
cat_loading=[0.5,1,2,10,25,50,110]
# time=[0.5,1,2,4,8,24]
time=[2]
# Generate all possible combinations
combinations = list(itertools.product(MCPs,catalysts, solvents, cosolvents, additive_dim1, additive_dim2, ratio, temperature, cat_loading, time))

In [15]:
import csv
# Define the CSV file name
csv_file = 'domain_derivatives_no_const_and_energy_2hrs_rtm_0additives_0.5ratio_first_half.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['MCP dim1',
                     'MCP dim2',
                     'Catalyst dim1', 
                     'Catalyst dim2', 
                     'Solvent dim1', 
                     'Solvent dim2', 
                     'Cosolvent dim1', 
                     'Cosolvent dim2', 
                     'additive dim1',
                     'additive dim2',
                     'cosolvent vs solvent ratio',
                     'temp in degree celsius', 
                     'cat. loading mol%',
                     'time in hours'])  # Write the header
    for combination in combinations[0:int(len(combinations)/2)]:
        MCP, Catalyst, Solvent, Cosolvent, additive_dim1, additive_dim2, ratio, temperature, cat_loading, time  = combination
        MCP_dim1, MCP_dim2 = MCP
        Catalyst_dim1, Catalyst_dim2 = Catalyst
        solvent_dim1, solvent_dim2 = Solvent
        Cosolvent_dim1, Cosolvent_dim2 = Cosolvent
        writer.writerow([MCP_dim1, 
                         MCP_dim2,
                         Catalyst_dim1, 
                         Catalyst_dim2, 
                         solvent_dim1, 
                         solvent_dim2, 
                         Cosolvent_dim1, 
                         Cosolvent_dim2, 
                         additive_dim1,
                         additive_dim2,
                         ratio,
                         temperature, 
                         cat_loading, 
                         time])
