In [1]:
from rdkit.Chem import AllChem
from rdkit.Chem import rdChemReactions
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
from rdkit.Chem.Descriptors import NumRadicalElectrons
from IPython.display import Image, display
from enum_rxns import *

In [2]:
import pandas as pd
rxn_sma = '[O:1]-[O:2]-[C:3]-[C:4]-[H:5]>>[H:5]-[O:1]-[O:2]-[C:3]-[C:4]'
reactants = [['[O]OC(OC)C'],['[O]OCCOC'], #methoxy, no methyl
             ['[O]OC(C)(OC)C'],['[O]OCC(C)OC'], #methoxy, one methyl, at same carbon
             ['[O]OC(CC)OC'],['[O]OC(C)COC'], #methoxy, one methyl, at different carbons
             ['[O]OC(OC)(C)CC'],['[O]OC(OC)C(C)C'], 
             ['[O]OC(C)C(C)OC'],['[O]OC(C)(C)COC'], #methoxy, two methyls
             ['[O]OC(C)(OC)C(C)C'],['[O]OC(C)(C)C(C)OC']] #methoxy, three methyls

Images = []
rxn_smiles_all = []
rxn_types_all = []
for i, reactant in enumerate(reactants):
    possible_products = run_rxn(rxn_sma, reactant)
    #print(possible_products)

    for j, prod in enumerate(possible_products):
        rxn_smiles = '.'.join(reactant)+'>>'+'.'.join(prod)
        rxn_type = get_rxn_type(rxn_smiles)
        #print(rxn_smiles, rxn_type)
        rxn_smiles_all.append(rxn_smiles)
        rxn_types_all.append(rxn_type)
        #png = visualize_rxn(rxn_smiles, 'test'+str(i)+str(j)+'.png')
        #Images.append(Image(filename = 'test'+str(i)+str(j)+'.png'))
        #IPython.display.Image(png)
        
df = pd.DataFrame(list(map(list, zip(*[rxn_smiles_all, rxn_types_all]))), columns = ['rxn_smiles', 'rxn_type'])
df['R'] = [ x.split('>>')[0] for x in df.rxn_smiles]
df['P'] = [ x.split('>>')[1] for x in df.rxn_smiles]
df['rxn_type'] += '_5mem'

In [3]:
df

Unnamed: 0,rxn_smiles,rxn_type,R,P
0,[O]OC(OC)C>>[CH2]C(OC)OO,2a-1_5mem,[O]OC(OC)C,[CH2]C(OC)OO
1,[O]OCCOC>>CO[CH]COO,1-2a_5mem,[O]OCCOC,CO[CH]COO
2,[O]OC(C)(OC)C>>[CH2]C(C)(OC)OO,3a-1_5mem,[O]OC(C)(OC)C,[CH2]C(C)(OC)OO
3,[O]OCC(C)OC>>CO[C](C)COO,1-3a_5mem,[O]OCC(C)OC,CO[C](C)COO
4,[O]OC(CC)OC>>C[CH]C(OC)OO,2a-2_5mem,[O]OC(CC)OC,C[CH]C(OC)OO
5,[O]OC(C)COC>>CO[CH]C(C)OO,2-2a_5mem,[O]OC(C)COC,CO[CH]C(C)OO
6,[O]OC(C)COC>>[CH2]C(COC)OO,2-1_5mem,[O]OC(C)COC,[CH2]C(COC)OO
7,[O]OC(OC)(C)CC>>C[CH]C(C)(OC)OO,3a-2_5mem,[O]OC(OC)(C)CC,C[CH]C(C)(OC)OO
8,[O]OC(OC)(C)CC>>[CH2]C(CC)(OC)OO,3a-1_5mem,[O]OC(OC)(C)CC,[CH2]C(CC)(OC)OO
9,[O]OC(OC)C(C)C>>COC(OO)[C](C)C,2a-3_5mem,[O]OC(OC)C(C)C,COC(OO)[C](C)C


In [4]:
from collections import Counter
rxn_type_counter = Counter(rxn_types_all)
reactant_counter = Counter([x.split('>>')[0] for x in rxn_smiles_all])
print(rxn_type_counter)
print(reactant_counter)
print(len(Counter(rxn_types_all).keys()))
print(len(rxn_smiles_all))
# Sometimes >1 reactions are generated per each rxn_type from run_rxn 
#due to some unexpected hydrogen transfers from methyl substituents
isDel = []
for _, row in df.iterrows():
    if rxn_type_counter[row['rxn_type']] > 1 and reactant_counter[row['rxn_smiles'].split('>>')[0]] > 1:
        isDel.append(True)
    else:
        isDel.append(False)
print(isDel.count(True))
df['isDel'] = isDel

Counter({'3a-1': 3, '2-1': 2, '3-1': 2, '2a-1': 1, '1-2a': 1, '1-3a': 1, '2a-2': 1, '2-2a': 1, '3a-2': 1, '2a-3': 1, '2-3a': 1, '3-2a': 1, '3a-3': 1, '3-3a': 1})
Counter({'[O]OC(C)COC': 2, '[O]OC(OC)(C)CC': 2, '[O]OC(C)C(C)OC': 2, '[O]OC(C)(C)COC': 2, '[O]OC(C)(OC)C(C)C': 2, '[O]OC(C)(C)C(C)OC': 2, '[O]OC(OC)C': 1, '[O]OCCOC': 1, '[O]OC(C)(OC)C': 1, '[O]OCC(C)OC': 1, '[O]OC(CC)OC': 1, '[O]OC(OC)C(C)C': 1})
14
18
0


In [5]:
#display(*Images)

In [6]:
df[~df.isDel]

Unnamed: 0,rxn_smiles,rxn_type,R,P,isDel
0,[O]OC(OC)C>>[CH2]C(OC)OO,2a-1_5mem,[O]OC(OC)C,[CH2]C(OC)OO,False
1,[O]OCCOC>>CO[CH]COO,1-2a_5mem,[O]OCCOC,CO[CH]COO,False
2,[O]OC(C)(OC)C>>[CH2]C(C)(OC)OO,3a-1_5mem,[O]OC(C)(OC)C,[CH2]C(C)(OC)OO,False
3,[O]OCC(C)OC>>CO[C](C)COO,1-3a_5mem,[O]OCC(C)OC,CO[C](C)COO,False
4,[O]OC(CC)OC>>C[CH]C(OC)OO,2a-2_5mem,[O]OC(CC)OC,C[CH]C(OC)OO,False
5,[O]OC(C)COC>>CO[CH]C(C)OO,2-2a_5mem,[O]OC(C)COC,CO[CH]C(C)OO,False
6,[O]OC(C)COC>>[CH2]C(COC)OO,2-1_5mem,[O]OC(C)COC,[CH2]C(COC)OO,False
7,[O]OC(OC)(C)CC>>C[CH]C(C)(OC)OO,3a-2_5mem,[O]OC(OC)(C)CC,C[CH]C(C)(OC)OO,False
8,[O]OC(OC)(C)CC>>[CH2]C(CC)(OC)OO,3a-1_5mem,[O]OC(OC)(C)CC,[CH2]C(CC)(OC)OO,False
9,[O]OC(OC)C(C)C>>COC(OO)[C](C)C,2a-3_5mem,[O]OC(OC)C(C)C,COC(OO)[C](C)C,False


In [7]:
len(df[~df.isDel].rxn_type.unique())

14

In [8]:
df[~df.isDel].to_csv('ro2_qooh_5mem.csv', index = False)

In [9]:
reactants = [['[O]OC(OC)C'],['[O]OCCOC'], #methoxy, no methyl
             ['[O]OC(C)(OC)C'],['[O]OCC(C)OC'], #methoxy, one methyl, at same carbon
             ['[O]OC(CC)OC'],['[O]OC(C)COC'], #methoxy, one methyl, at different carbons
             ['[O]OC(OC)(C)CC'],['[O]OC(OC)C(C)C'], 
             ['[O]OC(C)C(C)OC'],['[O]OC(C)(C)COC'], #methoxy, two methyls
             ['[O]OC(C)(OC)C(C)C'],['[O]OC(C)(C)C(C)OC']]
reactants = [x[0] for x in reactants]
'.'.join(reactants)

'[O]OC(OC)C.[O]OCCOC.[O]OC(C)(OC)C.[O]OCC(C)OC.[O]OC(CC)OC.[O]OC(C)COC.[O]OC(OC)(C)CC.[O]OC(OC)C(C)C.[O]OC(C)C(C)OC.[O]OC(C)(C)COC.[O]OC(C)(OC)C(C)C.[O]OC(C)(C)C(C)OC'