In [1]:
from rdkit.Chem import AllChem
from rdkit.Chem import rdChemReactions
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
from rdkit.Chem.Descriptors import NumRadicalElectrons
from IPython.display import Image, display
from enum_rxns import *

In [2]:
import pandas as pd
rxn_sma = '[O:1]-[O:2]-[C:3]-[*:6]-[C:4]-[H:5]>>[H:5]-[O:1]-[O:2]-[C:3]-[*:6]-[C:4]'

#ether oxygen between R carbon and Q carbon
reactants = 'COCO[O].CCOCO[O].COC(C)O[O].CC(C)OCO[O].COC(C)(C)O[O].CC(O[O])OCC.CC(C)OC(C)O[O].CCOC(C)(C)O[O].CC(C)OC(C)(C)O[O]'.split('.')
#ether oxygen not between R carbon and Q carbon, methoxy at 'Q' carbon
reactants += 'COCCCO[O].COC(C)CCO[O].COCCC(C)O[O].COC(C)CC(C)O[O].COCCC(C)(C)O[O].COC(C)CC(C)(O[O])C'.split('.')
#ether oxygen not between R carbon and Q carbon, methoxy at 'R' carbon
reactants += 'CCC(OC)O[O].COC(O[O])CCC.CC[C@@](OC)(C)O[O].COC(C)(O[O])CCC.COC(O[O])CC(C)C.COC(C)(O[O])CC(C)C'.split('.')
reactants = [[x] for x in reactants] 
print(len(reactants))

Images = []
rxn_smiles_all = []
rxn_types_all = []
for i, reactant in enumerate(reactants):
    possible_products = run_rxn(rxn_sma, reactant)
    #print(possible_products)

    for j, prod in enumerate(possible_products):
        rxn_smiles = '.'.join(reactant)+'>>'+'.'.join(prod)
        rxn_type = get_rxn_type(rxn_smiles)
        #print(rxn_smiles, rxn_type)
        rxn_smiles_all.append(rxn_smiles)
        rxn_types_all.append(rxn_type)
        #png = visualize_rxn(rxn_smiles, 'test'+str(i)+str(j)+'.png')
        #Images.append(Image(filename = 'test'+str(i)+str(j)+'.png'))
        #IPython.display.Image(png)
rxn_types_all = [x + '_6mem' for x in rxn_types_all]
        
df = pd.DataFrame(list(map(list, zip(*[rxn_smiles_all, rxn_types_all]))), columns = ['rxn_smiles', 'rxn_type'])
df['R'] = [ x.split('>>')[0] for x in df.rxn_smiles]
df['P'] = [ x.split('>>')[1] for x in df.rxn_smiles]

21


In [3]:
df

Unnamed: 0,rxn_smiles,rxn_type,R,P
0,COCO[O]>>[CH2]OCOO,1a-1a_6mem,COCO[O],[CH2]OCOO
1,CCOCO[O]>>C[CH]OCOO,1a-2a_6mem,CCOCO[O],C[CH]OCOO
2,COC(C)O[O]>>[CH2]OC(C)OO,2a-1a_6mem,COC(C)O[O],[CH2]OC(C)OO
3,CC(C)OCO[O]>>C[C](C)OCOO,1a-3a_6mem,CC(C)OCO[O],C[C](C)OCOO
4,COC(C)(C)O[O]>>[CH2]OC(C)(C)OO,3a-1a_6mem,COC(C)(C)O[O],[CH2]OC(C)(C)OO
5,CC(O[O])OCC>>C[CH]OC(C)OO,2a-2a_6mem,CC(O[O])OCC,C[CH]OC(C)OO
6,CC(C)OC(C)O[O]>>C[C](C)OC(C)OO,2a-3a_6mem,CC(C)OC(C)O[O],C[C](C)OC(C)OO
7,CCOC(C)(C)O[O]>>C[CH]OC(C)(C)OO,3a-2a_6mem,CCOC(C)(C)O[O],C[CH]OC(C)(C)OO
8,CC(C)OC(C)(C)O[O]>>C[C](C)OC(C)(C)OO,3a-3a_6mem,CC(C)OC(C)(C)O[O],C[C](C)OC(C)(C)OO
9,COCCCO[O]>>CO[CH]CCOO,1-2a_6mem,COCCCO[O],CO[CH]CCOO


In [4]:
from collections import Counter
rxn_type_counter = Counter(rxn_types_all)
reactant_counter = Counter([x.split('>>')[0] for x in rxn_smiles_all])
print(rxn_type_counter)
print(reactant_counter)
print(len(Counter(rxn_types_all).keys()))
print(len(rxn_smiles_all))
# Sometimes >1 reactions are generated per each rxn_type from run_rxn 
#due to some unexpected hydrogen transfers from methyl substituents
isDel = []
for _, row in df.iterrows():
    if rxn_type_counter[row['rxn_type']] > 1 and reactant_counter[row['rxn_smiles'].split('>>')[0]] > 1:
        isDel.append(True)
    else:
        isDel.append(False)
print(isDel.count(True))
df['isDel'] = isDel

Counter({'2a-1a_6mem': 4, '3a-1a_6mem': 4, '1a-1a_6mem': 1, '1a-2a_6mem': 1, '1a-3a_6mem': 1, '2a-2a_6mem': 1, '2a-3a_6mem': 1, '3a-2a_6mem': 1, '3a-3a_6mem': 1, '1-2a_6mem': 1, '1-3a_6mem': 1, '2-2a_6mem': 1, '2-3a_6mem': 1, '3-2a_6mem': 1, '3-3a_6mem': 1, '2a-1_6mem': 1, '2a-2_6mem': 1, '3a-1_6mem': 1, '3a-2_6mem': 1, '2a-3_6mem': 1, '3a-3_6mem': 1})
Counter({'CCC(OC)O[O]': 2, 'COC(O[O])CCC': 2, 'CC[C@@](OC)(C)O[O]': 2, 'COC(C)(O[O])CCC': 2, 'COC(O[O])CC(C)C': 2, 'COC(C)(O[O])CC(C)C': 2, 'COCO[O]': 1, 'CCOCO[O]': 1, 'COC(C)O[O]': 1, 'CC(C)OCO[O]': 1, 'COC(C)(C)O[O]': 1, 'CC(O[O])OCC': 1, 'CC(C)OC(C)O[O]': 1, 'CCOC(C)(C)O[O]': 1, 'CC(C)OC(C)(C)O[O]': 1, 'COCCCO[O]': 1, 'COC(C)CCO[O]': 1, 'COCCC(C)O[O]': 1, 'COC(C)CC(C)O[O]': 1, 'COCCC(C)(C)O[O]': 1, 'COC(C)CC(C)(O[O])C': 1})
21
27
6


In [5]:
#display(*Images)

In [6]:
df[~df.isDel]

Unnamed: 0,rxn_smiles,rxn_type,R,P,isDel
0,COCO[O]>>[CH2]OCOO,1a-1a_6mem,COCO[O],[CH2]OCOO,False
1,CCOCO[O]>>C[CH]OCOO,1a-2a_6mem,CCOCO[O],C[CH]OCOO,False
2,COC(C)O[O]>>[CH2]OC(C)OO,2a-1a_6mem,COC(C)O[O],[CH2]OC(C)OO,False
3,CC(C)OCO[O]>>C[C](C)OCOO,1a-3a_6mem,CC(C)OCO[O],C[C](C)OCOO,False
4,COC(C)(C)O[O]>>[CH2]OC(C)(C)OO,3a-1a_6mem,COC(C)(C)O[O],[CH2]OC(C)(C)OO,False
5,CC(O[O])OCC>>C[CH]OC(C)OO,2a-2a_6mem,CC(O[O])OCC,C[CH]OC(C)OO,False
6,CC(C)OC(C)O[O]>>C[C](C)OC(C)OO,2a-3a_6mem,CC(C)OC(C)O[O],C[C](C)OC(C)OO,False
7,CCOC(C)(C)O[O]>>C[CH]OC(C)(C)OO,3a-2a_6mem,CCOC(C)(C)O[O],C[CH]OC(C)(C)OO,False
8,CC(C)OC(C)(C)O[O]>>C[C](C)OC(C)(C)OO,3a-3a_6mem,CC(C)OC(C)(C)O[O],C[C](C)OC(C)(C)OO,False
9,COCCCO[O]>>CO[CH]CCOO,1-2a_6mem,COCCCO[O],CO[CH]CCOO,False


In [7]:
len(df[~df.isDel].rxn_type.unique())

21

In [8]:
df[~df.isDel].to_csv('ro2_qooh_6mem.csv', index = False)