# This is the notebook for QM9 curation in the manscript
QM9 dataset was downloaded from http://quantum-machine.org/datasets/, which uncharacterized.txt, gdb17_qm9_energy.smi, and atomref.txt in the notebook were extracted from. 

In [1]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm.notebook import tqdm
from collections import defaultdict
from pysmiles import read_smiles
from openbabel import pybel
import networkx as nx
import re
print(rdkit.__version__)

2022.03.2


In [2]:
# Canonicalize SMILES string
def canon_smi(smi):
    mol = Chem.MolFromSmiles(smi)
    Chem.SanitizeMol(mol)
    smi = Chem.MolToSmiles(mol)
    
    return smi

## 1. Remove “uncharacterized molecules” defined in the original QM9 paper.
## 2. Remove molecules that failed RDKit SMILES canonicalization from SMILES provided by the original QM9 paper after DFT optimization.

In [3]:
# Build uncharacterized molecules indicated in the original QM9 paper with canonicalized SMILES 
with open('./uncharacterized.txt','r') as uc:
    uncha = list()
    for line in uc:
        line = line.rstrip().split()
        if len(line) > 2:
            if line[0].isdigit():
                uncha.append(line[1])
len(uncha)

3054

In [4]:
# Build mol dict from QM9 with key=canon_smi, value={parsed_smi,energy}
with open('./gdb17_qm9_energy.smi','r') as qm9:
    qm9_dict = defaultdict(list)
    for line in tqdm(qm9, total=133885):
        try:
            line = line.rstrip().split()
            gdb_smi = line[1]
            parsed_smi = line[2]
            c_smi = canon_smi(parsed_smi)
            if gdb_smi in uncha:
                continue
            energy = float(line[3])
            qm9_dict[canon_smi(gdb_smi)].append({'parsed_smi':c_smi,'energy':energy})
        except:
            print(line)

  0%|          | 0/133885 [00:00<?, ?it/s]

[10:33:37] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:37] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:37] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:37] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:37] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:37] Explicit valence for atom # 3 N, 4, is greater than permitted


['000271', '[NH3+]CCC([O-])=O', '[NH3]CCC(=O)[O]', '-323.582949']
['000282', 'NC(=[NH2+])C([O-])=O', 'NC(=[NH2])C(=O)[O]', '-338.530072']
['001117', 'NC(=[NH2+])CC([O-])=O', 'NC(=[NH2])[CH2].C(=O)=O', '-377.830859']
['001642', 'C[NH+]=C(N)C([O-])=O', 'CNC(=[NH2])C(=O)[O]', '-377.816466']
['001648', 'CNC(=[NH2+])C([O-])=O', 'CNC(=[NH2])C(=O)[O]', '-377.816463']
['001670', 'CC(C[NH3+])C([O-])=O', 'C[C@@H](C[NH3])C(=O)[O]', '-362.87621']


[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[10:33:38] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:38] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:38] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:38] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5
[10:33:38] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:38] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:38] Explicit valence for atom # 4 N, 4, is greater than permitted
[

['003828', 'N=C1C=CNN=N1', '[NH]c1cc[nH]nn1', '-335.586914']
['003832', 'N=C1C=NNN=C1', '[NH]c1cn[nH]nc1', '-335.590859']
['003833', 'N=C1C=NNN=N1', '[NH]c1cn[nH]nn1', '-351.615495']
['003842', 'N=C1NN=CC=N1', '[NH]c1[nH]nccn1', '-335.61837']
['003843', 'N=C1NN=CN=C1', '[NH]c1[nH]ncnc1', '-335.613384']
['003844', 'N=C1NN=NC=C1', '[NH]c1[nH]nncc1', '-335.591582']
['003845', 'N=C1NN=NC=N1', '[NH]c1[nH]nncn1', '-351.646368']
['004015', 'C[NH2+]CC(CO)=N[O-]', 'C[NH2]C[C](CO)[N][O]', '-418.138616']
['004018', 'NC(=[NH2+])C(=N[O-])C#N', '[NH2]=C(N)C(=N[O])C#N', '-410.814514']
['004019', 'NC(=[NH2+])C(C=O)=N[O-]', '[NH2]=C(N)C(=N[O])C=O', '-431.877352']
['004028', 'NC(=[NH2+])C(CO)=N[O-]', '[NH2]=C(N)C(=N[O])CO', '-433.071644']
['004419', 'OC1=CN=CC(=N)N1', 'Oc1cncc([NH])[nH]1', '-394.814053']
['004427', 'OC1=NC=NC(=O)O1', 'OC(=O)[N]C=[N]=C=O', '-450.668952']
['005804', 'CC(C)([NH3+])CC([O-])=O', 'CC(C)([NH3])CC(=O)[O]', '-402.176006']
['006033', 'CC(C)(C[NH3+])C([O-])=O', 'CC(C)(C[NH3])C(=O)

[10:33:39] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:39] Explicit valence for atom # 0 N, 4, is g

['007245', '[O-]C(=O)C(=N)NC=[NH2+]', 'O=C(O)C(=[N]=CN)[NH]', '-431.95542']
['007326', 'CCNC(=[NH2+])C([O-])=O', 'CCNC(=[NH2])C(=O)[O]', '-417.108035']
['007335', 'C[NH2+]CC(=O)C([O-])=O', 'C[NH2]CC(=O)C(=O)[O]', '-436.91929']
['007375', 'C[NH2+]CC(C)C([O-])=O', 'C[NH2]C[C@@H](C)C(=O)[O]', '-402.163264']
['007410', 'CC(C(N)=[NH2+])C([O-])=O', 'C[CH]C(=[NH2])N.C(=O)=O', '-417.121047']
['007417', 'NC(=[NH2+])C(O)C([O-])=O', '[NH2]=C(N)[CH]O.C(=O)=O', '-453.04302']
['007419', 'NC(C(N)=[NH2+])C([O-])=O', 'N[CH]C(=[NH2])N.C(=O)=O', '-433.166605']
['007429', 'CN(C)C(=[NH2+])C([O-])=O', 'CN(C)C(=[NH2])C(=O)[O]', '-417.091698']
['007602', 'CC1(CC1[NH3+])C([O-])=O', 'C[C@]1(C[C@H]1[NH3])C(=O)[O]', '-400.944569']
['007654', 'C[NH+]1CC(C1)C([O-])=O', 'C[NH]1CC(C1)C(=O)[O]', '-400.962216']
['007657', '[NH3+]C1CC(C1)C([O-])=O', '[NH3][C@H]1C[C@@H](C1)C(=O)[O]', '-400.937791']
['008398', '[O-]C(=O)C12CC(C1)[NH2+]2', 'O=C([O])[C@@]12C[C@@H](C1)[NH2]2', '-399.729422']
['009986', '[NH2+]=CN[C-](C#N)C#N

[10:33:40] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:40] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:40] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:40] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:40] Explicit valence for atom # 3 N, 4, is greater than permitted


['013805', '[NH3+]CC1CC1C([O-])=O', '[NH3][CH][C@H]1C[C@H]1C(=O)O', '-400.93789']
['013833', '[O-]C(=O)C1=CC[NH2+]C1', 'O=C([O])C1=CC[NH2]C1', '-399.769262']
['014121', '[O-]C(=O)C12CC1[NH2+]C2', '[O]C(=O)[C@]12C[C@H]1[NH2]C2', '-399.730154']
['014318', 'O=C1[CH-][NH+]2CCC2=C1', 'O=C1[CH][N@H]2CC[C]2[CH]1', '-362.595893']
['015512', 'O=C[C-]1[NH2+]CCC1=O', 'O=C[C]1[NH2]CCC1=O', '-399.786072']


[10:33:41] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:41] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:41] Explicit valence for atom # 4 N, 4, is greater than permitted


['017808', '[O-]C(=O)C1C2CC1[NH2+]2', 'O=C([O])[C@H]1[C@H]2C[C@@H]1[NH2]2', '-399.708212']
['017809', '[O-]C(=O)C1C2C[NH+]1C2', '[O]C(=O)[C@H]1[C@H]2C[N@@H]1C2', '-399.712852']
['018308', 'O=C1C=C[NH+]2CC[C-]12', 'O=C1C=C[NH]2CC[C]12', '-362.598243']


[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6
[10:33:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6
[10:33:42] Can't kekuliz

['020630', 'CC1=CC(=N)N=NN1', 'Cc1cc([NH])nn[nH]1', '-374.885658']
['020633', 'CC1=CC(=N)NN=N1', 'Cc1cc([NH])[nH]nn1', '-374.890167']
['020662', 'CC1=CN=NNC1=N', 'Cc1cnn[nH]c1[NH]', '-374.890327']
['020671', 'CC1=CNN=NC1=N', 'Cc1c[nH]nnc1[NH]', '-374.884952']
['020684', 'CC1=NC(=N)NN=C1', 'Cc1nc([NH])[nH]nc1', '-374.911111']
['020685', 'CC1=NC(=N)NN=N1', 'Cc1nc([NH])[nH]nn1', '-390.94078']
['020703', 'CC1=NC=NNC1=N', 'Cc1ncn[nH]c1[NH]', '-374.915104']
['020717', 'CC1=NNC(=N)C=N1', 'Cc1n[nH]c([NH])cn1', '-374.913693']
['020718', 'CC1=NNC(=N)N=C1', 'Cc1n[nH]c([NH])nc1', '-374.915497']
['020745', 'CC1=NNN=CC1=N', 'Cc1n[nH]ncc1[NH]', '-374.891074']
['020746', 'CC1=NNN=NC1=N', 'Cc1n[nH]nnc1[NH]', '-390.917213']
['021010', 'CN1C=CC(=N)N=N1', 'Cn1ccc([NH])nn1', '-374.874846']
['021060', 'CN1N=CC(=N)C=N1', 'Cn1ncc([NH])cn1', '-374.879249']
['021061', 'CN1N=CC(=N)N=N1', 'Cn1ncc([NH])nn1', '-390.905004']
['021083', 'CN1N=CC=NC1=N', 'Cn1nccnc1[NH]', '-374.907536']
['021089', 'CN1N=CN=CC1=N', 'Cn1

[10:33:42] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:42] Explicit valence for atom # 2 N, 4, is g

['022438', 'C[NH+]=C(N)C(C=O)=N[O-]', 'CNC(=[NH2])C(=N[O])C=O', '-471.16096']
['022439', 'CNC(=[NH2+])C(=N[O-])C#N', 'CNC(=[NH2])C(=N[O])C#N', '-450.098345']
['022441', 'CNC(=[NH2+])C(C=O)=N[O-]', 'CNC(=[NH2])C(=N[O])C=O', '-471.160959']
['022443', 'CNC(=[NH2+])C(CO)=N[O-]', 'CNC(=[NH2])C(=N[O])CO', '-472.353931']
['022444', 'C[NH+]=C(N)C(CO)=N[O-]', 'CNC(=[NH2])C(=N[O])CO', '-472.353932']
['022460', 'C[NH2+]C(C)C(CO)=N[O-]', 'C[NH2][C@@H](C)[C](CO)[N][O]', '-457.431408']
['022482', 'NC=[NH+]CC(CO)=N[O-]', '[NH2]=CNC[C](CO)[N][O]', '-472.294853']
['022495', 'C[NH2+]CCC(=N[O-])C#N', 'C[NH2]CCC(=N[O])C#N', '-435.139327']
['022504', 'CC[NH2+]CC(CO)=N[O-]', 'CC[NH2]C[C](CO)[N][O]', '-457.430588']
['022686', 'OCC(CC1C[NH2+]1)=N[O-]', 'OCC(=N[O])C[C@H]1C[NH2]1', '-456.202959']
['022974', '[O-]N=C(C=O)C1C[NH2+]C1', '[O]N=C(C=O)C1C[NH2]C1', '-455.009993']
['022980', 'OCC(=N[O-])C1C[NH2+]C1', 'OC[C]([N][O])C1C[NH2]C1', '-456.202281']
['023150', 'C[NH2+]CC(=N[O-])C1CO1', 'C[NH2]CC(=N[O])[C@H]1CO

[10:33:43] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:43] Can't kekulize mol.  Unkekulized atoms: 1 2 5 7 8
[10:33:43] Explicit valence for atom # 4 N, 4, is greater than 

['025758', 'NC(=[NH2+])C1=C([O-])OC=C1', '[NH2]=C(N)[C]1C(=O)OC=C1', '-454.012997']
['025759', 'NC(=[NH2+])C1=C([O-])OC=N1', '[NH2]=C(N)[C]1C(=O)OC=N1', '-470.081061']
['025815', 'NC(=[NH2+])C1=CC=C([O-])N1', 'NC(=[NH2])[C]1C=CC(=O)N1', '-434.12012']
['025816', 'NC(=[NH2+])C1=CC=C([O-])O1', 'NC(=[NH2])[C]1C=CC(=O)O1', '-454.004009']
['025818', 'NC(=[NH2+])C1=CN=C(O)[N-]1', 'NC(=[NH2])c1cnc(O)n1', '-450.178621']
['025821', 'NC(=[NH2+])C1=NC=C([O-])N1', 'NC(=[NH2])[C]1N=CC(=O)N1', '-450.1801']
['025859', 'NC(=[NH2+])C1=C[N-]C(=N)O1', 'NC(=[NH2])c1cnc([NH])o1', '-450.156709']
['025860', 'NC(=[NH2+])C1=CNC(=O)[N-]1', 'NC(=[NH2])c1c[nH]c(=O)n1', '-450.172331']
['025861', 'NC(=[NH2+])C1=COC(=N)[N-]1', '[NH2]=C(N)C(=N[C][NH])C=O', '-450.155649']
['025862', 'NC(=[NH2+])C1=COC(=O)[N-]1', 'NC(=[NH2])[C](C=O)N=C=O', '-470.066525']
['025886', 'NC(=[NH2+])C1=COC([O-])=C1', 'NC(=[NH2])[C](C=O)[CH][C][O]', '-453.976034']
['025915', 'NC(=[NH2+])N1C=CC([O-])=C1', 'NC(=[NH2])N1C=CC(=O)[CH]1', '-434.0604

[10:33:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6
[10:33:44] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6
[10:33:45] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:45] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:45] Explicit valence for atom # 7 N, 4, is greater than permitted


['032041', 'COC1=CN=CC(=N)N1', 'COc1cncc([NH])[nH]1', '-434.086651']
['032059', 'OCC1=CN=CC(=N)N1', 'OCc1cncc([NH])[nH]1', '-434.090666']
['032414', 'CC1=C([O-])OC=C1C[NH3+]', 'C[C]1C(=O)OC=C1C[NH3]', '-439.048303']
['032465', '[NH3+]CC1=COC([O-])=C1O', '[NH3]CC1=C(C(=O)O[CH]1)O', '-474.978895']
['033400', 'N#C[C-](C#N)C1C[NH2+]C1', 'N#C[C](C#N)C1C[NH2]C1', '-396.87678']


[10:33:47] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:48] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:48] Explicit valence for atom # 8 N, 4, is greater than permitted


['042424', 'O=C([N-]C1C[NH2+]C1)C#N', 'O=C([N]C1C[NH2]C1)C#N', '-434.016751']
['043213', 'O=C1C2[NH2+]CC2[C-]1C#N', 'O=C1[C@H]2[NH2]C[C@H]2[C]1C#N', '-416.754086']
['043475', 'O=C1[CH-]C(=O)C2CC1[NH2+]2', 'O=C1[CH]C(=O)[C@@H]2C[C@H]1[NH2]2', '-437.865832']


[10:33:48] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:48] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:48] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:48] Explicit valence for atom # 7 N, 4, is greater than permitted


['045541', 'O=C1[CH-][NH+]2CCCC2=C1', 'O=C1[CH][N@H]2CCC[C]2[CH]1', '-401.919634']
['045545', 'O=C1O[C-]2CC[NH2+]C2=C1', '[O][C][CH][C]1C(=O)CC[NH2]1', '-437.858166']
['045546', 'O=C1O[C-]2C[NH2+]CC2=C1', 'O=C1OC2=C(C[NH2]C2)[CH]1', '-437.848179']
['046611', 'O=C1[CH-]NC2=C1C[NH2+]C2', 'O=C1[CH]NC2=C1C[NH2]C2', '-417.939084']


[10:33:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:49] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:49] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is g

['049723', 'O=C[C-]1C2C[NH2+]C2C1=O', 'O=C[C]1[C@H]2C[NH2][C@H]2C1=O', '-437.823164']
['050309', 'O=C[C-]1[NH2+]C2CC2C1=O', 'O=C[C]1[NH2][C@H]2C[C@H]2C1=O', '-437.851752']
['050620', 'O=C[C-]1[NH2+]CC=CC1=O', 'O=C[C]1[NH2]CC=CC1=O', '-437.867237']
['050736', 'O=C[C-]1CC[NH2+]CC1=O', 'O=C[C]1CC[NH2]CC1=O', '-439.055296']
['051246', '[NH2+]=CNC1=CC(=O)N[CH-]1', '[NH2]=CNC1=CNC(=O)[CH]1', '-434.057513']
['051247', '[NH2+]=CNC1=CC(=O)O[CH-]1', '[NH2]=CNC1=COC(=O)[CH]1', '-453.928733']
['052579', 'CC#C[C-](C#N)C(N)=[NH2+]', 'CC#C[C](C#N)C(=[NH2])N', '-396.943238']
['053821', '[O-]C(=O)C(=O)C1CC[NH2+]1', 'O=C([O])C(=O)[C@H]1CC[NH2]1', '-474.997087']
['053845', '[O-]C(=O)C(=[NH2+])NC1CC1', '[O]C(=O)C(=[NH2])NC1CC1', '-455.172789']
['053892', '[O-]C(=O)C(=O)CNC=[NH2+]', 'O=C([O])C(=O)CNC=[NH2]', '-491.080227']
['053896', 'NC(=[NH2+])C(C#C)C([O-])=O', 'NC(=[NH2])[CH]C#C.C(=O)=O', '-453.981069']
['053897', 'NC(=[NH2+])[C-](C#N)C(N)=O', 'NC(=[NH2])[C](C#N)C(=O)N', '-450.209037']
['053939', 'CC(C)

[10:33:50] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:50] Explicit valence for atom # 2 N, 4, is g

['054296', 'NC(=[NH2+])C1(CN1)C([O-])=O', '[NH2]=C(N)[C]1CN1.C(=O)=O', '-471.210889']
['054384', 'CC12CC(C1)([NH2+]2)C([O-])=O', 'C[C@@]12C[C@@](C1)([NH2]2)C(=O)[O]', '-439.028975']
['054400', '[O-]C(=O)C12CC([NH2+]1)C=C2', 'O=C([O])[C@@]12C[C@@H]([NH2]1)C=C2', '-437.838903']
['054409', '[O-]C(=O)C12CC(C1)C[NH2+]2', '[O]C(=O)[C@@]12C[C@@H](C1)C[NH2]2', '-439.065409']
['054410', '[O-]C(=O)C12CC(C1)[NH2+]C2', 'O=C([O])[C@@]12C[C@@H](C1)[NH2]C2', '-439.038329']
['054412', '[O-]C(=O)C12C[NH+](C1)CC2', 'O=C([O])[C@@]12C[N@@H](C1)CC2', '-439.03848']
['054422', 'CC12CC1(C[NH2+]2)C([O-])=O', 'C[C@]12C[C@]1(C[NH2]2)C(=O)[O]', '-439.031735']
['054538', '[O-]C(=O)C12CC1[NH2+]CC2', '[O]C(=O)[C@]12C[C@H]1[NH2]CC2', '-439.047551']
['054569', '[O-]C(=O)C12[NH2+]C3C1CC23', 'O=C([O])[C@@]12[NH2][C@H]3[C@@H]1C[C@@H]23', '-437.793304']
['054582', '[O-]C(=O)C12CC3[NH2+]C1C23', 'O=C([O])[C@@]12C[C@H]3[NH2][C@@H]1[C@@H]23', '-437.79076']
['054611', 'CC1=C(C[NH2+]C1)C([O-])=O', 'CC1=C(C[NH2]C1)C(=O)[O]', '-4

[10:33:51] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 4 N, 4, is greater than permitted


['055944', '[O-]C(=O)CC1CC2[NH2+]C12', '[O]C(=O)C[C@H]1C[C@H]2[NH2][C@@H]12', '-438.998187']
['056055', '[O-]C(=O)C[NH+]=CN1CC1', 'O=C=O.C=[NH][CH]N1CC1', '-455.111004']
['056072', '[O-]C(=O)CN=CNC=[NH2+]', 'O=C=O.[CH2]N=CNC=[NH2]', '-471.200679']
['056238', 'NC(=[NH2+])CNCC([O-])=O', 'NC(=[NH2])C=[NH]CC(=O)O', '-472.417424']
['056343', '[O-]C(=O)CCCNC=[NH2+]', '[O]C(=O)CCCNC=[NH2]', '-456.305265']
['057172', 'NC(=[NH2+])[C-](O)C(=O)C#N', 'NC(=[NH2])[C](O)C(=O)C#N', '-470.041313']
['057181', 'NC(=[NH2+])[C-](O)C(=O)C=O', 'NC(=[NH2])[C](O)C(=O)C=O', '-491.121141']
['057736', 'CC(N)=[NH+][C-](C#N)C#N', 'CC(=[NH2])[NH]=C(C#N)C#N', '-412.993012']
['057737', 'NC(N)=[NH+][C-](C#C)C#N', 'NC(=[NH2])[NH]=C(C#C)C#N', '-412.942185']
['057945', 'CN(C)C(=[NH2+])CC([O-])=O', 'CN(C)C(=[NH2])[CH2].C(=O)=O', '-456.391606']


[10:33:51] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:51] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:33:52] Explicit valence for atom # 8 N, 4, is greater than permitted


['058281', 'CC(CC([O-])=O)[NH+](C)C', 'C[C@@H](CC(=O)[O])[NH](C)C', '-441.446778']
['058614', 'CC([NH3+])C1(CO1)C([O-])=O', 'C[C@@H]([NH3])[C@]1(CO1)C(=O)[O]', '-476.169378']
['058982', 'C[NH+](C)C1=CC(=O)O[CH-]1', 'C[NH](C)C1=COC(=O)[CH]1', '-439.040257']
['059827', 'C[NH+](C)CC#CC([O-])=O', 'C[NH](C)C[C][C]C(=O)[O]', '-438.981629']
['059849', 'CC(C)NC(=[NH2+])C([O-])=O', 'CC(C)NC(=[NH2])C(=O)[O]', '-456.399811']
['059966', 'CC(C[NH+](C)C)C([O-])=O', 'C[C@@H](C[NH](C)C)C(=O)[O]', '-441.448746']
['059977', 'C[NH+](C)CC(O)C([O-])=O', 'C[NH](C)C[C@@H](O)C(=O)[O]', '-477.412763']
['060660', 'OC(C1C[NH2+]C1)C([O-])=O', 'O[C@@H](C1C[NH2]C1)C(=O)[O]', '-476.176938']
['060718', 'CN(C=[NH2+])C(=N)C([O-])=O', 'CN(C=[NH2])C(=N)C(=O)[O]', '-471.210465']
['060780', 'CN(C=[NH2+])[C-](C#N)C#N', 'CN(C=[NH2])[C](C#N)C#N', '-412.978993']
['061435', 'NC(=[NH2+])CC(O)C([O-])=O', 'NC(=[NH2])C[C@@H](O)C(=O)[O]', '-492.30444']
['061440', 'NC(CC([O-])=O)C(N)=[NH2+]', 'N[C@@H](CC(=O)[O])C(=[NH2])N', '-472.410

[10:33:55] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:55] Explicit valence for atom # 0 N, 4, is greater than permitted


['074137', '[NH3+]C1=CC(=O)NC(=O)[CH-]1', '[NH3]C1=CC(=O)NC(=O)[CH]1', '-453.968011']
['074176', '[NH3+]C1=CC(=O)O[C-]1C=O', '[NH3]C1=C(OC(=O)[CH]1)C=O', '-473.805098']


[10:33:56] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 8 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:56] Explicit valence for atom # 0 N, 4, is greater than permitted


['076135', 'CC1([NH3+])C(N)C1C([O-])=O', 'C[C@@]1([NH3])[C@@H](N)[C@H]1C(=O)[O]', '-456.286637']
['076136', 'CC1([NH3+])C(O)C1C([O-])=O', 'C[C@@]1([NH3])[C@@H](O)[C@H]1C(=O)[O]', '-476.177957']
['076143', 'CC1C(C([O-])=O)C1(C)[NH3+]', 'C[C@H]1[C@@H](C(=O)[O])[C@]1(C)[NH3]', '-440.237448']
['076372', 'CC1(C(N)C1[NH3+])C([O-])=O', 'C[C@]1([C@@H](N)[C@H]1[NH3])C(=O)[O]', '-456.297347']
['076373', 'CC1(C([NH3+])C1O)C([O-])=O', 'C[C@]1([C@@H]([NH3])[C@H]1O)C(=O)[O]', '-476.169941']
['076380', 'CC1C([NH3+])C1(C)C([O-])=O', 'C[C@H]1[C@@H]([NH3])[C@]1(C)C(=O)[O]', '-440.234295']
['076394', 'NC1C([NH3+])C1(N)C([O-])=O', 'N[C@H]1[C@@H]([NH3])[C@]1(N)C(=O)[O]', '-472.362553']
['076395', 'NC1C([NH3+])C1(O)C([O-])=O', 'N[C@H]1[C@@H]([NH3])[C@]1(O)C(=O)[O]', '-492.248184']
['076397', '[NH3+]C1C(O)C1(O)C([O-])=O', '[NH3][C@H]1[C@@H](O)[C@]1(O)C(=O)[O]', '-512.122082']


[10:33:57] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:57] Explicit valence for atom # 3 N, 4, is greater than permitted


['080595', '[NH3+]C1C2CC(C12)C([O-])=O', '[NH3][C@H]1[C@H]2C[C@@H]([C@@H]12)C(=O)[O]', '-438.997483']
['080597', '[NH3+]C1C2OC(C12)C([O-])=O', '[NH3][C@H]1[C@H]2O[C@@H]([C@@H]12)C(=O)[O]', '-474.919168']
['081049', 'CC1C2CC1([NH2+]2)C([O-])=O', 'C[C@H]1[C@H]2C[C@@]1([NH2]2)C(=O)[O]', '-439.020617']
['081054', 'CC1[NH+]2CC1(C2)C([O-])=O', 'C[C@H]1[N@H]2C[C@@]1(C2)C(=O)[O]', '-438.995664']
['081057', 'C[NH+]1C2CC1(C2)C([O-])=O', 'C[NH]1[C@H]2C[C@@]1(C2)C(=O)[O]', '-439.017318']
['081567', 'CC1C2C[NH+]1C2C([O-])=O', 'C[C@H]1[C@H]2C[N@@H]1[C@H]2C(=O)[O]', '-439.005365']
['081568', 'CC1C2[NH2+]C1C2C([O-])=O', 'C[C@H]1[C@H]2[NH2][C@@H]1[C@H]2C(=O)[O]', '-438.999252']
['081573', 'C[NH+]1C2CC1C2C([O-])=O', 'C[NH]1[C@H]2C[C@@H]1[C@H]2C(=O)[O]', '-438.99721']
['081578', '[NH3+]C1C2CC1C2C([O-])=O', '[NH3][C@H]1[C@H]2C[C@@H]1[C@H]2C(=O)[O]', '-438.980256']
['081579', '[NH3+]C1C2NC1C2C([O-])=O', '[NH3][C@H]1[C@H]2N[C@@H]1[C@H]2C(=O)[O]', '-455.03127']
['081580', '[NH3+]C1C2OC1C2C([O-])=O', '[NH3][C

[10:33:58] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:33:58] Explicit valence for atom # 1 N, 4, is greater than permitted


['082082', 'C[NH+]1C2CC2C1C([O-])=O', 'C[NH]1[C@H]2C[C@H]2[C@H]1C(=O)[O]', '-439.03109']
['083401', 'CC1C2[NH2+]CC12C([O-])=O', 'C[C@H]1[C@H]2[NH2]C[C@@]12C(=O)[O]', '-439.021902']
['083411', '[NH3+]C1C2CCC12C([O-])=O', '[NH3][C@H]1[C@H]2CC[C@@]12C(=O)[O]', '-439.010501']
['083414', 'NC1C2[NH2+]CC12C([O-])=O', 'N[C@H]1[C@H]2[NH2]C[C@@]12C(=O)[O]', '-455.075486']
['083415', '[NH3+]C1C2OCC12C([O-])=O', '[NH3][C@H]1[C@H]2OC[C@@]12C(=O)[O]', '-474.932105']
['083417', 'OC1C2[NH2+]CC12C([O-])=O', 'O[C@H]1[C@H]2[NH2]C[C@@]12C(=O)[O]', '-474.950112']
['084310', 'C[NH+]1C=CC(=O)[C-]1C=O', 'C[NH]1C=CC(=O)[C]1C=O', '-437.863619']


[10:33:58] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 6 N, 4, is greater than permitted
[10:33:59] Explicit valence for atom # 0 N, 4, is greater than permitted


['085488', '[NH3+]C1CC(=O)[N-]C1C#N', '[NH3][C@H]1CC(=O)[N][C@H]1C#N', '-434.040485']
['085952', 'CC1(CC([NH3+])C1)C([O-])=O', 'C[C@]1(C[C@@H]([NH3])C1)C(=O)[O]', '-440.234059']
['085962', 'C[NH+]1CC(C)(C1)C([O-])=O', 'C[NH]1CC(C)(C1)C(=O)[O]', '-440.257846']
['086563', 'CC1C(C[NH+]1C)C([O-])=O', 'C[C@H]1[C@@H](C[NH]1C)C(=O)[O]', '-440.258389']
['086588', '[NH3+]C1CC(C1O)C([O-])=O', '[NH3][C@H]1C[C@@H]([C@H]1O)C(=O)[O]', '-476.166393']
['086636', 'CC1CC(=O)[C-]([NH2+]1)C=O', 'C[C@H]1CC(=O)[C]([NH2]1)C=O', '-439.080688']
['087035', '[NH3+]C1CC1(C#C)C([O-])=O', '[NH3][C@H]1C[C@]1(C#C)C(=O)[O]', '-437.799858']


[10:34:00] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:00] Explicit valence for atom # 2 N, 4, is greater than permitted


['089622', 'C[NH+]1CC2(CC12)C([O-])=O', 'C[NH]1C[C@]2(C[C@@H]12)C(=O)[O]', '-439.018043']
['089626', '[NH3+]C1CC2(CC12)C([O-])=O', '[NH3][C@H]1C[C@]2(C[C@@H]12)C(=O)[O]', '-439.005515']
['089628', '[NH3+]C1CC2(OC12)C([O-])=O', '[NH3][C@H]1C[C@]2(O[C@@H]12)C(=O)[O]', '-474.932115']
['090693', '[NH3+]C1CC2C1C2C([O-])=O', '[NH3][C@H]1C[C@H]2[C@@H]1[C@H]2C(=O)[O]', '-439.003489']
['090694', '[NH3+]C1CN2C1C2C([O-])=O', '[NH3][C@H]1CN2[C@@H]1[C@H]2C(=O)[O]', '-455.038863']
['090696', 'CC1C[C-]2[NH+]1C=CC2=O', 'C[C@H]1C[C]2[NH]1C=CC2=O', '-401.894018']
['091153', 'OC1C[NH+]2C=CC(=O)[C-]12', 'O[C@H]1C[NH]2C=CC(=O)[C]12', '-437.821036']
['091258', 'C[NH+]1C[C-]2OC(=O)C=C12', 'C[NH]1CC(=O)[C]1[CH][C][O]', '-437.841902']
['091519', 'CC1[NH2+]C2CC12C([O-])=O', 'C[C@H]1[NH2][C@H]2C[C@@]12C(=O)[O]', '-439.027794']


[10:34:00] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 2 N, 4, is greater than permitted


['092760', 'C[NH+]1CC=C(C1)C([O-])=O', 'C[NH]1CC=C(C1)C(=O)[O]', '-439.056455']
['093347', 'CC1[NH2+]CC(=O)[C-]1C=O', 'C[C@H]1[NH2]CC(=O)[C]1C=O', '-439.070556']
['093567', 'CC1[NH2+]CC(=C1)C([O-])=O', 'C[C@H]1[NH2]CC(=C1)C(=O)[O]', '-439.064442']
['093572', 'C[NH+]1CCC(=C1)C([O-])=O', 'C[NH]1CCC(=C1)C(=O)[O]', '-439.056616']
['093941', '[NH3+]C1CCC(C1)C([O-])=O', '[NH3][C@H]1CC[C@@H](C1)C(=O)[O]', '-440.254716']
['093942', '[NH3+]C1COC(C1)C([O-])=O', '[NH3][C@H]1CO[C@@H](C1)C(=O)[O]', '-476.174324']
['093986', 'CC1C[NH2+][C-](C=O)C1=O', 'C[C@H]1C[NH2][C](C=O)C1=O', '-439.076838']
['093988', 'CC1[NH2+]C[C-](C=O)C1=O', 'C[C@H]1[NH2]C[C](C=O)C1=O', '-439.073359']
['093997', 'OC1C[NH2+][C-](C=O)C1=O', 'O[C@H]1C[NH2][C](C=O)C1=O', '-475.005703']
['094182', 'C[NH+]1CCC1(C)C([O-])=O', 'C[NH]1CC[C@]1(C)C(=O)[O]', '-440.27075']
['094604', '[NH3+]C1CCC1CC([O-])=O', '[NH3][C@H]1CC[C@H]1CC(=O)[O]', '-440.220527']
['094606', '[NH3+]C1COC1CC([O-])=O', '[NH3][CH]COC=C.C(=O)=O', '-476.167809']
['0954

[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 7 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:01] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:02] Explicit valence for atom # 3 N, 4, is g

['096613', 'C[NH+]=C(N)C#CC([O-])=O', 'CNC(=[NH2])C#CC(=O)[O]', '-453.917607']
['096631', 'C[NH+]=C(N)[C-](C#C)C#N', 'CNC(=[NH2])[C](C#C)C#N', '-396.92568']
['096632', 'C[NH+]=C(N)[C-](C#N)C#N', 'CNC(=[NH2])[C](C#N)C#N', '-413.033514']
['096637', 'C[NH+]=C(N)C(C)C([O-])=O', 'CNC(=[NH2])[CH]C.C(=O)=O', '-456.398986']
['096638', 'C[NH+]=C(N)C(N)C([O-])=O', 'CNC(=[NH2])[CH]N.C(=O)=O', '-472.441046']
['096640', 'C[NH+]=C(N)C(O)C([O-])=O', 'CNC(=[NH2])[C@@H](O)C(=O)[O]', '-492.329519']
['096679', 'C[NH+]=C(N)OCC([O-])=O', 'CNC(=[NH2])OCC(=O)[O]', '-492.271777']
['097260', 'NC=[NH+]C(C#C)C([O-])=O', '[NH2]=C[NH]=CC#C.C(=O)=O', '-453.921194']
['097261', 'NC=N[C-](C#N)C(N)=[NH2+]', 'NC=N[C](C#N)C(=[NH2])N', '-430.282892']
['097325', 'NC=[NH+]CC(=O)C([O-])=O', '[NH2]=CNCC(=O)C(=O)[O]', '-491.080226']
['097358', 'NC=[NH+]C[C-](C#N)C#N', '[NH2]=CNC[C](C#N)C#N', '-412.976907']
['097458', 'NC=[NH+]CCCC([O-])=O', '[NH2]=CNCCCC(=O)[O]', '-456.305265']
['097476', '[NH3+]CC#CC#CC([O-])=O', '[NH3]CC#CC#

[10:34:02] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:02] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:02] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:03] Explicit valence for atom # 0 N, 4, is g

['101807', '[NH3+]CC(O)CCC([O-])=O', '[NH3]C[C@@H](O)CCC(=O)[O]', '-477.353919']
['102015', 'C[NH2+][C-](C=O)C(=O)C=O', 'C[NH2][C](C=O)C(=O)C=O', '-475.005966']
['102131', 'NC=[NH+]C(CO)C([O-])=O', '[NH2]=C[NH]=CCO.C(=O)=O', '-492.272024']
['102628', 'COC(C[NH3+])CC([O-])=O', 'CO[C@@H]([CH][NH3])CC(=O)O', '-477.366379']
['102634', '[NH3+]CC(CO)CC([O-])=O', '[NH3][CH][C@@H](CO)CC(=O)O', '-477.364061']
['102794', '[NH3+]CC(NC=O)C([O-])=O', '[NH3]C[C@@H](NC=O)C(=O)[O]', '-492.309996']
['102796', '[NH3+]CC(OC=O)C([O-])=O', '[NH3]C[C@@H](OC=O)C(=O)[O]', '-512.146376']
['102797', 'OCC(NC=[NH2+])C([O-])=O', 'OCC=[NH]C=[NH2].C(=O)=O', '-492.272025']
['103798', 'CC1(C[NH3+])CC1C([O-])=O', 'C[C@@]1(C[NH3])C[C@H]1C(=O)[O]', '-440.22023']
['103799', 'CC1(C[NH3+])NC1C([O-])=O', 'C[C@@]1(C[NH3])N[C@H]1C(=O)[O]', '-456.253273']
['103813', 'CCC1([NH3+])CC1C([O-])=O', 'CC[C@@]1([NH3])C[C@H]1C(=O)[O]', '-440.235519']
['103821', '[NH3+]CC1(O)CC1C([O-])=O', '[NH3]C[C@@]1(O)C[C@H]1C(=O)[O]', '-476.167199']

[10:34:04] Explicit valence for atom # 1 N, 4, is greater than permitted


['108410', 'C[NH2+]C12COC1[C-]2C=O', 'C[NH2][C@]12CO[C@H]1[C]2C=O', '-438.941668']


[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:05] Explicit valence for atom # 0 N, 4, is greater than permitted


['112497', 'C[NH2+]C1C[C-](C#N)C1=O', 'C[NH2][C@H]1C[C](C#N)C1=O', '-417.968277']
['112946', 'CC[NH+]1CC(C1)C([O-])=O', 'CC[NH]1CC(C1)C(=O)[O]', '-440.254682']
['112947', 'C[NH2+]C1CC(C1)C([O-])=O', 'C[NH2][C@H]1C[C@@H](C1)C(=O)[O]', '-440.225672']
['112955', '[NH3+]CC1CC(C1)C([O-])=O', '[NH3]C[C@H]1C[C@@H](C1)C(=O)[O]', '-440.209902']
['112990', 'C[NH2+]C1C[C-](C=O)C1=O', 'C[NH2][C@H]1C[C](C=O)C1=O', '-439.037302']
['113157', 'CC1(CC1C[NH3+])C([O-])=O', 'C[C@]1(C[C@H]1C[NH3])C(=O)[O]', '-440.221677']
['113161', 'CC1(OC1C[NH3+])C([O-])=O', 'C[C@]1(O[C@H]1C[NH3])C(=O)[O]', '-476.158494']
['113174', 'C[NH2+]C1CC1(C)C([O-])=O', 'C[NH2][C@H]1C[C@]1(C)C(=O)[O]', '-440.232667']
['113175', 'C[NH2+]C1CC1(N)C([O-])=O', 'C[NH2][C@H]1C[C@]1(N)C(=O)[O]', '-456.30583']
['113176', 'C[NH2+]C1CC1(O)C([O-])=O', 'C[NH2][C@H]1C[C@]1(O)C(=O)[O]', '-476.18794']
['113184', '[NH3+]CC1CC1(O)C([O-])=O', '[NH3]C[C@H]1C[C@]1(O)C(=O)[O]', '-476.168097']


[10:34:06] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 3 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 4 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:06] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 1 N, 4, is g

['115698', '[NH3+]CC1CCC1C([O-])=O', '[NH3]C[C@H]1CC[C@H]1C(=O)[O]', '-440.227148']
['115699', '[NH3+]CC1COC1C([O-])=O', '[NH3]C[C@H]1CO[C@H]1C(=O)[O]', '-476.140628']
['116537', 'CC[NH+]=C(N)CC([O-])=O', 'CCNC(=[NH2])[CH2].C(=O)=O', '-456.400799']
['116639', 'CNC=[NH+][C-](C#N)C#N', 'CN[CH][NH]=C(C#N)C#N', '-412.977086']
['117295', 'CCNC(=[NH2+])CC([O-])=O', 'CCNC(=[NH2])[CH2].C(=O)=O', '-456.400799']
['117630', 'CC(C)(CC[NH3+])C([O-])=O', 'CC(C)(CC[NH3])C(=O)[O]', '-441.432055']
['117643', 'C[NH2+]CC(C)(C)C([O-])=O', 'C[NH2]CC(C)(C)C(=O)[O]', '-441.455378']
['118441', 'CCOC(C[NH3+])C([O-])=O', 'CCO[C@@H](C[NH3])C(=O)[O]', '-477.383156']
['118448', 'C[NH2+]CC(OC)C([O-])=O', 'C[NH2]C[C@@H](OC)C(=O)[O]', '-477.375159']
['121589', 'CCCNC(=[NH2+])C([O-])=O', 'CCCNC(=[NH2])C(=O)[O]', '-456.39681']
['121596', 'CC[NH2+]CC(=O)C([O-])=O', 'CC[NH2]CC(=O)C(=O)[O]', '-476.214565']
['121600', 'C[NH2+]CCC(=O)C([O-])=O', 'C[NH2]CCC(=O)C(=O)[O]', '-476.176524']
['121613', 'OCCNC(=[NH2+])C([O-])=O', '

[10:34:07] Explicit valence for atom # 1 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:07] Explicit valence for atom # 0 N, 4, is greater than permitted
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8
[10:34:08] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:08] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:08] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:08] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:08] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:34:08] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8
[10:34:08] Can't kekulize mol.  Unkekulized atoms:

['122767', 'C[NH2+]CC[N-]C(=O)C#N', 'C[NH2]CC[N]C(=O)C#N', '-435.208865']
['123126', '[NH3+]CCCCCC([O-])=O', '[NH3]CCCCCC(=O)[O]', '-441.395258']
['123129', '[NH3+]CCOCCC([O-])=O', '[NH3]CCOCCC(=O)[O]', '-477.349951']
['124018', 'C1NC=[NH+]C2=C1N=N[N-]2', 'C1N[CH]Nc2c1nnn2', '-429.023818']
['124205', 'CC(=[NH2+])NC1=CN=N[N-]1', 'CC(=[NH2])Nc1cnnn1', '-430.218489']
['124710', 'CC(N)=[NH+]C1=CC=N[N-]1', 'CC(=[NH2])Nc1ccnn1', '-414.201678']
['124712', 'CC(N)=[NH+]C1=CN=N[N-]1', 'CC(=[NH2])Nc1cnnn1', '-430.242363']
['124714', 'CC(N)=[NH+]C1=NC=N[N-]1', 'CC(=[NH2])Nc1ncnn1', '-430.274117']
['124737', 'CC([NH3+])C1=NOC([O-])=N1', 'C[C@@H]([NH3])c1noc(=O)n1', '-471.177944']
['124923', 'CC1=C(C)C(=N)N=NN1', 'Cc1c(C)c([NH])nn[nH]1', '-414.180266']
['124926', 'CC1=C(C)C(=N)NN=N1', 'Cc1c(C)c([NH])[nH]nn1', '-414.185309']
['125003', 'CC1=C(N)C(=N)C=NN1', 'Cc1c(N)c([NH])cn[nH]1', '-414.207183']
['125005', 'CC1=C(N)C(=N)N=NN1', 'Cc1c(N)c([NH])nn[nH]1', '-430.235221']
['125008', 'CC1=C(N)C(=N)NN=N1',

[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4
[10:34:09] Can't kekulize mol.  Unke

['127659', 'CN1N=CN=C(C)C1=N', 'Cn1ncnc(C)c1[NH]', '-414.202784']
['127661', 'CN1N=CN=C(N)C1=N', 'Cn1ncnc(N)c1[NH]', '-430.266048']
['127663', 'CN1N=CN=C(O)C1=N', 'Cn1ncnc(O)c1[NH]', '-450.146004']
['127686', 'CN1N=NC(=N)C(C)=N1', 'Cn1nnc([NH])c(C)n1', '-430.206734']
['127687', 'CN1N=NC(=N)C(N)=N1', 'Cn1nnc([NH])c(N)n1', '-446.271424']
['127688', 'CN1N=NC(=N)C=C1C', 'Cn1nnc([NH])cc1C', '-414.17072']
['127689', 'CN1N=NC(=N)C=C1N', 'Cn1nnc([NH])cc1N', '-430.221971']
['127690', 'CN1N=NC(=N)C=C1O', 'Cn1nnc([NH])cc1O', '-450.101747']
['127695', 'CN1N=NC(=N)NC1=O', 'Cn1nnc([NH])[nH]c1=O', '-466.197913']
['127696', 'CN1N=NC(=N)OC1=O', 'Cn1nnc([NH])oc1=O', '-486.06067']
['127723', 'CN1N=NC(C)=CC1=N', 'Cn1nnc(C)cc1[NH]', '-414.181969']
['127725', 'CN1N=NC(C)=NC1=N', 'Cn1nnc(C)nc1[NH]', '-430.238988']
['127741', 'CN1N=NC(N)=CC1=N', 'Cn1nnc(N)cc1[NH]', '-430.239106']
['127743', 'CN1N=NC(N)=NC1=N', 'Cn1nnc(N)nc1[NH]', '-446.305892']
['127752', 'CN1N=NC(O)=NC1=N', 'Cn1nnc(O)nc1[NH]', '-466.178804']

[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 7 8
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5
[10:34:09] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6
[10:34:09] Can't kekulize mo

['129023', 'N=C1C=C(NN=N1)C#C', '[NH]c1cc([nH]nn1)C#C', '-411.727865']
['129027', 'N=C1C=C2CCN2N=N1', '[NH]c1cc2CCn2nn1', '-412.941444']
['129036', 'N=C1C=CN(C=O)N=N1', '[NH]c1ccn(C=O)nn1', '-448.908481']
['129037', 'N=C1C=CN=NN1C=O', '[NH]c1ccnnn1C=O', '-448.909509']
['129056', 'N=C1C=NC=NN1C=O', '[NH]c1cncnn1C=O', '-448.934689']
['129060', 'N=C1C=NN(C=O)N=C1', '[NH]c1cnn(C=O)nc1', '-448.916513']
['129061', 'N=C1C=NN(C=O)N=N1', '[NH]c1cnn(C=O)nn1', '-464.937633']
['129063', 'N=C1C=NN2CCC2=N1', '[NH]c1cnn2CCc2n1', '-412.973447']
['129069', 'N=C1C=NNC(=O)N=C1', '[NH]c1cn[nH]c(=O)nc1', '-448.92212']
['129071', 'N=C1C=NNC=C1C#C', '[NH]c1cn[nH]cc1C#C', '-395.702435']
['129074', 'N=C1C=NNC=CC1=O', '[NH]c1cn[nH]ccc1=O', '-432.859358']
['129076', 'N=C1C=NNN=C1C#C', '[NH]c1cn[nH]nc1C#C', '-411.729079']
['129100', 'N=C1C=NON=NC1=O', '[NH]c1cnoc1=O.N#N', '-484.878482']
['129123', 'N=C1N=CC=NN1C=O', '[NH]c1nccnn1C=O', '-448.938032']
['129127', 'N=C1N=CN=NN1C=O', '[NH]c1ncnnn1C=O', '-464.961718']


['132096', 'CC1=C(F)NC(=N)C=N1', 'Cc1c(F)[nH]c([NH])cn1', '-458.128493']
['132116', 'NC1=C(F)NC(=N)C=N1', 'Nc1c(F)[nH]c([NH])cn1', '-474.173475']
['132138', 'FC1=C(F)N=CC(=N)N1', 'Fc1c(F)ncc([NH])[nH]1', '-518.071091']
['132343', 'OC1=NC(F)=NC(=O)O1', 'OC(=O)[N]C(=[N]=C=O)F', '-549.926388']
['132344', 'OC1=NC(=O)OC(F)=N1', 'OC(=[N]=C=O)[N]C(=O)F', '-549.927723']
['132345', 'FC1=NC(F)=NC(=O)O1', 'FC(=O)[N]C(=[N]=C=O)F', '-573.94274']
['132438', 'CC1=NC=C(F)NC1=N', 'Cc1ncc(F)[nH]c1[NH]', '-458.127722']
['132956', 'CN1C(=N)C=NC=C1F', 'Cn1c([NH])cncc1F', '-458.118799']
['133007', 'CN=C1NC(F)=CN=C1', 'C[N]c1[nH]c(F)cnc1', '-458.11307']
['133136', 'CC1=C(F)C(=N)N=NN1', 'Cc1c(F)c([NH])nn[nH]1', '-474.124801']
['133139', 'CC1=C(F)C(=N)NN=N1', 'Cc1c(F)c([NH])[nH]nn1', '-474.131326']
['133159', 'CC1=C(F)N=NNC1=N', 'Cc1c(F)nn[nH]c1[NH]', '-474.139031']
['133164', 'CC1=C(F)NN=NC1=N', 'Cc1c(F)[nH]nnc1[NH]', '-474.129359']
['133189', 'CC1=NC(F)=NNC1=N', 'Cc1nc(F)n[nH]c1[NH]', '-474.164708']
['133196

[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 5 7 8
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 5 7 8
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 6
[10:34:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:10] Explicit valence for atom # 2 N, 4, is greater than permitted
[10:34:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 2 4 5 6 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 2 4 6 7 8
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 4 7 8
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 5 6 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms: 1 2 4 6 7
[10:34:10] Can't kekulize mol.  Unkekulized atoms

In [5]:
# Only keep the conformer with the lowest energy
for smi in tqdm(qm9_dict):
    mol_info = min(qm9_dict[smi], key=lambda k: k['energy'])
    qm9_dict[smi] = mol_info
len(qm9_dict)

  0%|          | 0/130132 [00:00<?, ?it/s]

130132

## 3. Remove molecules that failed graph isomorphism check between SMILES from GDB17 and SMILES provided by the original QM9 paper after DFT optimization.

In [6]:
# Finding molelcules having different connectivity after B3LYP optimization in the original QM9 paper
connect = list()
for gdb_smi in tqdm(qm9_dict):
    n1 = read_smiles(gdb_smi)
    nonstereo_smi = re.sub(r'\\|\/|\@','', qm9_dict[gdb_smi]['parsed_smi'])
    n2 = read_smiles(nonstereo_smi)
    if (not nx.is_isomorphic(n1,n2)):
        connect.append(gdb_smi)
connect

  0%|          | 0/130132 [00:00<?, ?it/s]

['CC1C2C3C4=CCC13C42', 'CN1C2C3C4=CCC31C42']

In [7]:
# Remove gdb_smi from qm9_dict
for smi in connect:
    qm9_dict.pop(smi, None)
len(qm9_dict)

130130

## 4. Remove molecules having atomization energy per bond smaller than 100 kcal/mol using atomic and molecular energies provided by the original QM9 paper.

In [8]:
# Build a atom energy dictionary
with open('atomref.txt','r') as f:
    atom_dict = {'H':1,'C':6,'N':7,'O':8,'F':9}
    atomref = {}
    for i in range(5):
        next(f)
    for i in range(5):
        line = f.readline().rstrip().split()
        atom_num = atom_dict[line[0]]
        atomref[atom_num] = float(line[2])
atomref

{1: -0.500273, 6: -37.846772, 7: -54.583861, 8: -75.064579, 9: -99.71873}

In [9]:
# Get atomization energy and atomization energy per bond in kcal/mol
def getAtomizationEnergy(smiles, energy, atomref):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    atom_energy_list = [atomref[atom.GetAtomicNum()] for atom in mol.GetAtoms()]
    ae = (sum(atom_energy_list) - energy) * 627.5 # Hartree to kcal/mol
    num_of_bonds = len(mol.GetBonds()) 
    ae_per_bond = ae / num_of_bonds
    
    return ae, ae_per_bond

In [10]:
# Add AE per bond into qm9_dict
for gdb_smi in tqdm(qm9_dict):
    ae, ae_per_bond = getAtomizationEnergy(qm9_dict[gdb_smi]['parsed_smi']\
                                       , qm9_dict[gdb_smi]['energy'], atomref)
    qm9_dict[gdb_smi]['AE'] = ae
    qm9_dict[gdb_smi]['AE_per_bond'] = ae_per_bond

  0%|          | 0/130130 [00:00<?, ?it/s]

In [11]:
# Get molecules with AE per bond < 100 kcal/mol
low_ae_mol = [(gdb_smi,qm9_dict[gdb_smi]['AE_per_bond']) for gdb_smi in qm9_dict if qm9_dict[gdb_smi]['AE_per_bond'] < 100]
len(low_ae_mol)

104964

In [12]:
# Remove low AE per bond molecules from qm9_dict
for mol in low_ae_mol:
    qm9_dict.pop(mol[0], None)
len(qm9_dict)

25166

## 5. Retain molecules with at least one ring.

In [13]:
# Find molecules in qm9_dict that don't have ring structure
no_ring = list()
for gdb_smi in tqdm(qm9_dict):
    mol = Chem.MolFromSmiles(gdb_smi)
    if Chem.GetSSSR(mol) == 0:
        no_ring.append(gdb_smi)
no_ring

  0%|          | 0/25166 [00:00<?, ?it/s]

['O',
 'C#C',
 'C#N',
 'C=O',
 'C#CC',
 'CC#N',
 'CC=O',
 'NC=O',
 'CC(C)=O',
 'CC(N)=O',
 'NC(N)=O',
 'C#CC#C',
 'C#CC#N',
 'N#CC#N',
 'C#CC=O',
 'N#CC=O',
 'O=CC=O',
 'CC#CC',
 'C#CCC',
 'CCC#N',
 'N#CCN',
 'C#CCO',
 'N#CCO',
 'CCC=O',
 'CNC=O',
 'COC=O',
 'O=CCO',
 'C#CC(C)=O',
 'CC(=O)C#N',
 'C#CC(N)=O',
 'CC(=O)C=O',
 'NC(=O)C=O',
 'C#CC(C)C',
 'CC(C)C#N',
 'CC(N)C#N',
 'C#CC(C)O',
 'CC(O)C#N',
 'CC(O)C=O',
 'CC(=O)CO',
 'CCC(C)=O',
 'CCC(N)=O',
 'COC(C)=O',
 'COC(N)=O',
 'NC(=O)CO',
 'C#CCC#C',
 'C#CCC#N',
 'N#CCC#N',
 'C#CCC=O',
 'N#CCC=O',
 'N=CNC=O',
 'N=COC=O',
 'O=CNC=O',
 'C#CC#CC',
 'CC#CC#N',
 'CC#CC=O',
 'CC#CCO',
 'CC#CCC',
 'C#CCCC',
 'CCCC#N',
 'CNCC#N',
 'C#CCOC',
 'COCC#N',
 'C#CCCO',
 'N#CCCO',
 'CCOC=O',
 'COCC=O',
 'O=CCCO',
 'C#CC(C)=NO',
 'FC(F)(F)F',
 'C#CC(C)(C)C',
 'CC(C)(C)C#N',
 'CC(C)(N)C#N',
 'C#CC(C)(C)O',
 'CC(C)(O)C#N',
 'CC#CC(C)=O',
 'CC#CC(N)=O',
 'CC#CC(C)C',
 'CC#CC(C)O',
 'C#CCC(C)=O',
 'CC(=O)CC#N',
 'C#CCC(N)=O',
 'N#CCC(N)=O',
 'CC(=N)NC=O',


In [14]:
# Remove no-ring molecules from qm9_dict
for smi in no_ring:
    qm9_dict.pop(smi, None)
len(qm9_dict)

17546

## 6. Remove molecules that failed the RDKit embedding method using the srETKDGv3 algorithm.

In [15]:
# Find molecules that fail for rdkit embedding
embed_fail = list()
for gdb_smi in tqdm(qm9_dict):
    mol = Chem.MolFromSmiles(gdb_smi)
    mol = Chem.AddHs(mol)
    params = Chem.rdDistGeom.srETKDGv3()
    params.randomSeed = 0xf00d
    params.clearConfs = True
    
    if (AllChem.EmbedMolecule(mol, params=params)) == -1:
        print(gdb_smi)
        embed_fail.append(gdb_smi)

  0%|          | 0/17546 [00:00<?, ?it/s]

N#CC12CCC1C(=O)O2
C#CC12COC1C(=O)O2
N#CC12COC1C(=O)O2
N#CC1CC2C(=O)OC12
C#CC1OC2C(=O)OC12
N#CC1OC2C(=O)OC12
N#CC1CC2OC(=O)C12
N#CC12CCC1OC2=O
C#CC12OCC1OC2=O
N#CC12OCC1OC2=O


In [16]:
# Remove embedding failing molecules from qm9_dict
for smi in embed_fail:
    qm9_dict.pop(smi, None)
len(qm9_dict)

17536

## 7. Remove molecules having bond(s) broken or rearrangement reactions after reorganization energy calculations described in the manuscript.

In [17]:
# This function was run after DFT optimizations were done
# origin_smi: original smiles string, geom_after_opt: geometry after DFT optimization in XYZ format
def graph_check(origin_smi, geom_after_opt):
    g1 = read_smiles(origin_smi, explicit_hydrogen=True)
    g1_noH = read_smiles(origin_smi)
    try:
        smi = pybel.readstring('xyz',geom_after_opt).write('smi').strip()
        if '.' in smi:
            return origin_smi
        g2 = read_smiles(smi, explicit_hydrogen=True)
        g2_noH = read_smiles(smi)
    except:
        return origin_smi
    
    if (not nx.is_isomorphic(g1,g2)) or (not nx.is_isomorphic(g1_noH,g2_noH, node_match=lambda n1,n2:n1['hcount']==n2['hcount'])):
        return origin_smi
    else:
        return False
    