In [1]:
# Bag of Bonds (BoB) example using ChemML
# https://hachmannlab.github.io/chemml/index.html
# 
# Molecule uses the RDKit and OpenBabel APIs to construct molecules
from chemml.chem import Molecule
# ChemML's implementation of BoB
from chemml.chem import BagofBonds

# Generate the molecule using SMILES and using ChemML's 
# Molecule class
methanol='CO'
mol = Molecule(methanol, input_type='smiles')

# Add hydrogens for safety and then perform a geometry optimization 
# using force fields
mol.hydrogens('add')
mol.to_xyz(optimizer='MMFF', mmffVariant='MMFF94s', maxIters=300)

# Print the Molecule class for methanol
print(mol)

# Print SMILES (should be the same as defined above)
print(mol.smiles)

# Print the atom symbols, should include the missing hydrogens
print(mol.xyz.atomic_symbols)
mol.visualize()

# Set BoB parameters
bob = BagofBonds(const= 1.0)

# Returns a Pandas dataframe
features = bob.represent(mol)

# Print the BoB vector
print(features)

<chemml.chem.Molecule(
        rdkit_molecule : <rdkit.Chem.rdchem.Mol object at 0x7fcdc7438b80>,
        pybel_molecule : None,
        creator        : ('SMILES', 'CO'),
        smiles         : 'CO',
        smarts         : None,
        inchi          : None,
        xyz            : <XYZ(geometry: (6, 3), atomic_numbers: (6, 1), atomic_symbols: (6, 1))>)>
CO
[['C']
 ['O']
 ['H']
 ['H']
 ['H']
 ['H']]
featurizing molecules in batches of 1 ...
Merging batch features ...    [DONE]
          0          1         2         3         4         5          6   \
0  36.858105  33.904673  5.491844  5.490692  5.490691  3.094722  73.516695   

         7         8         9   ...   11   12   13   14        15        16  \
0  8.227491  3.901689  3.888509  ...  0.5  0.5  0.5  0.5  0.560849  0.560849   

         17        18        19        20  
0  0.558327  0.436414  0.436414  0.351751  

[1 rows x 21 columns]
