In [40]:
import pandas as pd
import numpy as np
import os
import re
import time

from rdkit import Chem
from openbabel import pybel

In [41]:
def ac_to_series(smiles: str, filename: str) -> pd.DataFrame:
    with open(filename) as file:
        lines = file.readlines()

        out = []

        for line in lines:
            if ('ATOM' in line):
                strng = str(line[-3:]).strip()
                out.append(strng)

        output = '|'.join(out)

        return pd.DataFrame.from_dict({'smiles': [smiles], 'types': [output]})

In [42]:
def sdf_to_list(filename: str) -> list:
    """Converts an sdf file to a Python list."""
    with open(filename, "rt") as file:
        return file.read().split(r'$$$$')

In [43]:
mols = sdf_to_list('./data/edrug3d.sdf')
df = pd.DataFrame(columns=['smiles', 'types'])

In [44]:
for mol in mols:
    # Strip leading new lines
    mol = mol.lstrip()

    # Split on new lines to correct for mistakes made by splitting the sdf molecules
    split = mol.split('\n')
    curr_split_len = len(split)

    if curr_split_len > 5:
        # Inserts a new line if line 4 isn't in the correct place
        if re.compile(r'\s*\d+\s*\d+\s*\d+\s*\d+\s*').match(split[3]) is None:
            mol = '\n' + mol

        # Writes molecule to a file so antechamber can read it
        with open('mol.sdf', 'w') as file:
            file.write(mol)

        # Create an openbabel molecule
        py_mol = pybel.readstring('sdf', mol)

        # Run antechamber and divert output to a file (temporary)
        !antechamber -i mol.sdf -fi mdl -o mol.ac -fo ac -at gaff2 -pf y > .log

        # Sleep so antechamber can finish (requires experimentation)
        time.sleep(1)

        ac_df = ac_to_series(py_mol.write('smi'), 'mol.ac')

        df = pd.concat([df, ac_df], ignore_index=True)
    else:
        print('end')

try:
    os.remove('mol.ac')
    os.remove('mol.sdf')
except IOError:
    print('Something went wrong.')

KeyboardInterrupt: 

In [None]:
df

In [None]:
df.to_csv('./data/edrug3d.csv', index=False)