In [19]:
from rdkit import Chem
from rdkit.Chem import rdinchi
import csv 
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors

In [3]:
drugbank_input = Chem.SDMolSupplier('../data/drugbank.sdf')
drugbank = [m for m in drugbank_input if m]

In [9]:
with open('drugbank.csv', 'w') as csvfile:
    drugbank_writer = csv.writer(csvfile, delimiter= ',',
                            quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for drug in drugbank:
        drugbank_writer.writerow([Chem.MolToSmiles(drug)])

In [16]:
with open('drugbank.csv', 'w') as csvfile:
    drugbank_writer = csv.writer(csvfile, delimiter= ',',
                            quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for drug in drugbank:
        try:
            inchi = rdinchi.MolToInchi(drug)[0]
            inchikey = rdinchi.InchiToInchiKey(inchi)
        except ValueError:
            inchi = None
            inchikey = None
        drugbank_writer.writerow([inchikey, rdinchi.MolToInchi(drug, inchi)])

In [21]:
with open('drugbank.csv', 'w') as csvfile:
    drugbank_writer = csv.writer(csvfile, delimiter= ',',
                            quotechar='"', quoting=csv.QUOTE_MINIMAL)
    drugbank_writer.writerow(['inchikey', 'smiles', 'molwt', 'logp', 'HBA', 'HBD'])
    for drug in drugbank:
        try:
            inchi = rdinchi.MolToInchi(drug)[0]
            inchikey = rdinchi.InchiToInchiKey(inchi)
        except ValueError:
            inchi = None
            inchikey = None
        drugbank_writer.writerow([inchikey, Chem.MolToSmiles(drug), Descriptors.MolWt(drug),
                                  Descriptors.MolLogP(drug), rdMolDescriptors.CalcNumLipinskiHBA(drug), 
                                  rdMolDescriptors.CalcNumLipinskiHBD(drug)])

In [26]:
with open('drugbank_rings.csv', 'w') as csvfile:
    drugbank_writer = csv.writer(csvfile, delimiter= ',',
                            quotechar='"', quoting=csv.QUOTE_MINIMAL)
    drugbank_writer.writerow(['inchikey', 'smiles', 'number of aromatic rings'])
    counter = 0
    aromatic_drugs = []
    ring_number_all = 0
    for drug in drugbank:
        ring_number = rdMolDescriptors.CalcNumAromaticRings(drug)
        if ring_number > 0:
            counter += 1
            ring_number_all += ring_number
            try:
                inchi = rdinchi.MolToInchi(drug)[0]
                inchikey = rdinchi.InchiToInchiKey(inchi)
            except ValueError:
                inchi = None
                inchikey = None
            aromatic_drugs.append(drug)
            drugbank_writer.writerow([inchikey, Chem.MolToSmiles(drug), ring_number])
    print(counter)
    print(ring_number_all)
    print(ring_number_all/counter)

5166
10821
2.09465737514518


In [25]:
pattern = Chem.MolFromSmarts('c1ccccc1')

In [28]:
non_matching_drugs = [m for m in aromatic_drugs if not m.HasSubstructMatch(pattern)]

In [29]:
len(non_matching_drugs)

880

In [35]:
benzoovka = Chem.MolFromSmarts('c1ccccc1C(=O)O')

In [38]:
matching_drugs = [m for m in aromatic_drugs if m.HasSubstructMatch(benzoovka)]
matches = matching_drugs[1].

In [39]:
len(matching_drugs)

267

In [41]:
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG

In [None]:
drawer = rdMolDraw2DSVG(333,333)
mol = matching_drugs[1]
drawer.DrawMolecule(mol, highlightAtoms=mol.GetSubstructMatches(benzoovka))