In [13]:
import joblib
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from ipywidgets import widgets, IntProgress
from IPython.display import display, Markdown


from rdkit import Chem
from rdkit.Chem import Draw, Crippen, AllChem, Descriptors3D
from rdkit.Chem import rdMolDescriptors as descriptor

In [8]:
model = joblib.load('final_model.joblib')
features = joblib.load('final_model_importances.joblib')
exp_vs_calc = pd.read_csv('expvcalc.csv')
exp_vs_calc.drop('Unnamed: 0', axis=1, inplace=True)

In [60]:
def predict(SMILES):
    try:
        m = Chem.MolFromSmiles(SMILES)
        mol = Chem.AddHs(m)
        wt = descriptor.CalcExactMolWt(mol)
        logp = Crippen.MolLogP(mol)
        ali_homo_rings = descriptor.CalcNumAliphaticCarbocycles(mol)
        ali_hetero_rings = descriptor.CalcNumAliphaticHeterocycles(mol)
        amide_bonds = descriptor.CalcNumAmideBonds(mol)
        aro_homo_rings = descriptor.CalcNumAromaticCarbocycles(mol)
        aro_hetero_rings = descriptor.CalcNumAromaticHeterocycles(mol)
        hba = descriptor.CalcNumHBA(mol)
        hbd = descriptor.CalcNumHBD(mol)
        tpsa = descriptor.CalcTPSA(mol)
        fraction_sp3 = descriptor.CalcFractionCSP3(mol)

        AllChem.EmbedMolecule(mol)
        Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(mol)
        asph = Descriptors3D.Asphericity(mol)
        ecce = Descriptors3D.Eccentricity(mol)
        isf = Descriptors3D.InertialShapeFactor(mol)
        npr1 = Descriptors3D.NPR1(mol)
        features = np.array([wt, logp, ali_homo_rings, ali_hetero_rings,
                       amide_bonds, aro_homo_rings, aro_hetero_rings, 
                       hba, hbd, tpsa, fraction_sp3, asph, ecce, isf, npr1]).reshape(1, -1)
        display(Markdown("## Predicted HLC:   %f"%model.predict(features)[0]))
        display(Draw.MolToImage(Chem.MolFromSmiles(SMILES), size=(350, 350)))
    except:
        display(Markdown('# Invalid SMILES string'))

In [62]:
widgets.interact(predict, SMILES='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]')

interactive(children=(Text(value='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]', description='SMILES')…

<function __main__.predict(SMILES)>

In [44]:
import pubchempy as pcp

In [48]:
pcp.get_compounds('CN(CC1)CCN1C(C2=CC=CC=C2)C3=CC=CC=C3', 'smiles')[0].to_dict()

{'atom_stereo_count': 0,
 'atoms': [{'aid': 1, 'number': 7, 'element': 'N', 'y': 0, 'x': 4.5981},
  {'aid': 2, 'number': 7, 'element': 'N', 'y': 2, 'x': 4.5981},
  {'aid': 3, 'number': 6, 'element': 'C', 'y': 0.5, 'x': 3.732},
  {'aid': 4, 'number': 6, 'element': 'C', 'y': 0.5, 'x': 5.4641},
  {'aid': 5, 'number': 6, 'element': 'C', 'y': -1, 'x': 4.5981},
  {'aid': 6, 'number': 6, 'element': 'C', 'y': 1.5, 'x': 3.732},
  {'aid': 7, 'number': 6, 'element': 'C', 'y': 1.5, 'x': 5.4641},
  {'aid': 8, 'number': 6, 'element': 'C', 'y': -1.5, 'x': 3.732},
  {'aid': 9, 'number': 6, 'element': 'C', 'y': -1.5, 'x': 5.4641},
  {'aid': 10, 'number': 6, 'element': 'C', 'y': 3, 'x': 4.5981},
  {'aid': 11, 'number': 6, 'element': 'C', 'y': -1, 'x': 2.866},
  {'aid': 12, 'number': 6, 'element': 'C', 'y': -1, 'x': 6.3301},
  {'aid': 13, 'number': 6, 'element': 'C', 'y': -2.5, 'x': 3.732},
  {'aid': 14, 'number': 6, 'element': 'C', 'y': -2.5, 'x': 5.4641},
  {'aid': 15, 'number': 6, 'element': 'C', 'y':

In [56]:
pcp.get_synonyms('CN(CC1)CCN1C(C2=CC=CC=C2)C3=CC=CC=C3', 'smiles')[0]['Synonym'][0]

'CYCLIZINE'