In [1]:
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from rdkit import Chem
from rdkit.Chem import Descriptors

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

import pickle

In [2]:
# load the model from disk
loaded_model = pickle.load(open('model_desc_total.pkl', 'rb'))

In [3]:
def AromaticAtoms(m):
  aromatic_atoms = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())]
  aa_count = []
  for i in aromatic_atoms:
    if i==True:
      aa_count.append(1)
  sum_aa_count = sum(aa_count)
  return sum_aa_count

In [4]:
def predictSingle(smiles, model):
    mol = Chem.MolFromSmiles(smiles)
    
    single_MolLogP = Descriptors.MolLogP(mol)
    single_MolWt   = Descriptors.MolWt(mol)
    single_NumRotatableBonds = Descriptors.NumRotatableBonds(mol)
    
    single_AP = AromaticAtoms(mol)/Descriptors.HeavyAtomCount(mol)
    
    single_list = [single_MolLogP, single_MolWt, single_NumRotatableBonds, single_AP]
    single_df = pd.DataFrame(single_list).T
    single_df.columns = ['MolLogP', 'MolWt', 'NumRotatableBonds', 'AromaticProportion']
    #return single_df
    
    return model.predict(single_df)[0]

In [5]:
smiles_new = 'FC(F)(Cl)C(F)(Cl)Cl'
predictSingle(smiles_new, loaded_model)

-3.167894079679184