In [5]:
from deepmol.loaders.loaders import CSVLoader
import pandas as pd
from deepmol.imbalanced_learn import RandomUnderSampler
from deepmol.splitters.splitters import SingletaskStratifiedSplitter
from copy import deepcopy
from deepmol.models import SklearnModel
from deepmol.imbalanced_learn import RandomUnderSampler

In [6]:
ds = pd.read_csv('data/tox21.csv')
ds.dropna(subset=['SR-p53'],inplace=True)
ds.fillna(0, inplace=True)
ds.drop('mol_id', axis=1, inplace=True)
ds.to_csv("data/deepmol_dataset.csv", sep=',', index=True,index_label="id", encoding='utf-8')

loader = CSVLoader(dataset_path='data/deepmol_dataset.csv',smiles_field='smiles',id_field='id',labels_fields=['SR-p53'],features_fields=['NR-AR','NR-AR-LBD','NR-AhR','NR-Aromatase','NR-ER','NR-ER-LBD','NR-PPAR-gamma','SR-ARE','SR-ATAD5','SR-HSE','SR-MMP'],shard_size=6774,mode='auto')
dataset = loader.create_dataset()

d3 = deepcopy(dataset)
sampler = RandomUnderSampler(sampling_strategy=0.75, random_state=123, replacement=True)
d3 = sampler.sample(d3)
splitter = SingletaskStratifiedSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset=d3, frac_train=0.7,frac_valid=0.15, frac_test=0.15)



2024-05-18 15:32:37,070 — INFO — Assuming classification since there are less than 10 unique y values. If otherwise, explicitly set the mode to 'regression'!


In [7]:
import random
from rdkit.Chem import rdMolDescriptors
from deepmol.compound_featurization import MorganFingerprint

model = SklearnModel.load('rf_model')

def getFeatureImportance():
    mol_number = random.randint(0,len(test_dataset)-1)

    prediction = model.predict(test_dataset)[mol_number]
    actual_value = test_dataset.y[mol_number]
    print('Prediction: ', prediction)
    print('Actual Value: ', actual_value)
    smi = test_dataset.mols[mol_number]

    morgan_fp = MorganFingerprint()
    bit_info = {}
    fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
    smi, 
    morgan_fp.radius, 
    nBits=morgan_fp.size, 
    useChirality=morgan_fp.chiral, 
    useBondTypes=morgan_fp.bonds, 
    useFeatures=morgan_fp.features, 
    bitInfo=bit_info
    )
    active_bits = list(bit_info.keys())
    img = MorganFingerprint().draw_bits(smi,active_bits)
    return smi,img

In [8]:
from flask import Flask
from flask_cors import CORS
from rdkit import Chem 
import re
app = Flask(__name__)
CORS(app)

@app.route('/')
def getfeatures():
    smi,img = getFeatureImportance()
    smiles = Chem.MolToSmiles(smi)
    origin_smiles = str(smiles)
    smiles = re.sub(r'[\\\/\+\=\[\]\-\#]', '_', smiles)
    img.save("feature_importance/"+str(smiles)+".png")
    return {"name":origin_smiles,"file":smiles}

if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [18/May/2024 15:32:56] "GET / HTTP/1.1" 200 -


Prediction:  1.0
Actual Value:  1.0
