In [14]:
### --- importing dependences ---- ###

#For the code to work perfectly, the libraries below must contain the following versions:

#Name: joblib         Version: 1.4.2
#Name: rdkit          Version: 2023.9.5
#Name: scikit-learn   Version: 1.4.1.post1
#Name: tensorflow      Version: 2.16.1

#%pip show joblib rdkit scikit-learn
#%pip install joblib==1.4.2
#%pip install rdkit==2023.9.5
#%pip install scikit-learn==1.4.1.post1
#%pip install tensorflow==2.16.1


import joblib
import numpy as np

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Descriptors import *
from rdkit.Chem.rdMolDescriptors import *
from rdkit.Chem.Lipinski import *
from rdkit.Chem.EState import *
from rdkit.Chem.GraphDescriptors import *
from rdkit.Chem.Graphs import *
from math import sqrt, ceil

In [3]:
### important functions
def flatten_list(lst):
    flat_list = []
    for item in lst:
        if isinstance(item, list):
            flat_list.extend(flatten_list(item))
        else:
            flat_list.append(item)
    return flat_list

def properties_array(sSmiles):
    try:
        m = Chem.MolFromSmiles(sSmiles)
        p1 = AllChem.GetMorganFingerprintAsBitVect(m, 2, 512)
        p2 = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(m)

        p3 = [1000*FpDensityMorgan1(m), 1000*FpDensityMorgan2(m), 1000*FpDensityMorgan3(m), ExactMolWt(m), HeavyAtomMolWt(m), 1000*MaxAbsPartialCharge(m), 1000*MaxPartialCharge(m), 1000*MinAbsPartialCharge(m), 1000*MinPartialCharge(m), NumRadicalElectrons(m), NumValenceElectrons(m),1000*CalcFractionCSP3(m),10*CalcKappa1(m),10*CalcKappa2(m),10*CalcKappa3(m),CalcLabuteASA(m),CalcNumAliphaticCarbocycles(m),CalcNumAliphaticHeterocycles(m),CalcNumAliphaticRings(m),CalcNumAmideBonds(m),CalcNumAromaticCarbocycles(m),CalcNumAromaticHeterocycles(m),CalcNumAromaticRings(m),CalcNumAtomStereoCenters(m),CalcNumBridgeheadAtoms(m),CalcNumHBA(m),CalcNumHBD(m),CalcNumHeteroatoms(m),CalcNumHeterocycles(m),CalcNumLipinskiHBA(m),CalcNumLipinskiHBD(m),CalcNumRings(m),CalcNumRotatableBonds(m),CalcNumSaturatedCarbocycles(m),CalcNumSaturatedHeterocycles(m),CalcNumSaturatedRings(m),CalcNumSpiroAtoms(m),CalcNumUnspecifiedAtomStereoCenters(m),CalcTPSA(m)]
        pa3 = np.array(p3, dtype=np.int16)
        
        p4 = [HeavyAtomCount(m), NHOHCount(m), NOCount(m),NumHAcceptors(m), NumHDonors(m), Chi0(m), Chi1(m)]
        
        p5 = [rdMolDescriptors.BCUT2D(m)]

        pa1 = np.array(list(p1), dtype=np.int16)
        pa2 = np.array(list(p2), dtype=np.int16)
        pa0 = np.concatenate([pa1, pa2])
        pa4 = np.array(p4, dtype=np.int16)
        pa5 = np.array(flatten_list(p5), dtype=np.int16)
        
        pa = np.concatenate([pa0,pa3, pa4,pa5])
        #print(len(pa))

        pa = np.array(pa)

        return pa, True
    except Exception as e:
        print(f"Ocorreu um erro: {e}")
        return None, False

In [1]:
### importante 

!obabel -:"c1(cc(Br)c(cc1Br)C(CCCCBr)(Br)Br)C(CCCCBr)(Br)Br" -O estrutura2.png

1 molecule converted


In [12]:
### Prevision
import numpy as np
from tensorflow.keras.models import load_model


# List of SMILES
smiles_list = [ "c1(c(Br)cc(c(c1)Br)CC(C(Br)(Cl)Cl)(F)F)CC(C(Br)(Cl)Cl)(F)F", "c1(cc(Br)c(cc1Br)C(CCCC(Cl)Cl)(Br)Br)C(CCCC(Cl)Cl)(Br)Br"]  


path = "./Models"
scaler = joblib.load(f'{path}/scaler_model.pkl')
EE = joblib.load(f'{path}/RF_EE.pkl')
OS = joblib.load(f'{path}/RF_OS.pkl')
HO = joblib.load(f'{path}/RF_HO.pkl')
LU = joblib.load(f'{path}/RF_LU.pkl')
HOMO = load_model(f'{path}/HOMO-RNA.keras')

for sSmiles in smiles_list:
    pa, lC = properties_array(sSmiles)
    
    normalized_data = scaler.transform([pa])
       
    pa_2d = pa.reshape(1, -1)
        
    EE_pred = EE.predict(normalized_data)
    OS_pred = OS.predict(normalized_data)
    HO_pred = np.round(HO.predict(normalized_data))
    LU_pred = np.round(LU.predict(normalized_data))
    HOMO_pred = np.round(HOMO.predict(pa_2d))

    #print(HOMO_pred[0])
    
    results = f'SMILES: {sSmiles}, HOMO: {round(HOMO_pred[0][0])}, LUMO: {round(HOMO_pred[0][0])+1}, EE: {round(EE_pred[0],3)}, OS: {round(OS_pred[0],3)}, MO_i: {round(HO_pred[0],3)}, MO_f: {round(LU_pred[0])}, x: {round(HOMO_pred[0][0]- round(HO_pred[0]))}, y: {round(LU_pred[0]-(round(HOMO_pred[0][0])+1))}'
    
    print(results)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
SMILES: c1(c(Br)cc(c(c1)Br)CC(C(Br)(Cl)Cl)(F)F)CC(C(Br)(Cl)Cl)(F)F, HOMO: 161, LUMO: 162, EE: 5.041, OS: 0.025, MO_i: 161.0, MO_f: 165, x: 0, y: 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
SMILES: c1(cc(Br)c(cc1Br)C(CCCC(Cl)Cl)(Br)Br)C(CCCC(Cl)Cl)(Br)Br, HOMO: 195, LUMO: 196, EE: 4.563, OS: 0.122, MO_i: 192.0, MO_f: 196, x: 3, y: 0


  saveable.load_own_variables(weights_store.get(inner_path))
