In [1]:
### --- importing dependences ---- ###
import joblib
import numpy as np

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Descriptors import *
from rdkit.Chem.rdMolDescriptors import *
from rdkit.Chem.Lipinski import *
from rdkit.Chem.EState import *
from rdkit.Chem.GraphDescriptors import *
from rdkit.Chem.Graphs import *
from math import sqrt, ceil

In [2]:
### important functions
def flatten_list(lst):
    flat_list = []
    for item in lst:
        if isinstance(item, list):
            flat_list.extend(flatten_list(item))
        else:
            flat_list.append(item)
    return flat_list

def properties_array(sSmiles):
    try:
        m = Chem.MolFromSmiles(sSmiles)
        p1 = AllChem.GetMorganFingerprintAsBitVect(m, 2, 512)
        p2 = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(m)

        p3 = [1000*FpDensityMorgan1(m), 1000*FpDensityMorgan2(m), 1000*FpDensityMorgan3(m), ExactMolWt(m), HeavyAtomMolWt(m), 1000*MaxAbsPartialCharge(m), 1000*MaxPartialCharge(m), 1000*MinAbsPartialCharge(m), 1000*MinPartialCharge(m), NumRadicalElectrons(m), NumValenceElectrons(m),1000*CalcFractionCSP3(m),10*CalcKappa1(m),10*CalcKappa2(m),10*CalcKappa3(m),CalcLabuteASA(m),CalcNumAliphaticCarbocycles(m),CalcNumAliphaticHeterocycles(m),CalcNumAliphaticRings(m),CalcNumAmideBonds(m),CalcNumAromaticCarbocycles(m),CalcNumAromaticHeterocycles(m),CalcNumAromaticRings(m),CalcNumAtomStereoCenters(m),CalcNumBridgeheadAtoms(m),CalcNumHBA(m),CalcNumHBD(m),CalcNumHeteroatoms(m),CalcNumHeterocycles(m),CalcNumLipinskiHBA(m),CalcNumLipinskiHBD(m),CalcNumRings(m),CalcNumRotatableBonds(m),CalcNumSaturatedCarbocycles(m),CalcNumSaturatedHeterocycles(m),CalcNumSaturatedRings(m),CalcNumSpiroAtoms(m),CalcNumUnspecifiedAtomStereoCenters(m),CalcTPSA(m)]
        pa3 = np.array(p3, dtype=np.int16)
        
        p4 = [HeavyAtomCount(m), NHOHCount(m), NOCount(m),NumHAcceptors(m), NumHDonors(m), Chi0(m), Chi1(m)]
        
        p5 = [rdMolDescriptors.BCUT2D(m)]

        pa1 = np.array(list(p1), dtype=np.int16)
        pa2 = np.array(list(p2), dtype=np.int16)
        pa0 = np.concatenate([pa1, pa2])
        pa4 = np.array(p4, dtype=np.int16)
        pa5 = np.array(flatten_list(p5), dtype=np.int16)
        
        pa = np.concatenate([pa0,pa3, pa4,pa5])
        print(len(pa))

        pa = np.array(pa)

        return pa, True
    except:
        return None, False

In [3]:
sSmiles = "c1(c(cc(c(c1)Br)CC)Br)CC"
pa, lC = properties_array(sSmiles)

#Normalize data
scaler = joblib.load('./Models/scaler_model.pkl')
normalized_data = scaler.transform([pa])

# load saved models
EE = joblib.load('./Models/RF_EE.pkl')
OS = joblib.load('./Models/RF_OS.pkl')
HO = joblib.load('./Models/RF_HO.pkl')
LU = joblib.load('./Models/RF_LU.pkl')


# Apply the loaded model to the new data
Results = f'EE: {EE.predict(normalized_data)}, OS: {OS.predict(normalized_data)}, HO: {np.round(HO.predict(normalized_data))}, LU: {np.round(LU.predict(normalized_data))}. '
print(Results)


733
EE: [6.28385531], OS: [0.42518999], HO: [72.], LU: [74.]. 


In [6]:
smiles_list = ["c1(c(cc(c(c1)Br)CC)Br)CC", "CCO", "C"]  # List of SMILES

path = "./Models"
scaler = joblib.load('./Models/scaler_model.pkl')
EE = joblib.load(f'{path}/RF_EE.pkl')
OS = joblib.load(f'{path}/RF_OS.pkl')
HO = joblib.load(f'{path}/RF_HO.pkl')
LU = joblib.load(f'{path}/RF_LU.pkl')

for sSmiles in smiles_list:
    pa, lC = properties_array(sSmiles)
    
    normalized_data = scaler.transform([pa])
    
    EE_pred = EE.predict(normalized_data)
    OS_pred = OS.predict(normalized_data)
    HO_pred = np.round(HO.predict(normalized_data))
    LU_pred = np.round(LU.predict(normalized_data))
    
    results = f' EE: {round(EE_pred[0],3)}, OS: {round(OS_pred[0],3)}, HO: {round(HO_pred[0],3)}, LU: {round(LU_pred[0])} <- SMILES: {sSmiles} '
    print(results)


733
 EE: 6.284, OS: 0.425, HO: 72.0, LU: 74 <- SMILES: c1(c(cc(c(c1)Br)CC)Br)CC 
733
 EE: 7.121, OS: 1.313, HO: 39.0, LU: 40 <- SMILES: CCO 
733
 EE: 6.93, OS: 1.33, HO: 39.0, LU: 40 <- SMILES: C 
