# Importing modules

In [1]:
# Importing modules
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.ML.Descriptors import MoleculeDescriptors
from mordred import Calculator, descriptors
from rdkit.Chem import Descriptors, Lipinski
from mordred import Calculator, descriptors
import seaborn as sns
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import LeaveOneOut
from sklearn.tree import DecisionTreeClassifier as CART
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import auc
from sklearn.metrics import RocCurveDisplay
from sklearn.model_selection import StratifiedKFold
#import sdr
pd.set_option('display.max_columns', None)
%matplotlib inline

# Function for preprocessing of SMILES

In [2]:
# Define a function to preprocess the smiles
def preprocess_smiles(smiles_list):
    clean_smiles=[]
    for smiles in smiles_list:
        cpd=str(smiles).split('.')
        cpd_longest=max(cpd,key=len)
        clean_smiles.append(cpd_longest)
    return clean_smiles


# Define function to canocalize valid SMILES
def canonical_smiles(smiles_list):
    canon_smiles=[]
    mols = [Chem.MolFromSmiles(smi) for smi in smiles_list] 
    smiles = [Chem.MolToSmiles(mol) for mol in mols]
    canon_smiles.extend(smiles)  
    return canon_smiles

# Function for feature generation using RDKit

In [3]:
# Define a function to calculate all the molecular descriptors and ECFP6 fingerprint
def calculate_allfeatures(smiles_list):
    descriptors_list = []
    ecfp6_list = []
    for smiles in smiles_list:
        mol = Chem.MolFromSmiles(smiles)
        # Calculate the descriptors
        desc = {}
        for desc_name, desc_func in Descriptors.descList:
            desc[desc_name] = desc_func(mol)
        descriptors_list.append(desc)
        # Calculate the ECFP6 fingerprint/morgan fingerprints
        ecfp = AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)
        ecfp = np.array(ecfp)
        ecfp6_list.append(ecfp)
    # Create a pandas DataFrame of the calculated features
    descriptors_df = pd.DataFrame(descriptors_list)
    ecfp6_df = pd.DataFrame(ecfp6_list, columns=['ECFP6_{}'.format(i+1) for i in range(ecfp6_list[0].size)])
    # Combine the descriptors and ECFP6 features into a single DataFrame
    features_df = pd.concat([descriptors_df, ecfp6_df], axis=1)
    return features_df

# Function for preprocessing of features

In [4]:
##* replace a missing value to 0 for numeric columns and 'empty' for string columns, respectively.
def _remove_abnormal(all_data):

    ## Filling for missing values
    categorical_columns_list = []
    for each in all_data.columns:
        if all_data[each].dtypes != 'int64':
            if all_data[each].dtypes != 'float64':
                categorical_columns_list.append(each)
    for item in categorical_columns_list:
        all_data[item].fillna("empty",inplace=True)
    all_data.fillna(0, inplace=True)
    
    return all_data


##* function to specify X and y varaibles in the input file
def _pre_feature(all_data, processingFeature):
    ## Pre-processing - 2. Vectorization for Input
    # Get the column names
    # Define the columns to exclude
    exclude_columns = ['SMILES']
    column_list=[col for col in all_data.columns if col not in exclude_columns]
    target = 'bioclass'
    features = [col for col in all_data.columns if col != target and col not in exclude_columns]
    ## For saving the converted key list
    converted_key_list = []
    ## Input Columns have Numeric data only. There is nothing to be vectorized.

    ## Auto correction
    column_list_old = column_list.copy()
    features_old = features.copy()
    mod_column_list = []
    mod_features = []
    if converted_key_list is not None:
        for keys, values in converted_key_list:
            for item in column_list:
                if item == keys:
                    column_list.remove(keys)
                    for iter_item in values:
                        mod_column_list.append(iter_item) # replace
            for item in features:
                if item == keys:
                    features.remove(keys)
                    for iter_item in values:
                        mod_features.append(iter_item) # replace

    # replace previous list
    if len(mod_column_list) != 0:
        column_list = column_list + mod_column_list
    if len(mod_features) != 0:
        features = features + mod_features
    processingFeature.fillna(0, inplace=True)
    
    ## Listing features and scaling datasets
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaled = scaler.fit(all_data[features])
    results = scaled.transform(processingFeature[features])

    return pd.DataFrame(results, columns=features)

def _pre_target(all_data, processingTarget):
    ## Pre-processing - 2. Vectorization for Output
    target = 'bioclass'
    ## Output Column has Numeric data. There is nothing to be vectorized.
    label_names = [0, 1]
    processedTarget = processingTarget
    return processedTarget, label_names


def _post_target(all_data, predicted): 
    ## Post-processing for Output
    target = 'bioclass'
    tmp = []
    for index, value in enumerate(predicted):
        if value == 1 :    
            tmp.append('Yes')
        if value == 0 :
            tmp.append('No')
    return tmp

# Importing dataset

In [62]:
# Our external datasets are labelled. Has two columns "SMILES" and "bioclass".
# bioclass column has values 0 for inactive and 1 for active

df=pd.read_csv('dataset.csv')

# Preprocessing of SMILES and feature generation

In [65]:
# preprocess the smiles

cleaned_smiles=preprocess_smiles(df.SMILES)
cleaned_smiles=pd.Series(cleaned_smiles, name='SMILES')
no_smiles=df.drop(columns='SMILES')
cleaned_smiles_df=pd.concat([no_smiles,cleaned_smiles],axis=1)

# Calculate the features
features_df = calculate_allfeatures(cleaned_smiles_df.SMILES)

# Write the dataframe to a new CSV file
features_df.to_csv('property.csv', index=False)

# Write the dataframe having bioclass, smiles and features to a new CSV file
all_property_df=pd.concat([cleaned_smiles_df,features_df],axis=1)
all_property_df.to_csv('all_property.csv', index=False)

# Preprocessing of features

In [66]:
##* Data Loading
try:
    all_data = pd.read_csv("./all_property.csv")
except:
    all_data = pd.read_csv("./all_property.csv", encoding = "ISO-8859-1")
    

x = all_data.copy()

scaled_all_data= _pre_feature(all_data, x)

features = scaled_all_data.columns.tolist()

# Selecting features to be used for predictions

In [None]:
#column_list=['bioclass','MaxEStateIndex', 'MinEStateIndex', 'MaxAbsEStateIndex', 'MinAbsEStateIndex', 'qed', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons', 'MaxPartialCharge', 'MinPartialCharge', 'MaxAbsPartialCharge', 'MinAbsPartialCharge', 'FpDensityMorgan1', 'FpDensityMorgan2', 'FpDensityMorgan3', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BalabanJ', 'BertzCT', 'Chi0', 'Chi0n', 'Chi0v', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n', 'Chi2v', 'Chi3n', 'Chi3v', 'Chi4n', 'Chi4v', 'HallKierAlpha', 'Ipc', 'Kappa1', 'Kappa2', 'Kappa3', 'LabuteASA', 'PEOE_VSA1', 'PEOE_VSA10', 'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'PEOE_VSA14', 'PEOE_VSA2', 'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7', 'PEOE_VSA8', 'PEOE_VSA9', 'SMR_VSA1', 'SMR_VSA10', 'SMR_VSA2', 'SMR_VSA3', 'SMR_VSA4', 'SMR_VSA5', 'SMR_VSA6', 'SMR_VSA7', 'SMR_VSA8', 'SMR_VSA9', 'SlogP_VSA1', 'SlogP_VSA10', 'SlogP_VSA11', 'SlogP_VSA12', 'SlogP_VSA2', 'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA5', 'SlogP_VSA6', 'SlogP_VSA7', 'SlogP_VSA8', 'SlogP_VSA9', 'TPSA', 'EState_VSA1', 'EState_VSA10', 'EState_VSA11', 'EState_VSA2', 'EState_VSA3', 'EState_VSA4', 'EState_VSA5', 'EState_VSA6', 'EState_VSA7', 'EState_VSA8', 'EState_VSA9', 'VSA_EState1', 'VSA_EState10', 'VSA_EState2', 'VSA_EState3', 'VSA_EState4', 'VSA_EState5', 'VSA_EState6', 'VSA_EState7', 'VSA_EState8', 'VSA_EState9', 'FractionCSP3', 'HeavyAtomCount', 'NHOHCount', 'NOCount', 'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles', 'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR', 'fr_Al_COO', 'fr_Al_OH', 'fr_Al_OH_noTert', 'fr_ArN', 'fr_Ar_COO', 'fr_Ar_N', 'fr_Ar_NH', 'fr_Ar_OH', 'fr_COO', 'fr_COO2', 'fr_C_O', 'fr_C_O_noCOO', 'fr_C_S', 'fr_HOCCN', 'fr_Imine', 'fr_NH0', 'fr_NH1', 'fr_NH2', 'fr_N_O', 'fr_Ndealkylation1', 'fr_Ndealkylation2', 'fr_Nhpyrrole', 'fr_SH', 'fr_aldehyde', 'fr_alkyl_carbamate', 'fr_alkyl_halide', 'fr_allylic_oxid', 'fr_amide', 'fr_amidine', 'fr_aniline', 'fr_aryl_methyl', 'fr_azide', 'fr_azo', 'fr_barbitur', 'fr_benzene', 'fr_benzodiazepine', 'fr_bicyclic', 'fr_diazo', 'fr_dihydropyridine', 'fr_epoxide', 'fr_ester', 'fr_ether', 'fr_furan', 'fr_guanido', 'fr_halogen', 'fr_hdrzine', 'fr_hdrzone', 'fr_imidazole', 'fr_imide', 'fr_isocyan', 'fr_isothiocyan', 'fr_ketone', 'fr_ketone_Topliss', 'fr_lactam', 'fr_lactone', 'fr_methoxy', 'fr_morpholine', 'fr_nitrile', 'fr_nitro', 'fr_nitro_arom', 'fr_nitro_arom_nonortho', 'fr_nitroso', 'fr_oxazole', 'fr_oxime', 'fr_para_hydroxylation', 'fr_phenol', 'fr_phenol_noOrthoHbond', 'fr_phos_acid', 'fr_phos_ester', 'fr_piperdine', 'fr_piperzine', 'fr_priamide', 'fr_prisulfonamd', 'fr_pyridine', 'fr_quatN', 'fr_sulfide', 'fr_sulfonamd', 'fr_sulfone', 'fr_term_acetylene', 'fr_tetrazole', 'fr_thiazole', 'fr_thiocyan', 'fr_thiophene', 'fr_unbrch_alkane', 'fr_urea', 'ECFP6_1', 'ECFP6_2', 'ECFP6_3', 'ECFP6_4', 'ECFP6_5', 'ECFP6_6', 'ECFP6_7', 'ECFP6_8', 'ECFP6_9', 'ECFP6_10', 'ECFP6_11', 'ECFP6_12', 'ECFP6_13', 'ECFP6_14', 'ECFP6_15', 'ECFP6_16', 'ECFP6_17', 'ECFP6_18', 'ECFP6_19', 'ECFP6_20', 'ECFP6_21', 'ECFP6_22', 'ECFP6_23', 'ECFP6_24', 'ECFP6_25', 'ECFP6_26', 'ECFP6_27', 'ECFP6_28', 'ECFP6_29', 'ECFP6_30', 'ECFP6_31', 'ECFP6_32', 'ECFP6_33', 'ECFP6_34', 'ECFP6_35', 'ECFP6_36', 'ECFP6_37', 'ECFP6_38', 'ECFP6_39', 'ECFP6_40', 'ECFP6_41', 'ECFP6_42', 'ECFP6_43', 'ECFP6_44', 'ECFP6_45', 'ECFP6_46', 'ECFP6_47', 'ECFP6_48', 'ECFP6_49', 'ECFP6_50', 'ECFP6_51', 'ECFP6_52', 'ECFP6_53', 'ECFP6_54', 'ECFP6_55', 'ECFP6_56', 'ECFP6_57', 'ECFP6_58', 'ECFP6_59', 'ECFP6_60', 'ECFP6_61', 'ECFP6_62', 'ECFP6_63', 'ECFP6_64', 'ECFP6_65', 'ECFP6_66', 'ECFP6_67', 'ECFP6_68', 'ECFP6_69', 'ECFP6_70', 'ECFP6_71', 'ECFP6_72', 'ECFP6_73', 'ECFP6_74', 'ECFP6_75', 'ECFP6_76', 'ECFP6_77', 'ECFP6_78', 'ECFP6_79', 'ECFP6_80', 'ECFP6_81', 'ECFP6_82', 'ECFP6_83', 'ECFP6_84', 'ECFP6_85', 'ECFP6_86', 'ECFP6_87', 'ECFP6_88', 'ECFP6_89', 'ECFP6_90', 'ECFP6_91', 'ECFP6_92', 'ECFP6_93', 'ECFP6_94', 'ECFP6_95', 'ECFP6_96', 'ECFP6_97', 'ECFP6_98', 'ECFP6_99', 'ECFP6_100', 'ECFP6_101', 'ECFP6_102', 'ECFP6_103', 'ECFP6_104', 'ECFP6_105', 'ECFP6_106', 'ECFP6_107', 'ECFP6_108', 'ECFP6_109', 'ECFP6_110', 'ECFP6_111', 'ECFP6_112', 'ECFP6_113', 'ECFP6_114', 'ECFP6_115', 'ECFP6_116', 'ECFP6_117', 'ECFP6_118', 'ECFP6_119', 'ECFP6_120', 'ECFP6_121', 'ECFP6_122', 'ECFP6_123', 'ECFP6_124', 'ECFP6_125', 'ECFP6_126', 'ECFP6_127', 'ECFP6_128', 'ECFP6_129', 'ECFP6_130', 'ECFP6_131', 'ECFP6_132', 'ECFP6_133', 'ECFP6_134', 'ECFP6_135', 'ECFP6_136', 'ECFP6_137', 'ECFP6_138', 'ECFP6_139', 'ECFP6_140', 'ECFP6_141', 'ECFP6_142', 'ECFP6_143', 'ECFP6_144', 'ECFP6_145', 'ECFP6_146', 'ECFP6_147', 'ECFP6_148', 'ECFP6_149', 'ECFP6_150', 'ECFP6_151', 'ECFP6_152', 'ECFP6_153', 'ECFP6_154', 'ECFP6_155', 'ECFP6_156', 'ECFP6_157', 'ECFP6_158', 'ECFP6_159', 'ECFP6_160', 'ECFP6_161', 'ECFP6_162', 'ECFP6_163', 'ECFP6_164', 'ECFP6_165', 'ECFP6_166', 'ECFP6_167', 'ECFP6_168', 'ECFP6_169', 'ECFP6_170', 'ECFP6_171', 'ECFP6_172', 'ECFP6_173', 'ECFP6_174', 'ECFP6_175', 'ECFP6_176', 'ECFP6_177', 'ECFP6_178', 'ECFP6_179', 'ECFP6_180', 'ECFP6_181', 'ECFP6_182', 'ECFP6_183', 'ECFP6_184', 'ECFP6_185', 'ECFP6_186', 'ECFP6_187', 'ECFP6_188', 'ECFP6_189', 'ECFP6_190', 'ECFP6_191', 'ECFP6_192', 'ECFP6_193', 'ECFP6_194', 'ECFP6_195', 'ECFP6_196', 'ECFP6_197', 'ECFP6_198', 'ECFP6_199', 'ECFP6_200', 'ECFP6_201', 'ECFP6_202', 'ECFP6_203', 'ECFP6_204', 'ECFP6_205', 'ECFP6_206', 'ECFP6_207', 'ECFP6_208', 'ECFP6_209', 'ECFP6_210', 'ECFP6_211', 'ECFP6_212', 'ECFP6_213', 'ECFP6_214', 'ECFP6_215', 'ECFP6_216', 'ECFP6_217', 'ECFP6_218', 'ECFP6_219', 'ECFP6_220', 'ECFP6_221', 'ECFP6_222', 'ECFP6_223', 'ECFP6_224', 'ECFP6_225', 'ECFP6_226', 'ECFP6_227', 'ECFP6_228', 'ECFP6_229', 'ECFP6_230', 'ECFP6_231', 'ECFP6_232', 'ECFP6_233', 'ECFP6_234', 'ECFP6_235', 'ECFP6_236', 'ECFP6_237', 'ECFP6_238', 'ECFP6_239', 'ECFP6_240', 'ECFP6_241', 'ECFP6_242', 'ECFP6_243', 'ECFP6_244', 'ECFP6_245', 'ECFP6_246', 'ECFP6_247', 'ECFP6_248', 'ECFP6_249', 'ECFP6_250', 'ECFP6_251', 'ECFP6_252', 'ECFP6_253', 'ECFP6_254', 'ECFP6_255', 'ECFP6_256', 'ECFP6_257', 'ECFP6_258', 'ECFP6_259', 'ECFP6_260', 'ECFP6_261', 'ECFP6_262', 'ECFP6_263', 'ECFP6_264', 'ECFP6_265', 'ECFP6_266', 'ECFP6_267', 'ECFP6_268', 'ECFP6_269', 'ECFP6_270', 'ECFP6_271', 'ECFP6_272', 'ECFP6_273', 'ECFP6_274', 'ECFP6_275', 'ECFP6_276', 'ECFP6_277', 'ECFP6_278', 'ECFP6_279', 'ECFP6_280', 'ECFP6_281', 'ECFP6_282', 'ECFP6_283', 'ECFP6_284', 'ECFP6_285', 'ECFP6_286', 'ECFP6_287', 'ECFP6_288', 'ECFP6_289', 'ECFP6_290', 'ECFP6_291', 'ECFP6_292', 'ECFP6_293', 'ECFP6_294', 'ECFP6_295', 'ECFP6_296', 'ECFP6_297', 'ECFP6_298', 'ECFP6_299', 'ECFP6_300', 'ECFP6_301', 'ECFP6_302', 'ECFP6_303', 'ECFP6_304', 'ECFP6_305', 'ECFP6_306', 'ECFP6_307', 'ECFP6_308', 'ECFP6_309', 'ECFP6_310', 'ECFP6_311', 'ECFP6_312', 'ECFP6_313', 'ECFP6_314', 'ECFP6_315', 'ECFP6_316', 'ECFP6_317', 'ECFP6_318', 'ECFP6_319', 'ECFP6_320', 'ECFP6_321', 'ECFP6_322', 'ECFP6_323', 'ECFP6_324', 'ECFP6_325', 'ECFP6_326', 'ECFP6_327', 'ECFP6_328', 'ECFP6_329', 'ECFP6_330', 'ECFP6_331', 'ECFP6_332', 'ECFP6_333', 'ECFP6_334', 'ECFP6_335', 'ECFP6_336', 'ECFP6_337', 'ECFP6_338', 'ECFP6_339', 'ECFP6_340', 'ECFP6_341', 'ECFP6_342', 'ECFP6_343', 'ECFP6_344', 'ECFP6_345', 'ECFP6_346', 'ECFP6_347', 'ECFP6_348', 'ECFP6_349', 'ECFP6_350', 'ECFP6_351', 'ECFP6_352', 'ECFP6_353', 'ECFP6_354', 'ECFP6_355', 'ECFP6_356', 'ECFP6_357', 'ECFP6_358', 'ECFP6_359', 'ECFP6_360', 'ECFP6_361', 'ECFP6_362', 'ECFP6_363', 'ECFP6_364', 'ECFP6_365', 'ECFP6_366', 'ECFP6_367', 'ECFP6_368', 'ECFP6_369', 'ECFP6_370', 'ECFP6_371', 'ECFP6_372', 'ECFP6_373', 'ECFP6_374', 'ECFP6_375', 'ECFP6_376', 'ECFP6_377', 'ECFP6_378', 'ECFP6_379', 'ECFP6_380', 'ECFP6_381', 'ECFP6_382', 'ECFP6_383', 'ECFP6_384', 'ECFP6_385', 'ECFP6_386', 'ECFP6_387', 'ECFP6_388', 'ECFP6_389', 'ECFP6_390', 'ECFP6_391', 'ECFP6_392', 'ECFP6_393', 'ECFP6_394', 'ECFP6_395', 'ECFP6_396', 'ECFP6_397', 'ECFP6_398', 'ECFP6_399', 'ECFP6_400', 'ECFP6_401', 'ECFP6_402', 'ECFP6_403', 'ECFP6_404', 'ECFP6_405', 'ECFP6_406', 'ECFP6_407', 'ECFP6_408', 'ECFP6_409', 'ECFP6_410', 'ECFP6_411', 'ECFP6_412', 'ECFP6_413', 'ECFP6_414', 'ECFP6_415', 'ECFP6_416', 'ECFP6_417', 'ECFP6_418', 'ECFP6_419', 'ECFP6_420', 'ECFP6_421', 'ECFP6_422', 'ECFP6_423', 'ECFP6_424', 'ECFP6_425', 'ECFP6_426', 'ECFP6_427', 'ECFP6_428', 'ECFP6_429', 'ECFP6_430', 'ECFP6_431', 'ECFP6_432', 'ECFP6_433', 'ECFP6_434', 'ECFP6_435', 'ECFP6_436', 'ECFP6_437', 'ECFP6_438', 'ECFP6_439', 'ECFP6_440', 'ECFP6_441', 'ECFP6_442', 'ECFP6_443', 'ECFP6_444', 'ECFP6_445', 'ECFP6_446', 'ECFP6_447', 'ECFP6_448', 'ECFP6_449', 'ECFP6_450', 'ECFP6_451', 'ECFP6_452', 'ECFP6_453', 'ECFP6_454', 'ECFP6_455', 'ECFP6_456', 'ECFP6_457', 'ECFP6_458', 'ECFP6_459', 'ECFP6_460', 'ECFP6_461', 'ECFP6_462', 'ECFP6_463', 'ECFP6_464', 'ECFP6_465', 'ECFP6_466', 'ECFP6_467', 'ECFP6_468', 'ECFP6_469', 'ECFP6_470', 'ECFP6_471', 'ECFP6_472', 'ECFP6_473', 'ECFP6_474', 'ECFP6_475', 'ECFP6_476', 'ECFP6_477', 'ECFP6_478', 'ECFP6_479', 'ECFP6_480', 'ECFP6_481', 'ECFP6_482', 'ECFP6_483', 'ECFP6_484', 'ECFP6_485', 'ECFP6_486', 'ECFP6_487', 'ECFP6_488', 'ECFP6_489', 'ECFP6_490', 'ECFP6_491', 'ECFP6_492', 'ECFP6_493', 'ECFP6_494', 'ECFP6_495', 'ECFP6_496', 'ECFP6_497', 'ECFP6_498', 'ECFP6_499', 'ECFP6_500', 'ECFP6_501', 'ECFP6_502', 'ECFP6_503', 'ECFP6_504', 'ECFP6_505', 'ECFP6_506', 'ECFP6_507', 'ECFP6_508', 'ECFP6_509', 'ECFP6_510', 'ECFP6_511', 'ECFP6_512', 'ECFP6_513', 'ECFP6_514', 'ECFP6_515', 'ECFP6_516', 'ECFP6_517', 'ECFP6_518', 'ECFP6_519', 'ECFP6_520', 'ECFP6_521', 'ECFP6_522', 'ECFP6_523', 'ECFP6_524', 'ECFP6_525', 'ECFP6_526', 'ECFP6_527', 'ECFP6_528', 'ECFP6_529', 'ECFP6_530', 'ECFP6_531', 'ECFP6_532', 'ECFP6_533', 'ECFP6_534', 'ECFP6_535', 'ECFP6_536', 'ECFP6_537', 'ECFP6_538', 'ECFP6_539', 'ECFP6_540', 'ECFP6_541', 'ECFP6_542', 'ECFP6_543', 'ECFP6_544', 'ECFP6_545', 'ECFP6_546', 'ECFP6_547', 'ECFP6_548', 'ECFP6_549', 'ECFP6_550', 'ECFP6_551', 'ECFP6_552', 'ECFP6_553', 'ECFP6_554', 'ECFP6_555', 'ECFP6_556', 'ECFP6_557', 'ECFP6_558', 'ECFP6_559', 'ECFP6_560', 'ECFP6_561', 'ECFP6_562', 'ECFP6_563', 'ECFP6_564', 'ECFP6_565', 'ECFP6_566', 'ECFP6_567', 'ECFP6_568', 'ECFP6_569', 'ECFP6_570', 'ECFP6_571', 'ECFP6_572', 'ECFP6_573', 'ECFP6_574', 'ECFP6_575', 'ECFP6_576', 'ECFP6_577', 'ECFP6_578', 'ECFP6_579', 'ECFP6_580', 'ECFP6_581', 'ECFP6_582', 'ECFP6_583', 'ECFP6_584', 'ECFP6_585', 'ECFP6_586', 'ECFP6_587', 'ECFP6_588', 'ECFP6_589', 'ECFP6_590', 'ECFP6_591', 'ECFP6_592', 'ECFP6_593', 'ECFP6_594', 'ECFP6_595', 'ECFP6_596', 'ECFP6_597', 'ECFP6_598', 'ECFP6_599', 'ECFP6_600', 'ECFP6_601', 'ECFP6_602', 'ECFP6_603', 'ECFP6_604', 'ECFP6_605', 'ECFP6_606', 'ECFP6_607', 'ECFP6_608', 'ECFP6_609', 'ECFP6_610', 'ECFP6_611', 'ECFP6_612', 'ECFP6_613', 'ECFP6_614', 'ECFP6_615', 'ECFP6_616', 'ECFP6_617', 'ECFP6_618', 'ECFP6_619', 'ECFP6_620', 'ECFP6_621', 'ECFP6_622', 'ECFP6_623', 'ECFP6_624', 'ECFP6_625', 'ECFP6_626', 'ECFP6_627', 'ECFP6_628', 'ECFP6_629', 'ECFP6_630', 'ECFP6_631', 'ECFP6_632', 'ECFP6_633', 'ECFP6_634', 'ECFP6_635', 'ECFP6_636', 'ECFP6_637', 'ECFP6_638', 'ECFP6_639', 'ECFP6_640', 'ECFP6_641', 'ECFP6_642', 'ECFP6_643', 'ECFP6_644', 'ECFP6_645', 'ECFP6_646', 'ECFP6_647', 'ECFP6_648', 'ECFP6_649', 'ECFP6_650', 'ECFP6_651', 'ECFP6_652', 'ECFP6_653', 'ECFP6_654', 'ECFP6_655', 'ECFP6_656', 'ECFP6_657', 'ECFP6_658', 'ECFP6_659', 'ECFP6_660', 'ECFP6_661', 'ECFP6_662', 'ECFP6_663', 'ECFP6_664', 'ECFP6_665', 'ECFP6_666', 'ECFP6_667', 'ECFP6_668', 'ECFP6_669', 'ECFP6_670', 'ECFP6_671', 'ECFP6_672', 'ECFP6_673', 'ECFP6_674', 'ECFP6_675', 'ECFP6_676', 'ECFP6_677', 'ECFP6_678', 'ECFP6_679', 'ECFP6_680', 'ECFP6_681', 'ECFP6_682', 'ECFP6_683', 'ECFP6_684', 'ECFP6_685', 'ECFP6_686', 'ECFP6_687', 'ECFP6_688', 'ECFP6_689', 'ECFP6_690', 'ECFP6_691', 'ECFP6_692', 'ECFP6_693', 'ECFP6_694', 'ECFP6_695', 'ECFP6_696', 'ECFP6_697', 'ECFP6_698', 'ECFP6_699', 'ECFP6_700', 'ECFP6_701', 'ECFP6_702', 'ECFP6_703', 'ECFP6_704', 'ECFP6_705', 'ECFP6_706', 'ECFP6_707', 'ECFP6_708', 'ECFP6_709', 'ECFP6_710', 'ECFP6_711', 'ECFP6_712', 'ECFP6_713', 'ECFP6_714', 'ECFP6_715', 'ECFP6_716', 'ECFP6_717', 'ECFP6_718', 'ECFP6_719', 'ECFP6_720', 'ECFP6_721', 'ECFP6_722', 'ECFP6_723', 'ECFP6_724', 'ECFP6_725', 'ECFP6_726', 'ECFP6_727', 'ECFP6_728', 'ECFP6_729', 'ECFP6_730', 'ECFP6_731', 'ECFP6_732', 'ECFP6_733', 'ECFP6_734', 'ECFP6_735', 'ECFP6_736', 'ECFP6_737', 'ECFP6_738', 'ECFP6_739', 'ECFP6_740', 'ECFP6_741', 'ECFP6_742', 'ECFP6_743', 'ECFP6_744', 'ECFP6_745', 'ECFP6_746', 'ECFP6_747', 'ECFP6_748', 'ECFP6_749', 'ECFP6_750', 'ECFP6_751', 'ECFP6_752', 'ECFP6_753', 'ECFP6_754', 'ECFP6_755', 'ECFP6_756', 'ECFP6_757', 'ECFP6_758', 'ECFP6_759', 'ECFP6_760', 'ECFP6_761', 'ECFP6_762', 'ECFP6_763', 'ECFP6_764', 'ECFP6_765', 'ECFP6_766', 'ECFP6_767', 'ECFP6_768', 'ECFP6_769', 'ECFP6_770', 'ECFP6_771', 'ECFP6_772', 'ECFP6_773', 'ECFP6_774', 'ECFP6_775', 'ECFP6_776', 'ECFP6_777', 'ECFP6_778', 'ECFP6_779', 'ECFP6_780', 'ECFP6_781', 'ECFP6_782', 'ECFP6_783', 'ECFP6_784', 'ECFP6_785', 'ECFP6_786', 'ECFP6_787', 'ECFP6_788', 'ECFP6_789', 'ECFP6_790', 'ECFP6_791', 'ECFP6_792', 'ECFP6_793', 'ECFP6_794', 'ECFP6_795', 'ECFP6_796', 'ECFP6_797', 'ECFP6_798', 'ECFP6_799', 'ECFP6_800', 'ECFP6_801', 'ECFP6_802', 'ECFP6_803', 'ECFP6_804', 'ECFP6_805', 'ECFP6_806', 'ECFP6_807', 'ECFP6_808', 'ECFP6_809', 'ECFP6_810', 'ECFP6_811', 'ECFP6_812', 'ECFP6_813', 'ECFP6_814', 'ECFP6_815', 'ECFP6_816', 'ECFP6_817', 'ECFP6_818', 'ECFP6_819', 'ECFP6_820', 'ECFP6_821', 'ECFP6_822', 'ECFP6_823', 'ECFP6_824', 'ECFP6_825', 'ECFP6_826', 'ECFP6_827', 'ECFP6_828', 'ECFP6_829', 'ECFP6_830', 'ECFP6_831', 'ECFP6_832', 'ECFP6_833', 'ECFP6_834', 'ECFP6_835', 'ECFP6_836', 'ECFP6_837', 'ECFP6_838', 'ECFP6_839', 'ECFP6_840', 'ECFP6_841', 'ECFP6_842', 'ECFP6_843', 'ECFP6_844', 'ECFP6_845', 'ECFP6_846', 'ECFP6_847', 'ECFP6_848', 'ECFP6_849', 'ECFP6_850', 'ECFP6_851', 'ECFP6_852', 'ECFP6_853', 'ECFP6_854', 'ECFP6_855', 'ECFP6_856', 'ECFP6_857', 'ECFP6_858', 'ECFP6_859', 'ECFP6_860', 'ECFP6_861', 'ECFP6_862', 'ECFP6_863', 'ECFP6_864', 'ECFP6_865', 'ECFP6_866', 'ECFP6_867', 'ECFP6_868', 'ECFP6_869', 'ECFP6_870', 'ECFP6_871', 'ECFP6_872', 'ECFP6_873', 'ECFP6_874', 'ECFP6_875', 'ECFP6_876', 'ECFP6_877', 'ECFP6_878', 'ECFP6_879', 'ECFP6_880', 'ECFP6_881', 'ECFP6_882', 'ECFP6_883', 'ECFP6_884', 'ECFP6_885', 'ECFP6_886', 'ECFP6_887', 'ECFP6_888', 'ECFP6_889', 'ECFP6_890', 'ECFP6_891', 'ECFP6_892', 'ECFP6_893', 'ECFP6_894', 'ECFP6_895', 'ECFP6_896', 'ECFP6_897', 'ECFP6_898', 'ECFP6_899', 'ECFP6_900', 'ECFP6_901', 'ECFP6_902', 'ECFP6_903', 'ECFP6_904', 'ECFP6_905', 'ECFP6_906', 'ECFP6_907', 'ECFP6_908', 'ECFP6_909', 'ECFP6_910', 'ECFP6_911', 'ECFP6_912', 'ECFP6_913', 'ECFP6_914', 'ECFP6_915', 'ECFP6_916', 'ECFP6_917', 'ECFP6_918', 'ECFP6_919', 'ECFP6_920', 'ECFP6_921', 'ECFP6_922', 'ECFP6_923', 'ECFP6_924', 'ECFP6_925', 'ECFP6_926', 'ECFP6_927', 'ECFP6_928', 'ECFP6_929', 'ECFP6_930', 'ECFP6_931', 'ECFP6_932', 'ECFP6_933', 'ECFP6_934', 'ECFP6_935', 'ECFP6_936', 'ECFP6_937', 'ECFP6_938', 'ECFP6_939', 'ECFP6_940', 'ECFP6_941', 'ECFP6_942', 'ECFP6_943', 'ECFP6_944', 'ECFP6_945', 'ECFP6_946', 'ECFP6_947', 'ECFP6_948', 'ECFP6_949', 'ECFP6_950', 'ECFP6_951', 'ECFP6_952', 'ECFP6_953', 'ECFP6_954', 'ECFP6_955', 'ECFP6_956', 'ECFP6_957', 'ECFP6_958', 'ECFP6_959', 'ECFP6_960', 'ECFP6_961', 'ECFP6_962', 'ECFP6_963', 'ECFP6_964', 'ECFP6_965', 'ECFP6_966', 'ECFP6_967', 'ECFP6_968', 'ECFP6_969', 'ECFP6_970', 'ECFP6_971', 'ECFP6_972', 'ECFP6_973', 'ECFP6_974', 'ECFP6_975', 'ECFP6_976', 'ECFP6_977', 'ECFP6_978', 'ECFP6_979', 'ECFP6_980', 'ECFP6_981', 'ECFP6_982', 'ECFP6_983', 'ECFP6_984', 'ECFP6_985', 'ECFP6_986', 'ECFP6_987', 'ECFP6_988', 'ECFP6_989', 'ECFP6_990', 'ECFP6_991', 'ECFP6_992', 'ECFP6_993', 'ECFP6_994', 'ECFP6_995', 'ECFP6_996', 'ECFP6_997', 'ECFP6_998', 'ECFP6_999', 'ECFP6_1000', 'ECFP6_1001', 'ECFP6_1002', 'ECFP6_1003', 'ECFP6_1004', 'ECFP6_1005', 'ECFP6_1006', 'ECFP6_1007', 'ECFP6_1008', 'ECFP6_1009', 'ECFP6_1010', 'ECFP6_1011', 'ECFP6_1012', 'ECFP6_1013', 'ECFP6_1014', 'ECFP6_1015', 'ECFP6_1016', 'ECFP6_1017', 'ECFP6_1018', 'ECFP6_1019', 'ECFP6_1020', 'ECFP6_1021', 'ECFP6_1022', 'ECFP6_1023', 'ECFP6_1024']
#features=['MaxEStateIndex', 'MinEStateIndex', 'MaxAbsEStateIndex', 'MinAbsEStateIndex', 'qed', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons', 'MaxPartialCharge', 'MinPartialCharge', 'MaxAbsPartialCharge', 'MinAbsPartialCharge', 'FpDensityMorgan1', 'FpDensityMorgan2', 'FpDensityMorgan3', 'BCUT2D_MWHI', 'BCUT2D_MWLOW', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO', 'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW', 'BalabanJ', 'BertzCT', 'Chi0', 'Chi0n', 'Chi0v', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n', 'Chi2v', 'Chi3n', 'Chi3v', 'Chi4n', 'Chi4v', 'HallKierAlpha', 'Ipc', 'Kappa1', 'Kappa2', 'Kappa3', 'LabuteASA', 'PEOE_VSA1', 'PEOE_VSA10', 'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'PEOE_VSA14', 'PEOE_VSA2', 'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7', 'PEOE_VSA8', 'PEOE_VSA9', 'SMR_VSA1', 'SMR_VSA10', 'SMR_VSA2', 'SMR_VSA3', 'SMR_VSA4', 'SMR_VSA5', 'SMR_VSA6', 'SMR_VSA7', 'SMR_VSA8', 'SMR_VSA9', 'SlogP_VSA1', 'SlogP_VSA10', 'SlogP_VSA11', 'SlogP_VSA12', 'SlogP_VSA2', 'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA5', 'SlogP_VSA6', 'SlogP_VSA7', 'SlogP_VSA8', 'SlogP_VSA9', 'TPSA', 'EState_VSA1', 'EState_VSA10', 'EState_VSA11', 'EState_VSA2', 'EState_VSA3', 'EState_VSA4', 'EState_VSA5', 'EState_VSA6', 'EState_VSA7', 'EState_VSA8', 'EState_VSA9', 'VSA_EState1', 'VSA_EState10', 'VSA_EState2', 'VSA_EState3', 'VSA_EState4', 'VSA_EState5', 'VSA_EState6', 'VSA_EState7', 'VSA_EState8', 'VSA_EState9', 'FractionCSP3', 'HeavyAtomCount', 'NHOHCount', 'NOCount', 'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles', 'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR', 'fr_Al_COO', 'fr_Al_OH', 'fr_Al_OH_noTert', 'fr_ArN', 'fr_Ar_COO', 'fr_Ar_N', 'fr_Ar_NH', 'fr_Ar_OH', 'fr_COO', 'fr_COO2', 'fr_C_O', 'fr_C_O_noCOO', 'fr_C_S', 'fr_HOCCN', 'fr_Imine', 'fr_NH0', 'fr_NH1', 'fr_NH2', 'fr_N_O', 'fr_Ndealkylation1', 'fr_Ndealkylation2', 'fr_Nhpyrrole', 'fr_SH', 'fr_aldehyde', 'fr_alkyl_carbamate', 'fr_alkyl_halide', 'fr_allylic_oxid', 'fr_amide', 'fr_amidine', 'fr_aniline', 'fr_aryl_methyl', 'fr_azide', 'fr_azo', 'fr_barbitur', 'fr_benzene', 'fr_benzodiazepine', 'fr_bicyclic', 'fr_diazo', 'fr_dihydropyridine', 'fr_epoxide', 'fr_ester', 'fr_ether', 'fr_furan', 'fr_guanido', 'fr_halogen', 'fr_hdrzine', 'fr_hdrzone', 'fr_imidazole', 'fr_imide', 'fr_isocyan', 'fr_isothiocyan', 'fr_ketone', 'fr_ketone_Topliss', 'fr_lactam', 'fr_lactone', 'fr_methoxy', 'fr_morpholine', 'fr_nitrile', 'fr_nitro', 'fr_nitro_arom', 'fr_nitro_arom_nonortho', 'fr_nitroso', 'fr_oxazole', 'fr_oxime', 'fr_para_hydroxylation', 'fr_phenol', 'fr_phenol_noOrthoHbond', 'fr_phos_acid', 'fr_phos_ester', 'fr_piperdine', 'fr_piperzine', 'fr_priamide', 'fr_prisulfonamd', 'fr_pyridine', 'fr_quatN', 'fr_sulfide', 'fr_sulfonamd', 'fr_sulfone', 'fr_term_acetylene', 'fr_tetrazole', 'fr_thiazole', 'fr_thiocyan', 'fr_thiophene', 'fr_unbrch_alkane', 'fr_urea', 'ECFP6_1', 'ECFP6_2', 'ECFP6_3', 'ECFP6_4', 'ECFP6_5', 'ECFP6_6', 'ECFP6_7', 'ECFP6_8', 'ECFP6_9', 'ECFP6_10', 'ECFP6_11', 'ECFP6_12', 'ECFP6_13', 'ECFP6_14', 'ECFP6_15', 'ECFP6_16', 'ECFP6_17', 'ECFP6_18', 'ECFP6_19', 'ECFP6_20', 'ECFP6_21', 'ECFP6_22', 'ECFP6_23', 'ECFP6_24', 'ECFP6_25', 'ECFP6_26', 'ECFP6_27', 'ECFP6_28', 'ECFP6_29', 'ECFP6_30', 'ECFP6_31', 'ECFP6_32', 'ECFP6_33', 'ECFP6_34', 'ECFP6_35', 'ECFP6_36', 'ECFP6_37', 'ECFP6_38', 'ECFP6_39', 'ECFP6_40', 'ECFP6_41', 'ECFP6_42', 'ECFP6_43', 'ECFP6_44', 'ECFP6_45', 'ECFP6_46', 'ECFP6_47', 'ECFP6_48', 'ECFP6_49', 'ECFP6_50', 'ECFP6_51', 'ECFP6_52', 'ECFP6_53', 'ECFP6_54', 'ECFP6_55', 'ECFP6_56', 'ECFP6_57', 'ECFP6_58', 'ECFP6_59', 'ECFP6_60', 'ECFP6_61', 'ECFP6_62', 'ECFP6_63', 'ECFP6_64', 'ECFP6_65', 'ECFP6_66', 'ECFP6_67', 'ECFP6_68', 'ECFP6_69', 'ECFP6_70', 'ECFP6_71', 'ECFP6_72', 'ECFP6_73', 'ECFP6_74', 'ECFP6_75', 'ECFP6_76', 'ECFP6_77', 'ECFP6_78', 'ECFP6_79', 'ECFP6_80', 'ECFP6_81', 'ECFP6_82', 'ECFP6_83', 'ECFP6_84', 'ECFP6_85', 'ECFP6_86', 'ECFP6_87', 'ECFP6_88', 'ECFP6_89', 'ECFP6_90', 'ECFP6_91', 'ECFP6_92', 'ECFP6_93', 'ECFP6_94', 'ECFP6_95', 'ECFP6_96', 'ECFP6_97', 'ECFP6_98', 'ECFP6_99', 'ECFP6_100', 'ECFP6_101', 'ECFP6_102', 'ECFP6_103', 'ECFP6_104', 'ECFP6_105', 'ECFP6_106', 'ECFP6_107', 'ECFP6_108', 'ECFP6_109', 'ECFP6_110', 'ECFP6_111', 'ECFP6_112', 'ECFP6_113', 'ECFP6_114', 'ECFP6_115', 'ECFP6_116', 'ECFP6_117', 'ECFP6_118', 'ECFP6_119', 'ECFP6_120', 'ECFP6_121', 'ECFP6_122', 'ECFP6_123', 'ECFP6_124', 'ECFP6_125', 'ECFP6_126', 'ECFP6_127', 'ECFP6_128', 'ECFP6_129', 'ECFP6_130', 'ECFP6_131', 'ECFP6_132', 'ECFP6_133', 'ECFP6_134', 'ECFP6_135', 'ECFP6_136', 'ECFP6_137', 'ECFP6_138', 'ECFP6_139', 'ECFP6_140', 'ECFP6_141', 'ECFP6_142', 'ECFP6_143', 'ECFP6_144', 'ECFP6_145', 'ECFP6_146', 'ECFP6_147', 'ECFP6_148', 'ECFP6_149', 'ECFP6_150', 'ECFP6_151', 'ECFP6_152', 'ECFP6_153', 'ECFP6_154', 'ECFP6_155', 'ECFP6_156', 'ECFP6_157', 'ECFP6_158', 'ECFP6_159', 'ECFP6_160', 'ECFP6_161', 'ECFP6_162', 'ECFP6_163', 'ECFP6_164', 'ECFP6_165', 'ECFP6_166', 'ECFP6_167', 'ECFP6_168', 'ECFP6_169', 'ECFP6_170', 'ECFP6_171', 'ECFP6_172', 'ECFP6_173', 'ECFP6_174', 'ECFP6_175', 'ECFP6_176', 'ECFP6_177', 'ECFP6_178', 'ECFP6_179', 'ECFP6_180', 'ECFP6_181', 'ECFP6_182', 'ECFP6_183', 'ECFP6_184', 'ECFP6_185', 'ECFP6_186', 'ECFP6_187', 'ECFP6_188', 'ECFP6_189', 'ECFP6_190', 'ECFP6_191', 'ECFP6_192', 'ECFP6_193', 'ECFP6_194', 'ECFP6_195', 'ECFP6_196', 'ECFP6_197', 'ECFP6_198', 'ECFP6_199', 'ECFP6_200', 'ECFP6_201', 'ECFP6_202', 'ECFP6_203', 'ECFP6_204', 'ECFP6_205', 'ECFP6_206', 'ECFP6_207', 'ECFP6_208', 'ECFP6_209', 'ECFP6_210', 'ECFP6_211', 'ECFP6_212', 'ECFP6_213', 'ECFP6_214', 'ECFP6_215', 'ECFP6_216', 'ECFP6_217', 'ECFP6_218', 'ECFP6_219', 'ECFP6_220', 'ECFP6_221', 'ECFP6_222', 'ECFP6_223', 'ECFP6_224', 'ECFP6_225', 'ECFP6_226', 'ECFP6_227', 'ECFP6_228', 'ECFP6_229', 'ECFP6_230', 'ECFP6_231', 'ECFP6_232', 'ECFP6_233', 'ECFP6_234', 'ECFP6_235', 'ECFP6_236', 'ECFP6_237', 'ECFP6_238', 'ECFP6_239', 'ECFP6_240', 'ECFP6_241', 'ECFP6_242', 'ECFP6_243', 'ECFP6_244', 'ECFP6_245', 'ECFP6_246', 'ECFP6_247', 'ECFP6_248', 'ECFP6_249', 'ECFP6_250', 'ECFP6_251', 'ECFP6_252', 'ECFP6_253', 'ECFP6_254', 'ECFP6_255', 'ECFP6_256', 'ECFP6_257', 'ECFP6_258', 'ECFP6_259', 'ECFP6_260', 'ECFP6_261', 'ECFP6_262', 'ECFP6_263', 'ECFP6_264', 'ECFP6_265', 'ECFP6_266', 'ECFP6_267', 'ECFP6_268', 'ECFP6_269', 'ECFP6_270', 'ECFP6_271', 'ECFP6_272', 'ECFP6_273', 'ECFP6_274', 'ECFP6_275', 'ECFP6_276', 'ECFP6_277', 'ECFP6_278', 'ECFP6_279', 'ECFP6_280', 'ECFP6_281', 'ECFP6_282', 'ECFP6_283', 'ECFP6_284', 'ECFP6_285', 'ECFP6_286', 'ECFP6_287', 'ECFP6_288', 'ECFP6_289', 'ECFP6_290', 'ECFP6_291', 'ECFP6_292', 'ECFP6_293', 'ECFP6_294', 'ECFP6_295', 'ECFP6_296', 'ECFP6_297', 'ECFP6_298', 'ECFP6_299', 'ECFP6_300', 'ECFP6_301', 'ECFP6_302', 'ECFP6_303', 'ECFP6_304', 'ECFP6_305', 'ECFP6_306', 'ECFP6_307', 'ECFP6_308', 'ECFP6_309', 'ECFP6_310', 'ECFP6_311', 'ECFP6_312', 'ECFP6_313', 'ECFP6_314', 'ECFP6_315', 'ECFP6_316', 'ECFP6_317', 'ECFP6_318', 'ECFP6_319', 'ECFP6_320', 'ECFP6_321', 'ECFP6_322', 'ECFP6_323', 'ECFP6_324', 'ECFP6_325', 'ECFP6_326', 'ECFP6_327', 'ECFP6_328', 'ECFP6_329', 'ECFP6_330', 'ECFP6_331', 'ECFP6_332', 'ECFP6_333', 'ECFP6_334', 'ECFP6_335', 'ECFP6_336', 'ECFP6_337', 'ECFP6_338', 'ECFP6_339', 'ECFP6_340', 'ECFP6_341', 'ECFP6_342', 'ECFP6_343', 'ECFP6_344', 'ECFP6_345', 'ECFP6_346', 'ECFP6_347', 'ECFP6_348', 'ECFP6_349', 'ECFP6_350', 'ECFP6_351', 'ECFP6_352', 'ECFP6_353', 'ECFP6_354', 'ECFP6_355', 'ECFP6_356', 'ECFP6_357', 'ECFP6_358', 'ECFP6_359', 'ECFP6_360', 'ECFP6_361', 'ECFP6_362', 'ECFP6_363', 'ECFP6_364', 'ECFP6_365', 'ECFP6_366', 'ECFP6_367', 'ECFP6_368', 'ECFP6_369', 'ECFP6_370', 'ECFP6_371', 'ECFP6_372', 'ECFP6_373', 'ECFP6_374', 'ECFP6_375', 'ECFP6_376', 'ECFP6_377', 'ECFP6_378', 'ECFP6_379', 'ECFP6_380', 'ECFP6_381', 'ECFP6_382', 'ECFP6_383', 'ECFP6_384', 'ECFP6_385', 'ECFP6_386', 'ECFP6_387', 'ECFP6_388', 'ECFP6_389', 'ECFP6_390', 'ECFP6_391', 'ECFP6_392', 'ECFP6_393', 'ECFP6_394', 'ECFP6_395', 'ECFP6_396', 'ECFP6_397', 'ECFP6_398', 'ECFP6_399', 'ECFP6_400', 'ECFP6_401', 'ECFP6_402', 'ECFP6_403', 'ECFP6_404', 'ECFP6_405', 'ECFP6_406', 'ECFP6_407', 'ECFP6_408', 'ECFP6_409', 'ECFP6_410', 'ECFP6_411', 'ECFP6_412', 'ECFP6_413', 'ECFP6_414', 'ECFP6_415', 'ECFP6_416', 'ECFP6_417', 'ECFP6_418', 'ECFP6_419', 'ECFP6_420', 'ECFP6_421', 'ECFP6_422', 'ECFP6_423', 'ECFP6_424', 'ECFP6_425', 'ECFP6_426', 'ECFP6_427', 'ECFP6_428', 'ECFP6_429', 'ECFP6_430', 'ECFP6_431', 'ECFP6_432', 'ECFP6_433', 'ECFP6_434', 'ECFP6_435', 'ECFP6_436', 'ECFP6_437', 'ECFP6_438', 'ECFP6_439', 'ECFP6_440', 'ECFP6_441', 'ECFP6_442', 'ECFP6_443', 'ECFP6_444', 'ECFP6_445', 'ECFP6_446', 'ECFP6_447', 'ECFP6_448', 'ECFP6_449', 'ECFP6_450', 'ECFP6_451', 'ECFP6_452', 'ECFP6_453', 'ECFP6_454', 'ECFP6_455', 'ECFP6_456', 'ECFP6_457', 'ECFP6_458', 'ECFP6_459', 'ECFP6_460', 'ECFP6_461', 'ECFP6_462', 'ECFP6_463', 'ECFP6_464', 'ECFP6_465', 'ECFP6_466', 'ECFP6_467', 'ECFP6_468', 'ECFP6_469', 'ECFP6_470', 'ECFP6_471', 'ECFP6_472', 'ECFP6_473', 'ECFP6_474', 'ECFP6_475', 'ECFP6_476', 'ECFP6_477', 'ECFP6_478', 'ECFP6_479', 'ECFP6_480', 'ECFP6_481', 'ECFP6_482', 'ECFP6_483', 'ECFP6_484', 'ECFP6_485', 'ECFP6_486', 'ECFP6_487', 'ECFP6_488', 'ECFP6_489', 'ECFP6_490', 'ECFP6_491', 'ECFP6_492', 'ECFP6_493', 'ECFP6_494', 'ECFP6_495', 'ECFP6_496', 'ECFP6_497', 'ECFP6_498', 'ECFP6_499', 'ECFP6_500', 'ECFP6_501', 'ECFP6_502', 'ECFP6_503', 'ECFP6_504', 'ECFP6_505', 'ECFP6_506', 'ECFP6_507', 'ECFP6_508', 'ECFP6_509', 'ECFP6_510', 'ECFP6_511', 'ECFP6_512', 'ECFP6_513', 'ECFP6_514', 'ECFP6_515', 'ECFP6_516', 'ECFP6_517', 'ECFP6_518', 'ECFP6_519', 'ECFP6_520', 'ECFP6_521', 'ECFP6_522', 'ECFP6_523', 'ECFP6_524', 'ECFP6_525', 'ECFP6_526', 'ECFP6_527', 'ECFP6_528', 'ECFP6_529', 'ECFP6_530', 'ECFP6_531', 'ECFP6_532', 'ECFP6_533', 'ECFP6_534', 'ECFP6_535', 'ECFP6_536', 'ECFP6_537', 'ECFP6_538', 'ECFP6_539', 'ECFP6_540', 'ECFP6_541', 'ECFP6_542', 'ECFP6_543', 'ECFP6_544', 'ECFP6_545', 'ECFP6_546', 'ECFP6_547', 'ECFP6_548', 'ECFP6_549', 'ECFP6_550', 'ECFP6_551', 'ECFP6_552', 'ECFP6_553', 'ECFP6_554', 'ECFP6_555', 'ECFP6_556', 'ECFP6_557', 'ECFP6_558', 'ECFP6_559', 'ECFP6_560', 'ECFP6_561', 'ECFP6_562', 'ECFP6_563', 'ECFP6_564', 'ECFP6_565', 'ECFP6_566', 'ECFP6_567', 'ECFP6_568', 'ECFP6_569', 'ECFP6_570', 'ECFP6_571', 'ECFP6_572', 'ECFP6_573', 'ECFP6_574', 'ECFP6_575', 'ECFP6_576', 'ECFP6_577', 'ECFP6_578', 'ECFP6_579', 'ECFP6_580', 'ECFP6_581', 'ECFP6_582', 'ECFP6_583', 'ECFP6_584', 'ECFP6_585', 'ECFP6_586', 'ECFP6_587', 'ECFP6_588', 'ECFP6_589', 'ECFP6_590', 'ECFP6_591', 'ECFP6_592', 'ECFP6_593', 'ECFP6_594', 'ECFP6_595', 'ECFP6_596', 'ECFP6_597', 'ECFP6_598', 'ECFP6_599', 'ECFP6_600', 'ECFP6_601', 'ECFP6_602', 'ECFP6_603', 'ECFP6_604', 'ECFP6_605', 'ECFP6_606', 'ECFP6_607', 'ECFP6_608', 'ECFP6_609', 'ECFP6_610', 'ECFP6_611', 'ECFP6_612', 'ECFP6_613', 'ECFP6_614', 'ECFP6_615', 'ECFP6_616', 'ECFP6_617', 'ECFP6_618', 'ECFP6_619', 'ECFP6_620', 'ECFP6_621', 'ECFP6_622', 'ECFP6_623', 'ECFP6_624', 'ECFP6_625', 'ECFP6_626', 'ECFP6_627', 'ECFP6_628', 'ECFP6_629', 'ECFP6_630', 'ECFP6_631', 'ECFP6_632', 'ECFP6_633', 'ECFP6_634', 'ECFP6_635', 'ECFP6_636', 'ECFP6_637', 'ECFP6_638', 'ECFP6_639', 'ECFP6_640', 'ECFP6_641', 'ECFP6_642', 'ECFP6_643', 'ECFP6_644', 'ECFP6_645', 'ECFP6_646', 'ECFP6_647', 'ECFP6_648', 'ECFP6_649', 'ECFP6_650', 'ECFP6_651', 'ECFP6_652', 'ECFP6_653', 'ECFP6_654', 'ECFP6_655', 'ECFP6_656', 'ECFP6_657', 'ECFP6_658', 'ECFP6_659', 'ECFP6_660', 'ECFP6_661', 'ECFP6_662', 'ECFP6_663', 'ECFP6_664', 'ECFP6_665', 'ECFP6_666', 'ECFP6_667', 'ECFP6_668', 'ECFP6_669', 'ECFP6_670', 'ECFP6_671', 'ECFP6_672', 'ECFP6_673', 'ECFP6_674', 'ECFP6_675', 'ECFP6_676', 'ECFP6_677', 'ECFP6_678', 'ECFP6_679', 'ECFP6_680', 'ECFP6_681', 'ECFP6_682', 'ECFP6_683', 'ECFP6_684', 'ECFP6_685', 'ECFP6_686', 'ECFP6_687', 'ECFP6_688', 'ECFP6_689', 'ECFP6_690', 'ECFP6_691', 'ECFP6_692', 'ECFP6_693', 'ECFP6_694', 'ECFP6_695', 'ECFP6_696', 'ECFP6_697', 'ECFP6_698', 'ECFP6_699', 'ECFP6_700', 'ECFP6_701', 'ECFP6_702', 'ECFP6_703', 'ECFP6_704', 'ECFP6_705', 'ECFP6_706', 'ECFP6_707', 'ECFP6_708', 'ECFP6_709', 'ECFP6_710', 'ECFP6_711', 'ECFP6_712', 'ECFP6_713', 'ECFP6_714', 'ECFP6_715', 'ECFP6_716', 'ECFP6_717', 'ECFP6_718', 'ECFP6_719', 'ECFP6_720', 'ECFP6_721', 'ECFP6_722', 'ECFP6_723', 'ECFP6_724', 'ECFP6_725', 'ECFP6_726', 'ECFP6_727', 'ECFP6_728', 'ECFP6_729', 'ECFP6_730', 'ECFP6_731', 'ECFP6_732', 'ECFP6_733', 'ECFP6_734', 'ECFP6_735', 'ECFP6_736', 'ECFP6_737', 'ECFP6_738', 'ECFP6_739', 'ECFP6_740', 'ECFP6_741', 'ECFP6_742', 'ECFP6_743', 'ECFP6_744', 'ECFP6_745', 'ECFP6_746', 'ECFP6_747', 'ECFP6_748', 'ECFP6_749', 'ECFP6_750', 'ECFP6_751', 'ECFP6_752', 'ECFP6_753', 'ECFP6_754', 'ECFP6_755', 'ECFP6_756', 'ECFP6_757', 'ECFP6_758', 'ECFP6_759', 'ECFP6_760', 'ECFP6_761', 'ECFP6_762', 'ECFP6_763', 'ECFP6_764', 'ECFP6_765', 'ECFP6_766', 'ECFP6_767', 'ECFP6_768', 'ECFP6_769', 'ECFP6_770', 'ECFP6_771', 'ECFP6_772', 'ECFP6_773', 'ECFP6_774', 'ECFP6_775', 'ECFP6_776', 'ECFP6_777', 'ECFP6_778', 'ECFP6_779', 'ECFP6_780', 'ECFP6_781', 'ECFP6_782', 'ECFP6_783', 'ECFP6_784', 'ECFP6_785', 'ECFP6_786', 'ECFP6_787', 'ECFP6_788', 'ECFP6_789', 'ECFP6_790', 'ECFP6_791', 'ECFP6_792', 'ECFP6_793', 'ECFP6_794', 'ECFP6_795', 'ECFP6_796', 'ECFP6_797', 'ECFP6_798', 'ECFP6_799', 'ECFP6_800', 'ECFP6_801', 'ECFP6_802', 'ECFP6_803', 'ECFP6_804', 'ECFP6_805', 'ECFP6_806', 'ECFP6_807', 'ECFP6_808', 'ECFP6_809', 'ECFP6_810', 'ECFP6_811', 'ECFP6_812', 'ECFP6_813', 'ECFP6_814', 'ECFP6_815', 'ECFP6_816', 'ECFP6_817', 'ECFP6_818', 'ECFP6_819', 'ECFP6_820', 'ECFP6_821', 'ECFP6_822', 'ECFP6_823', 'ECFP6_824', 'ECFP6_825', 'ECFP6_826', 'ECFP6_827', 'ECFP6_828', 'ECFP6_829', 'ECFP6_830', 'ECFP6_831', 'ECFP6_832', 'ECFP6_833', 'ECFP6_834', 'ECFP6_835', 'ECFP6_836', 'ECFP6_837', 'ECFP6_838', 'ECFP6_839', 'ECFP6_840', 'ECFP6_841', 'ECFP6_842', 'ECFP6_843', 'ECFP6_844', 'ECFP6_845', 'ECFP6_846', 'ECFP6_847', 'ECFP6_848', 'ECFP6_849', 'ECFP6_850', 'ECFP6_851', 'ECFP6_852', 'ECFP6_853', 'ECFP6_854', 'ECFP6_855', 'ECFP6_856', 'ECFP6_857', 'ECFP6_858', 'ECFP6_859', 'ECFP6_860', 'ECFP6_861', 'ECFP6_862', 'ECFP6_863', 'ECFP6_864', 'ECFP6_865', 'ECFP6_866', 'ECFP6_867', 'ECFP6_868', 'ECFP6_869', 'ECFP6_870', 'ECFP6_871', 'ECFP6_872', 'ECFP6_873', 'ECFP6_874', 'ECFP6_875', 'ECFP6_876', 'ECFP6_877', 'ECFP6_878', 'ECFP6_879', 'ECFP6_880', 'ECFP6_881', 'ECFP6_882', 'ECFP6_883', 'ECFP6_884', 'ECFP6_885', 'ECFP6_886', 'ECFP6_887', 'ECFP6_888', 'ECFP6_889', 'ECFP6_890', 'ECFP6_891', 'ECFP6_892', 'ECFP6_893', 'ECFP6_894', 'ECFP6_895', 'ECFP6_896', 'ECFP6_897', 'ECFP6_898', 'ECFP6_899', 'ECFP6_900', 'ECFP6_901', 'ECFP6_902', 'ECFP6_903', 'ECFP6_904', 'ECFP6_905', 'ECFP6_906', 'ECFP6_907', 'ECFP6_908', 'ECFP6_909', 'ECFP6_910', 'ECFP6_911', 'ECFP6_912', 'ECFP6_913', 'ECFP6_914', 'ECFP6_915', 'ECFP6_916', 'ECFP6_917', 'ECFP6_918', 'ECFP6_919', 'ECFP6_920', 'ECFP6_921', 'ECFP6_922', 'ECFP6_923', 'ECFP6_924', 'ECFP6_925', 'ECFP6_926', 'ECFP6_927', 'ECFP6_928', 'ECFP6_929', 'ECFP6_930', 'ECFP6_931', 'ECFP6_932', 'ECFP6_933', 'ECFP6_934', 'ECFP6_935', 'ECFP6_936', 'ECFP6_937', 'ECFP6_938', 'ECFP6_939', 'ECFP6_940', 'ECFP6_941', 'ECFP6_942', 'ECFP6_943', 'ECFP6_944', 'ECFP6_945', 'ECFP6_946', 'ECFP6_947', 'ECFP6_948', 'ECFP6_949', 'ECFP6_950', 'ECFP6_951', 'ECFP6_952', 'ECFP6_953', 'ECFP6_954', 'ECFP6_955', 'ECFP6_956', 'ECFP6_957', 'ECFP6_958', 'ECFP6_959', 'ECFP6_960', 'ECFP6_961', 'ECFP6_962', 'ECFP6_963', 'ECFP6_964', 'ECFP6_965', 'ECFP6_966', 'ECFP6_967', 'ECFP6_968', 'ECFP6_969', 'ECFP6_970', 'ECFP6_971', 'ECFP6_972', 'ECFP6_973', 'ECFP6_974', 'ECFP6_975', 'ECFP6_976', 'ECFP6_977', 'ECFP6_978', 'ECFP6_979', 'ECFP6_980', 'ECFP6_981', 'ECFP6_982', 'ECFP6_983', 'ECFP6_984', 'ECFP6_985', 'ECFP6_986', 'ECFP6_987', 'ECFP6_988', 'ECFP6_989', 'ECFP6_990', 'ECFP6_991', 'ECFP6_992', 'ECFP6_993', 'ECFP6_994', 'ECFP6_995', 'ECFP6_996', 'ECFP6_997', 'ECFP6_998', 'ECFP6_999', 'ECFP6_1000', 'ECFP6_1001', 'ECFP6_1002', 'ECFP6_1003', 'ECFP6_1004', 'ECFP6_1005', 'ECFP6_1006', 'ECFP6_1007', 'ECFP6_1008', 'ECFP6_1009', 'ECFP6_1010', 'ECFP6_1011', 'ECFP6_1012', 'ECFP6_1013', 'ECFP6_1014', 'ECFP6_1015', 'ECFP6_1016', 'ECFP6_1017', 'ECFP6_1018', 'ECFP6_1019', 'ECFP6_1020', 'ECFP6_1021', 'ECFP6_1022', 'ECFP6_1023', 'ECFP6_1024']

In [69]:
# Load file containing list of features to be used for making predictions
# These features were used during model training
file_path = 'features.txt'

# Read the content of the file into a list
with open(file_path, 'r') as file:
    features = eval(file.read())

# Display the contents of the 'features' variable
#features

# Predictions using models

In [70]:
# load model that we make for prediction
pickled_model = pickle.load(open('model.pkl', 'rb')) ## replace the 'model.pkl' with your desired property prediction model from repository 
## for eaxmple, to predict blood brain barrier permeability, replace model.pkl with BBB.pkl.

##* Predict using the model we made.
predicted = pickled_model.predict(scaled_all_data[features])

predicted_values['prediction'] = pd.DataFrame(predicted)

smiles_prediction_df=pd.concat([cleaned_smiles, predicted_values],axis=1)

smiles_prediction_df.to_csv('predicted_values.csv', index=False)