In [1]:
import logging
import pathlib
import shutil
import sys
logging.basicConfig()
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

sys.path.append('..')
from sbap.sdf import ChemblSdfReader
from sbap.featurizers.prolif_smina import SminaDockingPersistenceHandler, SminaConfig, DockedProlifFingerprintFeaturizer, SminaDockingScoreFeaturizer

In [5]:
sdf_file = pathlib.Path("../molecules/CYP2C9_IC50_CHEMBL_data.sdf")
protein_pdb_file = pathlib.Path("../molecules/4la0_human_serum_albumin_cleaned_without_ligand.pdb")
docked_ligands_target_directory = pathlib.Path("../4la0_human_serum_albumin_cleaned_without_ligand.pdb_2023_05_27_14_20_47")

fingerprint_featurizer = DockedProlifFingerprintFeaturizer.create(
    logging_level=logging.INFO,
)
docking_score_featurizer = SminaDockingScoreFeaturizer(logging.INFO)

In [6]:
fingerprint_featurizer.fit(protein_pdb_file, docked_ligands_target_directory)

  0%|          | 0/195 [00:00<?, ?it/s]

In [7]:
X_fp, y = fingerprint_featurizer.transform(protein_pdb_file, docked_ligands_target_directory)
X_fp.shape

  0%|          | 0/195 [00:00<?, ?it/s]

INFO:ProlifInteractionFingerprintGenerator:Receptor interactions found: Index(['GLU17.A Cationic', 'ASN18.A CustomVdWContact',
       'LYS20.A CustomVdWContact', 'ASP108.A CustomVdWContact',
       'ARG114.A CustomVdWContact', 'LEU115.A Hydrophobic', 'LEU115.A HBDonor',
       'LEU115.A CustomVdWContact', 'VAL116.A CustomVdWContact',
       'ARG117.A HBDonor', 'ARG117.A CustomVdWContact',
       'PRO118.A CustomVdWContact', 'MET123.A Hydrophobic',
       'MET123.A CustomVdWContact', 'ALA126.A HBDonor',
       'ALA126.A CustomVdWContact', 'ASN130.A CustomVdWContact',
       'PHE134.A Hydrophobic', 'PHE134.A HBDonor', 'PHE134.A CationPi',
       'PHE134.A CustomVdWContact', 'LYS137.A Anionic',
       'LYS137.A CustomVdWContact', 'TYR138.A Hydrophobic', 'TYR138.A HBDonor',
       'TYR138.A CustomVdWContact', 'GLU141.A Cationic',
       'GLU141.A CustomVdWContact', 'ILE142.A CustomVdWContact',
       'ARG145.A CustomVdWContact', 'HIS146.A CustomVdWContact',
       'PRO147.A CustomVdWContac

(195, 51)

In [8]:
X_fp

array([[-8.6,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [-6.8,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [-8.9,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       ...,
       [-7.6,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [-9.9,  0. ,  0. , ...,  0. ,  0. ,  0. ],
       [-6.6,  0. ,  0. , ...,  0. ,  0. ,  0. ]])

In [5]:
X_fp_train, X_fp_test, X_fp_train, X_fp_test = train_test_split(X_fp, y, random_state=1)

In [None]:
docking_score_featurizer.featurize(protein_pdb_file, docked_ligands_target_directory)

 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                  | 3461/5144 [21:19<10:16,  2.73it/s]

In [None]:
regr = DecisionTreeRegressor(random_state=1).fit(X_train, y_train.astype('float'))
regr.score(X_test, y_test.astype('float'))