In [1]:
import logging
import pathlib
import shutil
import sys
logging.basicConfig()
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

sys.path.append('..')
from sbap.sdf import ChemblSdfReader
from sbap.featurizers.prolif_smina import SminaDockingPersistenceHandler, SminaConfig, DockedProlifFingerprintFeaturizer, SminaDockingScoreFeaturizer

In [2]:
sdf_file = pathlib.Path("../molecules/CYP2C9_IC50_CHEMBL_data.sdf")
protein_pdb_file = pathlib.Path("../molecules/4nz2_CYP2C9_cleaned.pdb")
docked_ligands_target_directory = pathlib.Path("../molecules/CYP2C9_IC50_CHEMBL_docked_202305051811")

config = SminaConfig(
    center_x=-62.461, center_y=-44.369, center_z=-21.255, size_x=30, size_y=30, size_z=30, exhaustiveness=8, 
)
fingerprint_featurizer = DockedProlifFingerprintFeaturizer.create(
    logging_level=logging.INFO,
)
docking_score_featurizer = SminaDockingScoreFeaturizer(logging.INFO)

In [3]:
fingerprint_featurizer.fit(protein_pdb_file, docked_ligands_target_directory)

  0%|          | 0/5144 [00:00<?, ?it/s]

In [4]:
X_fp, y = fingerprint_featurizer.transform(protein_pdb_file, docked_ligands_target_directory)
X_fp.shape

  0%|          | 0/5144 [00:00<?, ?it/s]

INFO:ProlifInteractionFingerprintGenerator:Receptor interactions found: Index(['PHE69.A Hydrophobic', 'PHE69.A CustomVdWContact', 'GLY98.A HBDonor',
       'GLY98.A CustomVdWContact', 'ILE99.A CustomVdWContact',
       'PHE100.A Hydrophobic', 'PHE100.A HBDonor', 'PHE100.A CationPi',
       'PHE100.A CustomVdWContact', 'LEU102.A Hydrophobic',
       ...
       'ALA477.A HBDonor', 'ALA477.A CustomVdWContact', 'SER478.A HBDonor',
       'SER478.A CustomVdWContact', 'VAL479.A Hydrophobic',
       'VAL479.A CustomVdWContact', 'PRO480.A CustomVdWContact',
       'HEM501.A Hydrophobic', 'HEM501.A HBDonor',
       'HEM501.A CustomVdWContact'],
      dtype='object', length=136)


(5144, 136)

In [5]:
X_fp_train, X_fp_test, X_fp_train, X_fp_test = train_test_split(X_fp, y, random_state=1)

In [None]:
docking_score_featurizer.featurize(protein_pdb_file, docked_ligands_target_directory)

 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                  | 3461/5144 [21:19<10:16,  2.73it/s]

In [None]:
regr = DecisionTreeRegressor(random_state=1).fit(X_train, y_train.astype('float'))
regr.score(X_test, y_test.astype('float'))