**Import required libraries and scripts**

In [1]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.postprocessing import *

[00:58:28] Initializing Normalizer
  if LooseVersion(module.__version__) < minver:
  other = LooseVersion(other)
  if LooseVersion(sklearn.__version__) < LooseVersion("0.23.0"):
  from .autonotebook import tqdm as notebook_tqdm
  MIN_CHEMFILES_VERSION = LooseVersion("0.9")
[TRJ.py:171 - <module>()] netCDF4 is not available. Writing AMBER ncdf files will be slow.
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):
  pd_version = LooseVersion(pd.__version__)


In [4]:
software = Path('/home/tony/DockM8/software')
receptors = [Path('/home/tony/DockM8/testing_ensemble_docking/protein1.pdb'), Path('/home/tony/DockM8/testing_ensemble_docking/protein2.pdb')]
ref_files = [Path('/home/tony/DockM8/testing_ensemble_docking/ref1.sdf'), Path('/home/tony/DockM8/testing_ensemble_docking/ref2.sdf')]
docking_library = Path('/home/tony/DockM8/testing_ensemble_docking/library.sdf')
docking_programs = ['PLANTS']
clustering_metric = 'bestpose_PLANTS'
rescoring_functions = ['GNINA_Affinity','CNN-Score','CNN-Affinity', 'AD4', 'CHEMPLP', 'RFScoreVS']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
protonation = 'GypsumDL'
ncpus = 5
pocket = 'reference'
consensus_method = 'method6'

print('DockM8 is running in ensemble mode...')
        
receptor_dict = {}
for i, receptor in enumerate(receptors):
    receptor_dict[receptor] = ref_files[i]
    
for receptor, ref_file in receptor_dict.items():

    w_dir = Path(receptor).parent / Path(receptor).stem
    print('The working directory has been set to:', w_dir)
    (w_dir).mkdir(exist_ok=True)
    
    #if os.path.isfile(str(receptor).replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = get_pocket(ref_file, receptor, 8)
        print(pocket_definition)
    if pocket == 'RoG':
        pocket_definition = get_pocket_RoG(ref_file, receptor)
        print(pocket_definition)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(receptor, w_dir, method='volume')
        print(pocket_definition)
    #else:
        #pocket_definition = calculate_pocket_coordinates_from_pocket_pdb_file((str(receptor).replace('.pdb', '_pocket.pdb')))
        
    if (w_dir / 'final_library.sdf').is_file() == False:
        prepare_library(docking_library, w_dir, id_column, protonation, software, ncpus)
        
    docking(w_dir, receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, 5)
    concat_all_poses(w_dir, docking_programs)

    print('Loading all poses SDF file...')
    tic = time.perf_counter()
    all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
    print(f'Loaded {len(all_poses)} poses.')
    toc = time.perf_counter()
    print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

    if os.path.isfile(w_dir / 'clustering/'f'{clustering_metric}_clustered.sdf') == False:
        cluster_pebble(clustering_metric, 'KMedoids', w_dir, receptor, all_poses, ncpus)
        
    rescore_all(w_dir, receptor, pocket_definition, software, w_dir / 'clustering' / f'{clustering_metric}_clustered.sdf', rescoring_functions , ncpus)

    apply_consensus_methods(w_dir, clustering_metric, consensus_method, rescoring_functions, 'min_max')
    
ensemble_results = ensemble_consensus(receptors, clustering_metric, consensus_method, 30)

print(ensemble_results)


DockM8 is running in ensemble mode...
The working directory has been set to: /home/tony/DockM8/testing_ensemble_docking/protein1

[2023-Oct-25 01:01:32]: Extracting pocket from /home/tony/DockM8/testing_ensemble_docking/protein1.pdb using /home/tony/DockM8/testing_ensemble_docking/ref1.sdf as reference ligand


  if pd_version < LooseVersion("0.17.0"):
  elif pd_version < LooseVersion("0.23.0"):



[2023-Oct-25 01:01:48]: Finished extracting pocket from /home/tony/DockM8/testing_ensemble_docking/protein1.pdb using /home/tony/DockM8/testing_ensemble_docking/ref1.sdf as reference ligand
{'center': [16.7, -2.69, 17.61], 'size': [16.0, 16.0, 16.0]}
Splitting SDF file final_library.sdf ...


Splitting files: 100%|██████████| 10/10 [00:00<00:00, 1124.99it/s]


Split docking library into 10 files each containing 1 compounds

[2023-Oct-25 01:01:48]: All poses succesfully combined!
Loading all poses SDF file...
Loaded 100 poses.
Finished loading all poses SDF in 0.0578!...

[2023-Oct-25 01:01:48]: Skipping functions: GNINA_Affinity, CNN-Score, CNN-Affinity, AD4, CHEMPLP, RFScoreVS

[2023-Oct-25 01:01:48]: Combining all scores for /home/tony/DockM8/testing_ensemble_docking/protein1/rescoring_bestpose_PLANTS_clustered


Combining scores: 100%|██████████| 5/5 [00:00<00:00, 544.97files/s]



[2023-Oct-25 01:01:48]: Rescoring complete in 0.0236!
The working directory has been set to: /home/tony/DockM8/testing_ensemble_docking/protein2

[2023-Oct-25 01:01:48]: Extracting pocket from /home/tony/DockM8/testing_ensemble_docking/protein2.pdb using /home/tony/DockM8/testing_ensemble_docking/ref2.sdf as reference ligand


  if pd_version < LooseVersion("0.17.0"):
  elif pd_version < LooseVersion("0.23.0"):



[2023-Oct-25 01:02:21]: Finished extracting pocket from /home/tony/DockM8/testing_ensemble_docking/protein2.pdb using /home/tony/DockM8/testing_ensemble_docking/ref2.sdf as reference ligand
{'center': [-12.4, -13.78, -15.95], 'size': [16.0, 16.0, 16.0]}
Splitting SDF file final_library.sdf ...


Splitting files: 100%|██████████| 10/10 [00:00<00:00, 1089.85it/s]


Split docking library into 10 files each containing 1 compounds

[2023-Oct-25 01:02:21]: All poses succesfully combined!
Loading all poses SDF file...
Loaded 100 poses.
Finished loading all poses SDF in 0.0570!...

[2023-Oct-25 01:02:21]: Skipping functions: GNINA_Affinity, CNN-Score, CNN-Affinity, AD4, CHEMPLP, RFScoreVS

[2023-Oct-25 01:02:21]: Combining all scores for /home/tony/DockM8/testing_ensemble_docking/protein2/rescoring_bestpose_PLANTS_clustered


Combining scores: 100%|██████████| 5/5 [00:00<00:00, 463.43files/s]



[2023-Oct-25 01:02:21]: Rescoring complete in 0.0252!
[]
