**Import required libraries and scripts**

In [1]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.ranking_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *

software = '/home/mario/DockM8/software'
protein_file = '/home/mario/holiday/pnph/receptor_protoss_prepared.pdb'
ref_file = '/home/mario/holiday/pnph/crystal_ligand_protoss.sdf'
pocket = 'reference'
protonation = 'pkasolver'
docking_library = '/home/mario/holiday/pnph/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD']
clustering_method = 'KMedoids'
rescoring= ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'SCORCH']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
parallel = 1
ncpus = int(os.cpu_count()/2)
#Create a temporary folder for all further calculations
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

[16:46:50] Initializing Normalizer


The working directory has been set to: /home/mario/holiday/pnph
The folder: /home/mario/holiday/pnph/temp already exists


In [None]:
if os.path.isfile(protein_file.replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = GetPocket(ref_file, protein_file, 8)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
if os.path.isfile(w_dir+'/temp/final_library.sdf') == False:
    prepare_library(docking_library, id_column, software, protonation, ncpus)

In [None]:
docking_programs = {'GNINA': w_dir+'/temp/gnina/', 'SMINA': w_dir+'/temp/smina/', 'PLANTS': w_dir+'/temp/plants/'}
if parallel == 1:
    for program, file_path in docking_programs.items():
        if os.path.isdir(file_path) == False and program in docking_programs:
            docking_splitted(w_dir, protein_file, ref_file, software, [program], exhaustiveness, n_poses, ncpus)
else:
    for program, file_path in docking_programs.items():
        if os.path.isdir(file_path) == False and program in docking_programs:
            docking(w_dir, protein_file, ref_file, software, [program], exhaustiveness, n_poses, ncpus)


In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


In [None]:
if parallel == 1:
    for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster_futures(metric, clustering_method, w_dir, protein_file, all_poses, ncpus)
else:
    for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster(metric, clustering_method, w_dir, protein_file, all_poses, ncpus)

In [None]:
for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring, parallel, ncpus)


In [None]:
apply_consensus_methods(w_dir, clustering_metrics)
calculate_EFs(w_dir, docking_library)

In [2]:
apply_consensus_methods_combinations(w_dir, clustering_metrics)
calculate_EFs_combinations(w_dir, docking_library)

The folder: /home/mario/holiday/pnph/temp/ranking already exists
The folder: /home/mario/holiday/pnph/temp/consensus already exists

[2023-Mar-22 16:46:54]: Calculating consensus methods for every possible score combination...


  3%|▎         | 8/255 [00:00<00:06, 40.90it/s]
 11%|█         | 28/255 [00:00<00:07, 29.04it/s]
 22%|██▏       | 56/255 [00:04<00:16, 12.33it/s]
 27%|██▋       | 70/255 [00:09<00:26,  7.02it/s]
 22%|██▏       | 56/255 [00:11<00:40,  4.94it/s]
 11%|█         | 28/255 [00:06<00:55,  4.10it/s]
  3%|▎         | 8/255 [00:02<01:07,  3.68it/s]
  0%|          | 1/255 [00:00<01:24,  3.00it/s]


[2023-Mar-22 16:47:30]: Calculating consensus methods for every possible score combination...



  3%|▎         | 8/255 [00:03<01:34,  2.62it/s]
 11%|█         | 28/255 [00:11<01:32,  2.47it/s]
 22%|██▏       | 56/255 [00:25<01:30,  2.20it/s]
 27%|██▋       | 70/255 [00:37<01:38,  1.87it/s]
 22%|██▏       | 56/255 [00:34<02:02,  1.62it/s]
 11%|█         | 28/255 [00:18<02:31,  1.50it/s]
  3%|▎         | 8/255 [00:05<02:51,  1.44it/s]
  0%|          | 1/255 [00:00<03:12,  1.32it/s]


[2023-Mar-22 16:49:47]: Calculating consensus methods for every possible score combination...



  3%|▎         | 8/255 [00:07<03:55,  1.05it/s]
 11%|█         | 28/255 [00:26<03:33,  1.06it/s]
 22%|██▏       | 56/255 [00:56<03:20,  1.01s/it]
 27%|██▋       | 70/255 [01:17<03:25,  1.11s/it]
 22%|██▏       | 56/255 [01:07<04:00,  1.21s/it]
 11%|█         | 28/255 [00:35<04:50,  1.28s/it]
  3%|▎         | 8/255 [00:10<05:25,  1.32s/it]
  0%|          | 1/255 [00:01<05:51,  1.39s/it]


[2023-Mar-22 16:54:31]: Calculating consensus methods for every possible score combination...



  3%|▎         | 8/255 [00:14<07:25,  1.80s/it]
 11%|█         | 28/255 [00:46<06:13,  1.64s/it]
 22%|██▏       | 56/255 [01:37<05:47,  1.75s/it]
 27%|██▋       | 70/255 [02:11<05:46,  1.87s/it]
 22%|██▏       | 56/255 [01:53<06:41,  2.02s/it]
 11%|█         | 28/255 [00:58<07:56,  2.10s/it]
  3%|▎         | 8/255 [00:17<08:52,  2.16s/it]
  0%|          | 1/255 [00:02<09:22,  2.22s/it]


[2023-Mar-22 17:02:33]: Calculating consensus methods for every possible score combination...



  3%|▎         | 8/255 [00:26<13:49,  3.36s/it]
 11%|█         | 28/255 [01:14<10:07,  2.67s/it]
 22%|██▏       | 56/255 [02:38<09:22,  2.82s/it]
 27%|██▋       | 70/255 [03:31<09:18,  3.02s/it]
 22%|██▏       | 56/255 [02:57<10:28,  3.16s/it]
 11%|█         | 28/255 [01:29<12:05,  3.20s/it]
  3%|▎         | 8/255 [00:25<13:11,  3.21s/it]
  0%|          | 1/255 [00:03<13:58,  3.30s/it]



[2023-Mar-22 17:15:20]: Calculating consensus methods for every possible score combination...


  3%|▎         | 8/255 [00:43<22:14,  5.40s/it]
 11%|█         | 28/255 [01:46<14:22,  3.80s/it]
 22%|██▏       | 56/255 [03:38<12:56,  3.90s/it]
 27%|██▋       | 70/255 [04:44<12:31,  4.06s/it]
 22%|██▏       | 56/255 [03:45<13:19,  4.02s/it]
 11%|█         | 28/255 [01:51<15:07,  4.00s/it]
  3%|▎         | 8/255 [00:32<16:39,  4.05s/it]
  0%|          | 1/255 [00:04<17:18,  4.09s/it]



[2023-Mar-22 17:32:28]: Calculating consensus methods for every possible score combination...


  3%|▎         | 8/255 [04:02<2:04:55, 30.35s/it]
 11%|█         | 28/255 [11:24<1:32:27, 24.44s/it]
 22%|██▏       | 56/255 [40:17<2:23:10, 43.17s/it]
 27%|██▋       | 70/255 [1:13:51<3:15:12, 63.31s/it]
 22%|██▏       | 56/255 [1:20:26<4:45:50, 86.18s/it] 
 11%|█         | 28/255 [46:32<6:17:22, 99.75s/it] 
  3%|▎         | 8/255 [13:53<7:09:05, 104.23s/it]
  0%|          | 1/255 [01:25<6:01:08, 85.31s/it]



[2023-Mar-22 22:04:25]: Calculating consensus methods for every possible score combination...


  0%|          | 1/255 [02:29<10:34:49, 149.96s/it]

: 

: 