**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *

software = '/home/mario/DockM8/software'
protein_file = '/home/mario/Desktop/pde5a/receptor_protoss_prepared_pocket.pdb'
ref_file = '/home/mario/Desktop/pde5a/crystal_ligand_protoss.sdf'
docking_library = '/home/mario/Desktop/pde5a/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'RTMScore', 'SCORCH', 'vinardo']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
protonation = 'pkasolver'
parallel = 1
ncpus = int(os.cpu_count()-2)
pocket = 'reference'
#Create a temporary folder for all further calculations
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

In [None]:
if os.path.isfile(protein_file.replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = GetPocket(ref_file, protein_file, 8)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
if os.path.isfile(w_dir+'/temp/final_library.sdf') == False:
    prepare_library(docking_library, id_column, software, protonation, ncpus)

In [2]:
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
docking(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses, ncpus)



[2023-Apr-24 08:08:38]: All poses succesfully combined!


In [3]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


Loading all poses SDF file...
Finished loading all poses SDF in 152.8020!...


In [4]:
for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster_pebble(metric, 'KMedoids', w_dir, protein_file, all_poses, ncpus)

The folder: /home/mario/Desktop/pde5a/temp/clustering/ was created

[2023-Apr-24 08:11:18]: *Calculating RMSD metrics and clustering*

[2023-Apr-24 08:11:18]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 27572/27572 [19:44<00:00, 23.28IDs/s] 



[2023-Apr-24 08:31:02]: Finished submitting jobs in 1184.1659, now running jobs...


Running clustering jobs...: 100%|██████████| 27572/27572 [23:20<00:00, 19.69jobs/s]  


The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 08:54:36]: *Calculating spyRMSD metrics and clustering*

[2023-Apr-24 08:54:36]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 27572/27572 [14:46<00:00, 31.11IDs/s]



[2023-Apr-24 09:09:22]: Finished submitting jobs in 886.3201, now running jobs...


Running clustering jobs...: 100%|██████████| 27572/27572 [18:16<00:00, 25.14jobs/s]  


The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 09:27:55]: *Calculating espsim metrics and clustering*

[2023-Apr-24 09:27:55]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 27572/27572 [12:52<00:00, 35.67IDs/s]



[2023-Apr-24 09:40:48]: Finished submitting jobs in 772.9123, now running jobs...


Running clustering jobs...: 100%|██████████| 27572/27572 [03:42<00:00, 124.15jobs/s] 


The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 09:44:48]: *Calculating 3DScore metrics and clustering*

[2023-Apr-24 09:44:48]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 27572/27572 [11:57<00:00, 38.43IDs/s]



[2023-Apr-24 09:56:46]: Finished submitting jobs in 717.5091, now running jobs...


Running clustering jobs...: 100%|██████████| 27572/27572 [20:00<00:00, 22.97jobs/s]  


The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 10:16:56]: *Calculating bestpose metrics and clustering*
The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 10:17:03]: *Calculating bestpose_GNINA metrics and clustering*
The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 10:17:04]: *Calculating bestpose_SMINA metrics and clustering*
The folder: /home/mario/Desktop/pde5a/temp/clustering/ already exists

[2023-Apr-24 10:17:07]: *Calculating bestpose_PLANTS metrics and clustering*


In [5]:
for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, 1)


The folder: /home/mario/Desktop/pde5a/temp/rescoring_RMSD_clustered was created

[2023-Apr-24 10:17:12]: Rescoring with GNINA


KeyboardInterrupt: 

In [None]:
calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)

In [None]:
apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)