**Import required libraries and scripts**

In [1]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.ranking_functions import *
from scripts.performance_calculation import *
import numpy as np
import os

software = '/home/mario/WoConDock/software'
protein_file = '/home/mario/WoConDock/wocondock_performance_ace/receptor_protoss_prepared.pdb'
ref_file = '/home/mario/WoConDock/wocondock_performance_ace/crystal_ligand_protoss.sdf'
docking_library = '/home/mario/WoConDock/wocondock_performance_ace/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', 'USRCAT', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs']
id_column = 'ID'
n_poses = 10
exhaustiveness = 4

#Initialise variables and create a temporary folder
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

[14:07:36] Initializing Normalizer
  return torch._C._cuda_getDeviceCount() > 0


The working directory has been set to: /home/mario/WoConDock/wocondock_performance_ace
The folder: /home/mario/WoConDock/wocondock_performance_ace/temp already exists


In [None]:
pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
cleaned_pkasolver_df = prepare_library(docking_library, id_column, software, 'pkasolver')

In [None]:
all_poses = docking(protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses)

In [None]:
docking_splitted(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses)

In [2]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

Loading all poses SDF file...
Finished loading all poses SDF in 89.3222!...


In [3]:
for metric in clustering_metrics:
    cluster_futures(f'{metric}', 'KMedoids', w_dir, protein_file, all_poses)

The folder: /home/mario/WoConDock/wocondock_performance_ace/temp/clustering/ already exists
*Calculating RMSD metrics and clustering*
Submitting parallel jobs...


100%|██████████| 17108/17108 [06:57<00:00, 40.99it/s]


Finished submitting jobs in 417.3255, now running jobs...


100%|██████████| 17108/17108 [41:10<00:00,  6.92it/s]   


The folder: /home/mario/WoConDock/wocondock_performance_ace/temp/clustering/ already exists
*Calculating spyRMSD metrics and clustering*
Submitting parallel jobs...


100%|██████████| 17108/17108 [06:48<00:00, 41.86it/s]


Finished submitting jobs in 408.7338, now running jobs...


 27%|██▋       | 4605/17108 [05:36<28:12,  7.39it/s]  

**Rescoring**

The file containing all the cluster centers is then rescored using all scoring functions available (GNINA, Vina, AutoDock4, PLP, CHEMPLP, RF-Score-VS). The rescored output is return as a dataframe.

In [None]:
for metric in clustering_metrics:
    rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, 1)


**Final ranking methods**

This code calculates the final ranking of compounds using various methods.
*Method 1* : Calculates ECR value for each cluster center, then outputs the top ranked center.
*Method 2* : Calculates ECR value for each cluster center, then outputs the average ECR value for each compound.
*Method 3* : Calculates the average rank of each compound, then ouputs the corresponding ECR value for each compound.
*Method 6* : Calculates Z-score for each cluster center, then ouputs the top ranked center.
*Method 7* : Calculates Z-score for each cluster center, then ouputs the average Z-score for each compound.

All methods are then combined into a single dataframe for comparison purposes.

In [None]:
apply_consensus_methods(w_dir, ['bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS'])

In [None]:
calculate_EFs(w_dir, docking_library)