**Import required libraries and scripts**

In [1]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.ranking_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
import numpy as np
import os

software = '/home/tony/CADD22/software'
protein_file = '/home/tony/CADD22/wocondock_main/2o1x_A_apo_protoss.pdb'
ref_file = '/home/tony/CADD22/wocondock_main/2o1x_A_lig_protoss.sdf'
docking_library = '/home/tony/CADD22/wocondock_main/Selection_of_FCHGroup_LeadLike.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'AAScore']
id_column = 'ID'
n_poses = 10
exhaustiveness = 4
ncpus = int(os.cpu_count()/2)
#Initialise variables and create a temporary folder
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

[11:51:16] Initializing Normalizer


The working directory has been set to: /home/tony/CADD22/wocondock_main
The folder: /home/tony/CADD22/wocondock_main/temp already exists


In [None]:
pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
pocket = GetPocket(ref_file, protein_file, 8)

In [None]:
cleaned_pkasolver_df = prepare_library(docking_library, id_column, software, 'pkasolver')

In [None]:
all_poses = docking(protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses)

In [None]:
docking_splitted(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses)

In [None]:
fetch_poses_splitted(w_dir, n_poses, split_files_folder=w_dir+'/temp/split_final_library')

In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

In [None]:
for metric in clustering_metrics:
    cluster_futures(f'{metric}', 'KMedoids', w_dir, protein_file, all_poses, ncpus)

**Rescoring**

The file containing all the cluster centers is then rescored using all scoring functions available (GNINA, Vina, AutoDock4, PLP, CHEMPLP, RF-Score-VS). The rescored output is return as a dataframe.

In [2]:
for metric in clustering_metrics:
    rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, 1, ncpus)


The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered was created
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/gnina_rescoring/ was created
Splitting RMSD_clustered.sdf...
Splitting SDF file RMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/gnina_rescoring/split_RMSD_clustered was created


Splitting files: 100%|██████████| 7/7 [00:00<00:00, 318.66it/s]


Split docking library into 7 files each containing 4 compounds
Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 7/7 [00:00<00:00, 145.49file/s]
Rescoring with GNINA: 100%|██████████| 7/7 [00:04<00:00,  1.74file/s]


Rescoring with GNINA complete in 4.1709!
Rescoring with AD4
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/AD4_rescoring/ was created
Rescoring with AD4 complete in 0.8337!
Rescoring with CHEMPLP
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/chemplp_rescoring/ was created


28 molecules converted


Rescoring with CHEMPLP complete in 1.7547!
Rescoring with RFScoreVS
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/rfscorevs_rescoring was created
Rescoring with RF-Score-VS complete in 6.4185!
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/LinF9_rescoring/ was created
Splitting RMSD_clustered.sdf...
Splitting SDF file RMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/LinF9_rescoring/split_RMSD_clustered was created


Splitting files: 100%|██████████| 7/7 [00:00<00:00, 534.00it/s]


Split docking library into 7 files each containing 4 compounds
Rescoring with LinF9


Submitting LinF9 rescoring jobs: 100%|██████████| 7/7 [00:00<00:00, 154.89file/s]
Rescoring with LinF9: 100%|██████████| 7/7 [00:02<00:00,  2.94file/s]


Rescoring with LinF9 complete in 2.5018!
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/AAScore_rescoring/ was created
Splitting RMSD_clustered.sdf...
Splitting SDF file RMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered/AAScore_rescoring/split_RMSD_clustered was created


Splitting files: 100%|██████████| 7/7 [00:00<00:00, 510.94it/s]


Split docking library into 7 files each containing 4 compounds
Rescoring with AAScore


Submitting AAScore rescoring jobs: 100%|██████████| 7/7 [00:00<00:00, 152.43file/s]
Rescoring with AAScore: 100%|██████████| 7/7 [00:27<00:00,  3.98s/file]


Rescoring with AAScore complete in 27.9507!
Combining all score for /home/tony/CADD22/wocondock_main/temp/rescoring_RMSD_clustered


Combining scores: 100%|██████████| 5/5 [00:00<00:00, 383.46files/s]


Rescoring complete in 43.6668!
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered was created
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/gnina_rescoring/ was created
Splitting spyRMSD_clustered.sdf...
Splitting SDF file spyRMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/gnina_rescoring/split_spyRMSD_clustered was created


Splitting files: 100%|██████████| 8/8 [00:00<00:00, 386.22it/s]


Split docking library into 8 files each containing 4 compounds
Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 8/8 [00:00<00:00, 154.06file/s]
Rescoring with GNINA: 100%|██████████| 8/8 [00:03<00:00,  2.05file/s]


Rescoring with GNINA complete in 4.0593!
Rescoring with AD4
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/AD4_rescoring/ was created
Rescoring with AD4 complete in 0.7924!
Rescoring with CHEMPLP
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/chemplp_rescoring/ was created


29 molecules converted


Rescoring with CHEMPLP complete in 1.6750!
Rescoring with RFScoreVS
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/rfscorevs_rescoring was created
Rescoring with RF-Score-VS complete in 6.2313!
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/LinF9_rescoring/ was created
Splitting spyRMSD_clustered.sdf...
Splitting SDF file spyRMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/LinF9_rescoring/split_spyRMSD_clustered was created


Splitting files: 100%|██████████| 8/8 [00:00<00:00, 503.35it/s]


Split docking library into 8 files each containing 4 compounds
Rescoring with LinF9


Submitting LinF9 rescoring jobs: 100%|██████████| 8/8 [00:00<00:00, 172.50file/s]
Rescoring with LinF9: 100%|██████████| 8/8 [00:02<00:00,  3.02file/s]


Rescoring with LinF9 complete in 2.7707!
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/AAScore_rescoring/ was created
Splitting spyRMSD_clustered.sdf...
Splitting SDF file spyRMSD_clustered.sdf ...
The folder: /home/tony/CADD22/wocondock_main/temp/rescoring_spyRMSD_clustered/AAScore_rescoring/split_spyRMSD_clustered was created


Splitting files: 100%|██████████| 8/8 [00:00<00:00, 563.18it/s]


Split docking library into 8 files each containing 4 compounds
Rescoring with AAScore


Submitting AAScore rescoring jobs: 100%|██████████| 8/8 [00:00<00:00, 116.17file/s]
Rescoring with AAScore:  62%|██████▎   | 5/8 [00:19<00:08,  2.89s/file]Traceback (most recent call last):
Rescoring with AAScore:  62%|██████▎   | 5/8 [00:19<00:11,  3.95s/file]Process ForkProcess-21:
Traceback (most recent call last):
  File "/home/tony/.conda/envs/wocondock/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/tony/.conda/envs/wocondock/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/tony/.conda/envs/wocondock/lib/python3.8/concurrent/futures/process.py", line 233, in _process_worker
    call_item = call_queue.get(block=True)
  File "/home/tony/.conda/envs/wocondock/lib/python3.8/multiprocessing/queues.py", line 97, in get
    res = self._recv_bytes()
  File "/home/tony/.conda/envs/wocondock/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
    buf = se

KeyboardInterrupt: 

**Final ranking methods**

This code calculates the final ranking of compounds using various methods.
*Method 1* : Calculates ECR value for each cluster center, then outputs the top ranked center.
*Method 2* : Calculates ECR value for each cluster center, then outputs the average ECR value for each compound.
*Method 3* : Calculates the average rank of each compound, then ouputs the corresponding ECR value for each compound.
*Method 6* : Calculates Z-score for each cluster center, then ouputs the top ranked center.
*Method 7* : Calculates Z-score for each cluster center, then ouputs the average Z-score for each compound.

All methods are then combined into a single dataframe for comparison purposes.

In [None]:
apply_consensus_methods(w_dir, ['bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS'])

In [None]:
calculate_EFs(w_dir, docking_library)

In [None]:
test=pd.read_csv('/home/tony/CADD22/wocondock_main/AAScore_test.csv', delimiter='\t', header=None, names=['Pose ID', 'AAScore'])
display(test.head(10))