**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.ranking_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
import numpy as np
import os

software = '/home/mario/DockM8/software'
protein_file = '/home/mario/holiday/dyr/receptor_protoss_prepared.pdb'
ref_file = '/home/mario/holiday/dyr/crystal_ligand_protoss.sdf'
docking_library = '/home/mario/holiday/dyr/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9']
id_column = 'ID'
n_poses = 10
exhaustiveness = 4
ncpus = int(os.cpu_count()/2)
#Initialise variables and create a temporary folder
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

In [None]:
pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
pocket = GetPocket(ref_file, protein_file, 8)

In [9]:
cleaned_pkasolver_df = prepare_library(docking_library, id_column, software, 'pkasolver', ncpus)

Proposed mol at pH 7.4: Cc1cc(O)cc(Oc2cc(C)c(C[C@@H](O)C(C)(C)O)c(O)c2)c1
Proposed mol at pH 7.4: CC(C)C[NH+](CC(C)C)P(=O)(c1cccc(N)c1)c1cccc(N)c1
Proposed mol at pH 7.4: C[C@H]([NH2+][C@H](C)c1ccc(F)cc1F)C(=O)Nc1ccc2[nH]c(=O)[nH]c2c1
Proposed mol at pH 7.4: CC(C)[C@H](NC(=O)c1cccc(F)c1NN)c1cccs1
Proposed mol at pH 7.4: Cc1ccc([C@H](C)NC(=O)Nc2ccc(OCC(N)=O)cc2C)s1
Proposed mol at pH 7.4: Cc1cc(OCC(N)=O)ccc1NC(=O)N[C@@H]1C[C@H]2CC[C@]1(C)C2(C)C
Proposed mol at pH 7.4: CC[C@H](C)[C@H](NC(N)=O)C(=O)NCCCc1ccc(Cl)c(Cl)c1
Proposed mol at pH 7.4: Cc1ccc(NC(=S)N/N=C/c2cc(O)c(O)c(Cl)c2)cc1
Proposed mol at pH 7.4: CCc1[nH]c(C(=O)Nc2cc(Cl)c(N)c(Cl)c2)c(C)c1C(=O)OC
Proposed mol at pH 7.4: Cc1ccccc1[C@H](CC(=O)Nc1cccc2c1CCCC2)NC(N)=O
Proposed mol at pH 7.4: C/C(=N\[N-]C(=O)CNc1ccc(C)cc1C)c1ccc(O)cc1O
Proposed mol at pH 7.4: CCN(CC)c1ccc(C[C@H]2Oc3ccc(N)cc3NC2=O)cc1
Proposed mol at pH 7.4: CCN(CC)c1ccc(C[C@@H]2Oc3ccc(N)cc3NC2=O)cc1
Proposed mol at pH 7.4: CCC[C@@H](NC(N)=O)C(=O)NC(C)(C)c1ccc(Br)cc1


In [None]:
docking_splitted(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses)

In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

In [None]:
for metric in clustering_metrics:
    cluster_futures(f'{metric}', 'KMedoids', w_dir, protein_file, all_poses, ncpus)

**Rescoring**

The file containing all the cluster centers is then rescored using all scoring functions available (GNINA, Vina, AutoDock4, PLP, CHEMPLP, RF-Score-VS). The rescored output is return as a dataframe.

In [None]:
for metric in clustering_metrics:
    rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, 1, ncpus)

**Final ranking methods**

This code calculates the final ranking of compounds using various methods.
*Method 1* : Calculates ECR value for each cluster center, then outputs the top ranked center.
*Method 2* : Calculates ECR value for each cluster center, then outputs the average ECR value for each compound.
*Method 3* : Calculates the average rank of each compound, then ouputs the corresponding ECR value for each compound.
*Method 6* : Calculates Z-score for each cluster center, then ouputs the top ranked center.
*Method 7* : Calculates Z-score for each cluster center, then ouputs the average Z-score for each compound.

All methods are then combined into a single dataframe for comparison purposes.

In [None]:
apply_consensus_methods(w_dir, clustering_metrics)

In [None]:
calculate_EFs(w_dir, docking_library)