**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.ranking_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *

software = '/home/mario/DockM8/software'
protein_file = '/home/mario/holiday/pnph/receptor_protoss_prepared.pdb'
ref_file = '/home/mario/holiday/pnph/crystal_ligand_protoss.sdf'
pocket = 'reference'
protonation = 'pkasolver'
docking_library = '/home/mario/holiday/pnph/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD']
clustering_method = 'KMedoids'
rescoring= ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'SCORCH']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
parallel = 1
ncpus = int(os.cpu_count()/2)
#Create a temporary folder for all further calculations
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

In [None]:
if os.path.isfile(protein_file.replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = GetPocket(ref_file, protein_file, 8)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
if os.path.isfile(w_dir+'/temp/final_library.sdf') == False:
    prepare_library(docking_library, id_column, software, protonation, ncpus)

In [None]:
docking_programs = {'GNINA': w_dir+'/temp/gnina/', 'SMINA': w_dir+'/temp/smina/', 'PLANTS': w_dir+'/temp/plants/'}
if parallel == 1:
    for program, file_path in docking_programs.items():
        if os.path.isdir(file_path) == False and program in docking_programs:
            docking_splitted(w_dir, protein_file, ref_file, software, [program], exhaustiveness, n_poses, ncpus)
else:
    for program, file_path in docking_programs.items():
        if os.path.isdir(file_path) == False and program in docking_programs:
            docking(w_dir, protein_file, ref_file, software, [program], exhaustiveness, n_poses, ncpus)


In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


In [None]:
if parallel == 1:
    for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster_futures(metric, clustering_method, w_dir, protein_file, all_poses, ncpus)
else:
    for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster(metric, clustering_method, w_dir, protein_file, all_poses, ncpus)

In [None]:
for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring, parallel, ncpus)


In [None]:
apply_consensus_methods(w_dir, clustering_metrics)
calculate_EFs(w_dir, docking_library)

In [None]:
apply_consensus_methods_combinations(w_dir, clustering_metrics)
calculate_EFs_combinations(w_dir, docking_library)