**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.postprocessing import *
from scripts.protein_preparation import *
from software.DeepCoy.generate_decoys import generate_decoys

In [None]:
CWD = os.getcwd()
software = Path(CWD+'/software')
receptor = Path(CWD+'/testing_decoys_docking/protein.pdb')
ref_file = Path(CWD+'/testing_decoys_docking/ref.sdf')
docking_library = Path(CWD+'/testing_decoys_docking/library.sdf')
docking_programs = ['PLANTS', 'QVINA2', 'QVINAW']
clustering_metrics = ['bestpose', 'bestpose_PLANTS', 'bestpose_QVINAW', 'bestpose_QVINA2']
rescoring_functions = ['GNINA_Affinity', 'CNN-Score', 'CNN-Affinity', 'Vinardo', 'AD4', 'KORPL', 'ConvexPLR', 'LinF9', 'RTMScore', "RFScoreVS"]
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
conformers = 'GypsumDL'
protonation = 'GypsumDL'
ncpus = int(math.ceil(os.cpu_count()/2))
pocket = 'reference'
prepare_protein = True
gen_decoys = True
open('log.txt', 'w').close()


In [None]:
if prepare_protein == True:
    #Prepare the protein
    prepared_receptor = prepare_protein_protoss(receptor)
else:
    prepared_receptor = receptor

#Create a temporary folder for all further calculations
w_dir = prepared_receptor.parent / prepared_receptor.stem
print('The working directory has been set to:', w_dir)
(w_dir).mkdir(exist_ok=True)

if pocket == 'reference':
    pocket_definition = get_pocket(ref_file, prepared_receptor, 8)
    print(pocket_definition)
if pocket == 'RoG':
    pocket_definition = get_pocket_RoG(ref_file, prepared_receptor)
    print(pocket_definition)
elif pocket == 'dogsitescorer':
    pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='volume')
    print(pocket_definition)


In [None]:
if gen_decoys == True:
    if (w_dir / 'DeepCoy' / 'test_set.sdf').is_file() == False:
        test_set = generate_decoys(docking_library, 10, 'DUDE', software)
    else:
        test_set = Path(w_dir / 'DeepCoy' / 'test_set.sdf')
    w_dir = w_dir.parent / 'DeepCoy'
    print(w_dir)

In [None]:
if (w_dir / 'final_library.sdf').is_file() == False:
    prepare_library(test_set, w_dir, id_column, conformers, protonation, software, ncpus)


In [None]:
docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, ncpus, 'multiprocessing')
concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus, bust_poses=False)


In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
print(f'Loaded {len(all_poses)} poses.')
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

for metric in clustering_metrics:
    if os.path.isfile(w_dir / f'clustering/{metric}_clustered.sdf') == False:
        cluster_pebble(metric, 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)


In [None]:
for metric in clustering_metrics:
    rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{metric}_clustered.sdf', rescoring_functions , ncpus)


In [None]:
performance = calculate_performance(w_dir, test_set, [10, 5, 2, 1, 0.5])
#Determine optimal conditions
optimal_conditions = performance.sort_values(by='EF1', ascending=False).iloc[0].to_dict()
print(optimal_conditions)
if optimal_conditions['clustering'] == 'bestpose':
    pass
if '_' in optimal_conditions['clustering']:
    docking_programs = list(optimal_conditions['clustering'].split('_')[1])
rescoring_functions = list(optimal_conditions['scoring'].split('_'))


In [None]:
w_dir = prepared_receptor.parent / prepared_receptor.stem
print('The working directory has been set to:', w_dir)
(w_dir).mkdir(exist_ok=True)

In [None]:
if (w_dir / 'final_library.sdf').is_file() == False:
    prepare_library(docking_library, w_dir, id_column, conformers, protonation, software, ncpus)


In [None]:
docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, 5, 'multiprocessing')

concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus, bust_poses=False)

In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
print(f'Loaded {len(all_poses)} poses.')
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

if os.path.isfile(w_dir / f'clustering/{optimal_conditions["clustering"]}_clustered.sdf') == False:
    cluster_pebble(optimal_conditions['clustering'], 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)


In [None]:
for metric in clustering_metrics:
    rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{metric}_clustered.sdf', rescoring_functions , ncpus)


In [None]:
apply_consensus_methods(w_dir, optimal_conditions['clustering'], optimal_conditions['consensus'], rescoring_functions, 'min_max')
