**Import required libraries and scripts**

In [18]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.postprocessing import *
from scripts.protein_preparation import *
from software.DeepCoy.generate_decoys import generate_decoys

2023-12-22 02:43:41.994195: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-22 02:43:41.994332: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-22 02:43:42.104515: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-22 02:43:42.330523: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
CWD = os.getcwd()
software = Path(CWD+'/software')
receptor = Path(CWD+'/testing_single_docking/protein.pdb')
ref_file = Path(CWD+'/testing_single_docking/ref.sdf')
actives = Path(CWD+'/testing_single_docking/library.sdf')
docking_library = Path(CWD+'/testing_single_docking/library.sdf')
docking_programs = DOCKING_PROGRAMS
clustering_metrics = list(CLUSTERING_METRICS.keys())+['bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS', 'bestpose_QVINAW', 'bestpose_QVINA2']
rescoring_functions = list(RESCORING_FUNCTIONS.keys())
rescoring_functions.remove('RTMScore')
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
conformers = 'GypsumDL'
protonation = 'GypsumDL'
ncpus = 5
pocket = 'reference'
prepare_protein = True
gen_decoys = True

open('log.txt', 'w').close()

if prepare_protein == True:
    #Prepare the protein
    prepared_receptor = prepare_protein_protoss(receptor)
else:
    prepared_receptor = receptor

#Create a temporary folder for all further calculations
w_dir = prepared_receptor.parent / prepared_receptor.stem
print('The working directory has been set to:', w_dir)
(w_dir).mkdir(exist_ok=True)

if pocket == 'reference':
    pocket_definition = get_pocket(ref_file, prepared_receptor, 8)
    print(pocket_definition)
if pocket == 'RoG':
    pocket_definition = get_pocket_RoG(ref_file, prepared_receptor)
    print(pocket_definition)
elif pocket == 'dogsitescorer':
    pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='volume')
    print(pocket_definition)
    
if gen_decoys == True:
    if (w_dir / 'DeepCoy' / 'test_set.sdf').is_file() == False:
        test_set = generate_decoys(actives, 10, 'DUDE', software)
    else:
        test_set = Path(w_dir / 'DeepCoy' / 'test_set.sdf')
    w_dir = w_dir / 'DeepCoy'
    
if (w_dir / 'final_library.sdf').is_file() == False:
    prepare_library(test_set, w_dir, id_column, conformers, protonation, software, ncpus)

docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, 5, 'multiprocessing')

concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus)

print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
print(f'Loaded {len(all_poses)} poses.')
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

for metric in clustering_metrics:
    if os.path.isfile(w_dir / f'clustering/{metric}_clustered.sdf') == False:
        cluster_pebble(metric, 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)

for metric in clustering_metrics:
    rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{metric}_clustered.sdf', rescoring_functions , ncpus)
        
performance = calculate_performance(w_dir, actives, [10, 5, 2, 1, 0.5])

performance = pd.read_csv('/home/tony/11betahsd1/11betahsd1_p_protoss/all_results.csv', index_col=None)
#Determine optimal conditions
optimal_conditions = performance.sort_values(by='EF1', ascending=False).iloc[0].to_dict()
print(optimal_conditions)
if optimal_conditions['clustering'] == 'bestpose':
    pass
if '_' in optimal_conditions['clustering']:
    docking_programs = list(optimal_conditions['clustering'].split('_')[1])
rescoring_functions = list(optimal_conditions['scoring'].split('_'))

w_dir = prepared_receptor.parent / prepared_receptor.stem

if (w_dir / 'final_library.sdf').is_file() == False:
    prepare_library(test_set, w_dir, id_column, conformers, protonation, software, ncpus)

docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, 5, 'multiprocessing')

concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus)

print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
print(f'Loaded {len(all_poses)} poses.')
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

if os.path.isfile(w_dir / f'clustering/{optimal_conditions["clustering"]}_clustered.sdf') == False:
    cluster_pebble(optimal_conditions['clustering'], 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)

for metric in clustering_metrics:
    rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{metric}_clustered.sdf', rescoring_functions , ncpus)

apply_consensus_methods(w_dir, optimal_conditions['clustering'], optimal_conditions['consensus'], rescoring_functions, 'min_max')


In [None]:
if prepare_protein == True:
    #Prepare the protein
    prepared_receptor = prepare_protein_protoss(receptor)
else:
    prepared_receptor = receptor

#Create a temporary folder for all further calculations
w_dir = prepared_receptor.parent / prepared_receptor.stem
print('The working directory has been set to:', w_dir)
(w_dir).mkdir(exist_ok=True)

if pocket == 'reference':
    pocket_definition = get_pocket(ref_file, prepared_receptor, 8)
    print(pocket_definition)
if pocket == 'RoG':
    pocket_definition = get_pocket_RoG(ref_file, prepared_receptor)
    print(pocket_definition)
elif pocket == 'dogsitescorer':
    pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='volume')
    print(pocket_definition)

In [None]:
if (w_dir / 'final_library.sdf').is_file() == False:
    prepare_library(docking_library, w_dir, id_column, conformers, protonation, software, ncpus)

In [None]:
docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, 5, 'multiprocessing')

In [None]:
concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus)

In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
print(f'Loaded {len(all_poses)} poses.')
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


In [None]:
for metric in clustering_metrics:
    if os.path.isfile(w_dir / f'clustering/{metric}_clustered.sdf') == False:
        cluster_pebble(metric, 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)

In [None]:
for metric in clustering_metrics:
        rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{metric}_clustered.sdf', rescoring_functions , ncpus)

In [None]:
apply_consensus_methods(w_dir, 'bestpose', 'ECR_best', rescoring_functions, 'min_max')