In [1]:
from scripts.clustering_functions import *
from scripts.consensus_methods import *
from scripts.docking_functions import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.library_preparation import *
from scripts.performance_calculation import *
from scripts.postprocessing import *
from scripts.protein_preparation import *
from scripts.rescoring_functions import *
from scripts.utilities import *



In [2]:
def parse_pocket_coordinates(pocket_arg):
    try:
        pocket_str = pocket_arg.split('*')
        pocket_coordinates = {}
        for item in pocket_str:
            key, value = item.split(':')
            pocket_coordinates[key] = list(map(float, value.split(',')))
    except Exception as e:
        print(f"Error parsing pocket coordinates: {e}. Make sure the pocket coordinates are in the format 'center:1,2,3*size:1,2,3'")
        pocket_coordinates = None
    return pocket_coordinates

In [3]:
def dockm8(software, receptor, pocket, ref, docking_library, idcolumn,
           prepare_proteins, conformers, protonation, docking_programs,
           bust_poses, pose_selection, nposes, exhaustiveness, ncpus,
           clustering_method, rescoring, consensus):
    # Set working directory based on the receptor file
    w_dir = Path(receptor).parent / Path(receptor).stem
    print('The working directory has been set to:', w_dir)
    (w_dir).mkdir(exist_ok=True)

    # Prepare the protein for docking (e.g., adding hydrogens)
    if prepare_proteins == True:
        prepared_receptor = Path(prepare_protein_protoss(receptor))
    else:
        prepared_receptor = Path(receptor)

    # Determine the docking pocket
    if pocket == 'Reference':
        pocket_definition = get_pocket(Path(ref), prepared_receptor, 10)
    elif pocket == 'RoG':
        pocket_definition = get_pocket_RoG(Path(ref), prepared_receptor)
    elif pocket == 'Dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(
            prepared_receptor, w_dir, method='volume')
    else:
        pocket_definition = parse_pocket_coordinates(pocket)

    print("The pocket coordinates are:", pocket_definition)

    # Prepare the docking library if not already prepared
    if not os.path.isfile(w_dir / 'final_library.sdf'):
        prepare_library(docking_library, w_dir, idcolumn, conformers,
                        protonation, software, ncpus)

    # Perform the docking operation
    docking(w_dir, prepared_receptor, pocket_definition, software,
            docking_programs, exhaustiveness, nposes, ncpus,
            'concurrent_process')

    # Concatenate all poses into a single file
    concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus,
                     bust_poses)

    # Load all poses from SDF file and perform clustering
    print('Loading all poses SDF file...')
    tic = time.perf_counter()
    all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'),
                                    idName='Pose ID',
                                    molColName='Molecule',
                                    includeFingerprints=False,
                                    strictParsing=True)
    all_poses = all_poses[['Pose ID', 'Molecule']]
    toc = time.perf_counter()
    print(f'Finished loading all poses SDF in {toc-tic:0.4f}!')
    for method in pose_selection:
        if not os.path.isfile(w_dir / f'clustering/{method}_clustered.sdf'):
            select_poses(method, clustering_method, w_dir, prepared_receptor,
                         pocket_definition, software, all_poses, ncpus)

    # Rescore poses for each selection method
    for method in pose_selection:
        rescore_poses(w_dir, prepared_receptor, pocket_definition, software,
                      w_dir / 'clustering' / f'{method}_clustered.sdf',
                      rescoring, ncpus)

    # Apply consensus methods to the poses
    for method in pose_selection:
        apply_consensus_methods(w_dir,
                                method,
                                consensus,
                                rescoring,
                                standardization_type='min_max')

In [None]:
print('DockM8 is running in single mode...')
print('DockM8 is generating decoys...')
# Generate decoys
output_library = Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/20240430_MCHR1_splitted_RJ.sdf')
# Run DockM8 on decoy library
dockm8(software = Path('/home/tony/DockM8_v1/software'),
		receptor = Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered.pdb'), 
		pocket = 'center:-10.4,-4.6,5.3*size:25,20,25', 
		ref = None, 
		docking_library = output_library, 
		idcolumn = 'ID', 
		prepare_proteins = True,
		conformers='GypsumDL',
		protonation = 'GypsumDL', 
		docking_programs = ['GNINA', 'PLANTS'],
		bust_poses = False, 
		pose_selection = ['bestpose', 'bestpose_GNINA', 'bestpose_PLANTS', 'KORP-PL', 'ConvexPLR', 'CNN-Score', 'RTMScore'], 
		nposes = 10, 
		exhaustiveness = 8, 
		ncpus = 30, 
		clustering_method = None, 
		rescoring = ['CNN-Score', 'LinF9', 'RTMScore', 'ConvexPLR', 'KORP-PL', 'CHEMPLP', 'CNN-Affinity', ], 
		consensus = list(CONSENSUS_METHODS.keys()))
performance = calculate_performance(Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered/'), 
						output_library,
						[10, 5, 2, 1, 0.5])
#Determine optimal conditions
optimal_conditions = performance.sort_values(by='EF_1%', ascending=False).iloc[0].to_dict()
# Save optimal conditions to a file
with open('optimal_conditions.txt', 'w') as file:
	file.write(str(optimal_conditions))

In [None]:
rescore_poses(
    Path(
        '/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered'
    ),
	Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered_protoss.pdb'),
	{'center': [-10.4, -4.6, 5.3], 'size': [25, 20, 25]},
	Path('/home/tony/DockM8_v1/software'),
	Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered/allposes.sdf'),
	['CNN-Score', 'LinF9', 'RTMScore', 'ConvexPLR', 'KORP-PL', 'CHEMPLP', 'CNN-Affinity'],
	30
	)


In [None]:
display(performance)
display(optimal_conditions)

In [4]:
print('DockM8 is running in single mode...')
print('DockM8 is generating decoys...')
# Generate decoys
output_library = Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/20240430_MCHR1_splitted_RJ.sdf')
# Run DockM8 on decoy library
dockm8(software = Path('/home/tony/DockM8_v1/software'),
		receptor = Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_ACTIVE_TRIMMED_rank1_Af-multi_min_reordered.pdb'), 
		pocket = 'center:-10.4,-4.6,5.3*size:25,20,25', 
		ref = None, 
		docking_library = output_library, 
		idcolumn = 'ID', 
		prepare_proteins = True,
		conformers='GypsumDL',
		protonation = 'GypsumDL', 
		docking_programs = ['GNINA', 'PLANTS'],
		bust_poses = False, 
		pose_selection = ['bestpose', 'bestpose_GNINA', 'bestpose_PLANTS', 'KORP-PL', 'ConvexPLR', 'CNN-Score', 'RTMScore'], 
		nposes = 10, 
		exhaustiveness = 8, 
		ncpus = 30, 
		clustering_method = None, 
		rescoring = ['CNN-Score', 'LinF9', 'RTMScore', 'ConvexPLR', 'KORP-PL', 'CHEMPLP', 'CNN-Affinity', ], 
		consensus = list(CONSENSUS_METHODS.keys()))
performance = calculate_performance(Path('/home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_INACTIVE_TRIMMED_rank1_Af-multi_min_reordered/'), 
						output_library,
						[10, 5, 2, 1, 0.5])
#Determine optimal conditions
optimal_conditions = performance.sort_values(by='EF_1%', ascending=False).iloc[0].to_dict()
# Save optimal conditions to a file
with open('optimal_conditions.txt', 'w') as file:
	file.write(str(optimal_conditions))

DockM8 is running in single mode...
DockM8 is generating decoys...
The working directory has been set to: /home/tony/CACHE5/SBVS/DockM8_benchmarking_analysis/MCHR1_ACTIVE_TRIMMED_rank1_Af-multi_min_reordered
[2024-May-28 12:52:15]: Preparing protein with ProtoSS ...

Job 239ad4e4-af29-4790-beec-d21ce047cdb9 completed with success
The pocket coordinates are: {'center': [-10.4, -4.6, 5.3], 'size': [25.0, 20.0, 25.0]}
[2024-May-28 12:52:27]: All poses succesfully checked and combined!

Loading all poses SDF file...
Finished loading all poses SDF in 3.4842!
[2024-May-28 12:52:31]: *Calculating CNN-Score metrics and clustering*



Running <function gnina_rescoring_splitted at 0x7fbdb2e24ee0>: 100%|██████████| 60/60 [01:03<00:00,  1.05s/it]


[2024-May-28 12:53:36]: Rescoring with CNN-Score complete in 65.3360!



KeyError: '[nan] not in index'