**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions2 import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *

software = '/home/mario/DockM8/software'
protein_file = '/media/mario/T7/FINISHED/FINISHED/xiap/receptor_protoss_prepared.pdb'
ref_file = '/media/mario/T7/FINISHED/FINISHED/xiap/crystal_ligand_protoss.sdf'
docking_library = '/media/mario/T7/FINISHED/FINISHED/xiap/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'SCORCH', 'RTMScore', 'vinardo']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
protonation = 'pkasolver'
parallel = 1
ncpus = int(os.cpu_count()/2)
pocket = 'reference'
#Create a temporary folder for all further calculations
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

In [None]:
if os.path.isfile(protein_file.replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = GetPocket(ref_file, protein_file, 8)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
if os.path.isfile(w_dir+'/temp/final_library.sdf') == False:
    prepare_library(docking_library, id_column, software, protonation, ncpus)

In [None]:
docking(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses, ncpus)

In [3]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


Loading all poses SDF file...
Finished loading all poses SDF in 27.7929!...


In [4]:
for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster_pebble(metric, 'KMedoids', w_dir, protein_file, all_poses, ncpus)

The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ was created

[2023-Apr-20 10:26:48]: *Calculating RMSD metrics and clustering*

[2023-Apr-20 10:26:48]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 5235/5235 [00:46<00:00, 111.50IDs/s]



[2023-Apr-20 10:27:35]: Finished submitting jobs in 46.9552, now running jobs...


Running clustering jobs...: 100%|██████████| 5235/5235 [35:43<00:00,  2.44jobs/s]  


The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:03:23]: *Calculating spyRMSD metrics and clustering*

[2023-Apr-20 11:03:23]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 5235/5235 [00:42<00:00, 124.28IDs/s]



[2023-Apr-20 11:04:05]: Finished submitting jobs in 42.1243, now running jobs...


Running clustering jobs...: 100%|██████████| 5235/5235 [07:26<00:00, 11.72jobs/s]  


The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:11:35]: *Calculating espsim metrics and clustering*

[2023-Apr-20 11:11:35]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 5235/5235 [00:37<00:00, 138.43IDs/s]



[2023-Apr-20 11:12:13]: Finished submitting jobs in 37.8173, now running jobs...


Running clustering jobs...: 100%|██████████| 5235/5235 [00:37<00:00, 138.71jobs/s]


The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:12:53]: *Calculating 3DScore metrics and clustering*

[2023-Apr-20 11:12:53]: Submitting parallel jobs...


Submitting parallel jobs...: 100%|██████████| 5235/5235 [00:42<00:00, 123.48IDs/s]



[2023-Apr-20 11:13:35]: Finished submitting jobs in 42.3971, now running jobs...


Running clustering jobs...: 100%|██████████| 5235/5235 [07:16<00:00, 11.99jobs/s]  


The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:20:53]: *Calculating bestpose metrics and clustering*
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:20:56]: *Calculating bestpose_GNINA metrics and clustering*
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:20:56]: *Calculating bestpose_SMINA metrics and clustering*
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/ already exists

[2023-Apr-20 11:20:57]: *Calculating bestpose_PLANTS metrics and clustering*


In [5]:
for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, ncpus)


The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_RMSD_clustered was created
Splitting SDF file RMSD_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00, 11.95it/s]


Split docking library into 25 files each containing 667 compounds

[2023-Apr-20 11:21:06]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 37.70file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [02:41<00:00,  6.47s/file]



[2023-Apr-20 11:23:50]: Rescoring with GNINA complete in 170.5453!

[2023-Apr-20 11:23:50]: Rescoring with AD4

[2023-Apr-20 11:24:11]: Rescoring with AD4 complete in 20.1933!

[2023-Apr-20 11:24:11]: Rescoring with CHEMPLP


16016 molecules converted



[2023-Apr-20 11:25:21]: Rescoring with CHEMPLP complete in 70.0915!

[2023-Apr-20 11:25:21]: Rescoring with RFScoreVS

[2023-Apr-20 11:26:25]: Rescoring with RF-Score-VS complete in 64.2602!
Splitting SDF file RMSD_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00,  9.45it/s]


Split docking library into 25 files each containing 667 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 55.42file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:02<00:00,  9.15file/s]



[2023-Apr-20 11:26:36]: Rescoring with LinF9 complete in 10.8683!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_RMSD_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 11:26:36]: Converting protein file to .pdbqt ...

[2023-Apr-20 11:26:37]: Converting SDF file RMSD_clustered.sdf to .pdbqt files...
Converted 16016 molecules.

[2023-Apr-20 11:27:03]: Rescoring with SCORCH

[2023-Apr-20 11:31:30]: Rescoring with SCORCH complete in 293.8008!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_RMSD_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 11:31:30]: Rescoring with RTMScore

[2023-Apr-20 11:42:42]: Rescoring with RTMScore complete in 672.4732!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/RMSD_clustered.sdf'


[2023-Apr-20 11:42:42]: Rescoring with Vinardo

[2023-Apr-20 11:42:58]: Rescoring with Vinardo complete in 15.4676!

[2023-Apr-20 11:42:58]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_RMSD_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 334.75files/s]



[2023-Apr-20 11:42:58]: Rescoring complete in 1317.9433!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_spyRMSD_clustered was created
Splitting SDF file spyRMSD_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00,  9.89it/s]


Split docking library into 25 files each containing 686 compounds

[2023-Apr-20 11:43:05]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 49.84file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [02:45<00:00,  6.61s/file]



[2023-Apr-20 11:45:53]: Rescoring with GNINA complete in 175.3413!

[2023-Apr-20 11:45:53]: Rescoring with AD4

[2023-Apr-20 11:46:13]: Rescoring with AD4 complete in 20.0462!

[2023-Apr-20 11:46:13]: Rescoring with CHEMPLP


16468 molecules converted



[2023-Apr-20 11:47:27]: Rescoring with CHEMPLP complete in 74.1301!

[2023-Apr-20 11:47:27]: Rescoring with RFScoreVS

[2023-Apr-20 11:48:34]: Rescoring with RF-Score-VS complete in 66.9960!
Splitting SDF file spyRMSD_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00,  8.83it/s]


Split docking library into 25 files each containing 686 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 37.50file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:03<00:00,  7.13file/s]



[2023-Apr-20 11:48:47]: Rescoring with LinF9 complete in 12.4047!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_spyRMSD_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 11:48:47]: Converting protein file to .pdbqt ...

[2023-Apr-20 11:48:48]: Converting SDF file spyRMSD_clustered.sdf to .pdbqt files...
Converted 16468 molecules.

[2023-Apr-20 11:49:14]: Rescoring with SCORCH

[2023-Apr-20 11:53:49]: Rescoring with SCORCH complete in 302.5287!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_spyRMSD_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 11:53:49]: Rescoring with RTMScore

[2023-Apr-20 12:05:41]: Rescoring with RTMScore complete in 711.6198!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/spyRMSD_clustered.sdf'


[2023-Apr-20 12:05:41]: Rescoring with Vinardo

[2023-Apr-20 12:05:56]: Rescoring with Vinardo complete in 15.1581!

[2023-Apr-20 12:05:56]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_spyRMSD_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 298.11files/s]



[2023-Apr-20 12:05:56]: Rescoring complete in 1378.4751!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_espsim_clustered was created
Splitting SDF file espsim_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00, 12.21it/s]


Split docking library into 25 files each containing 532 compounds

[2023-Apr-20 12:06:01]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 37.55file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [02:15<00:00,  5.44s/file]



[2023-Apr-20 12:08:19]: Rescoring with GNINA complete in 142.7030!

[2023-Apr-20 12:08:19]: Rescoring with AD4

[2023-Apr-20 12:08:36]: Rescoring with AD4 complete in 16.5518!

[2023-Apr-20 12:08:36]: Rescoring with CHEMPLP


12774 molecules converted



[2023-Apr-20 12:09:31]: Rescoring with CHEMPLP complete in 55.7013!

[2023-Apr-20 12:09:31]: Rescoring with RFScoreVS

[2023-Apr-20 12:10:24]: Rescoring with RF-Score-VS complete in 53.1391!
Splitting SDF file espsim_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:02<00:00, 12.21it/s]


Split docking library into 25 files each containing 532 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 30.79file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:02<00:00,  8.49file/s]



[2023-Apr-20 12:10:34]: Rescoring with LinF9 complete in 9.8311!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_espsim_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:10:34]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:10:36]: Converting SDF file espsim_clustered.sdf to .pdbqt files...
Converted 12774 molecules.

[2023-Apr-20 12:10:55]: Rescoring with SCORCH

[2023-Apr-20 12:14:32]: Rescoring with SCORCH complete in 237.7248!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_espsim_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:14:32]: Rescoring with RTMScore

[2023-Apr-20 12:23:07]: Rescoring with RTMScore complete in 515.3025!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/espsim_clustered.sdf'


[2023-Apr-20 12:23:07]: Rescoring with Vinardo

[2023-Apr-20 12:23:19]: Rescoring with Vinardo complete in 11.8012!

[2023-Apr-20 12:23:19]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_espsim_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 413.06files/s]



[2023-Apr-20 12:23:19]: Rescoring complete in 1042.9834!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_3DScore_clustered was created
Splitting SDF file 3DScore_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:01<00:00, 22.16it/s]


Split docking library into 25 files each containing 218 compounds

[2023-Apr-20 12:23:22]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 37.64file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [00:59<00:00,  2.39s/file]



[2023-Apr-20 12:24:23]: Rescoring with GNINA complete in 64.1263!

[2023-Apr-20 12:24:23]: Rescoring with AD4

[2023-Apr-20 12:24:32]: Rescoring with AD4 complete in 8.4994!

[2023-Apr-20 12:24:32]: Rescoring with CHEMPLP


5235 molecules converted



[2023-Apr-20 12:24:54]: Rescoring with CHEMPLP complete in 22.2929!

[2023-Apr-20 12:24:54]: Rescoring with RFScoreVS

[2023-Apr-20 12:25:19]: Rescoring with RF-Score-VS complete in 24.5709!
Splitting SDF file 3DScore_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:00<00:00, 34.37it/s]


Split docking library into 25 files each containing 218 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 44.77file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:01<00:00, 14.87file/s]



[2023-Apr-20 12:25:24]: Rescoring with LinF9 complete in 4.8862!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_3DScore_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:25:24]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:25:26]: Converting SDF file 3DScore_clustered.sdf to .pdbqt files...
Converted 5235 molecules.

[2023-Apr-20 12:25:34]: Rescoring with SCORCH

[2023-Apr-20 12:27:05]: Rescoring with SCORCH complete in 101.5900!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_3DScore_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:27:05]: Rescoring with RTMScore

[2023-Apr-20 12:30:36]: Rescoring with RTMScore complete in 210.2830!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/3DScore_clustered.sdf'


[2023-Apr-20 12:30:36]: Rescoring with Vinardo

[2023-Apr-20 12:30:41]: Rescoring with Vinardo complete in 5.1240!

[2023-Apr-20 12:30:41]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_3DScore_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 412.37files/s]


[2023-Apr-20 12:30:41]: Rescoring complete in 441.4854!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_clustered was created
Splitting SDF file bestpose_clustered.sdf ...



Splitting files: 100%|██████████| 24/24 [00:02<00:00, 10.22it/s]


Split docking library into 24 files each containing 471 compounds

[2023-Apr-20 12:30:46]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 24/24 [00:00<00:00, 45.16file/s]
Rescoring with GNINA: 100%|██████████| 24/24 [02:06<00:00,  5.25s/file]



[2023-Apr-20 12:32:54]: Rescoring with GNINA complete in 133.4590!

[2023-Apr-20 12:32:54]: Rescoring with AD4

[2023-Apr-20 12:33:08]: Rescoring with AD4 complete in 14.1497!

[2023-Apr-20 12:33:08]: Rescoring with CHEMPLP


11300 molecules converted



[2023-Apr-20 12:33:59]: Rescoring with CHEMPLP complete in 50.2142!

[2023-Apr-20 12:33:59]: Rescoring with RFScoreVS

[2023-Apr-20 12:34:46]: Rescoring with RF-Score-VS complete in 47.4329!
Splitting SDF file bestpose_clustered.sdf ...


Splitting files: 100%|██████████| 24/24 [00:02<00:00,  9.45it/s]


Split docking library into 24 files each containing 471 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 24/24 [00:00<00:00, 28.86file/s]
Rescoring with LinF9: 100%|██████████| 24/24 [00:02<00:00,  9.82file/s]



[2023-Apr-20 12:34:55]: Rescoring with LinF9 complete in 9.2999!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:34:55]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:34:57]: Converting SDF file bestpose_clustered.sdf to .pdbqt files...
Converted 11300 molecules.

[2023-Apr-20 12:35:16]: Rescoring with SCORCH

[2023-Apr-20 12:38:34]: Rescoring with SCORCH complete in 219.0223!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:38:34]: Rescoring with RTMScore

[2023-Apr-20 12:45:51]: Rescoring with RTMScore complete in 436.3511!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/bestpose_clustered.sdf'


[2023-Apr-20 12:45:51]: Rescoring with Vinardo

[2023-Apr-20 12:46:01]: Rescoring with Vinardo complete in 10.5012!

[2023-Apr-20 12:46:01]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 406.49files/s]


[2023-Apr-20 12:46:01]: Rescoring complete in 920.6019!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_GNINA_clustered was created
Splitting SDF file bestpose_GNINA_clustered.sdf ...



Splitting files: 100%|██████████| 25/25 [00:00<00:00, 141.07it/s]


Split docking library into 25 files each containing 54 compounds

[2023-Apr-20 12:46:02]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 46.62file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [00:18<00:00,  1.34file/s]



[2023-Apr-20 12:46:21]: Rescoring with GNINA complete in 19.9227!

[2023-Apr-20 12:46:21]: Rescoring with AD4

[2023-Apr-20 12:46:24]: Rescoring with AD4 complete in 2.7246!

[2023-Apr-20 12:46:24]: Rescoring with CHEMPLP


1300 molecules converted



[2023-Apr-20 12:46:30]: Rescoring with CHEMPLP complete in 6.0708!

[2023-Apr-20 12:46:30]: Rescoring with RFScoreVS

[2023-Apr-20 12:46:38]: Rescoring with RF-Score-VS complete in 7.9534!
Splitting SDF file bestpose_GNINA_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:00<00:00, 101.76it/s]


Split docking library into 25 files each containing 54 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 30.70file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:01<00:00, 17.68file/s]



[2023-Apr-20 12:46:41]: Rescoring with LinF9 complete in 3.1227!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_GNINA_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:46:41]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:46:43]: Converting SDF file bestpose_GNINA_clustered.sdf to .pdbqt files...
Converted 1300 molecules.

[2023-Apr-20 12:46:44]: Rescoring with SCORCH

[2023-Apr-20 12:47:11]: Rescoring with SCORCH complete in 29.5174!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_GNINA_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:47:11]: Rescoring with RTMScore

[2023-Apr-20 12:47:59]: Rescoring with RTMScore complete in 48.6986!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/bestpose_GNINA_clustered.sdf'


[2023-Apr-20 12:47:59]: Rescoring with Vinardo

[2023-Apr-20 12:48:01]: Rescoring with Vinardo complete in 1.7312!

[2023-Apr-20 12:48:01]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_GNINA_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 1168.47files/s]


[2023-Apr-20 12:48:01]: Rescoring complete in 119.7792!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_SMINA_clustered was created
Splitting SDF file bestpose_SMINA_clustered.sdf ...



Splitting files: 100%|██████████| 25/25 [00:00<00:00, 34.31it/s]


Split docking library into 25 files each containing 208 compounds

[2023-Apr-20 12:48:03]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 41.65file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [01:00<00:00,  2.40s/file]



[2023-Apr-20 12:49:05]: Rescoring with GNINA complete in 63.4725!

[2023-Apr-20 12:49:05]: Rescoring with AD4

[2023-Apr-20 12:49:12]: Rescoring with AD4 complete in 7.8117!

[2023-Apr-20 12:49:12]: Rescoring with CHEMPLP


4997 molecules converted



[2023-Apr-20 12:49:34]: Rescoring with CHEMPLP complete in 21.8051!

[2023-Apr-20 12:49:34]: Rescoring with RFScoreVS

[2023-Apr-20 12:49:57]: Rescoring with RF-Score-VS complete in 22.5357!
Splitting SDF file bestpose_SMINA_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:01<00:00, 21.62it/s]


Split docking library into 25 files each containing 208 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 30.44file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:02<00:00, 12.07file/s]



[2023-Apr-20 12:50:03]: Rescoring with LinF9 complete in 6.0565!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_SMINA_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:50:03]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:50:04]: Converting SDF file bestpose_SMINA_clustered.sdf to .pdbqt files...
Converted 4997 molecules.

[2023-Apr-20 12:50:12]: Rescoring with SCORCH

[2023-Apr-20 12:51:46]: Rescoring with SCORCH complete in 102.7305!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_SMINA_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:51:46]: Rescoring with RTMScore

[2023-Apr-20 12:54:42]: Rescoring with RTMScore complete in 176.2635!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/bestpose_SMINA_clustered.sdf'


[2023-Apr-20 12:54:42]: Rescoring with Vinardo

[2023-Apr-20 12:54:47]: Rescoring with Vinardo complete in 5.4089!

[2023-Apr-20 12:54:47]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_SMINA_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 699.98files/s]


[2023-Apr-20 12:54:47]: Rescoring complete in 406.2034!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_PLANTS_clustered was created
Splitting SDF file bestpose_PLANTS_clustered.sdf ...



Splitting files: 100%|██████████| 25/25 [00:01<00:00, 22.54it/s]


Split docking library into 25 files each containing 208 compounds

[2023-Apr-20 12:54:50]: Rescoring with GNINA


Submitting GNINA rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 28.30file/s]
Rescoring with GNINA: 100%|██████████| 25/25 [00:57<00:00,  2.32s/file]



[2023-Apr-20 12:55:50]: Rescoring with GNINA complete in 62.2881!

[2023-Apr-20 12:55:50]: Rescoring with AD4

[2023-Apr-20 12:55:57]: Rescoring with AD4 complete in 7.6702!

[2023-Apr-20 12:55:57]: Rescoring with CHEMPLP


5003 molecules converted



[2023-Apr-20 12:56:19]: Rescoring with CHEMPLP complete in 21.9733!

[2023-Apr-20 12:56:19]: Rescoring with RFScoreVS

[2023-Apr-20 12:56:41]: Rescoring with RF-Score-VS complete in 22.0977!
Splitting SDF file bestpose_PLANTS_clustered.sdf ...


Splitting files: 100%|██████████| 25/25 [00:00<00:00, 35.99it/s]


Split docking library into 25 files each containing 208 compounds


Submitting LinF9 rescoring jobs: 100%|██████████| 25/25 [00:00<00:00, 43.17file/s]
Rescoring with LinF9: 100%|██████████| 25/25 [00:01<00:00, 14.77file/s]



[2023-Apr-20 12:56:46]: Rescoring with LinF9 complete in 4.9737!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_PLANTS_clustered/SCORCH_rescoring/ was created

[2023-Apr-20 12:56:46]: Converting protein file to .pdbqt ...

[2023-Apr-20 12:56:48]: Converting SDF file bestpose_PLANTS_clustered.sdf to .pdbqt files...
Converted 5003 molecules.

[2023-Apr-20 12:56:55]: Rescoring with SCORCH

[2023-Apr-20 12:58:26]: Rescoring with SCORCH complete in 99.2261!
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_PLANTS_clustered/RTMScore_rescoring/ was created

[2023-Apr-20 12:58:26]: Rescoring with RTMScore

[2023-Apr-20 13:01:13]: Rescoring with RTMScore complete in 167.4117!


'/media/mario/T7/FINISHED/FINISHED/xiap/temp/clustering/bestpose_PLANTS_clustered.sdf'


[2023-Apr-20 13:01:13]: Rescoring with Vinardo

[2023-Apr-20 13:01:19]: Rescoring with Vinardo complete in 5.6375!

[2023-Apr-20 13:01:19]: Combining all score for /media/mario/T7/FINISHED/FINISHED/xiap/temp/rescoring_bestpose_PLANTS_clustered


Combining scores: 100%|██████████| 7/7 [00:00<00:00, 713.89files/s]


[2023-Apr-20 13:01:19]: Rescoring complete in 391.3670!





In [6]:
calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)
apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)

The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/ranking was created
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/consensus was created
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/ranking already exists
The folder: /media/mario/T7/FINISHED/FINISHED/xiap/temp/consensus already exists

[2023-Apr-20 15:13:34]: Calculating consensus methods for every possible score combination...


100%|██████████| 8/8 [03:55<00:00, 29.47s/it]


In [None]:
def apply_consensus_methods(w_dir, clustering_metric, method, rescoring_functions):
    create_temp_folder(w_dir+'/temp/ranking')
    rescoring_folder = f'rescoring_{clustering_metric}_clustered'
    rescored_dataframe = pd.read_csv(w_dir + f'/temp/{rescoring_folder}/allposes_rescored.csv')
    standardised_dataframe = standardize_scores(rescored_dataframe)
    col_dict = {'gnina':['GNINA', 'CNN-Score', 'CNN-Affinity'], 'vinardo':'Vinardo', 'AD4':'AD4', 'LinF9':'LinF9', 'rfscorevs':'RFScoreVS', 'plp':'PLP', 'chemplp':'CHEMPLP', 'NNScore':'NNScore', 
               'PLECnn':'PLECnn', 'AAScore':'AAScore', 'ECIF':'ECIF', 'SCORCH':'SCORCH','RTMScore':'RTMScore'}
    col_list = ['Pose ID', 'GNINA', 'CNN-Score', 'RTMScore']
    # for function in rescoring_functions:
    #     cols = col_dict[function]
    #     if isinstance(cols, list):
    #         col_list.extend(cols)
    #     else:
    #         col_list.append(cols)
    # print(col_list)
    filtered_dataframe = standardised_dataframe[col_list]
    print(filtered_dataframe)
    standardised_dataframes, ranked_dataframes = process_dataframes(w_dir, {clustering_metric: rescoring_folder})
    for name, df_dict in {'standardised': standardised_dataframes, 'ranked': ranked_dataframes}.items():
        for df_name, df in df_dict.items():
            df['ID'] = df['Pose ID'].str.split('_').str[0]
            df.to_csv(w_dir + f'/temp/ranking/{df_name}.csv', index=False)

    create_temp_folder(w_dir+'/temp/consensus')
    rank_methods = {'method1': method1_ECR_best, 'method2': method2_ECR_average, 'method3': method3_avg_ECR, 'method4': method4_RbR}
    score_methods = {'method5': method5_RbV, 'method6': method6_Zscore_best, 'method7': method7_Zscore_avg}

    if method in rank_methods:
        method_function = rank_methods[method]
        analysed_dataframe = method_function(ranked_dataframes[clustering_metric+'_ranked'], clustering_metric, [col for col in ranked_dataframes[clustering_metric+'_ranked'] if col not in ['Pose ID', 'ID']])
    elif method in score_methods:
        method_function = score_methods[method]
        analysed_dataframe = method_function(standardised_dataframes[clustering_metric+'_standardised'], clustering_metric, [col for col in standardised_dataframes[clustering_metric+'_standardised'] if col not in ['Pose ID', 'ID']])
    else:
        raise ValueError(f"Invalid method: {method}")

    print(analysed_dataframe)
    analysed_dataframe = analysed_dataframe.drop(columns="Pose ID", errors='ignore')
    analysed_dataframe.to_csv(w_dir+f'/temp/consensus/{clustering_metric}_{method}_results.csv', index=False)

apply_consensus_methods(w_dir, 'bestpose', 'method6', ['gnina', 'RTMScore'])

In [None]:
results = pd.read_csv('/home/mario/DockM8/cdk2/temp/consensus/bestpose_method6_results.csv')
database = PandasTools.LoadSDF('/home/mario/DockM8/cdk2/temp/final_library.sdf', molColName=None, smilesName='SMILES', idName='ID')
final = pd.merge(database, results, on='ID', how='inner')
final.to_csv('/home/mario/DockM8/cdk2/temp/consensus/results_for_ML.csv', index=False)

In [None]:
./gnina -r /media/mario/T7/FINISHED/FINISHED/ace/receptor_prepared_protoss.pdb -l /media/mario/T7/FINISHED/FINISHED/ace/temp/clustering/RMSD_clustered.sdf --autobox_ligand /media/mario/T7/FINISHED/FINISHED/ace/crystal_ligand_protoss.sdf -o /media/mario/T7/FINISHED/FINISHED/ace/temp/rescoring_RMSD_clustered/vinardo_rescoring/vinardo_scores.csv --score_only --scoring vinardo --cnn_scoring none --no_gpu