### Comment: 
In this notebook we read all the pockets created in the last example and use vina to score all prepared_ligands against all pockets to create a score_table

# Preparing the data

In [2]:
%%bash
# remove everything except this notebook
# shopt -s extglob
# rm -rf !("notebook.ipynb")
# to test any error in the terminal, use:
# !ipython3 -c "%run notebook.ipynb"

In [1]:
# copy prepared molecules into this directory
# !cp -r ../example07\ -\ find\ pockets\ in\ prepared_receptors/prepared_receptors_pockets .
# !cp -r ../example04\ -\ prepare\ ligands/prepared_ligands .
# !cp -r ../example04\ -\ prepare\ ligands/raw_ligands .

# Read pockets and prepare data

In [1]:
# IMPORTANT: CHANGE THIS VALUE AS YOU PREFER
# pockets with a 'drug_score >= drug_score_min' will be scored
drug_score_min = 0.0221

dir_receptors = 'prepared_receptors_pockets/'
dir_ligands = 'raw_ligands/'
dir_vina_scores = 'vina_scores/'

# -------------------------------------------------------------------------------

import pandas as pd
# re-read file to make sure everything is fine
pockets_data = pd.read_csv(dir_receptors + 'merged_pockets_data.csv',sep=' ',index_col=[0])
pockets_data = pockets_data[pockets_data['drug_score'] >= drug_score_min].sort_values(by='drug_score', ascending=False)

# show first 10
pockets_data[0:10]

Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5GTY_3,0.8579,1220.3418,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,5GTY,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3GKW_3,0.8579,1213.3354,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3GKW,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_3,0.8579,1206.7412,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3P0V,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_10,0.0221,287.7342,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3P0V,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
5GTY_10,0.0221,292.1811,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,5GTY,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
3GKW_10,0.0221,293.3085,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3GKW,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001


In [2]:
# import local modules
import os, sys
sys.path.append(os.path.realpath(os.path.join(os.getcwd(), "../")))
from tools import *
# while updating modules
# import importlib
# importlib.reload(directory_scraping)
# importlib.reload(ligand_preparation)
# importlib.reload(visualize_molecules)

# list files in directories
receptor_paths, receptor_names = directory_scraping.list_files_in_dir(directory=dir_receptors, search_pattern='*.pdb*')
ligand_paths, ligand_names = directory_scraping.list_files_in_dir(directory=dir_ligands, search_pattern='*')

print(receptor_paths[0], receptor_names[0], sep='\n')
print(ligand_paths[0], ligand_names[0], sep='\n')

display(receptor_paths, ligand_paths)

prepared_receptors_pockets/3GKW.pdbqt
3GKW
raw_ligands/1iep_ligand.sdf
1iep_ligand


['prepared_receptors_pockets/3GKW.pdbqt',
 'prepared_receptors_pockets/3P0V.pdbqt',
 'prepared_receptors_pockets/5GTY.pdbqt']

['raw_ligands/1iep_ligand.sdf', 'raw_ligands/1iep_ligand_G_STI.sdf']

# Perform scoring and minimization on selected pockets

### Comment
from: https://autodock-vina.readthedocs.io/en/latest/docking_python.html
There is a small subility here, the behavior of the compute_vina_maps() function changes if the ligand was loaded before or after computing the vina maps. If no ligand was initialized, compute_vina_maps() will compute the affinity map for each atom types defined in the Vina forcefield (22 in total). This is very useful when we want to dock ligands in batch (a.k.a virtual screening) but we don’t necessarily know beforehand what atom types will be necessary for thoses ligands. Alternately to set_ligand_from_file(), you could also load a molecule using a molecule string in PDBQT format using the set_ligand_from_string() function.

In [15]:
# while updating modules
# import importlib
# importlib.reload(directory_scraping)
# importlib.reload(ligand_preparation)
# importlib.reload(visualize_molecules)

from vina import Vina
import time

from meeko import MoleculePreparation
from meeko import PDBQTMolecule

# default values: class vina.vina.Vina(sf_name='vina', cpu=0, seed=0, no_refine=False, verbosity=1)
v = Vina(sf_name='vina', seed=1)

# set a time flag for each docking
global_init_time = time.time()

# create new dataframe to save vina scores
vina_scores = pd.DataFrame(columns=['receptor_name', 'pocket_id', 'ligand_name', 'vina_score_before_min', 'vina_score_after_min'])

# loop over receptors
for i,receptor_filepath in enumerate(receptor_paths): #(receptor_paths[0:1]):
    # logging
    print("\n\n"+"-"*50, "RECEPTOR {} out of {}".format(i+1, len(receptor_paths)), receptor_filepath, sep='\n')
    receptor_name = receptor_names[i];
    receptor_init_time = time.time()
    
    # set receptor molecule for the docking
    v.set_receptor(receptor_filepath)

    # iterate over the pockets available for the current receptor
    for pocket_id, pocket_row in pockets_data[pockets_data['receptor'] == receptor_name].sort_values(by='drug_score', ascending=False).iterrows():
        print('Pocket number: ', pocket_id)
        print('drug_score: ', pocket_row['drug_score'])
        pocket_box_center = [pocket_row['center_x'], pocket_row['center_y'], pocket_row['center_z']]
        pocket_box_size = [pocket_row['size_x'], pocket_row['size_y'], pocket_row['size_z']]
        print("pocket_box_center:", pocket_box_center)
        # TODO: change this later for efficient box size. it should be sth like max(pocket_size, ligand_size)
        pocket_box_size = [30,30,30]
        print("pocket_box_size:", pocket_box_size)
        v.compute_vina_maps(center=pocket_box_center, box_size=pocket_box_size)
        # iterate over ligands
        for j,ligand_path in enumerate(ligand_paths):
            # logging
            print("\n\t"+"-"*50, "\tLIGAND {} out of {}".format(j+1, len(ligand_paths)),"\t" + ligand_path + "\n", sep='\n')
            ligand_name = ligand_names[j];
            ligand_init_time = time.time()

            # v.set_ligand_from_file(ligand_path)
            
            # Score the current pose
            vina_score_before_min = 0
            vina_score_after_min = 0
            
            # load ligand and move it to the center of the box
            mol = ligand_preparation.prepare_mols_from_sdf(ligand_path)[0]
            # prepare meeko object
            mkprep = MoleculePreparation()
            mkprep.prepare(mol)
            # preparator.show_setup()
            ligand_preparation.print_ligand_center(mkprep)
            ligand_preparation.translate_ligand(mkprep, pocket_box_center)
            ligand_preparation.print_ligand_center(mkprep)
            # At this point, pdbqt_string can be written to a file for docking with AutoDock-GPU or Vina, or passed directly to Vina within Python using set_ligand_from_string(pdbqt_string)
            with open("./aux_pdbqt.pdbqt", "w") as text_file:
                text_file.write(mkprep.write_pdbqt_string())
            lig_pdbqt_string = mkprep.write_pdbqt_string()
            v.set_ligand_from_string(lig_pdbqt_string)
            
            # lig_string = mkprep.write_pdbqt_string()
            # print_ligand_center(mkprep.setup)
            
            # print_ligand_center(mkprep.setup)
            # pdbqt_to_sdf_2(pdbqt_file='aux_pdbqt.pdbqt',output='aux_sdf.sdf')
            
            visualize_molecules.plot_prot_and_ligand(
                prot_path=receptor_filepath, 
                lig_str= parse_molecules.MolFromPDBQTBlock(block=lig_pdbqt_string,sanitize=False,removeHs=False),
                # lig_str='./aux_pdbqt.pdbqt', 
                include_box=True, center=pocket_box_center, size=pocket_box_size, 
                # mol=mol
            )
            
            energy = v.score()
            vina_score_before_min = energy[0]
            print('Score before minimization: %.3f (kcal/mol)' % vina_score_before_min)

            # Minimized locally the current pose
            energy_minimized = v.optimize()
            vina_score_after_min = energy_minimized[0]
            print('Score after minimization : %.3f (kcal/mol)' % vina_score_after_min)
            v.write_pose(pocket_id + '_' + ligand_name + '_minimized.pdbqt', overwrite=True)
            
            # save vina score in dataframe
            vina_scores = vina_scores.append({'receptor_name': receptor_name, 'pocket_id': pocket_id, 'ligand_name': ligand_name, 'vina_score_before_min': vina_score_before_min, 'vina_score_after_min': vina_score_after_min}, ignore_index=True)

            # Dock the ligand
            # v.dock(exhaustiveness=docking_exhaustiveness, n_poses=docking_n_poses)
            # result_filename = 'vinaout_r_' + receptor_filename + '__l_' + ligand_filename + '.pdbqt'
            # v.write_poses(results_dir + '/' + result_filename, n_poses=docking_n_poses, overwrite=True)
        
            # logging
            print("\ntime for ligand {}, global time {}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - ligand_init_time)), time.strftime("%H:%M:%S", time.gmtime(time.time() - global_init_time))))
            # -----------------------------
    # logging
    print("\ntime for receptor {}, global time {}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - receptor_init_time)), time.strftime("%H:%M:%S", time.gmtime(time.time() - global_init_time))))
print("\n", "-"*50, "\n")

vina_scores



--------------------------------------------------
RECEPTOR 1 out of 3
prepared_receptors_pockets/3GKW.pdbqt
Pocket number:  3GKW_3
drug_score:  0.8579
pocket_box_center: [-44.54250144958496, 14.45400047302246, -35.48000144958496]
pocket_box_size: [30, 30, 30]
Computing Vina grid ... done.

	--------------------------------------------------
	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

ligand center:   -0.946    0.133    0.097
ligand center:  -45.489   14.587  -35.383


Score before minimization: 75.277 (kcal/mol)
Score after minimization : -3.433 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:03

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

ligand center:    1.034    0.042    0.212
ligand center:  -43.509   14.496  -35.268


Score before minimization: 124.925 (kcal/mol)
Score after minimization : 124.925 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:04
Pocket number:  3GKW_10
drug_score:  0.0221
pocket_box_center: [-29.40750026702881, 7.247499942779541, -45.39350128173828]
pocket_box_size: [30, 30, 30]

	--------------------------------------------------
	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

Computing Vina grid ... done.
ligand center:    0.995    0.059   -0.147
ligand center:  -28.412    7.306  -45.540


Score before minimization: 233.122 (kcal/mol)
Score after minimization : 233.122 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:06

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

ligand center:    0.925    0.100   -0.248
ligand center:  -28.483    7.348  -45.642


Score before minimization: 306.953 (kcal/mol)
Score after minimization : 3.422 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:07

time for receptor 00:00:07, global time 00:00:07


--------------------------------------------------
RECEPTOR 2 out of 3
prepared_receptors_pockets/3P0V.pdbqt
Pocket number:  3P0V_3
drug_score:  0.8579
pocket_box_center: [-44.54250144958496, 14.45400047302246, -35.48000144958496]
pocket_box_size: [30, 30, 30]

	--------------------------------------------------Computing Vina grid ... done.

	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

ligand center:    0.989   -0.172    0.022
ligand center:  -43.553   14.282  -35.458


Score before minimization: 193.269 (kcal/mol)
Score after minimization : -2.000 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:11

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

ligand center:   -1.080    0.075   -0.073
ligand center:  -45.623   14.529  -35.553


Score before minimization: 80.877 (kcal/mol)
Score after minimization : -1.421 (kcal/mol)

time for ligand 00:00:00, global time 00:00:12
Pocket number:  3P0V_10
drug_score:  0.0221
pocket_box_center: [-29.40750026702881, 7.247499942779541, -45.39350128173828]
pocket_box_size: [30, 30, 30]
Performing local search ... done.
Computing Vina grid ... done.

	--------------------------------------------------
	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

ligand center:    0.835   -0.114    0.217
ligand center:  -28.573    7.133  -45.176


Score before minimization: 221.759 (kcal/mol)
Score after minimization : -5.015 (kcal/mol)
Performing local search ... 
time for ligand 00:00:00, global time 00:00:14

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

done.
ligand center:    1.076    0.304    0.048
ligand center:  -28.331    7.552  -45.346


Score before minimization: 274.940 (kcal/mol)
Score after minimization : -5.909 (kcal/mol)
Performing local search ... 
time for ligand 00:00:00, global time 00:00:15

time for receptor 00:00:07, global time 00:00:15


--------------------------------------------------
RECEPTOR 3 out of 3
prepared_receptors_pockets/5GTY.pdbqt
done.
Pocket number:  5GTY_3
drug_score:  0.8579
pocket_box_center: [-44.54250144958496, 14.45400047302246, -35.48000144958496]
pocket_box_size: [30, 30, 30]

	--------------------------------------------------Computing Vina grid ... done.

	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

ligand center:    1.007    0.078   -0.095
ligand center:  -43.535   14.532  -35.575


Score before minimization: 197.644 (kcal/mol)
Score after minimization : -1.059 (kcal/mol)
Performing local search ... 
time for ligand 00:00:00, global time 00:00:19

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

done.
ligand center:   -1.042   -0.078   -0.219
ligand center:  -45.584   14.376  -35.699


Score before minimization: 158.210 (kcal/mol)
Score after minimization : -2.282 (kcal/mol)

time for ligand 00:00:00, global time 00:00:20
Pocket number:  5GTY_10
drug_score:  0.0221
pocket_box_center: [-29.40750026702881, 7.247499942779541, -45.39350128173828]
pocket_box_size: [30, 30, 30]
Performing local search ... 
	--------------------------------------------------
	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

done.
Computing Vina grid ... done.
ligand center:    0.891   -0.026    0.103
ligand center:  -28.516    7.222  -45.290


Score before minimization: 294.096 (kcal/mol)
Score after minimization : -2.566 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:22

	--------------------------------------------------
	LIGAND 2 out of 2
	raw_ligands/1iep_ligand_G_STI.sdf

ligand center:    1.064    0.133   -0.325
ligand center:  -28.344    7.380  -45.718


Score before minimization: 280.501 (kcal/mol)
Score after minimization : 83.756 (kcal/mol)
Performing local search ... done.

time for ligand 00:00:00, global time 00:00:23

time for receptor 00:00:07, global time 00:00:23

 -------------------------------------------------- 



Unnamed: 0,receptor_name,pocket_id,ligand_name,vina_score_before_min,vina_score_after_min
0,3GKW,3GKW_3,1iep_ligand,75.277,-3.433
1,3GKW,3GKW_3,1iep_ligand_G_STI,124.925,124.925
2,3GKW,3GKW_10,1iep_ligand,233.122,233.122
3,3GKW,3GKW_10,1iep_ligand_G_STI,306.953,3.422
4,3P0V,3P0V_3,1iep_ligand,193.269,-2.0
5,3P0V,3P0V_3,1iep_ligand_G_STI,80.877,-1.421
6,3P0V,3P0V_10,1iep_ligand,221.759,-5.015
7,3P0V,3P0V_10,1iep_ligand_G_STI,274.94,-5.909
8,5GTY,5GTY_3,1iep_ligand,197.644,-1.059
9,5GTY,5GTY_3,1iep_ligand_G_STI,158.21,-2.282


In [12]:
# !ipython3 -c "%run notebook.ipynb"

[22;0t]0;IPython: example_notebooks/example08 - score all ligand per protein's pocketsprepared_receptors_pockets/3GKW.pdbqt
3GKW
raw_ligands/1iep_ligand.sdf
1iep_ligand
['prepared_receptors_pockets/3GKW.pdbqt',
 'prepared_receptors_pockets/3P0V.pdbqt',
 'prepared_receptors_pockets/5GTY.pdbqt']
['raw_ligands/1iep_ligand.sdf', 'raw_ligands/1iep_ligand_G_STI.sdf']


--------------------------------------------------
RECEPTOR 1 out of 3
prepared_receptors_pockets/3GKW.pdbqt
Pocket number:  3GKW_3
drug_score:  0.8579
pocket_box_center: [-44.54250144958496, 14.45400047302246, -35.48000144958496]
pocket_box_size: [30, 30, 30]
Computing Vina grid ... done.

	--------------------------------------------------
	LIGAND 1 out of 2
	raw_ligands/1iep_ligand.sdf

ligand center:    0.835   -0.234   -0.190
ligand center:  -43.708   14.220  -35.670
Score before minimization: 278.792 (kcal/mol)

time for ligand 00:00:00, global time 00:00:04

	--------------------------------------------------
	LIGAND