### Comment: 
In this notebook we'll make use of Fpocket to find the pockets in the receptors. Another great tool to compute the pockets is https://proteins.plus/ 

In [44]:
%%bash
# remove everything except this notebook
shopt -s extglob
rm -rf !("notebook.ipynb") 
# to test any error in the terminal, use:
# ipython3 -c "%run notebook.ipynb"

# finding the pockets

In [45]:
# copy prepared receptors into this directory
!cp -r ../example03\ -\ prepare\ receptors/prepared_receptors .

In [7]:
!cp -r prepared_receptors_pockets/ prepared_receptors/

In [3]:
# list files in directory
import glob, os
receptor_paths = sorted(glob.glob('prepared_receptors/*.pdb*'))
receptor_names = list(map(lambda x: os.path.splitext(os.path.basename(x))[0], receptor_paths))
receptor_paths

['prepared_receptors_pockets/3GKW.pdbqt',
 'prepared_receptors_pockets/3P0V.pdbqt',
 'prepared_receptors_pockets/5GTY.pdbqt']

In [47]:
for i,receptor_path in enumerate(receptor_paths):
    print("\n", "-"*50, "\n\n", receptor_path, "\n")
    receptor_name = receptor_names[i]; #print(receptor_name)
    # -------------------------------------------------------------------------------
    # step01: compute pockets for each receptor
    !fpocket -f {receptor_path} -d > 'prepared_receptors/fp_'{receptor_name}'.csv'
    print("fpocket finished running")
print("\n", "-"*50, "\n")
%ls prepared_receptors/


 -------------------------------------------------- 

 prepared_receptors/3GKW.pdbqt 

fpocket finished running

 -------------------------------------------------- 

 prepared_receptors/3P0V.pdbqt 

fpocket finished running

 -------------------------------------------------- 

 prepared_receptors/5GTY.pdbqt 

fpocket finished running

 -------------------------------------------------- 

[0m[01;34m3GKW_out[0m/   [01;34m3P0V_out[0m/   [01;34m5GTY_out[0m/   fp_3GKW.csv  fp_5GTY.csv
3GKW.pdbqt  3P0V.pdbqt  5GTY.pdbqt  fp_3P0V.csv


# Pockets visualization

In [1]:
# IMPORTANT: CHANGE THIS VALUE AS YOU PREFER
# pockets with a 'drug_score >= drug_score_min' will appear in red, others will appear in blue
drug_score_min = 0.0221
# when creating the box around the pocket, extend it by a few angstroms on each dimension
# we can do this extension later, right before performing the docking. that way we can also take into consideration the size of the ligand.
box_extra_angstroms = 0

In [8]:
import py3Dmol
from rdkit import Chem
# import random
from pymol import cmd
import sys
sys.path.insert(1, '../tools/')
from utils import getbox
import pandas as pd

list_pockets_data = []
for i,receptor_path in enumerate(receptor_paths):
    print("\n", "-"*50, "\n\n", receptor_path, "\n")
    receptor_name = receptor_names[i]; #print(receptor_name)
    # receptor_path = receptor_paths[0]
    # receptor_name = receptor_names[0]
    print("receptor_path", receptor_path, "receptor_name", receptor_name)

    view = py3Dmol.view()
    view.removeAllModels()
    view.setViewStyle({'style':'outline','color':'black','width':0.1})

    view.addModel(open(receptor_path,'r').read(),'pdb')
    Prot=view.getModel()
    Prot.setStyle({'cartoon':{'arrows':True, 'tubes':True, 'style':'oval', 'color':'white'}})
    #view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'})

    # view.addModel(open('1XOZ_lig_H.mol2','r').read(),'mol2')
    # ref_m = view.getModel()
    # ref_m.setStyle({},{'stick':{'colorscheme':'greenCarbon','radius':0.1}})
    
    # view.addLine({"start": {"x":0,"y":0,"z":0},"end":{"x":8,"y":8, "z":8},"color":'red'})
    # view.addBox({"center":{"x":0,"y":0,"z":0},"dimensions": {"w":3,"h":4,"d":2},"color":'red','opacity':0.3});
    

    pocket_paths = sorted(glob.glob('prepared_receptors/' + receptor_name + '_out/*.pqr'))
    # print("pocket_paths", pocket_paths)
    pockets_data = pd.read_csv('prepared_receptors/fp_' + receptor_name + '.csv',sep=' ',index_col=[0])
    # display(pockets_data)
    
    for cav_id, pocket_path in enumerate(pocket_paths, start=1):    
        drug_score = pockets_data["drug_score"].loc[cav_id]
        # print("cav_id", cav_id, "pocket_path", pocket_path, "drug_score", drug_score)
        # color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])]
        color = ['green'] if drug_score >= drug_score_min else ['blue']
        # print(color)
        view.addModel(open(pocket_path, 'r').read(), 'pqr')
        x = view.getModel()
        x.setStyle({},{'sphere':{'color':color[0],'opacity':1}})
        # compute the box center and size out from the pocket
        pocket_num = cav_id
        # print("pocket_num", pocket_num)
        cmd.load(filename=pocket_path,format='pqr',object=pocket_num)
        center,size=getbox(selection=pocket_num,extending=box_extra_angstroms,software='vina')
        pockets_data.loc[pocket_num,'receptor'] = receptor_name
        pockets_data.loc[pocket_num,'center_x']=center['center_x']
        pockets_data.loc[pocket_num,'center_y']=center['center_y']
        pockets_data.loc[pocket_num,'center_z']=center['center_z']
        pockets_data.loc[pocket_num,'size_x']=size['size_x']
        pockets_data.loc[pocket_num,'size_y']=size['size_y']
        pockets_data.loc[pocket_num,'size_z']=size['size_z']
        # draw the box if drug_score >= drug_score_min
        if drug_score >= drug_score_min:
            view.addBox({"center":{"x":center['center_x'],"y":center['center_y'],"z":center['center_z']},"dimensions": {"w":size['size_x'],"h":size['size_y'],"d":size['size_z']},"color":'red','opacity':0.3});
    # update pockets data list
    # edit index 'cav_id' and add 'drug_id' to the beginning of each number
    pockets_data.index = receptor_name + '_' + f'{pockets_data.index:03}' #pockets_data.index.astype(str)
    list_pockets_data.append(pockets_data)
    display(pockets_data)
    # show 3d view
    view.zoomTo()
    view.show()


 -------------------------------------------------- 

 prepared_receptors_pockets/3GKW.pdbqt 

receptor_path prepared_receptors_pockets/3GKW.pdbqt receptor_name 3GKW


Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3GKW_1,0.0004,89.239,19,0,0.2105,3.54,1.9197,0.359,3.0,0.2204,...,,A,A,3GKW,-28.344,15.920499,-73.912498,12.866001,17.579,12.824997
3GKW_2,0.0028,151.2805,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3GKW,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3GKW_3,0.8579,1213.3354,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3GKW,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3GKW_4,0.0005,276.2936,18,0,0.4444,3.8362,3.4734,0.5644,7.0,0.3844,...,,A,A,3GKW,-29.3975,-7.0045,-52.434999,13.029001,12.967,15.046001
3GKW_5,0.0021,162.2864,17,0,0.8824,3.6571,1.8439,0.53,14.0,0.1454,...,,A,A,3GKW,-25.7855,14.7195,-48.357,14.249001,17.803,13.41
3GKW_6,0.002,181.0574,21,0,1.0,4.0059,1.5075,0.5722,20.0,0.4904,...,,A,A,3GKW,-59.136999,13.0845,-45.618,12.436001,13.375,16.744003
3GKW_7,0.0017,348.4018,25,0,0.8,4.0572,3.464,0.6452,18.8,0.1686,...,,A,A,3GKW,-43.0275,15.024,-70.893002,15.567001,15.144001,14.896004
3GKW_8,0.0006,304.436,19,0,0.3684,3.9768,3.8428,0.6006,6.0,0.1503,...,,A,A,3GKW,-54.455999,11.628,-61.371998,13.034,12.432,17.849998
3GKW_9,0.0008,466.8372,24,0,0.1667,3.9928,4.4683,0.7313,3.0,0.1876,...,,A,A,3GKW,-22.75,21.766,-80.5555,13.742001,15.946001,11.695
3GKW_10,0.0221,293.3085,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3GKW,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001



 -------------------------------------------------- 

 prepared_receptors_pockets/3P0V.pdbqt 

receptor_path prepared_receptors_pockets/3P0V.pdbqt receptor_name 3P0V


Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3P0V_1,0.0004,85.4089,19,0,0.2105,3.54,1.9197,0.359,3.0,0.2204,...,,A,A,3P0V,-28.344,15.920499,-73.912498,12.866001,17.579,12.824997
3P0V_2,0.0028,146.4124,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3P0V,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3P0V_3,0.8579,1206.7412,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3P0V,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_4,0.0005,279.251,18,0,0.4444,3.8362,3.4734,0.5644,7.0,0.3844,...,,A,A,3P0V,-29.3975,-7.0045,-52.434999,13.029001,12.967,15.046001
3P0V_5,0.0021,160.1788,17,0,0.8824,3.6571,1.8439,0.53,14.0,0.1454,...,,A,A,3P0V,-25.7855,14.7195,-48.357,14.249001,17.803,13.41
3P0V_6,0.002,184.5439,21,0,1.0,4.0059,1.5075,0.5722,20.0,0.4904,...,,A,A,3P0V,-59.136999,13.0845,-45.618,12.436001,13.375,16.744003
3P0V_7,0.0017,351.1332,25,0,0.8,4.0572,3.464,0.6452,18.8,0.1686,...,,A,A,3P0V,-43.0275,15.024,-70.893002,15.567001,15.144001,14.896004
3P0V_8,0.0006,292.2429,19,0,0.3684,3.9768,3.8428,0.6006,6.0,0.1503,...,,A,A,3P0V,-54.455999,11.628,-61.371998,13.034,12.432,17.849998
3P0V_9,0.0008,461.8363,24,0,0.1667,3.9928,4.4683,0.7313,3.0,0.1876,...,,A,A,3P0V,-22.75,21.766,-80.5555,13.742001,15.946001,11.695
3P0V_10,0.0221,287.7342,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3P0V,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001



 -------------------------------------------------- 

 prepared_receptors_pockets/5GTY.pdbqt 

receptor_path prepared_receptors_pockets/5GTY.pdbqt receptor_name 5GTY


Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5GTY_1,0.0004,88.6799,19,0,0.2105,3.54,1.9197,0.359,3.0,0.2204,...,,A,A,5GTY,-28.344,15.920499,-73.912498,12.866001,17.579,12.824997
5GTY_2,0.0028,145.8179,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,5GTY,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
5GTY_3,0.8579,1220.3418,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,5GTY,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
5GTY_4,0.0005,275.2239,18,0,0.4444,3.8362,3.4734,0.5644,7.0,0.3844,...,,A,A,5GTY,-29.3975,-7.0045,-52.434999,13.029001,12.967,15.046001
5GTY_5,0.0021,162.0876,17,0,0.8824,3.6571,1.8439,0.53,14.0,0.1454,...,,A,A,5GTY,-25.7855,14.7195,-48.357,14.249001,17.803,13.41
5GTY_6,0.002,179.3142,21,0,1.0,4.0059,1.5075,0.5722,20.0,0.4904,...,,A,A,5GTY,-59.136999,13.0845,-45.618,12.436001,13.375,16.744003
5GTY_7,0.0017,342.8061,25,0,0.8,4.0572,3.464,0.6452,18.8,0.1686,...,,A,A,5GTY,-43.0275,15.024,-70.893002,15.567001,15.144001,14.896004
5GTY_8,0.0006,292.9463,19,0,0.3684,3.9768,3.8428,0.6006,6.0,0.1503,...,,A,A,5GTY,-54.455999,11.628,-61.371998,13.034,12.432,17.849998
5GTY_9,0.0008,471.2569,24,0,0.1667,3.9928,4.4683,0.7313,3.0,0.1876,...,,A,A,5GTY,-22.75,21.766,-80.5555,13.742001,15.946001,11.695
5GTY_10,0.0221,292.1811,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,5GTY,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001


# Merge the pockets data

In [121]:
import pandas as pd  

# merge pockets data and sort it by the drugability score
merged_pockets_data = pd.concat(list_pockets_data).sort_values(by=['drug_score'], ascending=False)

# display only the top 10 pockets
merged_pockets_data[0:10]

Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5GTY_3,0.8579,1220.3418,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,5GTY,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3GKW_3,0.8579,1213.3354,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3GKW,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_3,0.8579,1206.7412,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3P0V,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_10,0.0221,287.7342,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3P0V,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
5GTY_10,0.0221,292.1811,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,5GTY,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
3GKW_10,0.0221,293.3085,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3GKW,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
5GTY_2,0.0028,145.8179,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,5GTY,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3P0V_2,0.0028,146.4124,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3P0V,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3GKW_2,0.0028,151.2805,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3GKW,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3P0V_16,0.0023,266.2267,21,0,0.6667,3.8994,3.0427,0.5724,13.0,0.3316,...,,A,A,3P0V,-49.394501,-6.074,-43.686501,14.544998,13.17,17.376999


In [123]:
# save merged data to csv file
merged_pockets_data.to_csv('prepared_receptors/merged_pockets_data.csv', sep=' ')

# re-read file to make sure everything is fine
pd.read_csv('prepared_receptors/merged_pockets_data.csv',sep=' ',index_col=[0])

Unnamed: 0_level_0,drug_score,volume,nb_asph,inter_chain,apol_asph_proportion,mean_asph_radius,as_density,mean_asph_solv_acc,mean_loc_hyd_dens,flex,...,lig_het_tag,name_chain_1,name_chain_2,receptor,center_x,center_y,center_z,size_x,size_y,size_z
cav_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5GTY_3,0.8579,1220.3418,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,5GTY,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3GKW_3,0.8579,1213.3354,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3GKW,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_3,0.8579,1206.7412,158,0,0.6203,4.0133,7.6232,0.5053,61.0612,0.0977,...,,A,A,3P0V,-44.542501,14.454,-35.480001,13.853001,14.700001,14.827999
3P0V_10,0.0221,287.7342,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3P0V,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
5GTY_10,0.0221,292.1811,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,5GTY,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
3GKW_10,0.0221,293.3085,33,0,0.9091,4.0864,2.6539,0.4791,29.0,0.2869,...,,A,A,3GKW,-29.4075,7.2475,-45.393501,11.615,13.015,12.609001
5GTY_2,0.0028,145.8179,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,5GTY,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3P0V_2,0.0028,146.4124,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3P0V,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3GKW_2,0.0028,151.2805,22,0,0.6364,3.6132,2.2296,0.3886,13.0,0.7028,...,,A,A,3GKW,-21.296499,18.983,-54.9575,15.139,13.195999,15.569
3P0V_16,0.0023,266.2267,21,0,0.6667,3.8994,3.0427,0.5724,13.0,0.3316,...,,A,A,3P0V,-49.394501,-6.074,-43.686501,14.544998,13.17,17.376999


In [1]:
!mv ./prepared_receptors/ ./prepared_receptors_pockets