In [1]:
import os
import mdtraj
import numpy as np
import scipy
from scipy.spatial.distance import cdist

import alphaspace2 as al
from alphaspace2.Community import genCommunityPocket
import alphaspace2.Features as features


We apply the ensemble pocket approach to get the communities associated with 
fragment and their corresponding lead structures
In the first part we look at the communities and core pockets of a bound fragment structure for HSP90
In the second part, we look at the different metrics that show how lead structures have better complementarity to 
the binding site compared to their fragment counterparts

In [3]:
# Load 2wi2 and 2wi7 protein with pqbqt annotation
this_dir = os.path.abspath('')

hsp90_frag_prot = mdtraj.load(this_dir+"/DataSet/Fragment_to_Lead/protein_2wi2.pdb")
al.annotateVinaAtomTypes(pdbqt=this_dir+"/DataSet/Fragment_to_Lead/protein_2wi2.pdbqt", receptor=hsp90_frag_prot)
hsp90_lead_prot = mdtraj.load(this_dir+"/DataSet/Fragment_to_Lead/protein_2wi7.pdb")
al.annotateVinaAtomTypes(pdbqt=this_dir+"/DataSet/Fragment_to_Lead/protein_2wi7.pdbqt", receptor=hsp90_lead_prot)

Redundant Atom types in pdbqt file found, trimming last {-1} entries
Redundant Atom types in pdbqt file found, trimming last {-1} entries
Redundant Atom types in pdbqt file found, trimming last {-1} entries
Redundant Atom types in pdbqt file found, trimming last {-1} entries
Redundant Atom types in pdbqt file found, trimming last {-1} entries
Redundant Atom types in pdbqt file found, trimming last {-1} entries


In [4]:
hsp90_frag_ss, hsp90_lead_ss = al.Snapshot(), al.Snapshot()
hsp90_frag_ss.run(hsp90_frag_prot)
hsp90_lead_ss.run(hsp90_lead_prot)

frag_lead = al.Trajectory(snapshots=[hsp90_frag_ss,hsp90_lead_ss])
frag_lead.gen_dpockets(clust_distance=4.7)

In [5]:
temp_space_dict = {'Frag':{},'Lead':{}}
temp_coords_dict = {'Frag':{},'Lead':{}}
temp_contact_dict = {'Frag':{},'Lead':{}}
temp_score_dict = {'Frag':{},'Lead':{}}
temp_centroids_dict = {'Frag':{},'Lead':{}}
for dpx,dp in enumerate(frag_lead.dpockets):
    pockets = list(dp.pockets)
    if pockets[0].score != 0:
        pock = pockets[0]
        temp_coords_dict['Frag'][dpx] = [list(b.xyz) for b in pock.betas]
        temp_score_dict['Frag'][dpx] = np.array([min(b.scores) for b in  pock.betas])
        temp_space_dict['Frag'][dpx] = np.array([b.space for b in  pock.betas])
        temp_contact_dict['Frag'][dpx] = list(pock.lining_atoms_idx)
        temp_centroids_dict['Frag'][dpx] = pock.centroid
    if pockets[1].score != 0:
        pock = pockets[1]
        temp_coords_dict['Lead'][dpx] = [list(b.xyz) for b in pock.betas]
        temp_score_dict['Lead'][dpx] = np.array([min(b.scores) for b in  pock.betas])
        temp_space_dict['Lead'][dpx] = np.array([b.space for b in  pock.betas])
        temp_contact_dict['Lead'][dpx] = list(pock.lining_atoms_idx)
        temp_centroids_dict['Lead'][dpx] = pock.centroid

In [6]:
prot_frag_coords = hsp90_frag_prot.xyz[0]*10
frag_surface_communities = genCommunityPocket(prot_frag_coords, temp_centroids_dict['Frag'], temp_space_dict['Frag'],
                                            temp_contact_dict['Frag'], temp_score_dict['Frag'],
                                            corecut = 100, auxcut = 30, tight_option = True, tight_cutoff_core = 12.5, tight_cutoff_aux = 6.5)

In [10]:
### loading ligands ###
ligand_frag = mdtraj.load(this_dir+'/DataSet/Fragment_to_Lead/ligand_frag_2wi2.pdb')
ligand_lead = mdtraj.load(this_dir+'/DataSet/Fragment_to_Lead/ligand_lead_2wi7.pdb')


Here we get the list of communities that overlap with the fragment ligand.
In this case, there is only one overlapping community which is the top-ranking community by space (which has index 0)


In [11]:
binding_communities = []
for cx,community in frag_surface_communities.items():
    temp_binding_community_coords = []
    for pock in community['core_pockets'] + community['aux_pockets']:
        temp_binding_community_coords.extend(temp_coords_dict['Frag'][pock])
    dist=scipy.spatial.distance.cdist(temp_binding_community_coords,ligand_frag.xyz[0]*10)
    bool_arr = np.any(dist<1.8,axis = 1)    
    if np.any(bool_arr):
        binding_communities.append(cx)

In [12]:
## get local ranking and properties of high-scoring pockets in the binding site community ##
for cx in binding_communities:
    community = frag_surface_communities[cx]
    for pock in community['core_pockets'] + community['aux_pockets']:
        pocket_coord = np.array(temp_coords_dict['Frag'][pock])
        pocket_space = np.array(temp_space_dict['Frag'][pock])
        pocket_score = np.array(temp_score_dict['Frag'][pock])
        dist=scipy.spatial.distance.cdist(pocket_coord,ligand_frag.xyz[0]*10)
        bool_arr = np.any(dist<1.8,axis = 1)    
        optimized = 100*np.sum(pocket_score[bool_arr])/np.sum(pocket_score)
        space_occupied = 100*np.sum(pocket_space[bool_arr])/np.sum(pocket_space)
        print(pock,'Score : '+str(np.sum(pocket_score)), ' %Optimized : ' + str(optimized),
              ' Space : ' + str(np.sum(pocket_space)), ' %Space Occupied : ' + str(space_occupied))

6 Score : -2.32597  %Optimized : -0.0  Space : 205.922  %Space Occupied : 0.0
141 Score : -3.65936  %Optimized : 62.3867551245  Space : 203.603  %Space Occupied : 70.8409207583
142 Score : -4.23478  %Optimized : 29.3207900479  Space : 145.998  %Space Occupied : 34.7143601581
143 Score : -5.97796  %Optimized : 68.8680888007  Space : 446.136  %Space Occupied : 50.3513481142
144 Score : -1.19871  %Optimized : -0.0  Space : 36.7621  %Space Occupied : 0.0
146 Score : -2.38271  %Optimized : -0.0  Space : 99.9473  %Space Occupied : 0.0


We see that pocket 143, 142, and 141 are the top 3 ranking pockets and are also the most optimized/occupied
Pocket 6 and 146 are not occupied by the fragment and would be a good starting point for ligand optimization


In this section we take a look at pocket/ligand metrics to evaluate the complementarity between the pocket and ligands 
in the lead structure


In [13]:
prot_lead_coords = hsp90_lead_prot.xyz[0]*10
lead_surface_communities = genCommunityPocket(prot_lead_coords, temp_centroids_dict['Lead'], temp_space_dict['Lead'],
                                            temp_contact_dict['Lead'], temp_score_dict['Lead'],
                                            corecut = 100, auxcut = 30, tight_option = True, tight_cutoff_core = 12.5, tight_cutoff_aux = 6.5)

In [14]:
binding_communities = []
for cx,community in lead_surface_communities.items():
    temp_binding_community_coords = []
    for pock in community['core_pockets'] + community['aux_pockets']:
        temp_binding_community_coords.extend(temp_coords_dict['Lead'][pock])
    dist=scipy.spatial.distance.cdist(temp_binding_community_coords,ligand_lead.xyz[0]*10)
    bool_arr = np.any(dist<1.8,axis = 1)    
    if np.any(bool_arr):
        binding_communities.append(cx)

## Like in the fragment structure, the highest ranking community is also the only detected
## binding site for the lead molecule ie community index 0


In [16]:
binding_communities

[0]

In [18]:
pock_set = set()
for cx in binding_communities:
    for pock in lead_surface_communities[cx]['core_pockets'] + lead_surface_communities[cx]['aux_pockets']:
        pock_set.add(pock)

In [19]:
lead_binding_community_coords = []
lead_binding_community_space = []
lead_binding_community_score = []
for pock in pock_set:
    lead_binding_community_coords.extend(temp_coords_dict['Lead'][pock])
    lead_binding_community_space.extend(temp_space_dict['Lead'][pock])
    lead_binding_community_score.extend(temp_score_dict['Lead'][pock])
lead_binding_community_coords = np.array(lead_binding_community_coords)
lead_binding_community_space = np.array(lead_binding_community_space)
lead_binding_community_score = np.array(lead_binding_community_score)

In [20]:
bool_arr = np.any(dist<=1.6,axis = 1)

Here we measure the complimentarity of the lead vs the fragment ligand using %Optimized, %space occupied, and %volume overlap

In [28]:

print("       {} {} {}".format('%Optimized', '%Space_Occupied', '%Overlap_Volume'))

for lx,lig_coords in [('Frag',ligand_frag.xyz[0]*10),('Lead',ligand_lead.xyz[0]*10)]:
    dist = scipy.spatial.distance.cdist(lead_binding_community_coords,lig_coords)
    bool_arr = np.any(dist<=1.6,axis = 1)
    optimized = round(100*np.sum(lead_binding_community_score[bool_arr])/np.sum(lead_binding_community_score),2)
    space_occupied = round(100*np.sum(lead_binding_community_space[bool_arr])/np.sum(lead_binding_community_space),2)
    overlap_volume = features._get_overlap_volume(lead_binding_community_coords,lig_coords)
    total_volume = features._get_grid_volume(lead_binding_community_coords)
    overlap = round(100*overlap_volume/total_volume,2)
    
    print("{}     {}     {}          {}".format(lx, optimized,space_occupied, overlap))
    

       %Optimized %Space_Occupied %Overlap_Volume
Frag     11.96     16.19          10.55
Lead     35.52     49.19          28.08


the Lead structure improves the various metrics in the Lead binding site community
compared to the Fragment Structure by >2X. Maximizing these metrics by more elaborated lead structures
can be used to guide the design of more potent inhibitors