In [1]:
#Predict stereochemistry and reactivity from docked poses
import os
import sys
import pandas as pd
import numpy as np
import math

os.chdir('/home/azamh/demo/seq_struct_func/pred')

In [2]:
#Arguments
protein = 'tropb'
ligand = '2'
pdb_with_fad_path = f'../cofactor/pdb_with_fad/{protein}_fad.pdb'
dockdir = f'../dock/poses/{protein}_{ligand}_prot'
cluster_df = pd.read_excel(f'../dock/cluster/{protein}_{ligand}_prot.xlsx', header = 0, index_col = 0)

In [3]:
#List of atoms defining ring of ligand
lig_atom_dict = {
    '2': ['C9','C7','C6', 'C13', 'C12', 'C10', 'O4'],
    '3': ['C9','C7','C6', 'C13', 'C12', 'C10', 'O5'],
    '4':  ['C1', 'C5', 'C6', 'C4', 'C3', 'C2', 'O3'],
    '5': ['C2', 'C4', 'C6', 'C10', 'C3', 'C1', 'O2'], 
}

In [4]:
#Functions for manipulating PDB with pandas dataframes
#Read in pdb as a dataframe
def pdb_as_df(pdb_file):
    pdb_dict = dict()
    metrics = [
        "atom_num",
        "atom_name",
        "resi_name",
        "chain",
        "resi_num",
        "atom_x",
        "atom_y",
        "atom_z",
        "atom_occ",
        "atom_b",
        "atom_segid"
    ]
    columns = [
        (6,11),
        (12,16),
        (17,21),
        (21,22),
        (22,26),
        (30,38),
        (38,46),
        (46,54),
        (54,60),
        (60,66),
        (72,76)
    ]
    pdb_txt = open(pdb_file, 'r').read().splitlines()
    index_i = 0
    for line in pdb_txt:
        if line.startswith('ATOM') or line.startswith('HETATM'):
            atom_dict = {metric: line[col[0]:col[1]].strip() for metric,
                         col in zip(metrics, columns)}
            pdb_dict[index_i] = atom_dict
            index_i += 1

    pdb_df = pd.DataFrame.from_dict(pdb_dict, orient='index')
    pdb_df = pdb_df.astype({
                            "atom_num": 'int',
                            'resi_num': 'int',
                            'atom_x': 'float',
                            'atom_y': 'float',
                            'atom_z': 'float',
                            'atom_occ': 'float',
                            'atom_b': 'float',
                            })

    return pdb_df

#Select atoms based on a property
def get_atoms(df, property, value):
    if isinstance(value, list):
        return df.loc[df[property].isin(value)].reset_index(drop = True)
    else:
        return df.loc[df[property] == value].reset_index(drop = True)

#Get coordinates of an atom
def get_atom_coor(df, atom_name):
    atom_df = get_atoms(df, 'atom_name', atom_name).iloc[0]
    x, y, z = atom_df[['atom_x', 'atom_y', 'atom_z']].values
    return x, y, z

#distance between two points
def coor_distance(x1, y1, z1, x2, y2, z2):
    p1 = np.array([x1, y1, z1])
    p2 = np.array([x2, y2, z2])
    squared_dist=np.sum((p1-p2)**2, axis = 0)
    dist = np.sqrt(squared_dist)
    return dist

def coor_distance_tuple(coor1, coor2):
    x1, y1, z1 = coor1
    x2, y2, z2 = coor2
    p1 = np.array([x1, y1, z1])
    p2 = np.array([x2, y2, z2])
    squared_dist=np.sum((p1-p2)**2, axis = 0)
    dist = np.sqrt(squared_dist)
    return dist

#Average coordinates of dataframe
def average_coor(df):
    coords = df[['atom_x', 'atom_y', 'atom_z']]
    xave, yave, zave = coords.mean().values
    return xave, yave, zave

#Magnitude of a vector
def magnitude(x, y, z):
    return math.sqrt(x**2 + y**2 + z**2)

#Plane defined by 3 atoms
def plane(coor1:tuple, coor2:tuple, coor3:tuple):
    x1, y1, z1 = coor1
    x2, y2, z2 = coor2
    x3, y3, z3 = coor3
    
    a1 = x2 - x1
    b1 = y2 - y1
    c1 = z2 - z1
    a2 = x3 - x1
    b2 = y3 - y1
    c2 = z3 - z1
    a = b1 * c2 - b2 * c1
    b = a2 * c1 - a1 * c2
    c = a1 * b2 - b1 * a2
    d = (- a * x1 - b * y1 - c * z1)
    return a,b,c,d

#Dot product
def dot_product(x1, y1, z1, x2, y2, z2):
    return x1*x2 + y1*y2 + z1*z2

#Angle between two vectors
def angle_between_vector(x1, y1, z1, x2, y2, z2):
    dot = dot_product(x1, y1, z1, x2, y2, z2)
    mag1 = magnitude(x1, y1, z1)
    mag2 = magnitude(x2, y2, z2)
    angle =  np.arccos(dot/(mag1*mag2)) * 180/np.pi
    return angle


In [5]:
#Predict stereochemistry of a ligand pose:
#Enter R206 resi with protein numbering (get numbering from aligned residue) if finding distance to anion
def pred_pose_stereochem(protein_pdb_path, ligand_pdb_path, ligand, lig_atom_dict, R206_resi = None):
    
    #Read pdb of protein for fad atoms
    model_df = pdb_as_df(protein_pdb_path)
    fad_df = get_atoms(model_df, 'resi_name', 'FAD')
    
    #Read pdb of ligand 
    lig_df = pdb_as_df(ligand_pdb_path)
    lig_df = get_atoms(lig_df, 'atom_name', lig_atom_dict[ligand])
    
    #Get atom that binds to hydroperoxyl group in activated FAD
    fad_atom = 'C13'
    fad_atom_x, fad_atom_y, fad_atom_z = get_atom_coor(fad_df, fad_atom)
    
    #Get distance to c4a from ligand average coords
    lig_xave, lig_yave, lig_zave = average_coor(lig_df)
    dist_to_fad = coor_distance(fad_atom_x, fad_atom_y, fad_atom_z, lig_xave, lig_yave, lig_zave)
    
    #plane equation and normal vector of ligand ring
    plane_atoms = lig_atom_dict[ligand]
    lig_coor_1 = get_atom_coor(lig_df, plane_atoms[0])
    lig_coor_2 = get_atom_coor(lig_df, plane_atoms[1])
    lig_coor_3 = get_atom_coor(lig_df, plane_atoms[2])
    a_lig, b_lig, c_lig, d_lig = plane(lig_coor_1, lig_coor_2, lig_coor_3)
    
    #rescale so plane normal vector is unit vector
    length = magnitude(a_lig, b_lig, c_lig)
    a_lig, b_lig, c_lig, d_lig = a_lig/length, b_lig/length, c_lig/length, d_lig/length
    
    #normal vector of ligand (for visuallization)
    lig_norm_x = lig_xave + a_lig
    lig_norm_y = lig_yave + b_lig
    lig_norm_z = lig_zave + c_lig
        
    #find angle between normal vector and distance vector
    #distance vector
    dist_x, dist_y, dist_z = fad_atom_x - lig_xave, fad_atom_y - lig_yave, fad_atom_z - lig_zave
    angle_between_vect = round(angle_between_vector(dist_x, dist_y, dist_z, a_lig, b_lig, c_lig),2)
    
    #Define stereochemistry as R if angle > 90 else, S
    stereo = ''
    if angle_between_vect >= 90:
        stereo = 'R'
    else:
        stereo = 'S'
    
    #Calculate anion distance to R206 in TropB
    anion_dist = None
    if R206_resi:
        #Get arginine atom
        prot_resi_df = get_atoms(model_df, 'resi_num', R206_resi)
        CZ_coor = get_atom_coor(prot_resi_df, 'CZ')
        if CZ_coor:
            lig_anion = lig_atom_dict[ligand][-1]
            lig_anion_coor = get_atom_coor(lig_df, lig_anion)
            anion_dist = coor_distance_tuple(CZ_coor, lig_anion_coor)
                    
    return stereo, angle_between_vect, dist_to_fad, anion_dist   

In [6]:
#use the microcanonical ensemble parition function to estimate probabilites of S and R states
def partition_function(cluster_df, temperature=300):
    stereos = list(cluster_df['stereo'])
    min_eners = list(cluster_df['min_ener'])
    sizes = list(cluster_df['size'])
    #list of boltzman factors for degenerate states
    factors = []
    Z = 0
    #k = 1.380649 * math.pow(10, -23) #J/K
    k = 1.987204259 * math.pow(10, -3)  # kcal/mol * K
    T = temperature  # K
    B = 1 / (T * k)  # mol/kcal

    for energy, size in zip(min_eners, sizes):

        energy = float(energy)
        size = int(size)

        #find boltzman factor for degenerate states
        try:
            factor = math.exp(-B * energy)
        except OverflowError:
            factor = 0

        degenerate_factor = factor * size

        #add factor to partition function
        Z += degenerate_factor
        factors.append(degenerate_factor)

    R_frac = 0
    S_frac = 0

    if Z == 0:
        return R_frac, S_frac

    for factor, stereo in zip(factors, stereos):
        if stereo == 'R':
            R_frac += factor / Z
        elif stereo == 'S':
            S_frac += factor / Z

    return R_frac, S_frac

In [7]:
#Calculate stereochemistry of cluster representative poses
R206_resi = 206
cluster_df['stereo'] = ''
cluster_df['angle'] = ''
cluster_df['dist_to_fad'] = ''
cluster_df['anion_dist'] = ''
for index, min_index in zip(cluster_df.index, cluster_df['min_index']):
    rep_ligand_pdb_path = os.path.join(dockdir, f'{protein}_{ligand}_{min_index}.pdb')
    stereo, angle, dist_to_fad, anion_dist = pred_pose_stereochem(pdb_with_fad_path, rep_ligand_pdb_path, ligand, lig_atom_dict, R206_resi)
    cluster_df.at[index, 'stereo'] = stereo
    cluster_df.at[index, 'angle'] = angle
    cluster_df.at[index, 'dist_to_fad'] = dist_to_fad
    cluster_df.at[index, 'anion_dist'] = anion_dist

cluster_df.to_excel(f'stereo/{protein}_{ligand}_stereo.xlsx')
cluster_df


Unnamed: 0,cluster,size,min_ener,min_index,average energy,std energy,stereo,angle,dist_to_fad,anion_dist
0,"['105', '135', '147', '162', '179', '199', '22...",8,-50.03,224,-28.72,15.96,R,131.45,4.412384,3.315454
1,"['108', '115', '185', '213', '233', '242', '25...",17,-46.84,185,-20.94,17.58,R,113.79,5.454536,3.451893
2,['145'],1,-42.09,145,-42.09,0.00,R,115.82,5.454525,4.72862
3,"['122', '128', '136', '204', '211', '261', '28...",11,-40.18,211,15.15,30.23,R,110.16,5.373566,4.580234
4,"['194', '241', '438', '458', '471', '63']",6,-38.91,438,-33.32,3.23,R,118.84,5.102235,4.227627
...,...,...,...,...,...,...,...,...,...,...
103,['245'],1,1866.75,245,1866.75,0.00,R,116.61,7.197409,5.214141
104,['287'],1,3351.10,287,3351.10,0.00,S,61.76,2.704134,7.536139
105,"['156', '215']",2,3791.10,156,3799.87,8.77,S,30.87,2.076857,8.937882
106,['146'],1,3817.38,146,3817.38,0.00,R,157.18,1.887237,8.958015


In [8]:
#Calculate overall R frac with ensemble of clusters
R_frac, S_frac = partition_function(cluster_df)
R_frac

0.9999999999998027

In [None]:
#Visuallize top pose