In [1]:
import numpy as np
import pickle
import pandas as pd
from Bio.PDB.Polypeptide import PPBuilder
import Bio.PDB
from Bio.PDB.PDBIO import PDBIO
import glob

In [2]:
def vtoa(v):
    return np.array([float(x) for x in v])

def unit(v):
    return v / (np.dot(v,v)**0.5)

def norm(v):
    return np.dot(v,v) ** 0.5


###############################
#
#  Set up a function to calculate burial and side chain contact
#
###############################

def pdb2sasa_burial(file_loc):

    # define basic paramters
    d_param = 9
    d_param_sc = 9
    
    # import pdb file
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(file_loc,file_loc)
    structure = structure[0]
    residues = [res for res in structure.get_residues() if res.id[0] == " "]
    ppb=PPBuilder()
    aa_list = list(ppb.build_peptides(structure)[0].get_sequence())
    hbond_list = list()

    
    burial_list,sc_contact_list,arom_contact_list,alphatic_contact_list,polar_contact_list,negative_contact_list,positive_contact_list=[],[],[],[],[],[],[]
    
    
    # calculate burial and side chain contact
    for i, res in enumerate(residues):
        burial,sc_contact,arom_contact,alphatic_contact,polar_contact,negative_contact,positive_contact = 0,0,0,0,0,0,0
        
        
        # calculate vector from CA to CB of the amino acid of interest
        if res.get_resname() == 'GLY':
            try:
                start = res['2HA']
            except:
                start = res['HA2']
        else:
            start = res['CB']
        my_cb_vector = unit(vtoa(start.get_vector() - res['CA'].get_vector()))

        
        
        for j, res2 in enumerate(residues):
            if res2 != res:

                # calculate burial: # of Cα atoms in a cone projecting out 9 Å away from the Cβ atom on residue X in the direction of the residue X Cα-Cβ vector.
                to_res2 = vtoa(res2['CA'].get_vector() - start.get_vector())
                dist_term = 1.0 / (1.0 + (np.exp(norm(to_res2) - d_param)))
                angle_term = (0.5 + np.dot(unit(to_res2), my_cb_vector))
                if angle_term < 0: angle_term = 0
                angle_term = angle_term ** 2.0
                burial += dist_term * angle_term / 2.25
                
                
                # calculate (total) side chain contact: # of Cβ atoms in a cone projecting out 9 Å away from the Cβ atom on residue X in the direction of the residue X Cα-Cβ vector.
                if aa_list[j] == 'G':
                    try:
                        to_res2_sc = vtoa(res2['2HA'].get_vector() - start.get_vector())
                    except:
                        to_res2_sc = vtoa(res2['HA2'].get_vector() - start.get_vector())
                else:
                    to_res2_sc = vtoa(res2['CB'].get_vector() - start.get_vector())
                dist_term_sc = 1.0 / (1.0 + (np.exp(norm(to_res2_sc) - d_param_sc)))
                angle_term_sc = (0.5 + np.dot(unit(to_res2_sc), my_cb_vector))
                if angle_term_sc < 0: angle_term_sc = 0
                angle_term_sc = angle_term_sc ** 2.0
                sc_contact += dist_term_sc * angle_term_sc / 2.25
                
                # calculate aromatic side chain contact: # of CE2 atoms in a cone projecting out 9 Å away from the Cβ atom on residue X in the direction of the residue X Cα-Cβ vector.
                if aa_list[j] in 'FYW':
                    to_res2_sc = vtoa(res2['CE2'].get_vector() - start.get_vector())
                    dist_term_sc = 1.0 / (1.0 + (np.exp(norm(to_res2_sc) - d_param_sc)))
                    angle_term_sc = (0.5 + np.dot(unit(to_res2_sc), my_cb_vector))
                    if angle_term_sc < 0: angle_term_sc = 0
                    angle_term_sc = angle_term_sc ** 2.0
                    
                    arom_contact += dist_term_sc * angle_term_sc / 2.25
                    
                # calculate acidic side chain contact: # of OE1 atoms of Glu + OD1 atoms of Asp in a cone projecting out 9 Å away from the Cβ atom on residue X in the direction of the residue X Cα-Cβ vector.    
                if aa_list[j] in 'DE':
                    try:
                        to_res2_sc = vtoa(res2['OE1'].get_vector() - start.get_vector())
                    except:
                        to_res2_sc = vtoa(res2['OD1'].get_vector() - start.get_vector())
                    dist_term_sc = 1.0 / (1.0 + (np.exp(norm(to_res2_sc) - d_param_sc)))
                    angle_term_sc = (0.5 + np.dot(unit(to_res2_sc), my_cb_vector))
                    if angle_term_sc < 0: angle_term_sc = 0
                    angle_term_sc = angle_term_sc ** 2.0
                    negative_contact += dist_term_sc * angle_term_sc / 2.25
                # calculate basic side chain contact: # of NZ atoms of Lys + NE atoms of Arg in a cone projecting out 9 Å away from the Cβ atom on residue X in the direction of the residue X Cα-Cβ vector.    
                if aa_list[j] in 'KR':
                    try:
                        to_res2_sc = vtoa(res2['NZ'].get_vector() - start.get_vector())
                    except:
                        to_res2_sc = vtoa(res2['NE'].get_vector() - start.get_vector())
                    dist_term_sc = 1.0 / (1.0 + (np.exp(norm(to_res2_sc) - d_param_sc)))
                    angle_term_sc = (0.5 + np.dot(unit(to_res2_sc), my_cb_vector))
                    if angle_term_sc < 0: angle_term_sc = 0
                    angle_term_sc = angle_term_sc ** 2.0
                    positive_contact += dist_term_sc * angle_term_sc / 2.25               

                

        burial_list.append(burial)
        sc_contact_list.append(sc_contact)
        arom_contact_list.append(arom_contact)
        negative_contact_list.append(negative_contact)
        positive_contact_list.append(positive_contact)


      #  aa_list.append(res.get_resname())
        
        


    
    burial_sasa_df = pd.DataFrame()
    burial_sasa_df['pos'] = range(1,len(residues)+1)
    burial_sasa_df['wt_aa'] = aa_list
    burial_sasa_df['burial'] = burial_list
    burial_sasa_df['sc_contact'] = sc_contact_list
    burial_sasa_df['arom_contact'] = arom_contact_list
    burial_sasa_df['negative_contact'] = negative_contact_list
    burial_sasa_df['positive_contact'] = positive_contact_list

    return burial_sasa_df

In [3]:
pdb2sasa_burial('../AlphaFold_model_PDBs/1A0N.pdb')

Unnamed: 0,pos,wt_aa,burial,sc_contact,arom_contact,negative_contact,positive_contact
0,1,V,0.448836,0.678624,0.000901,0.000819,0.0584304
1,2,T,0.761383,1.103654,0.559828,0.000512,6.730169e-07
2,3,L,0.527029,0.941851,0.005345,0.003328,0.6852897
3,4,F,3.951286,4.485673,0.869375,0.014133,0.001124542
4,5,V,0.606352,0.742116,0.003172,0.00283,0.4442756
5,6,A,4.749868,4.686474,0.793864,1.106512,0.01692999
6,7,L,1.508452,1.238574,0.674841,0.001762,0.0002050215
7,8,Y,0.718695,0.81758,0.872033,0.002317,0.03569214
8,9,D,0.077457,0.237653,0.0,0.095093,0.01626959
9,10,Y,4.759198,4.737425,1.539835,0.249923,0.06187744
