In [1]:
import pandas as pd
import numpy as np 
import glob
from Bio.PDB import *
from biopandas.pdb import PandasPdb

In [2]:
uniprot = "Q9UPQ9"

In [3]:
parser = PDBParser()
pdb_path = "/rcfs/projects/proteometer/alphafold_swissprot_pdb"
pdb_file = glob.glob("/rcfs/projects/proteometer/alphafold_swissprot_pdb/*" + uniprot + "*" ".pdb")
ppdb = PandasPdb()
ppdb.read_pdb(pdb_file[0])

print(pdb_file)


['/rcfs/projects/proteometer/alphafold_swissprot_pdb/AF-Q9UPQ9-F1-model_v4.pdb']


In [4]:
ppdb.df['ATOM'].head()

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,ATOM,1,,N,,MET,,A,1,,...,38.776,-10.346,-16.776,1.0,60.47,,,N,,187
1,ATOM,2,,CA,,MET,,A,1,,...,37.705,-9.353,-16.508,1.0,60.47,,,C,,188
2,ATOM,3,,C,,MET,,A,1,,...,36.65,-9.913,-15.568,1.0,60.47,,,C,,189
3,ATOM,4,,CB,,MET,,A,1,,...,37.089,-8.716,-17.765,1.0,60.47,,,C,,190
4,ATOM,5,,O,,MET,,A,1,,...,36.623,-9.434,-14.449,1.0,60.47,,,O,,191


In [5]:
atom_1 = ppdb.df['ATOM'].query('residue_number == 2 and atom_name == "CA"')
atom_2 = ppdb.df['ATOM'].query('residue_number == 100 and atom_name == "CA"')
x_p, y_p, z_p = atom_1['x_coord'].values[0], atom_1['y_coord'].values[0], atom_1['z_coord'].values[0]
x_q, y_q, z_q  = atom_2['x_coord'].values[0], atom_2['y_coord'].values[0], atom_2['z_coord'].values[0]
distance = np.sqrt((x_p - x_q)**2 + (y_p - y_q)**2 + (z_p - z_q)**2)

distance

np.float64(93.37404292949941)

In [6]:
def get_pairwise_distance(structure, residue_num_1, residue_num_2):
    atom_1 = structure.query('residue_number == @residue_num_1 and atom_name == "CA"') # restrict to only res #1 and alpha carbon 
    atom_2 = structure.query('residue_number == @residue_num_2 and atom_name == "CA"') # restrict to only res #2 and alpha carbon 
    x_p, y_p, z_p = atom_1['x_coord'].values[0], atom_1['y_coord'].values[0], atom_1['z_coord'].values[0]
    x_q, y_q, z_q  = atom_2['x_coord'].values[0], atom_2['y_coord'].values[0], atom_2['z_coord'].values[0]
    distance = np.sqrt((x_p - x_q)**2 + (y_p - y_q)**2 + (z_p - z_q)**2)
    return distance


In [7]:
input_struct = ppdb.df['ATOM']

output = get_pairwise_distance(input_struct,1,10)
print(output)
output * 2

14.348114196646192


np.float64(28.696228393292383)

In [8]:
def find_mean_distances(structure, target_residue, residue_list):
    residue1 = target_residue
    distances = []
    for residue2 in residue_list:
        residue2 = float(residue2)
        distance_to_add = get_pairwise_distance(structure, residue1, residue2)
        # print(distance_to_add)
        distances.append(distance_to_add)
        distances = [x for x in distances if x != 'NaN']
    return np.mean(distances)

In [9]:
pocket_residues = ['97', '99', '100', '101', '102', '159', '163', '189', '190']
output = find_mean_distances(input_struct,2,pocket_residues)