In [1]:
import math
import timeit

from Bio import PDB

In [4]:
repository = PDB.PDBList()
parser = PDB.PDBParser()
repository.retrieve_pdb_file('1TUP', file_format='pdb', pdir='.')  # XXX
p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')

Downloading PDB structure '1TUP'...




In [5]:
zns = []
for atom in p53_1tup.get_atoms():
    if atom.element == 'ZN':
        #print(atom, dir(atom), atom.mass, atom.element, atom.coord[0])
        zns.append(atom)
for zn in zns:
        print(zn, zn.coord)

<Atom ZN> [58.108 23.242 57.424]
<Atom ZN> [60.108 17.981 75.931]
<Atom ZN> [33.653  0.403 74.115]


In [6]:
#Suggest a pymol viewing

In [7]:
#Try this in numba?
def get_closest_atoms(pdb_struct, ref_atom, distance):
    atoms = {}
    rx, ry, rz = ref_atom.coord
    for atom in pdb_struct.get_atoms():
        if atom == ref_atom:
            continue
        x, y, z = atom.coord
        my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) 
        if my_dist < distance:
            atoms[atom] = my_dist
    return atoms

In [8]:
for zn in zns:
    print()
    print(zn.coord)
    atoms = get_closest_atoms(p53_1tup, zn, 4)
    for atom, distance in atoms.items():
        print(atom.element, distance, atom.coord)


[58.108 23.242 57.424]
C 3.4080117696286854 [57.77  21.214 60.142]
S 2.3262243799594877 [57.065 21.452 58.482]
C 3.4566537492335123 [58.886 20.867 55.036]
C 3.064120559761192 [58.047 22.038 54.607]
N 1.9918273537290707 [57.755 23.073 55.471]
C 2.9243719601324525 [56.993 23.943 54.813]
C 3.857729198122736 [61.148 25.061 55.897]
C 3.62725094648044 [61.61  24.087 57.001]
S 2.2789209624943494 [60.317 23.318 57.979]
C 3.087214470667822 [57.205 25.099 59.719]
S 2.2253158446520818 [56.914 25.054 57.917]

[60.108 17.981 75.931]
C 3.41769274437124 [57.593 15.783 75.207]
S 2.3254721582053093 [58.586 17.082 74.42 ]
C 3.4672070967122894 [62.272 17.174 73.345]
C 3.1139134725185587 [62.061 18.615 73.59 ]
N 2.0564599972249455 [61.366 19.056 74.71 ]
C 2.985233217423681 [61.332 20.382 74.647]
C 3.805126390272999 [62.573 18.263 78.816]
C 3.1803200512467478 [61.521 17.136 78.652]
S 2.2070404885225816 [61.287 16.447 76.993]
C 3.2038921042012745 [57.624 18.417 77.907]
S 2.242320906916762 [58.978 19.402 77

In [9]:
for distance in [1, 2, 4, 8, 16, 32, 64, 128]:
    my_atoms = []
    for zn in zns:
        atoms = get_closest_atoms(p53_1tup, zn, distance)
        my_atoms.append(len(atoms))
    print(distance, my_atoms)

1 [0, 0, 0]
2 [1, 0, 0]
4 [11, 11, 12]
8 [109, 113, 106]
16 [523, 721, 487]
32 [2381, 3493, 2053]
64 [5800, 5827, 5501]
128 [5827, 5827, 5827]


In [10]:
nexecs = 10
print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], 4.0)',
                    'from __main__ import get_closest_atoms, p53_1tup, zns',
                    number=nexecs) / nexecs * 1000)

86.03363559998343


In [11]:
def get_closest_alternative(pdb_struct, ref_atom, distance):
    atoms = {}
    rx, ry, rz = ref_atom.coord
    for atom in pdb_struct.get_atoms():
        if atom == ref_atom:
            continue
        x, y, z = atom.coord
        if abs(x - rx) > distance or abs(y - ry) > distance or abs(z - rz) > distance:
            continue
        my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) 
        if my_dist < distance:
            atoms[atom] = my_dist
    return atoms

In [12]:
print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], 4.0)',
                    'from __main__ import get_closest_alternative, p53_1tup, zns',
                    number=nexecs) / nexecs * 1000)

31.851707400005576


In [13]:
print('Standard')
for distance in [1, 4, 16, 64, 128]:
    print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], distance)',
                        'from __main__ import get_closest_atoms, p53_1tup, zns, distance',
                        number=nexecs) / nexecs * 1000)
print('Optimized')
for distance in [1, 4, 16, 64, 128]:
    print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], distance)',
                        'from __main__ import get_closest_alternative, p53_1tup, zns, distance',
                        number=nexecs) / nexecs * 1000)

Standard
85.08649739999328
86.50681579999855
86.79630599999655
96.95437099999253
96.21982420001132
Optimized
30.253444099980698
32.69531210000878
52.965772600009586
142.53310030001103
141.26269519999823


In [12]:
#for interesting distances