In [None]:
# Ch09-3 Geometric Operations

In [None]:
# Installations (if not already completed)
# ! pip install biopython

In [None]:
# 1. Import Libraries
import numpy as np
from Bio import PDB 
import pandas as pd

In [None]:
# 2. Retrieve data
repository = PDB.PDBList() 
parser = PDB.PDBParser() 
repository.retrieve_pdb_file('1TUP', pdir='.', file_format='pdb') 
p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent') 
# Note - it is OK to get warnings about "Structure Exists" here

In [None]:
# 3. Recall residue types
my_residues = set() 
for residue in p53_1tup.get_residues(): 
    my_residues.add(residue.id[0]) 
print(my_residues) 

In [None]:
# 4. Compute masses for chains, zincs, and waters
# Function to compute mass
def get_mass(atoms, accept_fun=lambda x: True):
    return sum([atom.mass for atom in atoms if accept_fun(atom)])
# Extract chain names
chain_names = [chain.id for chain in p53_1tup.get_chains()]
# Initialize NumPy array for masses
my_mass = np.ndarray((len(chain_names), 3))
# Iterate over chains to compute mass
for i, chain in enumerate(p53_1tup.get_chains()):
    my_mass[i, 0] = get_mass(chain.get_atoms())
print("Mass array:", my_mass)

In [None]:
# Alternate for the above with better formatting
import numpy as np

# Function to compute mass
def get_mass(atoms, accept_fun=lambda x: True):
    """Calculate total mass of atoms that pass the acceptance function."""
    return sum([atom.mass for atom in atoms if accept_fun(atom)])

# Filter functions for different atom types
def is_not_water(atom):
    """Returns True if atom is not part of a water molecule."""
    return atom.get_parent().get_resname() != 'HOH'

def is_zinc(atom):
    """Returns True if atom is zinc."""
    return atom.element == 'ZN'

def is_water(atom):
    """Returns True if atom is part of a water molecule."""
    return atom.get_parent().get_resname() == 'HOH'

# Extract chain names
chain_names = [chain.id for chain in p53_1tup.get_chains()]

# Initialize NumPy array for masses (3 columns: No water, Zincs, Water)
my_mass = np.zeros((len(chain_names), 3))

# Iterate over chains to compute mass for each category
for i, chain in enumerate(p53_1tup.get_chains()):
    my_mass[i, 0] = get_mass(chain.get_atoms(), is_not_water)  # No water
    my_mass[i, 1] = get_mass(chain.get_atoms(), is_zinc)       # Zincs
    my_mass[i, 2] = get_mass(chain.get_atoms(), is_water)      # Water

# Create nicely formatted table
print("Mass Distribution by Chain (Daltons)")
print("=" * 45)
print(f"{'Chain':<6} {'No water':<12} {'Zincs':<12} {'Water':<12}")
print("-" * 45)

for i, chain_id in enumerate(chain_names):
    print(f"{chain_id:<6} {my_mass[i, 0]:<12.2f} {my_mass[i, 1]:<12.2f} {my_mass[i, 2]:<12.2f}")

print("-" * 45)

# Calculate and display totals
totals = np.sum(my_mass, axis=0)
print(f"{'Total':<6} {totals[0]:<12.2f} {totals[1]:<12.2f} {totals[2]:<12.2f}")

print(f"\nRaw mass array:\n{my_mass}")

In [None]:
# 5. Compute geometric center and center of mass
def get_center(atoms, 
    weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0): 
    xsum = ysum = zsum = 0.0 
    acum = 0.0 
    for atom in atoms: 
        x, y, z = atom.coord 
        weight = weight_fun(atom) 
        acum += weight 
        xsum += weight * x 
        ysum += weight * y 
        zsum += weight * z 
    return xsum / acum, ysum / acum, zsum / acum 
    print(get_center(p53_1tup.get_atoms())) 
    print(get_center(p53_1tup.get_atoms(), 
    weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)) 

In [None]:
# 6. Compute center of mass and geometric center of each chain
my_center = np.ndarray((len(chain_names), 6)) 
for i, chain in enumerate(p53_1tup.get_chains()): 
    x, y, z = get_center(chain.get_atoms()) 
    my_center[i, 0] = x 
    my_center[i, 1] = y 
    my_center[i, 2] = z 
    x, y, z = get_center(chain.get_atoms(), 
        weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0) 
    my_center[i, 3] = x 
    my_center[i, 4] = y 
    my_center[i, 5] = z 
weights = pd.DataFrame(my_center, index=chain_names, 
    columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)']) 
print(weights) 

In [None]:
## End of Notebook ##