# Protein Structure Analysis using Biopython

In this Jupyter Notebook, we will demonstrate how to the 3D structure of a protein specified by its PDB ID using Biopython. Specifically, we will load and analyze the structure of a protein with a given PDB (Protein Data Bank) ID. We will focus on accessing and displaying properties of the atoms within the protein structure.


In [1]:
# Import necessary libraries
from Bio import PDB
import numpy as np

In [2]:
# Define the PDB ID of the structure you want to analyze
pdb_id = "1xyz"  # Replace with your PDB ID

In [3]:
# Download the PDB file from the RCSB PDB (if not already downloaded)
pdbl = PDB.PDBList()
pdbl.retrieve_pdb_file(pdb_id, pdir='.')

Structure exists: '.\1xyz.cif' 




'.\\1xyz.cif'

In [4]:
# Load the PDB file
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure(pdb_id, f'{pdb_id}.pdb')

In [5]:
# Function to analyze the protein structure
def analyze_protein_structure(structure):
    # Iterate through the atoms in the structure
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    # Access atom properties
                    atom_name = atom.get_name()
                    atom_coords = atom.get_coord()
                    occupancy = atom.get_occupancy()
                    bfactor = atom.get_bfactor()

                    print(f"Atom: {atom_name}, Coordinates: {atom_coords}, Occupancy: {occupancy}, B-factor: {bfactor}")


In [6]:
# Call the analysis function
analyze_protein_structure(structure)

Atom: N, Coordinates: [41.511 25.152 36.876], Occupancy: 1.0, B-factor: 22.29
Atom: CA, Coordinates: [40.907 25.555 35.563], Occupancy: 1.0, B-factor: 21.77
Atom: C, Coordinates: [39.684 24.707 35.106], Occupancy: 1.0, B-factor: 19.45
Atom: O, Coordinates: [39.191 24.916 34.001], Occupancy: 1.0, B-factor: 18.37
Atom: CB, Coordinates: [41.97  25.57  34.422], Occupancy: 1.0, B-factor: 25.18
Atom: CG, Coordinates: [43.166 26.528 34.694], Occupancy: 1.0, B-factor: 28.53
Atom: OD1, Coordinates: [43.247 27.183 35.761], Occupancy: 1.0, B-factor: 31.23
Atom: ND2, Coordinates: [44.122 26.565 33.756], Occupancy: 1.0, B-factor: 28.63
Atom: N, Coordinates: [39.186 23.778 35.939], Occupancy: 1.0, B-factor: 15.7
Atom: CA, Coordinates: [38.027 22.952 35.538], Occupancy: 1.0, B-factor: 14.27
Atom: C, Coordinates: [36.73  23.719 35.746], Occupancy: 1.0, B-factor: 12.59
Atom: O, Coordinates: [36.707 24.699 36.48 ], Occupancy: 1.0, B-factor: 12.14
Atom: CB, Coordinates: [37.986 21.654 36.326], Occupancy:

In [7]:
# Function to calculate the center of mass for a chain
def calculate_center_of_mass(chain):
    atoms = []
    for residue in chain:
        for atom in residue:
            coords = atom.get_coord()
            atoms.append(coords)
    if len(atoms) > 0:
        center_of_mass = np.mean(atoms, axis=0)
        return center_of_mass
    return None

In [8]:
# Function to calculate the distance between two specified atoms
def calculate_distance(structure, atom1, atom2):
    atom1_coords = None
    atom2_coords = None
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    if atom.get_id() == atom1:
                        atom1_coords = atom.get_coord()
                    elif atom.get_id() == atom2:
                        atom2_coords = atom.get_coord()
    if atom1_coords is not None and atom2_coords is not None:
        distance = np.linalg.norm(atom1_coords - atom2_coords)
        return distance
    return None

In [9]:
# Analyze the structure
for model in structure:
    for chain in model:
        # Calculate and print the center of mass for each chain
        center_of_mass = calculate_center_of_mass(chain)
        if center_of_mass is not None:
            print(f"Chain {chain.get_id()} Center of Mass: {center_of_mass}")

Chain A Center of Mass: [16.638699 16.246683 37.621258]
Chain B Center of Mass: [ 7.4387755 33.858883  71.27614  ]


In [10]:
# Specify the atoms for distance calculation (replace with your desired atoms)
atom1_id = ('A', 123, 'CA')
atom2_id = ('A', 123, 'CB')

In [11]:
# Calculate and print the distance between specified atoms
distance = calculate_distance(structure, atom1_id, atom2_id)
if distance is not None:
    print(f"Distance between Atom {atom1_id} and Atom {atom2_id}: {distance:.2f} Angstroms")

In [20]:
from Bio.SVDSuperimposer import SVDSuperimposer

In [21]:
# Load the reference structure (1xyz.pdb)
parser = PDB.PDBParser(QUIET=True)
reference_structure = parser.get_structure("1xyz", "1xyz.pdb")

In [22]:
# Load the structure you want to compare (the same structure, in this case)
structure_to_compare = parser.get_structure("1xyz", "1xyz.pdb")

In [23]:
# Extract atom coordinates for both structures and convert to NumPy arrays
reference_atoms = np.array([atom.get_coord() for atom in reference_structure.get_atoms()])
compare_atoms = np.array([atom.get_coord() for atom in structure_to_compare.get_atoms()])

In [24]:
# Calculate the RMSD
sup = SVDSuperimposer()
sup.set(reference_atoms, compare_atoms)
sup.run()

In [25]:
rmsd = sup.get_rms()

In [26]:
print(f"RMSD between 1xyz (reference) and 1xyz (to compare): {rmsd:.2f} Angstroms")

RMSD between 1xyz (reference) and 1xyz (to compare): 0.00 Angstroms
