In [1]:
import os
import sys
import glob
sys.path.append("src")

from align import align_pdbs

In [2]:
# Path settings
pdb_pattern = "./data/V*/V*.cif"   # Pattern to obtain PDB files
output_dir  = "./data/aligned"     # Output path for aligned PDBs
reference   = "./data/V1/V1.cif"   # Alignment against this reference

In [3]:
# Alignment settings, modify as needed
#
# The alignment is performed using a selected subset of residues, called `core_residues`.
# These residues represent structurally stable regions that are conserved across all
# conformers of the molecule. By aligning only on this "core", we avoid distortion from
# flexible loops or termini and ensure that the superposition reflects the true global
# frame of the structure.
#
# What is in `core_residues`?
# - This list defines the residue indices used for alignment.
# - The ranges below were chosen because these regions are known to form the rigid
#   structural scaffold of the molecule. They exclude flexible tails, loops, or
#   variable segments.
#

# - Align PDBs through these core residues:
# Usage example for gstRNaseP RNA:
core_residues = list(range(13, 16)) + list(range(20, 30)) + list(range(38, 85)) + \
                list(range(97, 111)) + list(range(249, 262)) + list(range(273, 313)) + \
                list(range(326, 341)) + list(range(386, 402))

In [4]:
pdb_files = sorted(glob.glob(pdb_pattern))
align_pdbs(
    pdb_files=pdb_files,
    ref_pdb=reference,
    output_dir=output_dir,
    core_residues=core_residues
)

[INFO] Aligned and saved: V1.pdb
[INFO] Aligned and saved: V2.pdb
[INFO] Aligned and saved: V3.pdb
[INFO] Aligned and saved: V4.pdb
[INFO] Aligned and saved: V5.pdb
