## Align Structures
Compair the crystal structure of different enzymes

Relevant websites:
- ESM Forge: https://forge.evolutionaryscale.ai/
- AlphaFold Server: https://alphafoldserver.com/

In [29]:
import esm
import py3Dmol
import biotite.structure.io.pdbx as pdbx
from biotite.structure.io.pdb import PDBFile
from biotite.structure import superimpose, rmsd
from io import StringIO
import warnings


# Define structure paths
structurePathA = '/Users/ca34522/Documents/Pymol/TEV (1LVM).pdb'
structurePathB = 'AF3/fold_tev_proteinmpnn/fold_tev_proteinmpnn_model_0.cif'

# Optional: highlight rebuilt loops
highlightAAs = [] # ['129-147', '188-198']

In [30]:
# suppress irrelevant warnings
warnings.filterwarnings('ignore', message='Attribute .* not found within')

def align_and_visualize_cif(file1, file2, chain1='A', chain2='A', highlight=None):
    """
    Align two CIF files and visualize the overlay.
    
    Parameters:
    -----------
    file1 : str
        Reference CIF or PDB file
    file2 : str
        CIF or PDB file to align
    chain1, chain2 : str
        Chain IDs to use for alignment
    """

    # Load PDB and CIF files
    print(f'Loading Structures:\n  {file1}\n  {file2}\n\n')
    def loadStructure(path):
        if path.lower().endswith('.pdb'):
            pdb = PDBFile.read(path)
            return pdb.get_structure(model=1)
        elif path.lower().endswith('.cif'):
            cif = pdbx.CIFFile.read(path)
            return pdbx.get_structure(cif, model=1)
        else:
            raise ValueError(f"Unknown format: {path}")
    
    # Load structures
    structure1 = loadStructure(file1)
    structure2 = loadStructure(file2)
    
    # Get CA atoms for alignment
    ca1 = structure1[(structure1.chain_id == chain1) & (structure1.atom_name == 'CA')]
    ca2 = structure2[(structure2.chain_id == chain2) & (structure2.atom_name == 'CA')]
    
    # Match lengths
    n_atoms = min(len(ca1), len(ca2))
    ca1 = ca1[:n_atoms]
    ca2 = ca2[:n_atoms]
    
    # Superimpose CA atoms to get transformation
    ca2_aligned, transformation = superimpose(ca1, ca2)
    
    # Apply transformation to full structure
    structure2_aligned = structure2.copy()
    structure2_aligned.coord = transformation.apply(structure2.coord)
    
    # Print alignment info
    print(f'RMSD: {rmsd(ca1, ca2_aligned):.3f} Ã… ({n_atoms} CA atoms)')
    
    # Convert to PDB format for visualization
    pdb1 = StringIO()
    pdb_file1 = PDBFile()
    pdb_file1.set_structure(structure1)
    pdb_file1.write(pdb1)
    
    pdb2 = StringIO()
    pdb_file2 = PDBFile()
    pdb_file2.set_structure(structure2_aligned)
    pdb_file2.write(pdb2)
    
    # Visualize
    view = py3Dmol.view(width=1000, height=1000)
    view.addModel(pdb1.getvalue(), 'pdb')
    view.addModel(pdb2.getvalue(), 'pdb')
    view.setStyle({'model': 0}, {'cartoon': {'color': 'lime', 'opacity': 1.0}})
    view.setStyle({'model': 1}, {'cartoon': {'color': 'fuchsia'}})
    if highlight is not None:
        view.setStyle({'model': 1, 'resi': highlight}, {'cartoon': {'color': 'red'}})
    view.zoomTo()
    
    return view


view = align_and_visualize_cif(
    file1=structurePathA,
    file2=structurePathB,
    highlight=highlightAAs
)

print()
view.show()

Loading Structures:
  /Users/ca34522/Documents/Pymol/TEV (1LVM).pdb
  AF3/fold_tev_proteinmpnn/fold_tev_proteinmpnn_model_0.cif


RMSD: 0.694 Ã… (221 CA atoms)

