# Structural Alignment and Evolutionary Analysis of Fusexin Proteins

Welcome! In this notebook, you will explore the structure and evolution of fusexin proteins using hands-on bioinformatics tools and methods. The protein structures are provided in the `example_fusexins` directory and are based on [this Nature Communications article](https://www.nature.com/articles/s41467-022-31564-1).

## Objectives
- **Visualize protein structures:** Explore and inspect fusexin protein 3D structures.
- **Structural alignment:** Apply multiple methods to align and compare protein structures.
- **Sequence analysis:** Perform sequence alignments and build phylogenetic trees.
- **Advanced analysis:** Use the foldtree method for deeper structural insights.

## Workflow Overview
1. Visualize fusexin protein structures
2. Align and compare structures
3. Perform sequence alignment and phylogenetic analysis
4. Apply foldtree for advanced structural analysis

---

Let's get started!

In [None]:
from Bio.PDB import Superimposer, PDBParser

def rigid_body_align(structure_path1, structure_path2, chain_id1='A', chain_id2='A'):
	"""
	Align two protein structures using rigid body superposition.

	Args:
		structure_path1 (str): Path to the first PDB file.
		structure_path2 (str): Path to the second PDB file.
		chain_id1 (str): Chain ID to use from the first structure.
		chain_id2 (str): Chain ID to use from the second structure.

	Returns:
		rmsd (float): Root mean square deviation after alignment.
		super_imposer (Superimposer): Biopython Superimposer object with alignment info.
	"""
	parser = PDBParser(QUIET=True)
	structure1 = parser.get_structure('struct1', structure_path1)
	structure2 = parser.get_structure('struct2', structure_path2)

	atoms1 = [atom for atom in structure1[0][chain_id1].get_atoms() if atom.get_id() == 'CA']
	atoms2 = [atom for atom in structure2[0][chain_id2].get_atoms() if atom.get_id() == 'CA']

	# Ensure equal length
	min_len = min(len(atoms1), len(atoms2))
	atoms1 = atoms1[:min_len]
	atoms2 = atoms2[:min_len]

	sup = Superimposer()
	sup.set_atoms(atoms1, atoms2)
	sup.apply(structure2.get_atoms())

	return sup.rms, sup

In [None]:
import subprocess
import tempfile

def run_iqtree_with_lg_invariant(fasta_path, iqtree_path='iqtree2', threads=2):
	"""
	Run IQ-TREE on a FASTA MSA using the LG model with invariant sites.

	Args:
		fasta_path (str): Path to the input FASTA MSA file.
		iqtree_path (str): Path to the IQ-TREE executable (default: 'iqtree2').
		threads (int): Number of CPU threads to use (default: 2).

	Returns:
		result (subprocess.CompletedProcess): The result of the IQ-TREE run.
	"""
	cmd = [
		iqtree_path,
		'-s', fasta_path,
		'-m', 'LG+I',
		'-nt', str(threads)
	]
	result = subprocess.run(cmd, capture_output=True, text=True)
	return result

def run_mafft_on_sequences(sequences, mafft_path='mafft'):
	"""
	Run MAFFT on a list of sequences.

	Args:
		sequences (list of str): List of sequences in FASTA format (with headers).
		mafft_path (str): Path to the MAFFT executable (default: 'mafft').

	Returns:
		str: Aligned sequences in FASTA format.
	"""

	with tempfile.NamedTemporaryFile(mode='w+', delete=False) as fasta_file:
		fasta_file.write('\n'.join(sequences))
		fasta_file.flush()
		cmd = [mafft_path, '--auto', fasta_file.name]
		result = subprocess.run(cmd, capture_output=True, text=True)
	return result.stdout

In [None]:
import nglview as nv
from Bio.PDB import PDBParser

def visualize_structure_with_coloring(pdb_path, color_ranges, chain_id='A' , alpha=0.7):
	"""
	Visualize a protein structure with colored ribbon and opaque grey volume.

	Args:
		pdb_path (str): Path to the PDB file.
		color_ranges (list of tuples): List of (start_res, end_res, color) for coloring.
		chain_id (str): Chain ID to visualize (default: 'A').
	"""
	view = nv.show_file(pdb_path)
	view.clear_representations()
	# Add opaque grey surface (volume)
	view.add_surface(opacity=alpha, color='grey', sele=f":{chain_id}")
	# Add cartoon (ribbon) and color by ranges
	for start, end, color in color_ranges:
		selection = f"{chain_id} and {start}-{end}"
		view.add_cartoon(selection=selection, color=color)
	# Optionally, show the rest of the chain in a default color
	view.add_cartoon(selection=f"{chain_id}", color='lightgrey')
	return view