# 🧬 Evolutionary Tree Analyzer

A modular workflow to:
1. Align biological sequences using MUSCLE  
2. Build a phylogenetic tree using a parsimony-like method  
3. Visualize and optionally export the tree

---

## 1️⃣ Align Sequences

This step aligns input sequences using MUSCLE and saves the result to `output/aligned_sequences.fasta`.

MUSCLE citation:
Edgar, R.C. (2004) Nucleic Acids Res 32(5):1792–1797. http://www.drive5.com/muscle

In [None]:
# Setting up the environment
import sys
import os

# Add src directory to the path
sys.path.append(os.path.abspath("../src"))

## ✨ This is where you need to change the file path to YOUR sequences (unless you really do want to see the example_sequences.fasta trees)

In [None]:
# Use correct relative paths from the notebook's location
input_fasta = os.path.abspath("../data/example_large.fa")
output_fasta = os.path.abspath("../output/aligned_sequences.fasta")

# Confirm file exists
print("Input exists?", os.path.exists(input_fasta))  # Should now be True

# ✅ Now safe to import from src
from align_sequences import align_sequences

# Run the alignment
align_sequences(input_fasta, output_fasta)

## 2️⃣ Build Tree (Parsimony-style)

This uses Biopython to build a tree from the aligned sequences, using identity-based distances and UPGMA.


In [None]:
import importlib.util

# Absolute path to your build_tree.py file
build_tree_path = os.path.abspath("../src/build_tree.py")

# Load the module directly from the path
spec = importlib.util.spec_from_file_location("build_tree", build_tree_path)
build_tree = importlib.util.module_from_spec(spec)
spec.loader.exec_module(build_tree)

# Use functions from the dynamically loaded module
build_parsimony_tree = build_tree.build_parsimony_tree
build_likelihood_tree = build_tree.build_likelihood_tree

# Correct absolute paths
aligned_fasta = os.path.abspath("../output/aligned_sequences.fasta")
tree_output = os.path.abspath("../output/parsimony_tree.newick")

build_parsimony_tree(aligned_fasta, tree_output)


## 🔨 Build Tree (Likelihood-style)
This builds a tree from the aligned sequences using UPGMA  
and identity-based distances, saved in Newick format.


In [None]:
def build_likelihood_tree(aligned_fasta, output_newick="output/ml_tree.newick"):
    """
    Builds a simple ML-like tree (UPGMA using identity) and saves it in Newick format.
    """
    alignment = AlignIO.read(aligned_fasta, "fasta")
    calculator = DistanceCalculator("identity")
    distance_matrix = calculator.get_distance(alignment)

    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(distance_matrix)

    Phylo.write(tree, output_newick, "newick")
    print(f"Likelihood-like tree saved to: {output_newick}")

In [None]:
# Absolute path to your build_tree.py file
build_tree_path = os.path.abspath("../src/build_tree.py")

# Load the module directly from the path
spec = importlib.util.spec_from_file_location("build_tree", build_tree_path)
build_tree = importlib.util.module_from_spec(spec)
spec.loader.exec_module(build_tree)

# Use functions from the dynamically loaded module
build_parsimony_tree = build_tree.build_parsimony_tree
build_likelihood_tree = build_tree.build_likelihood_tree

# Correct absolute paths
aligned_fasta = os.path.abspath("../output/aligned_sequences.fasta")
tree_output = os.path.abspath("../output/ml_tree.newick")

# Build the tree
build_likelihood_tree(aligned_fasta, tree_output)


## 3️⃣ Visualize Trees

This step renders the tree inline, and saves a `.png` version in `output/tree_images/`.


In [None]:
# List of tree files and output image paths
trees_to_visualize = [
    {
        "label": "Maximum Likelihood Tree",
        "tree_path": "../output/ml_tree.newick",
        "png_path": "../output/tree_images/ml_tree.png"
    },
    {
        "label": "Parsimony Tree",
        "tree_path": "../output/parsimony_tree.newick",
        "png_path": "../output/tree_images/parsimony_tree.png"
    }
]

from Bio import Phylo
import matplotlib.pyplot as plt

def visualize_tree(newick_file, save_path=None):
    tree = Phylo.read(newick_file, "newick")
    plt.figure(figsize=(10, 5))
    Phylo.draw(tree, do_show=False)
    if save_path:
        plt.savefig(save_path, dpi=300)
        plt.close()

# Loop through trees and plot/save
for tree in trees_to_visualize:
    print(f"🧬 {tree['label']}")
    visualize_tree(tree["tree_path"])                      # Display in notebook
    visualize_tree(tree["tree_path"], save_path=tree["png_path"])  # Save PNG
    print(f"✅ Saved to: {tree['png_path']}\n")



## ✅ Done!

- The tree was built and displayed successfully.
- A copy was saved as `.png` in `output/tree_images/` for use in handouts, slides, or interactive activities.

🌻 Created with the **Pipeline Bio** toolkit.