In [1]:
# detect_rooted_biopython.py
# pip install biopython
from Bio import Phylo
from pathlib import Path
import sys
import re

TAG_ROOTED = re.compile(r"\[\s*&R\s*\]", re.IGNORECASE)
TAG_UNROOTED = re.compile(r"\[\s*&U\s*\]", re.IGNORECASE)

def detect_from_text(nwk_text: str):
    if TAG_ROOTED.search(nwk_text):
        return "rooted (via [&R] tag)"
    if TAG_UNROOTED.search(nwk_text):
        return "unrooted (via [&U] tag)"
    return None  # no explicit tag

def detect_file(path: Path) -> str:
    text = path.read_text()
    tag_guess = detect_from_text(text)

    # Parse with Biopython
    tree = Phylo.read(path, "newick")

    # Heuristics:
    # - If [&R]/[&U] tag present, trust it.
    # - If root has exactly 2 children (bifurcation), very likely rooted.
    # - If root has 3+ children, it's typically an unrooted tree represented
    #   as a trifurcation (unless explicitly tagged rooted).
    if tag_guess:
        return tag_guess

    root_child_count = len(getattr(tree.root, "clades", []))

    # Biopython may not always set tree.rooted correctly from Newick,
    # but if it does, respect it first.
    if getattr(tree, "rooted", None) is True:
        return "rooted"
    if getattr(tree, "rooted", None) is False and root_child_count >= 3:
        return "unrooted (trifurcating root)"

    if root_child_count == 2:
        return "rooted (binary root)"
    elif root_child_count >= 3:
        return "unrooted (trifurcating root)"
    else:
        return "unknown/edge-case"

def main(paths):
    for p in paths:
        p = Path(p)
        try:
            verdict = detect_file(p)
            print(f"{p.name}: {verdict}")
        except Exception as e:
            print(f"{p.name}: ERROR - {e}")

main()


-f: ERROR - [Errno 2] No such file or directory: '-f'
kernel-d7c8b523-e66c-439e-adc9-904709af710e.json: unrooted (trifurcating root)
