In [1]:
#!/usr/bin/env python3

import os
import re

SOURCE_DIR = 'misc_trees'
DEST_DIR = 'misc_trees_labeled'

os.makedirs(DEST_DIR, exist_ok=True)

# Prefixes that should be labeled as {Foreground}
foreground_prefixes = ['mm', 'ml', 'md', 'mb', 'dr', 'rf']

# Regex: capture (1) node/tip name, (2) colon+branch_length
pattern = re.compile(r'([^\(\),:]+)(:[0-9\.eE\-\+]+)')

def replacer(match):
    """Decide {Foreground} vs. {Background}."""
    taxon_name = match.group(1)
    branch_length = match.group(2)
    if any(taxon_name.startswith(pref) for pref in foreground_prefixes):
        return f"{taxon_name}{{Foreground}}{branch_length}"
    else:
        return f"{taxon_name}{{Background}}{branch_length}"

for filename in os.listdir(SOURCE_DIR):
    if filename.endswith('.nwk'):
        source_file = os.path.join(SOURCE_DIR, filename)
        dest_file = os.path.join(DEST_DIR, filename)

        with open(source_file, 'r') as infile:
            data = infile.read()

        # --- FORCE ONE LINE ---
        # Remove all linebreaks:
        data = data.replace('\n', '').replace('\r', '')
        # (Optional) remove extra spaces if you like:
        # data = ' '.join(data.split())

        # Perform the labeling
        data_labeled = pattern.sub(replacer, data)

        # Write out WITHOUT adding an extra newline at the end
        with open(dest_file, 'w') as outfile:
            outfile.write(data_labeled)

        print(f"Processed {filename} -> {dest_file}")


Processed NP_054706.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\NP_054706.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk
Processed O14793.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\O14793.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk
Processed O15217.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\O15217.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk
Processed O43684.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\O43684.1_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk
Processed O75355.2_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\O75355.2_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk
Processed O75907.2_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nwk -> misc_trees_labeled\O75907.2_nucleotide.fasta_aligned_codon_alignment.fasta_pruned.nw