# Structural Phylogenetic

In [1]:
import shutil
from pathlib import Path

import pandas as pd


def get_paths(protcode="BLDB"):
    paths, names = [], []

    ref_path = Path(f"../../data/{protcode}_ref.csv")
    ref_meta = pd.read_csv(ref_path)
    for entry, asym in zip(list(ref_meta["Entry ID"]), list(ref_meta["Auth Asym ID"])):
        p = Path(f"../../data/{protcode}/processed/{entry}_{asym}.pdb")
        if p.exists():
            paths.append(p)
            names.append(f"{p.stem}_{protcode}")

    for p in Path(f"../../data/{protcode}/generated/BQS").glob("*.pdb"):
        paths.append(p)
        names.append(f"{p.stem}_{protcode}")

    return paths, names

### Structural Phylogenetic Tree (Q-score)

In [None]:
from src.phylogenetics import qs_phylogenetic_tree

bldb_paths, bldb_names = get_paths("BLDB")
cytc_paths, cytc_names = get_paths("CYTC")
gfp_paths, gfp_names = get_paths("GFP")
ras_paths, ras_names = get_paths("RAS")

phylo_tree = qs_phylogenetic_tree(
    pdb_list=bldb_paths + cytc_paths + gfp_paths + ras_paths,
    names=bldb_names + cytc_names + gfp_names + ras_names
)

In [None]:
print(phylo_tree["newick"])

WD = "wd"
Path(WD).mkdir(parents=True, exist_ok=True)
with open(f'{WD}/qs-tree.all.newick', 'w', encoding='utf-8') as f:
    f.write(phylo_tree["newick"])

Use Tree Visualization Tools (e.g., https://itol.embl.de/) to visualize the newick format tree.

### Structural Phylogenetic Tree (3Di)

Prepare zipped PDB files for calculation.

In [None]:
WD = Path("./wd/3Di")
WD.mkdir(parents=True, exist_ok=True)


def prepare(protcode, wd=WD):
    for path, name in zip(*get_paths(protcode)):
        shutil.copy(path, wd / f"{name}.pdb")


prepare("BLDB")
prepare("CYTC")
prepare("GFP")
prepare("RAS")

shutil.make_archive(WD.parent / "pdbs", "zip", WD)

Upload the zipped file to FoldTree.
See more at https://github.com/DessimozLab/fold_tree.

### Phylogenetic Tree Summarization

Make sure you have installed Dendropy.

https://jeetsukumaran.github.io/DendroPy/

In [None]:
from src.phylogenetics import normalize

WD = Path("PHYLO")
assert WD.exists()

normalize(input_path=WD / "qs-tree.all.newick", output_path=WD / "qs-tree.all.norm.newick")
normalize(input_path=WD / "3di.foldtree.newick", output_path=WD / "3di.foldtree.norm.newick")

More about SumTrees.

https://jeetsukumaran.github.io/DendroPy/programs/sumtrees.html

In [None]:
!cd {WD} && sumtrees --output=sum.norm.newick --output-tree-format newick --set-edges mean-length --suppress-annotations qs-tree.all.norm.newick 3di.foldtree.norm.newick

In [2]:
from ete4.smartview import TreeLayout
from ete4 import Tree
from pathlib import Path
import src.colorscheme as color

WD = Path("wd")
tree = Tree(open(str(WD / "sum.norm.newick")))

for node in tree.traverse():
    node.support = None


def ns(node):
    node.sm_style["hz_line_width"] = 1.5
    node.sm_style["vt_line_width"] = 1.5
    node.sm_style['hz_line_color'] = color.CS_BG_DARK
    if node.is_leaf:
        ids = node.name.split("_")
        mtd, cls = ids[0], ids[-1]
        if mtd == "SM":
            node.sm_style['size'] = 3
            node.sm_style['fgcolor'] = color.CS_FG_A
            node.sm_style["hz_line_width"] = 2
            node.sm_style['hz_line_color'] = color.CS_FG_A
        elif mtd == "FM":
            node.sm_style['size'] = 3
            node.sm_style['fgcolor'] = color.CS_FG_B
            node.sm_style["hz_line_width"] = 2
            node.sm_style['hz_line_color'] = color.CS_FG_B


tree.explore(
    keep_server=True,
    layouts=[
        TreeLayout(
            name="SUM_TREE",
            ns=ns,
            # ts=ts,
            active=True,
            aligned_faces=True
        )
    ])

Added tree tree-1 with id 0.


In [None]:
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D

# Legends
fig, ax = plt.subplots(figsize=(2, 2), dpi=200)
plt.legend(
    handles=[
        Line2D([0], [0], 
               label="SM", marker="o", markerfacecolor=color.CS_FG_A,
               markeredgewidth=0, markersize=10, linestyle=""),
        Line2D([0], [0], 
               label="FM", marker="o", markerfacecolor=color.CS_FG_B,
               markeredgewidth=0, markersize=10, linestyle=""),
        Line2D([0], [0], 
               label="exp.", color=color.CS_BG_DARK, linestyle="-"),
    ],
    # loc='lower right',
    # bbox_to_anchor=(1.28, 0)
)
plt.grid(False)
plt.xticks([])
plt.yticks([])
plt.show()