In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import visualize_tree
from ete3 import Tree

In [None]:
hbc_mito_path = "../input/hbc_mito.h5ad"
cell_map_type_path = "../../Data/Embryo 1/annotation_main_new3.csv"
no_of_sub_clusters = 14

In [None]:
def avg_mutation_profile(adata, obs_col="leiden"):
    """
    get the average mutation profile for each cluster using the column mentioned
    with an aditional normal added.
    """
    avg_mut = pd.DataFrame(
        np.zeros((len(adata.obs[obs_col].unique()), len(adata.var_names))),
        index=sorted(adata.obs["leiden"].unique().tolist()),
        columns=adata.var_names,
    )
    for cluster in adata.obs["leiden"].unique():
        avg_mut.loc[cluster, :] = adata[adata.obs["leiden"] == cluster].X.mean(axis=0)
    return avg_mut


def generate_lintimat_data(avg_mut_01):
    mut = avg_mut_01.astype(int).astype(str)
    le_name_mapping_dict = dict(
        zip(mut.columns.to_list(), list(range(len(mut.columns))))
    )
    le_name_mapping = list(zip(mut.columns.to_list(), list(range(len(mut.columns)))))
    pd.DataFrame(le_name_mapping).to_csv(
        "../input/backbone_tree/mutation_mapping.txt",
        sep="\t",
        header=False,
        index=False,
    )

    for i in range(mut.shape[0]):
        for j in range(mut.shape[1]):
            if mut.iloc[i][j] == "0":
                mut.iloc[i][j] = "NONE"
            elif mut.iloc[i][j] == "1":
                mut.iloc[i][j] = le_name_mapping_dict[mut.columns[j]]
            else:
                print(mut.iloc[i][j], "error")

    mutations = mut.astype(str).apply("-".join, axis=1)
    mutations = pd.DataFrame(mutations, columns=["HMID"])
    mutations.to_csv(
        "../input/backbone_tree/cell_mutation_mapping.txt",
        sep="\t",
        header=False,
        index=True,
    )

    df = pd.DataFrame(np.zeros((mutations.shape[0], 2000)), index=mutations.index)
    df["ClusterIdent"] = df.index

    df = pd.concat([df, mutations], axis=1, join="inner")
    temp = df[["ClusterIdent", "HMID"]]
    df = df.drop(["ClusterIdent", "HMID"], axis=1)
    df = pd.concat([temp, df], axis=1, join="inner")
    df.index.name = "Cells"

    top2000 = pd.DataFrame(range(2000))
    top2000.to_csv(
        "../input/backbone_tree/top2000.txt", sep="\t", index=False, header=False
    )

    lintimat_format = df
    clusters_to_numbers = pd.DataFrame(
        enumerate(lintimat_format["ClusterIdent"].astype("category").cat.categories)
    )
    lintimat_format["ClusterIdent"] = (
        lintimat_format["ClusterIdent"].astype("category").cat.codes
    )

    lintimat_format.to_csv(
        "../input/backbone_tree/Data_matrix_Comb_final2_for_lintimat.txt", sep="\t"
    )
    clusters_to_numbers.to_csv(
        "../input/backbone_tree/lintimat_cell_type_map.txt",
        sep="\t",
        index=False,
        header=False,
    )
    lintimat_format.iloc[:, :2].to_csv(
        "../input/backbone_tree/lintimat_txt_label_HMID.txt", sep="\t"
    )


def get_mutation_all(mutation, mutation_map_names):
    mutation_all = list()
    if mutation == "":
        return [set(["NONE"]) for i in range(len(mutation_map_names))]
    for mut in mutation.split("-"):
        if mut == "NONE":
            mutation_all.append(set(["NONE"]))
        else:
            mutation_all.append(set([mutation_map_names[int(mut)]]))
    return mutation_all


def add_attributes(tree, cell_map_mutation):
    if tree is not None:
        if not hasattr(add_attributes, "thisid"):
            add_attributes.thisid = 0
        add_attributes.thisid += 1
        mutation = ""
        mutation_all = [set(["NONE"]) for i in range(len(mutation_map_names))]
        name = tree.name
        if name in cell_map_mutation.keys():
            tree.name = name
            mutation = cell_map_mutation[name]
            mutation_all = get_mutation_all(mutation, mutation_map_names)
        if name == "normal":
            mutation = 0 * len(mutation_map_names)
        tree.add_features(
            **{
                "id": "i" + str(add_attributes.thisid),
                "mutation": mutation,
                "mutation_all": mutation_all,  # important
                "mutation_diff": "NONE",
            }
        )
        for child in tree.children:
            add_attributes(child, cell_map_mutation)


def order_tree(tree):
    if tree is not None:
        tree.children = sorted(
            tree.children,
            key=lambda x: [len(x.get_leaf_names()), len(x.get_descendants())],
        )
        for child in tree.children:
            order_tree(child)


def make_normal_as_root(tree):
    normal = tree.search_nodes(name="normal")
    tree.set_outgroup(normal[0])

In [None]:
def top_mutations_gt9(adata):
    mutations_gt9_sub_cluster = []
    for cluster in adata.obs["leiden"].unique():
        adata_sub = adata[adata.obs["leiden"] == cluster]
        data = pd.DataFrame(
            adata_sub.X,
            index=adata_sub.obs_names,
            columns=adata_sub.var_names,
            dtype=int,
        )
        data = data.loc[:, data.sum() > 9]
        mutations_gt9_sub_cluster.extend(list(data.columns))
    return mutations_gt9_sub_cluster

# create backbone tree

In [None]:
adata = sc.read_h5ad(hbc_mito_path)
avg_mut = avg_mutation_profile(adata, "leiden")
top_mutations = top_mutations_gt9(adata)
top_mutations = top_mutations
avg_mut = avg_mut.loc[:, top_mutations]

In [None]:
avg_mut.to_csv(
    "../input/backbone_tree/average_frequency_cluster_mutations.txt", sep="\t"
)

In [None]:
n, bins, patches = plt.hist(avg_mut.values.flatten(), bins=200)
plt.show()
fig = plt.figure(figsize=(50, 10))
n, bins, patches = plt.hist(avg_mut.values.flatten(), bins=200)
plt.ylim(0, 40)
plt.xlim(0, 0.5)
plt.xticks(bins[bins < 0.5], rotation=90, fontsize=30)
plt.show()

In [None]:
threshold = 0.05  # select the threshold using which average mutations are converted into binary (based on the above plot)
avg_mut_01 = avg_mut.copy()
avg_mut_01[avg_mut_01 < threshold] = 0
avg_mut_01[avg_mut_01 > threshold] = 1
generate_lintimat_data(avg_mut_01)

In [None]:
os.system("date >  ../output/backbone_tree/terminal_output.txt")
os.system(
    "java -jar ./LinTIMaT.jar -i ../input/backbone_tree/Data_matrix_Comb_final2_for_lintimat.txt\
    -gf ../input/backbone_tree/top2000.txt \
    -gc 2000 \
    -ob ../output/backbone_tree/bin_tree.newick \
    -on ../output/backbone_tree/nonbinary_tree.txt \
     -mi 200000 -ci 0 -s 9126 >> ../output/backbone_tree/terminal_output.txt"
)

os.system("date >> ../output/backbone_tree/terminal_output.txt")
# !date > terminal_output.txt
# !java -jar /home/krushna/Documents/Lineage/Lintimat/LinTIMaT.jar -i ./Data_matrix_Comb_final2_for_lintimat.txt \
#  -gf ./top2000.txt \
#   -gc 2000 \
#    -ob ./bin_tree.newick \
#     -on ./nonbinary_tree.txt \
#      -mi 200000 -ci 0 -s 1 >> ./terminal_output.txt 
# !date >> terminal_output.txt

In [None]:
tree = Tree("../output/backbone_tree/bin_tree.newick", format=1)
visualize_tree.add_parent_to_leafs(tree)
make_normal_as_root(tree)
cell_map_mutation = pd.read_csv('../input/backbone_tree/cell_mutation_mapping.txt', sep='\t',header=None, dtype=str)
cell_map_mutation.append([['normal','-'.join(['NONE']*len(cell_map_mutation.iloc[0,1].split('-')))]])
cell_map_mutation = {row[0]:row[1] for i, row in cell_map_mutation.iterrows()}
mutation_map_names = pd.read_csv('../input/backbone_tree/mutation_mapping.txt', header=None,sep='\t')[0].to_list()
add_attributes(tree, cell_map_mutation)
visualize_tree.build_mutations_set(tree)
visualize_tree.finalize_mutation(tree)
visualize_tree.remove_non_mutation_branches(tree)
visualize_tree.add_mutation_diff(tree)
order_tree(tree)
tree.write(outfile='../output/backbone_tree/backbone_tree.txt',format=1)


In [None]:
tree.show()
del tree

# compleate tree

In [None]:
backbone_tree_path = '../output/backbone_tree/backbone_tree.txt'
cell_type_map_color_path = "../input/celltype_map_color.txt"

In [None]:
backbone_tree = Tree(backbone_tree_path,format=1)

In [None]:
def compleate_leaf_trees(backbone_tree, no_of_sub_clusters):
    sub_cluster_folders = "../sub_clusters/"
    for cluster in range(no_of_sub_clusters):
        if os.path.exists(f'{sub_cluster_folders}{cluster}/output/non_binary_ete3.txt'):
            tree = Tree(f'{sub_cluster_folders}{cluster}/output/non_binary_ete3.txt',format = 1)
            t_normal = tree&'normal' #search normal
            tree.remove_child(t_normal)
            if len(tree.children) == 1:
                tree = tree.children[0]
            attach_to = backbone_tree&cluster
            for subtree_child in tree.children:
                attach_to.up.add_child(subtree_child)
            attach_to.up.remove_child(attach_to)
        else:
            print(f'no tree for cluster {cluster}, adding all cells as leafs')
            attach_to = backbone_tree&cluster
            cells = pd.read_csv(f'{sub_cluster_folders}{cluster}/input/cell_mutation_mapping.txt',sep='\t',header=None)[0]
            for cell in cells:
                attach_to.up.add_child(Tree(name=cell))
            attach_to.up.remove_child(attach_to)

def add_annotation(tree, cell_map_type_path, cell_type_map_color_path):
    cell_map_type = pd.read_csv(cell_map_type_path, sep=",")
    cell_map_type = {row[0]: row[1] for _, row in cell_map_type.iterrows()}
    cell_type_map_color = pd.read_csv(
            cell_type_map_color_path, sep='\t', header=0, index_col=0)
    cell_type_map_color = {row[0]: '#'+row[1]
                            for _, row in cell_type_map_color.iterrows()}

    for node in tree.traverse():
        name = node.name

        cell_type = cell_map_type.get(name,None)
        nodecolor = cell_type_map_color.get(cell_type, '#FFFFFF')
        node.add_features(**{"cell_type": cell_type,
                             "nodecolor": nodecolor
                             })

In [None]:
compleate_leaf_trees(backbone_tree, no_of_sub_clusters)
add_annotation(backbone_tree, cell_map_type_path, cell_type_map_color_path)

In [None]:
cell_type_map_color = pd.read_csv(
    cell_type_map_color_path, sep="\t", header=0, index_col=0
)
cell_type_map_color = {
    row[0]: "#" + row[1] for _, row in cell_type_map_color.iterrows()
}

cwd = os.getcwd() + "/"

order_tree(backbone_tree)


# tree_sll = backbone_tree.copy(method="deepcopy")
# ts_sll = visualize_tree.style(
#     tree_sll,
#     cell_type_map_color,
#     cwd + "../",
#     is_circular=True,
#     is_same_level_leaf=True,
# )


ts = visualize_tree.style(
    backbone_tree,
    cell_type_map_color,
    cwd + "../output/",
    is_circular=True,
    is_same_level_leaf=False,
)

In [None]:
backbone_tree.show(tree_style = ts)

In [None]:
backbone_tree.render(
    "../output/compleate_tree_ete3.pdf", w=10000, units="px", dpi=500, tree_style=ts
)
visualize_tree.del_pie_local(cwd + "../output/")