In [1]:
import numpy as np
import os
import os.path
from functools import reduce

import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import shortest_path
from scipy.stats import pearsonr
from pandas import DataFrame, concat

from anytree import AnyNode
from anytree import RenderTree
from anytree import PostOrderIter
from anytree import PreOrderIter

from pprint import pprint

import gsd.gene_sets
import gsd.immune_cells
from gsd.distance import calc_pairwise_distances

In [3]:
gene_sets = gsd.gene_sets.load_gene_sets("../evaluation_data/immune_cells/immune_only/gene_sets.json")
root = gsd.gene_sets.load_tree("../evaluation_data/immune_cells/immune_only/tree.json")
all_gene_sets = gsd.gene_sets.load_gene_sets("../evaluation_data/immune_cells/all/gene_sets.json")
all_root = gsd.gene_sets.load_tree("../evaluation_data/immune_cells/all/tree.json")
immune_cell_tree = gsd.immune_cells.extract_immune_cell_tree("../raw_data/immune_cells")

def render_tikz_tree(root):
    return "\Tree" + _render_tikz_node(root, 0)
    
def _render_tikz_node(node: AnyNode, indent: int = 0):
    if len(node.children) == 0:
        return " "*indent + "\\text{%s}" % node.name
    
    children = sorted(node.children, key=lambda x: len(x.children))
    rendered_children = reduce(lambda a,b: a+"\n"+b, [_render_tikz_node(child, indent + 3) for child in children])
    
    return " "*indent + "[.{%s}\n%s ]" % (node.name, rendered_children)

print(len(gene_sets))
print([gene_set.general_info.name for gene_set in all_gene_sets if gene_set.general_info.calculated])
print(len([node for node in PostOrderIter(immune_cell_tree)]))
print(len([node for node in PostOrderIter(all_root)]))
print(all_gene_sets[0])
print(all_gene_sets[1])
print([gene_set.general_info.name for gene_set in gene_sets if gene_set.general_info.calculated])
print(render_tikz_tree(root))

36
['cell', 'immune cell', 'lymphoid', 'T cell CD4+ memory ', 'myleoid', 'Monocytic lineage', 'other cell']
45
40
<AnnotatedGeneSet(general_info='<GeneralInfo(name='cell', n_entrez_gene_ids='2197')>')>
<AnnotatedGeneSet(general_info='<GeneralInfo(name='immune cell', n_entrez_gene_ids='1847')>')>
['immune cell', 'lymphoid', 'T cell CD4+ memory ', 'myleoid', 'Monocytic lineage']
\Tree[.{immune cell}
   [.{lymphoid}
      [.{NK cell}
         \text{NK cell resting}
         \text{NK cell activated} ]
      [.{T cell}
         \text{T cell CD8+}
         \text{T cell gamma delta}
         [.{T cell CD4+}
            \text{T cell regulatory (Tregs)}
            [.{T cell CD4+ (non-regulatory)}
               \text{T cell CD4+ naive}
               \text{T cell follicular helper}
               [.{T cell CD4+ memory }
                  \text{T cell CD4+ memory resting}
                  \text{T cell CD4+ memory activated} ] ] ] ]
      [.{B cell}
         \text{B cell naive}
         \text{B