In [1]:
import os
import json


In [2]:
from typing import Dict, Any, List, Optional

class TreeNode:
    """
    Represents a single node in the hierarchical tree structure.
    """
    def __init__(
        self,
        id: str,
        label: str,
        category: str,
        primaryGroup: str,
        description: str,
        children: Optional[List['TreeNode']] = None,
    ):
        """
        Initializes a new tree node with data attributes.

        Args:
            id (str): The unique identifier for the node.
            label (str): The human-readable label or name.
            category (str): The category of the node.
            primaryGroup (str): The primary group classification.
            children (Optional[List['TreeNode']]): A list of child TreeNode objects.
        """
        self.id = id
        self.label = label
        self.category = category
        self.primaryGroup = primaryGroup
        self.description = description
        self.children: List['TreeNode'] = children if children is not None else []

    def __repr__(self):
        """
        Provides a string representation for debugging.
        """
        return f"TreeNode(id='{self.id}', label='{self.label}', children={len(self.children)})"

    def get_child(self, label):
        for child in self.children:
            if child.label == label:
                return child
        

    def add_children(self, labels, descriptions):
        for i, label in enumerate(labels):
            child = TreeNode(id='0_2_3_4', 
                            label=label, 
                            category=self.label, 
                            primaryGroup=self.category, 
                            description = descriptions[i])
            self.children.append(child)
        

    def print_tree(self, level=0):
        """
        Recursively prints the structure of the tree.
        """
        indent = "  " * level
        print(f"{indent}- {self.label} (ID: {self.id})")
        for child in self.children:
            child.print_tree(level + 1)

In [48]:
def build_dict(tree: TreeNode) -> dict:
    "Recursively builds the new dictionary from the tree"

    dictionary = {}
    for label in ["id", "label", "category", "primaryGroup", "description"]:
        dictionary[label] = getattr(tree, label)
    if tree.children:
        dictionary["children"] = dict([(child.id, build_dict(child)) for child in tree.children])
    return dictionary
    

In [4]:
def build_tree(data: Dict[str, Any]) -> TreeNode:
    """
    Recursively builds the TreeNode structure from the raw dictionary data.

    Args:
        data (Dict[str, Any]): The raw dictionary data for a single node.

    Returns:
        TreeNode: The resulting object-oriented tree node.
    """
    # Extract core attributes
    node_id = data.get('id', '')
    label = data.get('label', '')
    category = data.get('category', '')
    primaryGroup = data.get('primaryGroup', 'cancer-type')
    description = data.get('description', '')

    # Initialize the current node
    current_node = TreeNode(
        id=node_id,
        label=label,
        category=category,
        primaryGroup=primaryGroup,
        description=description
    )

    # Check for children and recurse
    raw_children = data.get('children', {})
    if raw_children and isinstance(raw_children, dict):
        # Iterate over the values (the child dictionaries)
        for child_dict in raw_children.values():
            # Recursively build the child node
            child_node = build_tree(child_dict)
            current_node.children.append(child_node)

    return current_node

In [5]:
dummy_path = "../src/utils/dummy_data"
utils_path = "../src/utils"

In [6]:
assert(os.path.isdir(dummy_path))

In [7]:
with open(os.path.join(utils_path, "longer_filter_data.js")) as f:
    filters = eval("".join(f.read().split("\n")[1:-1])[:-1])

In [8]:
cancer_tree = build_tree(filters["0_0"])
access_tree = build_tree(filters["0_1"])

In [9]:
data_tree = build_tree(filters["0_2"])

In [None]:
def pop_label(node, label):
    for i in range(len(node.children)):
        if node.children[i].label == label:
            node.children.pop(i)
            return node
    return node

In [None]:
model = data_tree.get_child("Model Organism")

In [None]:
patient = data_tree.get_child("Patient study")
multi = patient.get_child("Multi-omic Data")
image = patient.get_child("Imaging Data")

In [None]:
techniques = data_tree.get_child("Techniques")
co,cr,sp, me, na = techniques.children 

In [None]:
techniques.children = [co, cr,image, me, multi, na, sp]

In [None]:
for node in [patient, model]:
    for label in ["Multi-omic Data", "Imaging Data"]:
        node = pop_label(node, label)

In [None]:
for node in [multi, image]:
    node.category = techniques.label
    node.primaryGroup = techniques.label

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [None]:
data_tree.print_tree()

In [None]:
multi.id

In [None]:
omics_node = TreeNode(id=multi.id, 
                      label=multi.label, 
                      category=multi.category, 
                      primaryGroup=multi.primaryGroup, 
                      description="Omic techniques such as genomics / proteomics" )

In [None]:
layers = ["Biological molecules (eg DNA)", "Source", "Spatial resolution"]
descriptions = ["genomics/proteomics etc", "eg. control/tumour", "single cell/spatial etc"]
omics_node.add_children(layers, descriptions)

In [None]:
omics_node.children

In [None]:
sources = ["Control", "Liquid Biopsy", "Other", "Tumour"]
descriptions = ["healthy tissue", "circulating tumour cells (CTCs), exosomes etc.", "e.g. environmental", "primary and secondary tumours"]

omics_node.get_child("Source").add_children(sources,descriptions)

bios = sorted(["Proteomics", "Transcriptomics", "Epigenomics", "Metabolomics", "Metagenomics", "Genomics"])

descriptions = ["chemical modifications to DNA/histones",
                "exomes/genomes",                 
                "small molecules (sugars, amino acids) produced by metabolism",
                "genetic material from multiple organisms (usually microbes) living together",
                "functional molecules that carry out cellular processes",
                "RNA transcripts of DNA",
                ]

omics_node.get_child("Biological molecules (eg DNA)").add_children(bios,descriptions)

layers = ["bulk", "single cell", "spatial"]
descriptions = ["tumour average - may include non-tumour cells", "specific cells", "includes positional information"]

omics_node.get_child('Spatial resolution').add_children(layers, descriptions)


In [None]:
omics_node = change_id(omics_node, omics_node.id)

In [None]:
omics_node.print_tree()

In [None]:
data_tree.get_child('Techniques').children[-3] = omics_node

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [None]:
data_tree.print_tree()

In [None]:
omics = data_tree.get_child("Techniques").get_child("Multi-omic Data")

bio = omics.get_child('Biological molecules (eg DNA)')
geo = bio.get_child("Genomics")


labels = ["SNVs", "Indels", "CNVs", "Fusion Genes"]
descriptions = ["single base changes", "small insertions or deletions",
                "large scale amplifications/deletions","chromosomal translocations"]

geo.add_children(labels, descriptions)

In [None]:
geo.print_tree()

In [78]:
new_filters = {"0_0" : build_dict(cancer_tree),
               "0_1" : build_dict(access_tree),
               "0_2" : build_dict(data_tree)}


In [79]:
with open("test.json", "w") as f:
    json.dump(new_filters, f)

In [80]:
with open(os.path.join(utils_path, "longer_filter_data.js"), "w") as f:
    f.write("const theFilters =\n")
    json.dump(new_filters, f)
    f.write(";\nexport const filterData = theFilters;")

In [None]:
with open("filters.json", "w") as f:
    json.dump(filters, f)

In [None]:
def change_id(node, identity):
    node.id = identity
    if not node.children:
        return node
    else:
        old_children = node.children.copy()
        children = []
        for i, child in enumerate(old_children): 
            iden = f"{identity}_{i}"
            child = change_id(child, iden)
            children.append(child)
        node.children = children
    return node

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [73]:
node = data_tree.get_child("Techniques").get_child('Multi-omic Data')

In [75]:
node.children

[TreeNode(id='0_2_4_4_0', label='Biological molecules (eg DNA)', children=6),
 TreeNode(id='0_2_4_4_1', label='Source', children=4),
 TreeNode(id='0_2_4_4_2', label='Spatial resolution', children=3)]

In [76]:
descriptions = ["genomics/proteomics etc", "eg. control/tumour", "single cell/spatial etc"]

In [69]:
descriptions = ["chemical modifications to DNA/histones",
                "exomes/genomes",                 
                "small molecules (sugars, amino acids) produced by metabolism",
                "genetic material from multiple organisms (usually microbes) living together",
                "functional molecules that carry out cellular processes",
                "RNA transcripts of DNA",
                ]

In [20]:
genomics_descriptions = ["single base changes", "small insertions or deletions",
                "large scale amplifications/deletions","chromosomal translocations"]


In [77]:
for i,child in enumerate(node.children):
    child.description = descriptions[i]