In [1]:
import os
import json


from typing import Dict, Any, List, Optional

class TreeNode:
    """
    Represents a single node in the hierarchical tree structure.
    """
    def __init__(
        self,
        id: str,
        label: str,
        category: str,
        primaryGroup: str,
        description: str,
        children: Optional[List['TreeNode']] = None,
    ):
        """
        Initializes a new tree node with data attributes.

        Args:
            id (str): The unique identifier for the node.
            label (str): The human-readable label or name.
            category (str): The category of the node.
            primaryGroup (str): The primary group classification.
            children (Optional[List['TreeNode']]): A list of child TreeNode objects.
        """
        self.id = id
        self.label = label
        self.category = category
        self.primaryGroup = primaryGroup
        self.description = description
        self.children: List['TreeNode'] = children if children is not None else []

    def __repr__(self):
        """
        Provides a string representation for debugging.
        """
        return f"TreeNode(id='{self.id}', label='{self.label}', children={len(self.children)})"

    def get_child(self, label):
        for child in self.children:
            if child.label == label:
                return child
    
    def add_child(self, label, description):
        identity = f"{self.id}_{len(self.children)}"
        child = TreeNode(id=identity, label=label, description=description, category=self.label, primaryGroup=self.category)
        self.children.append(child)

    def add_children(self, labels, descriptions):
        for i, label in enumerate(labels):
            description = descriptions[i]
            self.add_child(label, description)
        

    def print_tree(self, level=0):
        """
        Recursively prints the structure of the tree.
        """
        indent = "  " * level
        print(f"{indent}- {self.label} (ID: {self.id})")
        for child in self.children:
            child.print_tree(level + 1)

def build_dict(tree: TreeNode) -> dict:
    "Recursively builds the new dictionary from the tree"

    dictionary = {}
    for label in ["id", "label", "category", "primaryGroup", "description"]:
        dictionary[label] = getattr(tree, label)
    if tree.children:
        dictionary["children"] = dict([(child.id, build_dict(child)) for child in tree.children])
    return dictionary
    

def build_tree(data: Dict[str, Any]) -> TreeNode:
    """
    Recursively builds the TreeNode structure from the raw dictionary data.

    Args:
        data (Dict[str, Any]): The raw dictionary data for a single node.

    Returns:
        TreeNode: The resulting object-oriented tree node.
    """
    # Extract core attributes
    node_id = data.get('id', '')
    label = data.get('label', '')
    category = data.get('category', '')
    primaryGroup = data.get('primaryGroup', 'cancer-type')
    description = data.get('description', '')

    # Initialize the current node
    current_node = TreeNode(
        id=node_id,
        label=label,
        category=category,
        primaryGroup=primaryGroup,
        description=description
    )

    # Check for children and recurse
    raw_children = data.get('children', {})
    if raw_children and isinstance(raw_children, dict):
        # Iterate over the values (the child dictionaries)
        for child_dict in raw_children.values():
            # Recursively build the child node
            child_node = build_tree(child_dict)
            current_node.children.append(child_node)

    return current_node


def add_a_filter(filter_path, place, description, output_path):
    place_list = place.split("/")
    lower = place_list[0].lower()
    cancer = build_tree(filters["0_0"])
    access = build_tree(filters["0_1"])
    data = build_tree(filters["0_2"])
    names = {"cancer" : ("0_0",cancer), "access" : ("0_1",access), "data" : ("0_2",data)}
    for name in names:
        if name in lower:
            key,node = names[name]
            key,parent = names[name]
            break
    for child_name in place_list[1:-1]:
        try:
            node = node.get_child(child_name)
        except:
            node = null
    if node:
        node.add_child(place_list[-1], description)
        filters[key] = build_dict(parent)
    with open(output_path, "w") as f:
        f.write("const theFilters =\n")
        json.dump(filters, f)
        f.write(";\nexport const filterData = theFilters;")

dummy_path = "../src/utils/dummy_data"
utils_path = "../src/utils"

assert(os.path.isdir(dummy_path))

with open(os.path.join(utils_path, "longer_filter_data.js")) as f:
    filters = eval("".join(f.read().split("\n")[1:-1])[:-1])

cancer_tree = build_tree(filters["0_0"])
access_tree = build_tree(filters["0_1"])
data_tree = build_tree(filters["0_2"])

def get_labels(tree):
    def add_label(labels, node):
        labels.append((node.id, node.label))
        if node.children:
            for child in node.children:
                add_label(labels,child)
        return labels
    labels = add_label([], tree) 
    return labels

def get_upload_entry(tree):
    def add_entry(entries, node):
        entry = (node.id, 
                 {"id":node.id, 
                  "label":node.label,
                  "category":node.category,
                 "primaryGroup":node.primaryGroup,
                 "description":node.description})
        entries.append(entry)
        if node.children:
            for child in node.children:
                add_entry(entries , child)
        return entries
    entries = add_entry([], tree) 
    return entries

def get_full_labels(tree):
    def add_label(labels, root, node):
        root = "/".join([root, node.label])
        labels.append((node.id, root[1:]))
        if node.children:
            for child in node.children:
                add_label(labels, root, child)
        return labels
    labels = add_label([], "", tree) 
    return labels
        

In [7]:
cruk = cancer_tree.children[2]

In [38]:
cruk_dicts = {}
for child in cruk.children: 
    cp = child.__dict__.copy()
    cp.pop("children")
    cruk_dicts[child.label] = cp

In [39]:
tcga = cancer_tree.children[3]
tcga_dicts = {}
for child in tcga.children: 
    cp = child.__dict__.copy()
    cp.pop("children")
    tcga_dicts[child.label] = cp

In [40]:
tcga_dicts

{'ACC (Adrenocortical carcinoma)': {'id': '0_0_4_0',
  'label': 'ACC (Adrenocortical carcinoma)',
  'category': 'cancerTypes',
  'primaryGroup': 'cancer-type',
  'description': ''},
 'BLCA (Bladder Urothelial Carcinoma)': {'id': '0_0_4_1',
  'label': 'BLCA (Bladder Urothelial Carcinoma)',
  'category': 'cancerTypes',
  'primaryGroup': 'cancer-type',
  'description': ''},
 'BRCA (Breast invasive carcinoma)': {'id': '0_0_4_2',
  'label': 'BRCA (Breast invasive carcinoma)',
  'category': 'cancerTypes',
  'primaryGroup': 'cancer-type',
  'description': ''},
 'CESC (Cervical squamous cell carcinoma and endocervical adenocarcinoma)': {'id': '0_0_4_3',
  'label': 'CESC (Cervical squamous cell carcinoma and endocervical adenocarcinoma)',
  'category': 'cancerTypes',
  'primaryGroup': 'cancer-type',
  'description': ''},
 'CHOL (Cholangiocarcinoma)': {'id': '0_0_4_4',
  'label': 'CHOL (Cholangiocarcinoma)',
  'category': 'cancerTypes',
  'primaryGroup': 'cancer-type',
  'description': ''},
 'CN

In [33]:
pathn = lambda n: f"../src/utils/dummy_data/dataset_{str(n).zfill(2)}.json"

In [34]:
i = 0

In [49]:
i+=1
with open(pathn(i)) as f:
    dummy = json.load(f)

print(dummy["summary"]["title"])
print(dummy["datasetFilters"])

Prostate Cancer Active Surveillance MRI Cohort
[{'id': '0_0_0_11_1', 'label': 'C61 Prostate gland', 'category': 'C60-C63 Male genital organs', 'primaryGroup': 'cancer-type', 'description': ''}, {'id': '0_0_1_3_0', 'label': '8090/3 Basal cell carcinoma, NOS', 'category': '809-811 Basal cell neoplasms', 'primaryGroup': 'cancer-type', 'description': ''}, {'id': '0_2_3_2', 'label': 'Imaging Data', 'category': 'Patient study', 'primaryGroup': 'data-type', 'description': ''}, {'id': '0_2_3_3', 'label': 'Longitudinal Follow up', 'category': 'Patient study', 'primaryGroup': 'data-type', 'description': ''}, {'id': '0_1_4', 'label': 'Open to applicants', 'category': 'accessType', 'primaryGroup': 'access-type', 'description': ''}, {'id': '0_0_2_81', 'label': 'Prostate cancer', 'category': 'crukTerms', 'primaryGroup': 'cancer-type', 'description': ''}, {'id': '0_0_4_26', 'label': 'PRAD (Prostate adenocarcinoma)', 'category': 'cancerTypes', 'primaryGroup': 'cancer-type', 'description': ''}]


In [1]:
key_label_pairs= (get_labels(cancer_tree) + get_labels(access_tree) + get_labels(data_tree))
key_label_dict = dict(key_label_pairs) 
with open(os.path.join(utils_path, "key_label_dict.json"), "w") as f:
    json.dump(key_label_dict, f)
label_key_pairs = [(l,k) for k,l in key_label_pairs]
label_key_dict = dict(label_key_pairs)
with open(os.path.join(utils_path, "label__key_dict.json"), "w") as f:
    json.dump(label_key_dict, f)

In [2]:
key_entry_dict = dict(get_upload_entry(cancer_tree) + get_upload_entry(access_tree) + get_upload_entry(data_tree))

In [4]:
with open(os.path.join(utils_path, "key_entry_dict.json"), "w") as f:
    json.dump(key_entry_dict, f)

In [44]:
self.get_child("Techniques").get_child("Multi-omic Data").get_child("Source").children

[TreeNode(id='0_2_4_4_1_0', label='Control', children=0),
 TreeNode(id='0_2_4_4_1_1', label='Liquid Biopsy', children=0),
 TreeNode(id='0_2_4_4_1_2', label='Other', children=0),
 TreeNode(id='0_2_4_4_1_3', label='Tumour', children=0)]

In [51]:

def add_a_filter(filter_path, place, description, output_path):
    place_list = place.split("/")
    lower = place_list[0].lower()
    cancer = build_tree(filters["0_0"])
    access = build_tree(filters["0_1"])
    data = build_tree(filters["0_2"])
    names = {"cancer" : ("0_0",cancer), "access" : ("0_1",access), "data" : ("0_2",data)}
    for name in names:
        if name in lower:
            key,node = names[name]
            key,parent = names[name]
            break
    for child_name in place_list[1:-1]:
        try:
            node = node.get_child(child_name)
        except:
            node = null
    if node:
        node.add_child(place_list[-1], description)
        filters[key] = build_dict(parent)
    with open(output_path, "w") as f:
        f.write("const theFilters =\n")
        json.dump(filters, f)
        f.write(";\nexport const filterData = theFilters;")

In [53]:
with open("check.js", "w") as f:
    json.dump(filters, f)

In [26]:
h = 1

In [28]:
import inspect
f(h)

h


In [None]:
def pop_label(node, label):
    for i in range(len(node.children)):
        if node.children[i].label == label:
            node.children.pop(i)
            return node
    return node

In [None]:
model = data_tree.get_child("Model Organism")

In [None]:
patient = data_tree.get_child("Patient study")
multi = patient.get_child("Multi-omic Data")
image = patient.get_child("Imaging Data")

In [None]:
techniques = data_tree.get_child("Techniques")
co,cr,sp, me, na = techniques.children 

In [None]:
techniques.children = [co, cr,image, me, multi, na, sp]

In [None]:
for node in [patient, model]:
    for label in ["Multi-omic Data", "Imaging Data"]:
        node = pop_label(node, label)

In [None]:
for node in [multi, image]:
    node.category = techniques.label
    node.primaryGroup = techniques.label

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [None]:
data_tree.print_tree()

In [None]:
multi.id

In [None]:
omics_node = TreeNode(id=multi.id, 
                      label=multi.label, 
                      category=multi.category, 
                      primaryGroup=multi.primaryGroup, 
                      description="Omic techniques such as genomics / proteomics" )

In [None]:
layers = ["Biological molecules (eg DNA)", "Source", "Spatial resolution"]
descriptions = ["genomics/proteomics etc", "eg. control/tumour", "single cell/spatial etc"]
omics_node.add_children(layers, descriptions)

In [None]:
omics_node.children

In [None]:
sources = ["Control", "Liquid Biopsy", "Other", "Tumour"]
descriptions = ["healthy tissue", "circulating tumour cells (CTCs), exosomes etc.", "e.g. environmental", "primary and secondary tumours"]

omics_node.get_child("Source").add_children(sources,descriptions)

bios = sorted(["Proteomics", "Transcriptomics", "Epigenomics", "Metabolomics", "Metagenomics", "Genomics"])

descriptions = ["chemical modifications to DNA/histones",
                "exomes/genomes",                 
                "small molecules (sugars, amino acids) produced by metabolism",
                "genetic material from multiple organisms (usually microbes) living together",
                "functional molecules that carry out cellular processes",
                "RNA transcripts of DNA",
                ]

omics_node.get_child("Biological molecules (eg DNA)").add_children(bios,descriptions)

layers = ["bulk", "single cell", "spatial"]
descriptions = ["tumour average - may include non-tumour cells", "specific cells", "includes positional information"]

omics_node.get_child('Spatial resolution').add_children(layers, descriptions)


In [None]:
omics_node = change_id(omics_node, omics_node.id)

In [None]:
omics_node.print_tree()

In [None]:
data_tree.get_child('Techniques').children[-3] = omics_node

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [None]:
data_tree.print_tree()

In [None]:
omics = data_tree.get_child("Techniques").get_child("Multi-omic Data")

bio = omics.get_child('Biological molecules (eg DNA)')
geo = bio.get_child("Genomics")


labels = ["SNVs", "Indels", "CNVs", "Fusion Genes"]
descriptions = ["single base changes", "small insertions or deletions",
                "large scale amplifications/deletions","chromosomal translocations"]

geo.add_children(labels, descriptions)

In [None]:
geo.print_tree()

In [84]:
new_filters = {"0_0" : build_dict(cancer_tree),
               "0_1" : build_dict(access_tree),
               "0_2" : build_dict(data_tree)}


In [85]:
with open("test.json", "w") as f:
    json.dump(new_filters, f)

In [80]:
with open(os.path.join(utils_path, "longer_filter_data.js"), "w") as f:
    f.write("const theFilters =\n")
    json.dump(new_filters, f)
    f.write(";\nexport const filterData = theFilters;")

In [None]:
with open("filters.json", "w") as f:
    json.dump(filters, f)

In [None]:
def change_id(node, identity):
    node.id = identity
    if not node.children:
        return node
    else:
        old_children = node.children.copy()
        children = []
        for i, child in enumerate(old_children): 
            iden = f"{identity}_{i}"
            child = change_id(child, iden)
            children.append(child)
        node.children = children
    return node

In [None]:
data_tree = change_id(data_tree, data_tree.id)

In [81]:
node = data_tree.get_child("Techniques").get_child('Multi-omic Data').get_child("Spatial resolution")

In [82]:
node.children

[TreeNode(id='0_2_4_4_2_0', label='bulk', children=0),
 TreeNode(id='0_2_4_4_2_1', label='single cell', children=0),
 TreeNode(id='0_2_4_4_2_2', label='spatial', children=0)]

In [None]:
descriptions = ["tumour average - may include non-tumour cells", "specific cells", "includes positional information"]

In [76]:
descriptions = ["genomics/proteomics etc", "eg. control/tumour", "single cell/spatial etc"]

In [69]:
descriptions = ["chemical modifications to DNA/histones",
                "exomes/genomes",                 
                "small molecules (sugars, amino acids) produced by metabolism",
                "genetic material from multiple organisms (usually microbes) living together",
                "functional molecules that carry out cellular processes",
                "RNA transcripts of DNA",
                ]

In [20]:
genomics_descriptions = ["single base changes", "small insertions or deletions",
                "large scale amplifications/deletions","chromosomal translocations"]


In [83]:
for i,child in enumerate(node.children):
    child.description = descriptions[i]