In [280]:
from ipytree import Tree
import json
from pathlib import Path

In [281]:
sm = "../../pdfs/paper.json"
lg = "../../pdfs/loc-ark--13960-t2c831z4j-1632534282.json"
with open(lg,"r") as f:
    data = json.load(f)

In [282]:
def clean(node):
    if "label" in node:
        node["type"] = "section"
        for n in node["children"]:
            if "type" in n and n["type"] == "label":
                node["value"] = n["value"]
                node["coords"] = n["coords"]
                node["page"] = n["page"]
                node["children"] = [
                    n for n in node["children"] 
                    if not ("type" in n and n["type"]=="label")
                    ]
                break
        else:
            node["value"] = node["label"]
        

    if not "children" in node:
        node["children"] = []


    if "content" in node:
        node["children"] = node["content"] + node["children"]

    for n in node["children"]:
        clean(n)
    

In [283]:

clean(data)

In [284]:
NODE_TYPES = {}
MAX_LEN = 20

In [285]:
from ipytree import Node


class MyNode(Node):
    def __init__(self, data=None):
        super().__init__()
        if data:
            self.load(data)
    def load(self, data):
        for child in data["children"]:
            if not child["type"] in "labeltable":
                self.add_node(NODE_TYPES[child["type"]](child))
    def collapse(self):
        self.opened = False
    def expand(self):
        self.opened = True
        
    def collapse_all(self):
        self.collapse()
        for n in self.nodes:
            n.collapse_all()
    def collapse_to(self, level):
        if level == 0:
            self.collapse_all()
        else:
            self.expand()
            for n in self.nodes:
                n.collapse_to(level-1)


class SectionNode(MyNode):
    def __init__(self, data=None):
        super().__init__(data)
        self.value = data.get("value","")
        ellipsis = "..." if len(self.value)>MAX_LEN else ""
        self.name = self.value[:MAX_LEN] + ellipsis

class TextNode(MyNode):
    def __init__(self, data=None):
        super().__init__(data)
        self.value = data.get("value","")
        ellipsis = "..." if len(self.value)>MAX_LEN else ""
        self.name = self.value[:MAX_LEN] + ellipsis
        self.icon = "align-left"

class ImageNode(MyNode):
    def __init__(self, data=None):
        super().__init__(data)
        self.value = data.get("value","")
        self.name = "img"
        self.icon = "image"

NODE_TYPES = {
    "section": SectionNode,
    "text": TextNode,
    "image": ImageNode,
}
    

In [286]:
tree = Tree(stripes=True)
my_node = SectionNode(data)
tree.add_node(my_node)
my_node.collapse_to(2)
tree

Tree(nodes=(SectionNode(name='..\\pdfs\\loc-ark--139...', nodes=(SectionNode(name='THE CAPITOL COOK BOO...', n…

In [287]:
my_node.collapse_to(2)