In [1]:
import psycopg2 as pg

In [2]:
conn = pg.connect(dbname='sec', user='secapp', host='r_363-postgres-1')
curr = conn.cursor()

In [3]:
class Node:
    code: str
    children: list["Node"]
    child_codes: set[str]

    def __init__(self, code) -> None:
        self.code = code
        self.children = []
        self.child_codes = set()

    def get(self, code):
        if self.code == code:
            return self
        else:
            for child in self.children:
                node = child.get(code)
                if node:
                    return node
            return None

    def insert(self, parent: 'Node', code: str):
        if parent.code == code:
            return None

        if self is parent:
            if code not in self.child_codes:
                self.child_codes.add(code)
                self.children.append(Node(code))
            return [child for child in self.children if child.code == code][0]
        else:
            for child in self.children:
                node = child.insert(parent, code)
                if node:
                    return node
            return None

    def __repr__(self) -> str:
        return f"<Node>({self.code})"

In [75]:
paths = []
with open('paths.txt') as f:
    for line in f:
        paths.append(line.strip())
# paths = ["C35850|C9315|C132728|C132732|C132733"]

In [76]:
visited = set()
printed_dupe = False


def printgraph(node: Node, level=0):
    global printed_dupe
    if node.code in visited:
        if not printed_dupe:
            outfile.write("\t" * level + f" - ({node.code}) dupe* \n")
            printed_dupe = True
    elif node.code:
        printed_dupe = False
        curr.execute("select pref_name from ncit where code = %s", (node.code,))
        name = curr.fetchone()[0]
        outfile.write("\t" * level + " - (" + node.code + ") " + name + "\n")
        visited.add(node.code)
    else:
        printed_dupe = False
        outfile.write("- root" + "\n")
        visited.add(None)
    for child in node.children:
        printgraph(child, level + 1)

In [77]:
root = Node(None)
for path in paths:
    codes = path.split("|")
    parent = root
    current = None
    for code in codes:
        current = root.insert(parent, code)
        if current:
            parent = current

In [78]:
outfile = open("paths.out.txt", "w")
printgraph(root)
outfile.close()