Problem Part 1:
- Find all directories with size at most 100.000
- Add up their sizes

In [69]:
import sys
sys.path.append("..")
import lib
from anytree import Node, RenderTree, AnyNode
import numpy as np
import re

In [102]:
def get_child(node : Node, child_name : str, verbose : bool = False):
    children = node.children
    names = []
    for child in children:
        names.append(child.name)
    if verbose:
        print(f"List of children: {names}")
    idx_child = names.index(child_name)
    child = node.children[idx_child]
    return child

def get_idx_of_next_command(lines: str):
    i = 1 # because search starts at next line
    try:
        while lines[i][0] != "$":
            i += 1
    except IndexError:
        i = -1
    return i

def parse_children(lines: str, verbose : bool = False):
    if verbose:
        print(f"Parsing children... input ... \n {lines}")
    nodes = []
    for line in lines[1:]:
        if verbose:
            print(f"Process line: {line}")
        if line[:3] == "dir":
            nodes.append(Node(line[4:]))
        else: # parse file size first
            file_size, file_name = re.split("\s", line)
            new_node = AnyNode(name = file_name, size = file_size)
            nodes.append(new_node)
    return nodes

def parse_directory(input: str, verbose : bool = False):
    root = Node("root")
    n_lines = len(input)
    idx_iter = 1 # start from 2nd line for easier parsing
    
    # points to current node
    tracker = root
    if verbose:
        print(f"Start parsing, n_lines = {n_lines}...")
    while idx_iter < n_lines:
        line = input[idx_iter]
        if verbose:
            print(f"Line: {line}")
        assert line[:4] == "$ cd" or line[:4] == "$ ls", line
        if line[:4] == "$ cd": # change directory
            if line[5:] == "..": # go back
                if verbose:
                    print(f"Going back...")
                tracker = tracker.parent
            else:
                chdirto = line[5:]
                if verbose:
                    print(f"New dir: {chdirto}")
                tracker = get_child(tracker, chdirto)
            idx_iter += 1
        elif line[:4] == "$ ls": # create children
            idx_iter_add = get_idx_of_next_command(input[idx_iter:])
            if idx_iter_add == -1:
                tracker.children = parse_children(input[idx_iter:], verbose)
                tree = RenderTree(root)
                return root, tree
            else:
                tracker.children = parse_children(input[idx_iter:idx_iter+idx_iter_add], verbose)
                idx_iter += idx_iter_add
                assert input[idx_iter][0] == "$"
        else:
            AssertionError
    tree = RenderTree(root)
    return root, tree

In [120]:
def compute_directory_sum(node : Node, verbose = False):
    # go through each child
    # add file size if it has file size attribute
    # else it's a dir -> traverse through that and call recursively
    # return sum of file sizes
    total_size = 0
    cumulate_partA = 0
    for child in node.children:
        if verbose:
            print(f"Processing: \n{child}")
        try:
            size = child.size
            size = int(size)
        except AttributeError: #directory
            size, cumulator = compute_directory_sum(child)
            size = int(size)
            cumulate_partA += cumulator # include in any case existing cumumulated sum
            if size <= 100000: # and add current dir if it fits
                cumulate_partA += size
        total_size += size
    return total_size, cumulate_partA

In [103]:
file = lib.read_file("test_input.txt")
display(file[:5])
root, tree = parse_directory(file)

['$ cd /', '$ ls', 'dir a', '14848514 b.txt', '8504156 c.dat']

In [106]:
print(tree)

Node('/root')
├── Node('/root/a')
│   ├── Node('/root/a/e')
│   │   └── AnyNode(name='i', size='584')
│   ├── AnyNode(name='f', size='29116')
│   ├── AnyNode(name='g', size='2557')
│   └── AnyNode(name='h.lst', size='62596')
├── AnyNode(name='b.txt', size='14848514')
├── AnyNode(name='c.dat', size='8504156')
└── Node('/root/d')
    ├── AnyNode(name='j', size='4060174')
    ├── AnyNode(name='d.log', size='8033020')
    ├── AnyNode(name='d.ext', size='5626152')
    └── AnyNode(name='k', size='7214296')


In [121]:
compute_directory_sum(root)

(48381165, 95437)

In [122]:
file = lib.read_file("input.txt")
display(file[:5])
root, tree = parse_directory(file)
compute_directory_sum(root)

['$ cd /', '$ ls', 'dir bsnqsfm', 'dir dtqvbspj', 'dir hhhtrws']

(42476859, 1517599)