In [None]:
# problem definition

# --- Day 7: No Space Left On Device ---
# You can hear birds chirping and raindrops hitting leaves as the expedition proceeds. Occasionally, you can even hear much louder sounds in the distance; how big do the animals get out here, anyway?

# The device the Elves gave you has problems with more than just its communication system. You try to run a system update:

# $ system-update --please --pretty-please-with-sugar-on-top
# Error: No space left on device
# Perhaps you can delete some files to make space for the update?

# You browse around the filesystem to assess the situation and save the resulting terminal output (your puzzle input). For example:

# $ cd /
# $ ls
# dir a
# 14848514 b.txt
# 8504156 c.dat
# dir d
# $ cd a
# $ ls
# dir e
# 29116 f
# 2557 g
# 62596 h.lst
# $ cd e
# $ ls
# 584 i
# $ cd ..
# $ cd ..
# $ cd d
# $ ls
# 4060174 j
# 8033020 d.log
# 5626152 d.ext
# 7214296 k
# The filesystem consists of a tree of files (plain data) and directories (which can contain other directories or files). The outermost directory is called /. You can navigate around the filesystem, moving into or out of directories and listing the contents of the directory you're currently in.

# Within the terminal output, lines that begin with $ are commands you executed, very much like some modern computers:

# cd means change directory. This changes which directory is the current directory, but the specific result depends on the argument:
# cd x moves in one level: it looks in the current directory for the directory named x and makes it the current directory.
# cd .. moves out one level: it finds the directory that contains the current directory, then makes that directory the current directory.
# cd / switches the current directory to the outermost directory, /.
# ls means list. It prints out all of the files and directories immediately contained by the current directory:
# 123 abc means that the current directory contains a file named abc with size 123.
# dir xyz means that the current directory contains a directory named xyz.
# Given the commands and output in the example above, you can determine that the filesystem looks visually like this:

# - / (dir)
#   - a (dir)
#     - e (dir)
#       - i (file, size=584)
#     - f (file, size=29116)
#     - g (file, size=2557)
#     - h.lst (file, size=62596)
#   - b.txt (file, size=14848514)
#   - c.dat (file, size=8504156)
#   - d (dir)
#     - j (file, size=4060174)
#     - d.log (file, size=8033020)
#     - d.ext (file, size=5626152)
#     - k (file, size=7214296)
# Here, there are four directories: / (the outermost directory), a and d (which are in /), and e (which is in a). These directories also contain files of various sizes.

# Since the disk is full, your first step should probably be to find directories that are good candidates for deletion. To do this, you need to determine the total size of each directory. The total size of a directory is the sum of the sizes of the files it contains, directly or indirectly. (Directories themselves do not count as having any intrinsic size.)

# The total sizes of the directories above can be found as follows:

# The total size of directory e is 584 because it contains a single file i of size 584 and no other directories.
# The directory a has total size 94853 because it contains files f (size 29116), g (size 2557), and h.lst (size 62596), plus file i indirectly (a contains e which contains i).
# Directory d has total size 24933642.
# As the outermost directory, / contains every file. Its total size is 48381165, the sum of the size of every file.
# To begin, find all of the directories with a total size of at most 100000, then calculate the sum of their total sizes. In the example above, these directories are a and e; the sum of their total sizes is 95437 (94853 + 584). (As in this example, this process can count files more than once!)

# Find all of the directories with a total size of at most 100000. What is the sum of the total sizes of those directories?

copilot fills in summary after the first line below

In [None]:
# summary of the problem:
# - find all directories with a total size of at most 100000
# - calculate the sum of their total sizes
# - return the sum
# - the total size of a directory is the sum of the sizes of the files it contains, directly or indirectly
# - directories themselves do not count as having any intrinsic size


In [2]:
# read lines to array from 07.txt
with open('07.txt') as f:
    lines = f.read().strip().split("\n")
print(lines[0:10])

['$ cd /', '$ ls', '113975 bqpslnv', '50243 btttmt.nmb', 'dir gbjh', 'dir hlpzbht', '43500 lblt', 'dir phpmmtvc', 'dir plbjmdl', 'dir tggr']


In [3]:
from typing import List
from pathlib import Path
from pprint import pprint

In [28]:
list(Path("/a/b/c/d").parents)

[PosixPath('/a/b/c'), PosixPath('/a/b'), PosixPath('/a'), PosixPath('/')]

In [29]:
def solve(lines: List[str]) -> int:
    # copy input so we don't modify the passed in list
    remaining = [x for x in lines]
    # start at root
    cwd = Path("/")
    # dict of dir to size
    sizes = {}
    debounce = set()
    # while unprocessed input remains
    while len(remaining) > 0:
        # get next line
        line = remaining.pop(0).strip()
        if line.startswith('$ cd '): # change cwd
            dir = line[5:]
            if dir.startswith("/"):
                cwd = Path(dir)
            elif dir == "..":
                cwd = cwd.parent
            else:
                cwd = cwd / dir
            # print("Changed to ", cwd)
        elif line == "$ ls":
            # print("Gathering files")
            # while next line isn't a command, consider it as ls output
            while len(remaining) > 0 and not remaining[0].startswith("$"):
                line = remaining.pop(0)
                if line.startswith("dir "):
                    continue
                else:
                    # get file size
                    size, file = line.split(" ")
                    filePath = (cwd / file).absolute().__str__()
                    # print(filePath in debounce, filePath, debounce)
                    if filePath in debounce:
                        print("!!!!!!!", filePath)
                        continue
                    debounce.add(filePath)
                    for parent in (cwd/file).parents:
                        parentPath = parent.absolute().__str__()
                        if parentPath not in sizes:
                            sizes[parentPath] = 0
                        sizes[parentPath] += int(size)
        else:
            raise Exception(f"unprocessed line: {line}")
    ans = sum([x for x in sizes.values() if x <= 100000])
    return ans, (sizes,)

In [31]:
t1,_ = solve("""
$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k
$ cd /
$ cd bears
$ ls
100 beans
200 frogs
$ cd ..
$ cd bears
$ ls
100 beans
200 frogs
""".strip().split("\n"))
print(t1)
assert t1 == 95437 + 100 + 200

!!!!!!! /bears/beans
!!!!!!! /bears/frogs
95737


In [33]:
p1, (sizes,) = solve(lines)
print(p1)
assert p1 > 1822026 # too low

2061777


# PArt 2

In [37]:
total = 70000000
needed = 30000000
used = sizes["/"]

In [46]:
kill = [(i,v) for i,v in sizes.items() if total - (used - v) >= needed]
kill.sort(key=lambda x: x[1])
pprint(kill)

[('/phpmmtvc/bqpslnv/vtnptsl', 4473403),
 ('/gbjh/pzdn/nlwnv', 5172382),
 ('/phpmmtvc/dptzbgc', 6841976),
 ('/gbjh/pzdn', 8292589),
 ('/phpmmtvc/bqpslnv', 8898409),
 ('/phpmmtvc/vnswqbm', 9095427),
 ('/gbjh', 9976905),
 ('/phpmmtvc', 30197553),
 ('/', 44125990)]


In [48]:
kill[0][1]

4473403