In [1]:
with open('input.txt') as file:
    lines = file.readlines()

# separate tokens and remove trailing whitespace
lines = [line.strip().split(" ") for line in lines]


In [2]:
# Construct classes to save our files and directories
class Directory():
    name: str
    size: int
    contents: dict

    def __init__(self, name: str, parent=None):
        self.name = name
        self.contents = {}
        self.parent = parent
        self.size = None

    def __str__(self):
        return f"- {self.name} (dir)"

    def compute_size(self):
        """
        recursively compute the size of this directory and all subdirectories
        """
        self.size=0

        for item in self.contents:
            item=self.contents[item]
            if item.size == None:
                item.compute_size()
            self.size+=item.size  


class File():
    name: str
    size: int

    def __init__(self, name: str, size: int):
        self.name = name
        self.size = size

    def __str__(self):
        return f"- {self.name} (file, size={self.size})"


In [3]:
# Initialise directory structure 
current_tree = ["/"]
current_dir = Directory("/")
root_dir = current_dir

# go through each line, handle cd and ls commands
for i, line in enumerate(lines):
    if line[0] != "$": # If not a command, skip the line. It is handled in ls command handling
        continue
    if line[1] == "cd":
        target = line[2]
        if target == "..": # Go back up a level
            current_tree.pop()
            current_dir = current_dir.parent
        elif target == current_tree[-1]:
            pass
        else:
            target+="_dir" # Mark directories (and later files) to handle duplicate naming
            current_tree.append(target)
            if target in current_dir.contents:
                current_dir = current_dir.contents[target]
            else:
                current_dir = Directory(target, current_dir)
    else:
        for j, new_line in enumerate(lines[i+1:]): # If an ls command is receives, scan lines until a new command is found. everything in between is directories or files to parse
            if new_line[0] == "$":
                contents = lines[i+1:j+i+1]
                break
            contents = lines[i+1:j+i+2] # To handle the end of file, where no new command is found after the last output
        for dir_or_size, name in contents:
            if dir_or_size == "dir":
                if not (name+"_dir") in current_dir.contents:
                    current_dir.contents[name+"_dir"] = Directory(name, current_dir)
            else:
                if not (name+"_file") in current_dir.contents:
                    current_dir.contents[name+"_file"] = File(name, int(dir_or_size))


In [4]:
def recursive_folder_printer(dir: Directory, indent: int = 0, to_print=False):
    """
    Recursively goes through and prints the folder tree, adding indentation to matchh the formatting on the Advent of Code website
    """
    output = " "*indent + dir.__str__()+"\n"
    for item in dir.contents:
        item = dir.contents[item]
        if issubclass(type(item), Directory):
            output += recursive_folder_printer(item, indent+2) 
        else:
            output += " "*(indent+2) + item.__str__()+"\n"
    if not to_print:
        return output
    else:
        print(output)


recursive_folder_printer(root_dir, to_print=True)


- / (dir)
  - ddgtnw (dir)
    - gftgshl (dir)
      - mtshhn (dir)
        - fsclsm (file, size=244930)
        - vnnf (file, size=197930)
      - smnslwd (dir)
        - dbtvp.mbr (file, size=205127)
        - grct (dir)
          - jjq.hjd (file, size=20977)
        - hcjtjptg (file, size=270601)
        - lsqvg.zmm (file, size=146538)
        - vnnf (file, size=310443)
        - vqms (file, size=84541)
      - znbs (dir)
        - pjrpqc.gwh (file, size=192316)
        - tnqpmbjf.prg (file, size=5233)
    - grct (dir)
      - qzlmfj.lhc (file, size=297156)
      - vnnf (file, size=104088)
    - tbqpqfgd.wvz (file, size=57336)
    - vqms (file, size=267191)
    - wtgzgmvr (dir)
      - cfvjph (dir)
        - tlms (file, size=201215)
      - jzdqctm (dir)
        - hnbjcm (dir)
          - ddgtnw.tpg (file, size=147907)
          - hgh (dir)
            - smnslwd.hnd (file, size=64701)
          - qjpfhmw.gts (file, size=107668)
          - qvnbfdq (dir)
            - dzbfsf.qsr (fil

In [5]:
# Go through and fill in all directory sizes
root_dir.compute_size()
root_dir.size

46975962

In [6]:
def recursive_dir_lister(dir:Directory):
    """
    Makes a list of all (recursive) subdirectories contained in a directory
    """
    dirs=[dir]
    for item in dir.contents:
        item=dir.contents[item]
        if issubclass(type(item),Directory):
            subdirs=recursive_dir_lister(item)
            dirs+=subdirs

    return dirs

all_directories=recursive_dir_lister(root_dir)

# Make a list of names and sizes for easy searching
dir_name_size=[[dir.name,dir.size] for dir in all_directories]

dir_name_size

[['/', 46975962],
 ['ddgtnw', 5672556],
 ['gftgshl', 1678636],
 ['mtshhn', 442860],
 ['smnslwd', 1038227],
 ['grct', 20977],
 ['znbs', 197549],
 ['grct', 401244],
 ['wtgzgmvr', 3268149],
 ['cfvjph', 201215],
 ['jzdqctm', 2913732],
 ['hnbjcm', 1573758],
 ['hgh', 64701],
 ['qvnbfdq', 948182],
 ['dzfpqb', 295512],
 ['rvdlmnqv', 456982],
 ['smnslwd', 456982],
 ['tlms', 263149],
 ['wlvg', 42151],
 ['vdzsn', 301547],
 ['zvh', 111901],
 ['dtmbp', 4095149],
 ['cscpfcjv', 260848],
 ['hdjs', 174135],
 ['tlms', 174135],
 ['jcrb', 316186],
 ['spmc', 1181734],
 ['hhwzwqzq', 249643],
 ['jbrftqj', 612334],
 ['spwsfpww', 2162246],
 ['ddgtnw', 786462],
 ['grct', 18055],
 ['tlms', 54154],
 ['hzb', 812363],
 ['smnslwd', 501094],
 ['smnslwd', 118657],
 ['dzbfsf', 19354558],
 ['jttqnbvn', 9663],
 ['ptbps', 289723],
 ['pwclsbw', 157147],
 ['dzbfsf', 157147],
 ['tllwm', 18659329],
 ['pcbgr', 4474328],
 ['grct', 629852],
 ['jtqg', 333756],
 ['smtbvvpn', 2934485],
 ['mqgzwzn', 297744],
 ['rhp', 297744],
 ['msp

In [7]:
total=0
for dir in dir_name_size:
    size = dir[1]
    if size <= int(1e5):
        total+=size
        

total # 836968 is too low, 1334506 is correct

1334506

# Part 2

In [8]:
# Computed the required folder size
TOTAL_SPACE = int(7e7)
REQUIRED_SPACE= int(3e7)
available_space=TOTAL_SPACE- root_dir.size
space_to_free=REQUIRED_SPACE-available_space
space_to_free


6975962

In [9]:
# Find the smallest folder that satisfies the required folder size
size_to_beat=max([dir[1] for dir in dir_name_size])
for size in [dir[1] for dir in dir_name_size]:
    if size < size_to_beat and size >=space_to_free:
        size_to_beat=size

size_to_beat

7421137

In [10]:
# Find and print the directory with the size found previously
_=[print(f"{dir[0]},{dir[1]}") for dir in dir_name_size if dir[1]==size_to_beat]

ptzptl,7421137


In [11]:
# Do both at once with list comprehension magic
_=[print(f"{dir}, size = {dir.size}") for dir in all_directories if dir.size == min([dir.size for dir in all_directories if dir.size>=space_to_free])]

- ptzptl (dir), size = 7421137
