# Advent of Code 2022: Day 7
https://adventofcode.com/2022/day/7


## Part 1
Find the sum of all directories with a total size of at most 100000

### Get the data into a list of strings

In [1]:
myfile = open('input.txt', 'r')
data = myfile.read()
data_list = data.split('\n')
# Remove empty value at the bottom of the list
data_list = data_list[:-1]

### Transform list into a dictionary

In [2]:
# Set up the dictionary and some initial values.
# Each key refers a directory, and points to a 
# list of two lists, where the first list is a 
# manifest of what directories and files exist 
# under the current directory, while the second 
# list refers to the total size of this directory
data_dict = {'/': [[],[]]}
parent = '/'
i = 2
p_length = []

for item in data_list[2:]:

  # If the line is '$ ls' we need to go deeper in
  # the filesystem by updating the parent.
  # For example, on line 9 in the input, the parent
  # will be updated from '/' to '/hdwsmn/'
  # Also keep track of the number of characters that
  # was added onto the parent string, in the
  # p_length list for use in the next part.
  if (item == '$ ls'):
    parent = parent + data_list[i-1][5:] + '/'
    p_length.append(len(data_list[i-1][5:] + '/'))

  # If the line is '$ cd ..' we need to go back up
  # in the filesystem by updating the parent.
  # For example, on line 16 the parent will be updated
  # from '/hdwsmn/mrrqnc/' to '/hdwsmn/' by taking the
  # most recent value from the p_length list.
  elif (item == '$ cd ..'):
    parent = parent[:len(parent)-p_length.pop()]

  # If the line contains 'cd ' this indicates that we
  # are entering a directory for the first time, and so
  # we add a new entry in the dictionary, with 
  # parent + curr_key + '/' as the key.
  elif('cd ' in item):
    curr_key = item[5:]
    data_dict[parent+curr_key+'/'] = [[],[]]
  
  # If the line contains 'dir ' this indiciates that
  # we are looking at a directory inside the current
  # parent directory. As such we append parent + item[4:] 
  # to the manifest list, where item is the string of
  # the directory, and the [4:] removes 'dir ' from
  # the overall string. 
  elif('dir ' in item):
    data_dict[parent][0].append(parent+item[4:])
  
  # If none of the above situations are encountered,
  # this indicates that the current item is a file
  # inside the parent directory. It is appended to
  # the list in the same fashion as for a directory,
  # but without the need to trim the string.
  else:
    data_dict[parent][0].append(parent+item)
  i +=1

### Calculate the size of every directory

In [3]:
# Function which counts the size of each
# directory and stores this in the second
# list of each directory.

def count_size(dictionary):

  # Get the keys and reverse the order. This 
  # ensures that the calculations are done in
  # the correct order, starting from the bottom
  # of the filesystem. This way, when a directory
  # contains other directories, the internal
  # directories will have their sizes calculated first,
  # which makes sure there wont be any calls to a
  # not-yet-counted size. 
  keylist = list(dictionary.keys())
  keylist.reverse()

  for key in keylist:
    temp_list = dictionary[key][0]
    size = 0
    for item in temp_list:

      # Split the item on an empty space, which
      # in this case will always result in a list
      # of either 1 or 2 strings.
      split_list = item.split(' ')

      # If the list is of length 2, this means the
      # item is a file, which means a size must be
      # retrieved from the first string.
      if len(split_list) == 2:

        # Split the string into characters with
        # enumerate and find the first digit. This
        # requires that folders do not have any
        # digits in their name. 
        num_start = 0
        size_string = split_list[0]
        for i, c in enumerate(size_string):
          if c.isdigit():
            num_start = i
            break
        
        # Add the size of the item to the total
        size += int(size_string[num_start:])
      
      # If the length of the list is 1, this means
      # the item is a directory. Because of the order
      # of the keys, this directory will already have
      # had its size calculated. As such the size can
      # easily be retrieved. Add '/' to the name to
      # correctly match.
      elif len(split_list) == 1:
        size += dictionary[item+'/'][1][0]
    dictionary[key][1].append(size)
  return dictionary

data_dict = count_size(data_dict)

In [4]:
# Find all dictionaries with a size less 
# than or equal to a given limit and 
# return the sum of these dictionaries. 
def sum_of_sizes(dictionary, limit):
    res = 0
    for key in dictionary.keys():
      size = dictionary[key][1][0]
      if(size <= limit):
        res += size
    return res
    
sum_of_sizes(data_dict, 100000)

1348005

## Part 2
Find the size of the smallest directory which can be removed to free up enough space

### Get the space that must be deleted

In [5]:
max_space = 70000000
curr_space = data_dict['/'][1][0]
free_space = max_space-curr_space
needed_space = 30000000
must_delete = needed_space - free_space
must_delete

8690120

### Find the smallest size

In [6]:
# Find all dictionaries with a size greater 
# than a given limit and return the size of 
# the smallest of these dictionaries. 
def find_smallest(dictionary, limit = 0):
  candidates = []
  for key in dictionary.keys():
    size = dictionary[key][1][0]
    if size >= limit:
      candidates.append(size)
  return min(candidates)

In [7]:
find_smallest(data_dict, must_delete)

12785886