In this exercice, the goal is to find a sequence of characters in a file that have different characters. So the moment you find it, it is useless to keep the rest of the file stored in memory.

To avoid loading the whole file, we prefer reading the file character by character and ask for a new only if we haven't find the right sequence yet.

This can be achieved by using generators.

In [19]:
import numpy as np

# Use of generator to avoid filling memory with data we don't need (after the start signal)
def reader(filename):
    with open(filename) as f:
        while True:
            # read next character
            char = f.read(1)
            # if not EOF, then at least 1 character was read, and 
            # this is not empty
            if char:
                yield char, f.tell()
            else:
                return

reader = reader("./day6Input.txt")

signal_list = [next(reader)[0] for _ in range(3)]

for char in reader:
    position = char[1]
    signal_list.append(char[0])
    if len(np.unique(signal_list)) == 4:
        break
    else:
        signal_list.pop(0)

print(f'Signal starting is {"".join(signal_list)} at position {position}')

Signal starting is jzsh at position 1651


In [21]:
import numpy as np

# Use of generator to avoid filling memory with data we don't need (after the start signal)
def reader(filename):
    with open(filename) as f:
        while True:
            # read next character
            char = f.read(1)
            # if not EOF, then at least 1 character was read, and 
            # this is not empty
            if char:
                yield char, f.tell()
            else:
                return

reader = reader("./day6Input.txt")

signal_list = [next(reader)[0] for _ in range(13)]

for char in reader:
    position = char[1]
    signal_list.append(char[0])
    if len(np.unique(signal_list)) == 14:
        break
    else:
        signal_list.pop(0)

print(f'Message starting is {"".join(signal_list)} at position {position}')

Message starting is htrzpnflwjvcgb at position 3837
