# Some tips and tricks in Python that will come in handy with AoC

### Some basics about lists, dicts and their properties.

In [75]:
# Lists are ordered, indexed, mutable containers of elements. Dictionaries are unordered, mutable containers of key-value pairs.
my_list = [1, 2, 3, 4, 5]
my_dictionary = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

# To get elements, lists use the index of the element, dictionaries use the key of the element.
print(my_list[0])  # 1
print(my_dictionary['d'])  # 4

# To add elements, lists use the append() method, dictionaries use the key of the element.
my_list.append(6)
print(my_list)  # [1, 2, 3, 4, 5, 6]
my_dictionary['f'] = 6
print(my_dictionary)  # {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}

# Tuples are ordered, indexed, immutable containers of elements. Sets are unordered, mutable containers of unique elements.
my_tuple = (1, 2, 3, 4, 5)
my_set = {1, 2, 3, 4, 5}

# Tuples are handy because they are memory efficient. See for your self:
import sys
print(sys.getsizeof(my_list))  # 104
print(sys.getsizeof(my_tuple))  # 88

# Sets are handy because they are unique. See for yourself:
some_list = [1, 2, 3, 4, 5, 5, 5, 5]
my_set = {1, 2, 3, 4, 5, 5, 5, 5}
print(some_list)  # [1, 2, 3, 4, 5, 5, 5, 5]
print(my_set)  # {1, 2, 3, 4, 5}



1
4
[1, 2, 3, 4, 5, 6]
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}
104
80
[1, 2, 3, 4, 5, 5, 5, 5]
{1, 2, 3, 4, 5}


In [2]:
# Use zip() to combine elements from multiple iterables into tuples.
keys = ['a', 'b', 'c']
values = [1, 2, 3]
zipped = zip(keys, values)
print(list(zipped))  # [('a', 1), ('b', 2), ('c', 3)]

# Note that we had to put the zipped variable in a list, as zip() returns an iterator (lazy but memroy efficient).

# You often see zip() used in a for loop, if you want to iterate over multiple iterables at the same time.
for k, v in zip(keys, values):
    print(k, v)  # a 1, b 2, c 3

# Alternative is using enumerate() to get the index and value of an iterable.
for index, value in enumerate(values):
    print(keys[index], value)  # a 1, b 2, c 3
    
# Zip can also be used to unzip a list of tuples.
zipped = [('a', 1), ('b', 2), ('c', 3)]
keys, values = zip(*zipped)
print(keys)  # ('a', 'b', 'c')
print(values)  # (1, 2, 3)

# Zip can also be used to create a dictionary from two lists.
keys = ['a', 'b', 'c']
values = [1, 2, 3]
my_dictionary = dict(zip(keys, values))
print(my_dictionary)  # {'a': 1, 'b': 2, 'c': 3}


[('a', 1), ('b', 2), ('c', 3)]
a 1
b 2
c 3
a 1
b 2
c 3
('a', 'b', 'c')
(1, 2, 3)
{'a': 1, 'b': 2, 'c': 3}


### List, dictionary and generator comprehensions

In [None]:
# List and dictionary comprehensions are a handy way to create lists and dictionaries in a for loop.
# They allow you to create lists or dictionaries in a single line of code. Lists comprehensions use square brackets, 
# while dictionaries comprehensions use curly brackets.

my_list = [i for i in range(10)]
print(my_list)  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# For dictionary comprehensions, you need to specify the key and value with a : in between.
my_dictionary = {i: i+1 for i in range(10)}
print(my_dictionary)  # {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10}

# You can also add conditions to list and dictionary comprehensions.
my_list = [i for i in range(10) if i % 2 == 0]
print(my_list)  # [0, 2, 4, 6, 8]

# The variable my_list is fully loaded in memory. If you want to save memory, you can use a generator expression.
# A generator expression is similar to a list comprehension, but it returns an iterator instead of a list.
# That means that the elements are not loaded in memory until you need them.
my_generator = (i for i in range(10) if i % 2 == 0)
print(my_generator)  # <generator object <genexpr> at 0x7f8f4c1d5f50>
my_list = list(my_generator)
print(my_list)  # [0, 2, 4, 6, 8]



### Regex in Python with the `re` module

In [5]:
# If you have a problem and you need to solve it with regex, you actually have two problems. 
# However, if you really need to use regex, you can use the re module that is built into Python.

# Let's say you want to extract the numbers from a string.
import re

my_string = 'This is a string with 1234 numbers in 56 different places.'

# 10/10 I would recomment to use r-string notation for regex. This way you don't have to escape the backslash.
# The r in r-string stands for..... raw string. Not regex, lol.

# There are a few basic ways to use re-functions. The first one is findall().
# findall() returns a list of all parts of the string that match the regex.
numbers = re.findall(r'\d+', my_string)
print(numbers)  # ['1234', '56']

# The second one is search(). search() returns a match object if the regex matches the string, otherwise it returns None.
match = re.search(r'\d+', my_string)
print(match)  # <re.Match object; span=(22, 26), match='1234'>

# You can also use sub() to replace parts of the string that match the regex.
new_string = re.sub(r'\d+', 'number', my_string)
print(new_string)  # This is a string with number numbers in number different places.

# Another important fucntion is finditer(). finditer() returns an iterator of match objects.
matches = re.finditer(r'\d+', my_string)
for m in matches:
    print(m)  # <re.Match object; span=(22, 26), match='1234'>, <re.Match object; span=(44, 46), match='56'>
# Note that you can use the span of the match object to get the start and end index of the match! 


['1234', '56']
<re.Match object; span=(22, 26), match='1234'>
<re.Match object; span=(22, 26), match='1234'>
<re.Match object; span=(38, 40), match='56'>


### Sets and their properties

In [48]:
# Sets are unordered collections of unique elements. They are useful for removing duplicates from a list. 
# They are also useful for checking if an element is in a collection (O(1) time complexity)

my_list = [1, 2, 3, 3, 4, 5, 5, 5, 6]
print("My list: ", my_list)

# Sets are unordered! Any order is possible when printing
my_set = set(my_list)
print("My set: ", my_set)


# Sets are useful for checking if an element is in a collection
if 3 in my_set:
    print("3 is in the set")

# You can create  unions of sets (all unique items from two sets) using the `|` operator
my_second_set = {1, 2, 'a', 'b'}
print("My second set: ", my_second_set)
print("Union: ", my_set | my_second_set)

# You can create intersections of sets (all items that are in both sets) using the `&` operator
print("Intersection: ", my_set & my_second_set)

# You can create differences of sets (all items that are in the first set, but not in the second) using the `-` operator
print("Difference: ", my_set - my_second_set)

# You can create symmetric differences of sets (all items that are in either set, but not in both) using the `^` operator
print("Symmetric difference: ", my_set ^ my_second_set)

# Conclusion: sets() are cool!

My list:  [1, 2, 3, 3, 4, 5, 5, 5, 6]
My set:  {1, 2, 3, 4, 5, 6}
3 is in the set
My second set:  {'b', 1, 2, 'a'}
Union:  {1, 2, 3, 4, 5, 6, 'b', 'a'}
Intersection:  {1, 2}
Difference:  {3, 4, 5, 6}
Symmetric difference:  {'b', 3, 4, 5, 6, 'a'}


### Using the `collections` module, it's your friend!

In [41]:
# Use Counter() when counting occurences of items in an iterable (list, string, etc.)
from collections import Counter

s = 'AAABBC'
c = Counter(s)
print("Counter", c)

# Use defaultdict() when you want to create a dictionary with a default value for non-existing keys
# The function that you provide must take 0 parameters and return a value. Use `lambda: <default_value>` to create such a function.
from collections import defaultdict

d = defaultdict(lambda: 0)
for c in s:
    d[c] += 1
print("Default dict", d)



Counter Counter({'A': 3, 'B': 2, 'C': 1})
Default dict defaultdict(<function <lambda> at 0x000001EC0D0004C0>, {'A': 3, 'B': 2, 'C': 1})


In [None]:
# Lists are handy for storing ordered collections of items. They are mutable, which means that you can change them after creation.
# You can also get items by index in O(1) time complexity.

# Lists can be slow though, especially when used as stacks. (Stacks are collections where you can only add/remove items at the end). 
# When you find yourself in a situation where you implement a stack, try using collections.deque (double ended queue) instead. 
# It is much faster for stacks and queues.

# Example: we want to process all numbers between 0 and N, recursively, and add them.
result = 0
my_list = [3]
while my_list:
    print("Current list: ", my_list)
    item = my_list.pop(0)                           # take first item of list and remove it.
    print(f"Processing item value: {item}")
    result += item
    print("Intermediate result: ", result)
    for i in range(item):
        my_list.append(i)                           # add item 0 upto (exclusive) N to the end of the list
    print(" ")
print("Final result: ", result)

In [None]:
# Doing the same with a double ended queue (deque)
# Remember, deque is a double ended queue. You can add/remove items at the start and end of the queue.
# So rather than using pop(0) to take the first item, we use popleft() to take the first item.

from collections import deque
result = 0
my_deque = deque([3])
while my_deque:
    print("Current list: ", my_deque)
    item = my_deque.popleft()                      # take first item of list and remove it using popleft()
    print(f"Processing item value: {item}")
    result += item
    print("Intermediate result: ", result)
    for i in range(item):
        my_deque.append(i)                           # add item 0 upto (exclusive) N to the end of the list
    print(" ")
print("Final result: ", result)

In [64]:
# Although they look pretty similar, the deque version is much faster. We can time it!

from collections import deque
def using_list(start_int):
    result = 0
    my_list = [3]
    while my_list:
        item = my_list.pop(0)                           # take first item of list and remove it.
        result += item
        for i in range(item):
            my_list.append(i)                           # add item 0 upto (exclusive) N to the end of the list
    return result

def using_deque(start_int):
    result = 0
    my_deque = deque([3])
    while my_deque:
        item = my_deque.popleft()                      # take first item of list and remove it using popleft()
        result += item
        for i in range(item):
            my_deque.append(i)                           # add item 0 upto (exclusive) N to the end of the list
    return result


In [65]:

import timeit
from functools import partial
print("Using a list a 10.000 times (small int): ", timeit.timeit(partial(using_list, 3), number=10000))
print("Using a deque a 10.000 times (small int): ", timeit.timeit(partial(using_deque, 3), number=10000))

# Results:
# Using a list a 10.000 times (small int):  0.071946800002479
# Using a deque a 10.000 times (small int):  0.055354099997202866

Using a list a 10.000 times (small int):  0.071946800002479
Using a deque a 10.000 times (small int):  0.055354099997202866


In [69]:

print("Using a list a 10.000 times (larger int): ", timeit.timeit(partial(using_list, 10), number=10000))
print("Using a deque a 10.000 times (larger int): ", timeit.timeit(partial(using_deque, 10), number=10000))

# Results:
# Using a list a 10.000 times (larger int):  0.1034739000024274
# Using a deque a 10.000 times (larger int):  0.05009819999395404

# That is already twice as fast! And the larger the numbers, the bigger the difference.

Using a list a 10.000 times (larger int):  0.1034739000024274
Using a deque a 10.000 times (larger int):  0.05009819999395404


In [72]:
print("Using a list a 10.000 times (even larger int): ", timeit.timeit(partial(using_list, 100), number=10000))
print("Using a deque a 10.000 times (even larger int): ", timeit.timeit(partial(using_deque, 100), number=10000))

# Results:
# Using a list a 10.000 times (even larger int):  0.1361823000042932
# Using a deque a 10.000 times (even larger int):  0.05713070000638254

Using a list a 10.000 times (even larger int):  0.1361823000042932
Using a deque a 10.000 times (even larger int):  0.05713070000638254


### Module `itertools` provide efficient tools as well!

In [15]:
# Itertools are, as the name suggests, tools for iterating over iterables.
# Someimes, you need a endless loop. Often, you see a while True loop for this, like so:
i = 0
while True:
    i += 1
    if i > 3:
        break
    print(i) # 1, 2, 3

# You can also use itertools.count() for this. It returns an iterator that counts up from a given number.
from itertools import count
for i in count(1): # count(1) starts counting from 1
    if i > 3:
        break
    print(i) # 1, 2, 3

# At times you need to cycle through a list, and repeat that cycle endlessly. For example, 
# you want to you need to play a 10000 cards in a game, and the stack of cards repeats itself.

# You can use itertools.cycle() for this. It returns an iterator that cycles through a list endlessly.
from itertools import cycle
cards = ['A', 'K', 'Q', 'J', '10', '9', '8', '7', '6', '5', '4', '3', '2']
for idx, card in enumerate(cycle(cards)):
    print(card) # A, K, Q, J, 10, 9, 8, 7, 6, 5, 4, 3, 2, A, K, Q, J, 10, 9, 8, 7, 6, 5, 4, 3, 2, etc.
    if idx > 5:
        break


# Batched() is a handy function for splitting a list into batches of a given size.
from itertools import batched # Note: Python 3.12 required!
my_list = [1, 2, 3, 4, 5, 6, 7, 8, 9]
for batch in batched(my_list, 4):
    print(batch) # [1, 2, 3, 4], [5, 6, 7, 8], [9]

# Groupby() will give you an iterator that returns consecutive keys and groups from the iterable.
from itertools import groupby
my_list = [1, 1, 1, 2, 2, 3, 3, 3, 3]
for key, group in groupby(my_list):
    print(key, list(group)) # 1 [1, 1, 1], 2 [2, 2], 3 [3, 3, 3, 3]

# The last function I want to mention is zip_longest(). It is similar to zip(), but it will
# fill in a default value for missing values. Normally, zip() will stop when the shortest iterable is exhausted.
from itertools import zip_longest
keys = ['a', 'b', 'c']
values = [1, 2]
for key, value in zip_longest(keys, values, fillvalue=0):
    print(key, value) # a 1, b 2, c 0

# You can find more itertools functions here: https://docs.python.org/3/library/itertools.html
# Although I want to only mention built-in packages, the https://pypi.org/project/more-itertools/ package is also very useful.

1
2
3
1
2
3
A
K
Q
J
10
9
8


ImportError: cannot import name 'batched' from 'itertools' (unknown location)

### Functools, a very nice module for higher-order functions

In [19]:
# Functions are first class citizens in Python. That means that you can pass them around like any other variable.
# The following is therefore perfectly valid:

def add(a, b):
    return a + b

my_var = add
print(my_var(1, 2)) # 3

# The object `add` is a function, but the name itself is just a variable. You can assign it to another variable. 
# Calling a function is done by adding parentheses after the variable name. This way, you can pass
# functions to other functions. This is very useful when you want to use a function as an argument. 

# The functool module contains a few functions that are useful when working with functions. When doing AoC, you
# will look for an O(1) solution, but sometimes you can't find it. In that case, you might benefit from memoization.

# Memoization is a technique where you store the results of a function call, so that you can reuse them later. In Python
# you can use the @cache decorator from the functools module for this. Given a set of input parameters, it will 
# return the cached result if it is available, otherwise it will call the function and store the result.

# See the following example:

from functools import cache

@cache
def add(a, b):
    print("Calling add")
    return a + b

for i in range(3):
    print(add(1, 2)) # Calling add, 3, 3

# The first time we call add(1, 2), the function is called. The second time, the result is returned from the cache.
# This is useful when you have a recursive function that you want to speed up. Think day 4 part 2. 

# You will get bonus points if you are going to use reduce() from the functools module. reduce() is a function that
# applies a function of two arguments cumulatively to the items of an iterable, from left to right, so as to reduce
# the iterable to a single value. An example copied from the docs is most explanatory:
from functools import reduce
my_calculation = reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) # calculates ((((1+2)+3)+4)+5)
print(my_calculation) # 15



3
Calling add
3
3
3
15


### To discuss: Pandas, Numpy...