In [1]:
# code for loading the format for the notebook
import os

# path : store the current path to convert back to it later
path = os.getcwd()
os.chdir(os.path.join('..', 'notebook_format'))
from formats import load_style
load_style()

In [2]:
os.chdir(path)
# magic to print version
%load_ext watermark
%watermark -a 'Ethen' -d -t -v

Ethen 2017-09-08 21:10:56 

CPython 3.5.2
IPython 6.1.0


# Collections

## Named Tuples

Create a tuple with names for clearity (compared with tuples) and also has the immutable feature.

In [3]:
from collections import namedtuple


# create the name tuple by assigning the field name
Color = namedtuple('Color', ['red', 'green', 'blue'])
color = Color(55, 55, 55)

# access the element using .field name
print(color.red)

# be aware that we can use index to access the element
# in the namedtuple, but this defeats the whole purpose
# of using namedtuple versus plain tuple, i.e. color.red
# is arguably more readable than color[0]
print(color[0])

55
55


namedtuple can be used as a replacement for a dictionary, which requires more space to store. However, be aware that a it's immutable.

In [4]:
# because of its immutable feature
# color.red = 75  # this will return an error

# use ._replace() if you really really really
# wish to change the value after creation
color = color._replace(red = 75)
color

Color(red=75, green=55, blue=55)

## DefaultDict

Whenever you need a dictionary, and each value of the dictionary has to start with the default value, use **defaultdict**. A **defaultdict** will never raise a KeyError. Any key that does not exist gets the value returned by the default factory.

In [5]:
# example 1: Joe does not exist in the dictionary, return default value
from collections import defaultdict

ice_cream = defaultdict(lambda: 'Vanilla')
ice_cream['Sarah'] = 'Chunky Monkey'
ice_cream['Abdul'] = 'Butter Pecan'
print(ice_cream['Sarah'])
print(ice_cream['Joe'])

Chunky Monkey
Vanilla


In [6]:
# example 2: Grouping with dictionaries
from collections import defaultdict

city_list = [('TX','Austin'), ('TX','Houston'), ('NY','Albany'), ('NY', 'Syracuse'), 
             ('NY', 'Buffalo'), ('NY', 'Rochester'), ('TX', 'Dallas'), ('CA','Sacramento'), 
             ('CA', 'Palo Alto'), ('GA', 'Atlanta')]

cities_by_state = defaultdict(list)

for state, city in city_list:
    cities_by_state[state].append(city)

for state, cities in cities_by_state.items():
    print(state, ", ".join(cities))

CA Sacramento, Palo Alto
NY Albany, Syracuse, Buffalo, Rochester
GA Atlanta
TX Austin, Houston, Dallas


# Reference

- [Blog: Using defaultdict in Python](https://www.accelebrate.com/blog/using-defaultdict-python/)
- [Youtube: Namedtuple - When and why should you use namedtuples?](https://www.youtube.com/watch?v=GfxJYp9_nJA)