In [2]:
# RUN IT
# Display multiple interactive objects in one shell
# No Need for print function
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

collections (high performance containers) only have 5 methods: (in Python2)
    - namedtuple()
    - deque
    - Counter
    - OrderedDict
    - defaultdict

### Chapter 3 Dict and Set

Even we don't write explicitly ditionaries in our program, the program itself actually use lots of dictionaries when running. Function keywords, module namespaces etc. are using `__builtins__.__dict__` to deploy.

Dictionaries are very optimized by `Hash tables` (WTF is this?)

In [4]:
from collections import abc

# Abstract Base Classes
# It contains all the base interfaces of class, test if a class have such interface
# Sequence and Mapping types ->
# inherit Container(__contains__), Iterable(__iter__), Sized(__len__) from abc
dir(abc)

['AsyncIterable',
 'AsyncIterator',
 'Awaitable',
 'ByteString',
 'Callable',
 'Container',
 'Coroutine',
 'Generator',
 'Hashable',
 'ItemsView',
 'Iterable',
 'Iterator',
 'KeysView',
 'Mapping',
 'MappingView',
 'MutableMapping',
 'MutableSequence',
 'MutableSet',
 'Sequence',
 'Set',
 'Sized',
 'ValuesView',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__']

In [5]:
# dictionary is a Mapping type
isinstance({}, abc.Mapping)

True

Ways to build a dictionary:

In [9]:
dict1 = {'a':1, 'b':2, 'c':3}
dict2 = dict(a=1, b=2, c=3)
dict3 = dict(zip(['a', 'b', 'c'], [1, 2, 3])) # zip two lists, fucking cool
dict4 = dict([('a', 1), ('b', 2), ('c', 3)]) # list of tuples
print(dict1, dict2, dict3, dict4)

{'a': 1, 'c': 3, 'b': 2} {'a': 1, 'c': 3, 'b': 2} {'a': 1, 'c': 3, 'b': 2} {'a': 1, 'c': 3, 'b': 2}


Dict Comprehensions (dictcomp)

In [18]:
lst = [('a', 1), ('b', 2), ('c', 3)]
dic1 = {letter:number for letter, number in lst}

lst1 = ['a', 'b', 'c']
lst2 = [1, 2, 3]
dic2 = {letter:number for letter in lst1
                      for number in lst2}

dic1
dic2

{'a': 1, 'b': 2, 'c': 3}

{'a': 3, 'b': 3, 'c': 3}

Other Mapping Method:
    - collections.defaultdict
    - collections.OrderedDict

handling missing keys with dict.setdefault(key, [default])

In [42]:
# To avoid KeyError, either the dict have the key, it will return the value of key
# if dict doesn't have the key, it will return default value and add the key+default value
dic = dict(zip(['a', 'b', 'c'], [1, 2, 3]))
dic.setdefault('a', None) # return 1
dic.setdefault('d', None) # return None (nothing returns) + add 'd':None into dict
dic

# get() do the similar job, but it won't add the new key and value to the dictionary
dic.get('e', None) # return None(nothing returns)
dic

1

{'a': 1, 'b': 2, 'c': 3, 'd': None}

{'a': 1, 'b': 2, 'c': 3, 'd': None}

defaultdict

In [54]:
from collections import defaultdict

# calculate the occurance of a string
# every value in this dict is an int
d_int = defaultdict(int) # factory function int
string = 'asdfsdfasfasdfdfsadf'
for char in string:
    d_int[char] += 1
d_int

defaultdict(int, {'a': 4, 'd': 5, 'f': 6, 's': 5})

In [57]:
# include all the values with same key in the dictionary
# everyvalue in this dict is a list
d_lst = defaultdict(list)
balls = [('black', 1), ('white', 3), ('black', 5), ('white', 8)]
for ball, times in balls:
    d_lst[ball].append(times)
d_lst

# use setdefault(k, [default]) to the same job
d = {}
for ball, time in balls:
    d.setdefault(ball, []).append(time)
d

defaultdict(list, {'black': [1, 5], 'white': [3, 8]})

{'black': [1, 5], 'white': [3, 8]}

OrderedDict

In [67]:
from collections import OrderedDict

d = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2}
od = OrderedDict(d)

# sorted by key
od_key = OrderedDict(sorted(d.items(), key=lambda item: item[0]))
od_key

# sorted by value
od_value = OrderedDict(sorted(d.items(), key=lambda item: item[1]))
od_value

# It can also popitem with OrderedDict.popitem()

OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])

OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])

Counter

In [70]:
# count the occurance of chars in a string
from collections import Counter
ct = Counter('asdfasdfasdfasdfasfd')
ct
ct.update('asdfasdfasdfasfasdffssf')
ct
ct.most_common(3)

Counter({'a': 5, 'd': 5, 'f': 5, 's': 5})

Counter({'a': 10, 'd': 9, 'f': 12, 's': 12})

[('f', 12), ('s', 12), ('a', 10)]

Set / Frozenset:
    - Frozenset has hash values, so it can be used as dictionary keys. But Set can't
    - Set can add/remove values, so it is best to do set operations. But Frozenset can't

In [79]:
s = set('asdfasdfasdfasf')
s
fs = frozenset('asdfasdfasdfasf')
fs

{'a', 'd', 'f', 's'}

frozenset({'a', 'd', 'f', 's'})

In [80]:
# frozenset can't do any of the operations here:
s
s.add('k')
s
s.update('qwer')
s
s -= set('qwer')
s
s.remove('k')

{'a', 'd', 'f', 's'}

{'a', 'd', 'f', 'k', 's'}

{'a', 'd', 'e', 'f', 'k', 'q', 'r', 's', 'w'}

{'a', 'd', 'f', 'k', 's'}

In [81]:
# subset
set('asd') < s
# union |
# intersect &
# difference -

True

Other stuff:
- `UserDict` is an easy-to-extend subclass for OOP

### Chapter 4: Text vs. Bytes