# Collections Module

Counter is a dictionary subclass which helps count hashable objects.
A Counter is a container that keeps track of how many times equivalent values are added. 

In [8]:
from collections import Counter

In [9]:
# inside of the counter, elements are stored as dictionary keys and the counts are stored as dictionary values:
lst = [1,2,2,2,3,54,1,2,4,4,5]
Counter(lst)

Counter({1: 2, 2: 4, 3: 1, 54: 1, 4: 2, 5: 1})

In [10]:
# this can also be done with a string:
str = 'abecasegbadaegafa'
Counter(str)

Counter({'a': 6, 'b': 2, 'e': 3, 'c': 1, 's': 1, 'g': 2, 'd': 1, 'f': 1})

In [11]:
# how to count words in a sentence:
sentence = 'How many times does this word or this word show up or not show up in this sentence.'

In [12]:
words = sentence.split()

In [13]:
Counter(words)

Counter({'How': 1,
         'many': 1,
         'times': 1,
         'does': 1,
         'this': 3,
         'word': 2,
         'or': 2,
         'show': 2,
         'up': 2,
         'not': 1,
         'in': 1,
         'sentence.': 1})

In [14]:
# A number of methods can be called, for example on the example with Counter, above:
c = Counter(words)

In [15]:
c.most_common(2)

[('this', 3), ('word', 2)]

## Methods of Counter

In [None]:
# The following methods are available for Counter:

sum(c.values())              # the total of all counts
c.clear()                    # reset all counts
list(c)                      # list of unique elements
set(c)                       # convert to a set
dict(c)                      # convert to a regular dictionary
c.items()                    # convert to a list of (element, count) pairs
Counter(dict(list_of_pairs)) # convert from a list of (element, count) pairs
c.most_common()[:-n-1;-1]    # n least common elements (with most_common, string indexing can be used)
c += Counter()               # remove zero and negative counts

# defaultdict

defaultdict is a dictionary-like object with all methods that a dictionary has. It takes a first argument as default data type for the dictionary. defaultdict never returns a key error: any key that does not exist gets the value returned by default.

In [2]:
from collections import defaultdict

In [3]:
d = {'k1':1}  # a regular dictionary: key plus value

In [4]:
d['k1']     # use key to call the value

1

In [5]:
d['k2']     # this returns an error because this key is not in the dictionary d

KeyError: 'k2'

In [6]:
# create a defaultdict:

d = defaultdict(object)

In [7]:
d['one']    # no key or value has been defined but defaultdict assigns the DEFAULT (in this case: object) as key

<object at 0x1092540b0>

In [9]:
for item in d:
    print(item)      #this shows that the key was assigned

one


In [10]:
# you can assign a default value for a key, for example using lambda:

d = defaultdict(lambda: 0)

In [11]:
d['one']      # no key error because the default of 0 is assigned

0

In [15]:
d['two']       # if no value is assigned, default is 0

2

In [16]:
d['two'] = 2  # if value is assigned, default is not applied

In [17]:
d      # printing the dictionary, it shows the keys and values

defaultdict(<function __main__.<lambda>()>, {'one': 0, 'two': 2})

# OrderedDict

The OrderedDict is a dictionary subclass that remembers the order in which its contents are added.

In [18]:
# Normal dictionary:

d = {}

d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [19]:
d

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

In [27]:
for k,v in d.items():
    print(k,v)

a 1
b 2
c 3
d 4
e 5


In [22]:
from collections import OrderedDict

In [23]:
d = OrderedDict()

In [24]:
d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [26]:
for k,v in d.items():
    print(k,v)

a 1
b 2
c 3
d 4
e 5


In [28]:
d1 = {}
d1['a'] = 1
d1['b'] = 2

d2 = {}
d2['b'] = 2
d2['a'] = 1

In [29]:
print(d1==d2)  # this will return TRUE because the elements of normal dictionaries can be in any order

True


In [30]:
d1 = OrderedDict()
d1['a'] = 1
d1['b'] = 2

d2 = OrderedDict()
d2['b'] = 2
d2['a'] = 1

In [31]:
print(d1==d2)   # this will return FALSE because the elements of the two dictionaries are not in the same order

False


# namedtuple