This [**collections**](https://docs.python.org/3/library/collections.html#module-collections) module implements specialized container datatypes providing alternatives to Python’s general purpose built-in containers, `dict`, `list`, `set`, and `tuple`.

**Link to other Notebooks:**
- [basic_python.ipynb](https://colab.research.google.com/drive/1GG4qrTubAGu1p0JvJ4dt5UVOI2jHTL8l#scrollTo=fbS3rOL3HbIW)
- [data_structure.ipynb](https://colab.research.google.com/drive/1NII-_1t4H1gD-dI7CavJa-7VltibUyLp#scrollTo=zgIIuZ-97fLW)


# [defaultdict](https://docs.python.org/3/library/collections.html#collections.defaultdict)
`class collections.defaultdict(default_factory=None, /[, ...])`

dict subclass that calls a __factory function__ to supply missing values



In [14]:
from collections import defaultdict

In [15]:
hash_map = defaultdict(list)
hash_map[0].append("first value")
print(hash_map[0])

['first value']


In [16]:
hash_map = defaultdict(int)
print(hash_map[0])
hash_map[0] += 1
print(hash_map[0])

0
1


In [17]:
# self-defined factory function
def factory():
  return [set(), set()]

hash_map = defaultdict(factory)
print(hash_map[0])

hash_map[0][0].add("1")
hash_map[0][1].add("2")
print(hash_map[0])

[set(), set()]
[{'1'}, {'2'}]


In [18]:
hash_map = defaultdict()
hash_map.default_factory = hash_map.__len__  # 新key的默认value是当前hash_map的len

a = "a"
b = "b"

print(f"hash_map[1, 2, 3]:\t {hash_map[1, 2, 3]}")  # dict key可以是tuple的
print(f"hash_map[a]:\t\t {hash_map[a]}")  
print(f"hash_map[b]:\t\t {hash_map[b]}")
print(f"hash_map[b]:\t\t {hash_map[b]}")

hash_map[1, 2, 3]:	 0
hash_map[a]:		 1
hash_map[b]:		 2
hash_map[b]:		 2


In [20]:
# Example: use defaultdict as counters

words = ["i","love","leetcode","i","love","coding"]

# using defaultdict
counter = defaultdict(int)
for word in words:
    counter[word] += 1
print(counter)

defaultdict(<class 'int'>, {'i': 2, 'love': 2, 'leetcode': 1, 'coding': 1})


In [21]:
for key, val in counter.items():
    print(f"key: {key}, \t val: {val}")

key: i, 	 val: 2
key: love, 	 val: 2
key: leetcode, 	 val: 1
key: coding, 	 val: 1


# [Counter](https://docs.python.org/3/library/collections.html#collections.Counter)
`class collections.Counter([iterable-or-mapping])`

dict subclass for counting hashable objects



In [1]:
from collections import Counter

In [9]:
c = Counter(['eggs', 'ham', 'eggs'])
c

Counter({'eggs': 2, 'ham': 1})

In [10]:
# it is a dictionary
isinstance(c, dict)

True

In [11]:
# Return an iterator over elements repeating each as many times as its count.
c.elements()  
sorted(c.elements())

['eggs', 'eggs', 'ham']

In [12]:
# Return a list of the n most common elements and their counts from the most common to the least.
# Elements with equal counts are ordered in the order first encountered:
c.most_common(1)

[('eggs', 2)]

In [13]:
from collections import Counter
c = Counter()
print(f"c['123']:\t {c['123']}")  # 遇到新的idx, 默认counter值是0
c["123"] += 1
print(f"c['123']:\t {c['123']}")

c['123']:	 0
c['123']:	 1


# [OrderedDict](https://docs.python.org/3/library/collections.html#collections.OrderedDict)
`class collections.OrderedDict([items])`

`Ordered dictionaries` are just like regular `dictionaries` but have some extra capabilities relating to ordering operations.

- `popitem(last=True)`
- `move_to_end(key, last=True)`

**Internal Implementation Detail**:

It is implemented by `dict` and `DoublyLinkedList` (ref: [cpython source code](https://github.com/python/cpython/blob/ed1671ced7c9b951dfc16a0cf32a2b4eab914cf1/Lib/collections/__init__.py#L78))


In [37]:
from collections import OrderedDict


# basic ordering property 
hash_map1 = OrderedDict()

hash_map1["name"] = "Chet"
hash_map1["gender"] = "M"
hash_map1["age"] = "18"
hash_map1

OrderedDict([('name', 'Chet'), ('gender', 'M'), ('age', '18')])

In [38]:
hash_map2 = OrderedDict()

hash_map2["gender"] = "M"
hash_map2["name"] = "Chet"
hash_map2["age"] = "18"
hash_map2

OrderedDict([('gender', 'M'), ('name', 'Chet'), ('age', '18')])

In [39]:
hash_map1 != hash_map2

True

## LRUCache

In [40]:
from collections import OrderedDict


class LRUCache:
    """
    Least recently used cache:
    Any updates or get will require least recently used record to be deleted
    """
    def __init__(self, capacity) -> None:
        self.capacity = capacity
        self.cache = OrderedDict()

    def get(self, key: int) -> int:
        """
        return value of the key if key exist, else return -1
        
        Time Complexity:  O(1)
        Space Complexity: O(1) 
        """
        if key in self.cache:
            self.cache.move_to_end(key, last=True)
        
        return self.cache.get(key, -1)
    

    def put(self, key: int, value: int) -> None:
        """
        update value of key if key exists, otherwise add the key-value pair to the cache;
        if the number of keys exceeds the capacity, then evict the least recently used key.

        Time Complexity:  O(1)
        Space Complexity: O(1) 
        """
        if key in self.cache:  # inplace update
            self.cache.move_to_end(key, last=True)
            self.cache[key] = value
        else:  # add a new item
            if len(self.cache) == self.capacity:  # need to delete least recently used item
                self.cache.popitem(last=False)
                self.cache[key] = value
            else:
                self.cache[key] = value

In [54]:
lru = LRUCache(2)

lru.put(1, 1)
print(f"lru_cache: {lru.cache}")

lru_cache: OrderedDict([(1, 1)])


In [55]:
lru.put(2, 2)
print(f"lru_cache: {lru.cache}")

lru_cache: OrderedDict([(1, 1), (2, 2)])


In [56]:
val = 1
print(f"lru get {val}: {lru.get(val)}")
print(f"lru_cache: {lru.cache}")

lru get 1: 1
lru_cache: OrderedDict([(2, 2), (1, 1)])


In [57]:
lru.put(3, 3)
print(f"lru_cache: {lru.cache}")

lru_cache: OrderedDict([(1, 1), (3, 3)])


In [58]:
val = 2
print(f"lru get {val}: {lru.get(val)}")
print(f"lru_cache: {lru.cache}")

lru get 2: -1
lru_cache: OrderedDict([(1, 1), (3, 3)])


In [59]:
lru.put(1, 10)
print(f"lru_cache: {lru.cache}")

lru_cache: OrderedDict([(3, 3), (1, 10)])



# [deque](https://docs.python.org/3/library/collections.html#collections.deque)

`
class collections.deque([iterable[, maxlen]])
`

list-like container with fast appends and pops on either end.




Deques are a generalization of `stacks` and `queues` (the name is pronounced “deck” and is short for “double-ended queue”). Deques support **thread-safe**, memory efficient appends and pops from either side of the deque with approximately the same `O(1)` performance in either direction.

- append
- appendleft
- pop
- popleft
- etc.

In [1]:
from collections import deque

queue = deque()

queue.append(1)
queue.append(10)
queue.append(100)

print(queue)

deque([1, 10, 100])


In [2]:
while queue:
    val = queue.popleft()
    print(val)

1
10
100


In [3]:
from collections import deque

queue = deque([1, 10 ,100])

print(queue)

while queue:
    val = queue.pop()
    print(val)

deque([1, 10, 100])
100
10
1


In [19]:
# adding list as iterables to deque
from collections import deque


queue = deque([], maxlen=2)

queue.append([1, 10])
queue.append([2, 20])
print(queue)

queue.append([3, 30])
print(queue)

deque([[1, 10], [2, 20]], maxlen=2)
deque([[2, 20], [3, 30]], maxlen=2)


In [20]:
for key, value in queue:
    print(f"key: {key}, value:{value}")

key: 2, value:20
key: 3, value:30


# [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple)

`Named tuples` assign meaning to each position in a tuple and allow for more readable, self-documenting code. <br>
They can be used wherever regular `tuples` are used, and they add the ability to access fields by name instead of position index.

> Like `dictionaries`, they contain keys that are hashed to a particular value. But on contrary, it supports both access from key-value and iteration, the functionality that dictionaries lack.

ref: https://www.geeksforgeeks.org/namedtuple-in-python/

>`namedtuples` compare on values only (ordered). They are designed to be a drop-in replacement for regular tuples, with named attribute access as an added feature. The field names will not be considered when making equality comparisons. It may not be what you wanted nor expected from the namedtuple type! This differs from dict equality comparisons, which do take into account the keys and also compare order agnostic.
ref: https://stackoverflow.com/a/43921348/8280662

In [9]:
# Python code to demonstrate namedtuple()
 
from collections import namedtuple
 
# Declaring namedtuple()
Student = namedtuple('Student', ['name', 'age', 'DOB'])
 
# Adding values
s1 = Student('Nandini', '19', '2541997')
s2 = Student(name='Chet', age='25', DOB='22/07/1993')
print(s1)  # readable __repr__ with a name=value style
print(s2)

Student(name='Nandini', age='19', DOB='2541997')
Student(name='Chet', age='25', DOB='22/07/1993')


In [10]:
# Assigning attributes with a default value is also possible
TransactionDefault = namedtuple('TransactionDefault',['sender','receiver','date','amount'], 
                                defaults=['jojo', 'xiaoxu', None, None])
print(TransactionDefault())

TransactionDefault(sender='jojo', receiver='xiaoxu', date=None, amount=None)


In [11]:
# default value is assigned to the last attribute because non-default argument cannot follow default argument
TransactionDefault2 = namedtuple('TransactionDefault2',['sender','receiver','date','amount'], 
                                 defaults=[None])
print(TransactionDefault2('luffy', 'chet', '2020'))

TransactionDefault2(sender='luffy', receiver='chet', date='2020', amount=None)


In [16]:
# namedtuple from dictionary
name_counter = {"chet": 3, "zoe": 3, "xixi": 2, "haha": 1}

NameCounter = namedtuple("NameCounter", name_counter)  #  just pass the keys of a dict directly
NameCounter(**name_counter)  # create tuple instances from this dict, or any other dict with matching keys

NameCounter(chet=3, zoe=3, xixi=2, haha=1)

In [3]:
# Access using index just like plain tuple
print("The Student age using index is : ", end="")
print(s1[1])
print()

# Access using name
print("The Student name using keyname is : ", end="")
print(s1.name)
print()

# Access using getattr()
print("The Student DOB using getattr() is : ", end="")
print(getattr(s1, 'DOB'))
print()

# unpack like a regular tuple
name, age, DoB = s1
print(f"{name}, {age}, {DoB}")

# _asdict
s1._asdict()

The Student age using index is : 19

The Student name using keyname is : Nandini

The Student DOB using getattr() is : 2541997

Nandini, 19, 2541997


OrderedDict([('name', 'Nandini'), ('age', '19'), ('DOB', '2541997')])

`tuple` is an **immutable** data type, but if the attribute itself is **mutable** like list, you are allowed to change the elements inside the list without letting the tuple be aware of the change.

In [4]:
# Assign the attribute

Transaction = namedtuple("Trasction", ["sender", "receiver"])

record = Transaction(sender=["jojo", "kiki"], receiver="chet")

In [5]:
id1 = id(record)
record

Trasction(sender=['jojo', 'kiki'], receiver='chet')

In [6]:
record.sender[1] = "gaga"
id2 = id(record)
record

Trasction(sender=['jojo', 'gaga'], receiver='chet')

In [7]:
id1==id2

True

In [8]:
print(record.receiver)
record.receiver="sheng"

chet


AttributeError: ignored