# <center>Тема 2. Углубленные приемы pandas, collections, numpy</center>
## <center>Часть 1. Работа с модулем collections

**Collections** - это модуль Python, в котором реализованы специальные контейнеры данных, которые предоставляют альтернативу и расширяют функционал стандартных типов dict, list, set, и tuple.

Наиболее популярные типы данных:

- Counter
- defaultdict
- OrderedDict
- deque

In [1]:
import numpy as np
from collections import (Counter, defaultdict, OrderedDict, deque,
                         ChainMap, namedtuple)

# Counter

Словарь для подсчета числа элементов

### Создание

In [2]:
cnt = Counter()
cnt

Counter()

In [3]:
cnt = Counter(np.random.randint(low=0, high=10, size=100))
cnt

Counter({6: 9, 2: 6, 9: 13, 4: 9, 1: 10, 3: 9, 7: 12, 8: 14, 5: 11, 0: 7})

In [4]:
cnt = Counter({3:1, 1:3, 2:4})
cnt

Counter({3: 1, 1: 3, 2: 4})

### Counter поддерживает все функции dict и имеет дополнительные:

- elements
- most_common()
- subtract()

In [5]:
cnt.items(), cnt.values(), cnt.keys()

(dict_items([(3, 1), (1, 3), (2, 4)]),
 dict_values([1, 3, 4]),
 dict_keys([3, 1, 2]))

In [6]:
list(cnt.elements())

[3, 1, 1, 1, 2, 2, 2, 2]

In [7]:
cnt = Counter([1,2,3,4,1,2,6,7,3,8,1])
cnt.most_common(2)

[(1, 3), (2, 2)]

In [8]:
cnt = Counter({1:3,2:4})
deduct = {1:1, 2:2}
cnt.subtract(deduct)
cnt

Counter({1: 2, 2: 2})

### Отличие от dict

In [9]:
a = {1:2, 3:4, 5:6}
b = {1:1, 2:1, 3:1, 4:1, 5:1}

print(type(a), type(b))

<class 'dict'> <class 'dict'>


In [10]:
try:
    a + b # - ошибка
except TypeError as te:
    print(f'TypeError: {te}')

TypeError: unsupported operand type(s) for +: 'dict' and 'dict'


In [11]:
a.update(b) # inplace
a

{1: 1, 3: 1, 5: 1, 2: 1, 4: 1}

In [12]:
a = Counter({1:2, 3:4, 5:6})
b = Counter({1:1, 2:1, 3:1, 4:1, 5:1})

a + b # outofplace

Counter({1: 3, 3: 5, 5: 7, 2: 1, 4: 1})

In [13]:
a.update(b) # inplace
a

Counter({1: 3, 3: 5, 5: 7, 2: 1, 4: 1})

# defaultdict

Словарь с элементами по умолчанию (не выдает ошибку KeyError)

In [14]:
a = {1:1, 2:2, 3:3}
try:
    a[4]
except KeyError as ke:
    print(f'KeyError: {ke}')

KeyError: 4


In [15]:
dd = defaultdict(int, a)

In [16]:
dd

defaultdict(int, {1: 1, 2: 2, 3: 3})

In [17]:
dd[4]

0

**ВАЖНО!!!**

In [18]:
dd

defaultdict(int, {1: 1, 2: 2, 3: 3, 4: 0})

### Значение по умолчанию

In [19]:
dd = defaultdict(lambda: 1, a)
dd

defaultdict(<function __main__.<lambda>()>, {1: 1, 2: 2, 3: 3})

In [20]:
dd[4] + 1

2

### Словари словарей

In [21]:
dd = defaultdict(lambda: defaultdict(list))

In [22]:
dd['month']['jan'] = list(range(10))
dd['month']['feb'].append('short')
dd

defaultdict(<function __main__.<lambda>()>,
            {'month': defaultdict(list,
                         {'jan': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          'feb': ['short']})})

In [23]:
dd = {}
dd['month'] = {}
dd['month']['feb'] = []
dd['month']['feb'].append(1)

# OrderedDict

Словарь, который поддерживает порядок ключей

In [24]:
import calendar
months = [calendar.month_abbr[i] for i in range(1, 13)]
months

['Jan',
 'Feb',
 'Mar',
 'Apr',
 'May',
 'Jun',
 'Jul',
 'Aug',
 'Sep',
 'Oct',
 'Nov',
 'Dec']

In [25]:
set(months)

{'Apr',
 'Aug',
 'Dec',
 'Feb',
 'Jan',
 'Jul',
 'Jun',
 'Mar',
 'May',
 'Nov',
 'Oct',
 'Sep'}

In [26]:
forward = {month: idx + 1 for idx, month in enumerate(months)}
od_forward = OrderedDict(forward)
od_forward

OrderedDict([('Jan', 1),
             ('Feb', 2),
             ('Mar', 3),
             ('Apr', 4),
             ('May', 5),
             ('Jun', 6),
             ('Jul', 7),
             ('Aug', 8),
             ('Sep', 9),
             ('Oct', 10),
             ('Nov', 11),
             ('Dec', 12)])

In [27]:
backward = {month: 12 - idx for idx, month in enumerate(months[::-1])}
od_backward = OrderedDict(backward)
od_backward

OrderedDict([('Dec', 12),
             ('Nov', 11),
             ('Oct', 10),
             ('Sep', 9),
             ('Aug', 8),
             ('Jul', 7),
             ('Jun', 6),
             ('May', 5),
             ('Apr', 4),
             ('Mar', 3),
             ('Feb', 2),
             ('Jan', 1)])

Сравнение порядка с dict и OrderedDict

In [28]:
forward == backward

True

In [29]:
od_forward == od_backward

False

In [30]:
od_forward.popitem()

('Dec', 12)

In [31]:
od_forward.popitem(last=False)

('Jan', 1)

In [32]:
od_forward

OrderedDict([('Feb', 2),
             ('Mar', 3),
             ('Apr', 4),
             ('May', 5),
             ('Jun', 6),
             ('Jul', 7),
             ('Aug', 8),
             ('Sep', 9),
             ('Oct', 10),
             ('Nov', 11)])

In [33]:
od_forward.move_to_end('Feb')
od_forward

OrderedDict([('Mar', 3),
             ('Apr', 4),
             ('May', 5),
             ('Jun', 6),
             ('Jul', 7),
             ('Aug', 8),
             ('Sep', 9),
             ('Oct', 10),
             ('Nov', 11),
             ('Feb', 2)])

### reversed

In [34]:
reversed(forward), reversed(od_forward)

(<dict_reversekeyiterator at 0x25702ccaf90>, <odict_iterator at 0x25702cca3b0>)

In [35]:
try:
    reversed(forward.items()), reversed(od_forward.items())
    reversed(forward.keys()), reversed(od_forward.keys())
    reversed(forward.values()), reversed(od_forward.values())
except:
    pass
else:
    print('OK')

OK


### Пример (из документации) LRU кэш

In [36]:
class LRU(OrderedDict):
    'Limit size, evicting the least recently looked-up key when full'

    def __init__(self, maxsize=128, /, *args, **kwds):
        self.maxsize = maxsize
        super().__init__(*args, **kwds)

    def __getitem__(self, key):
        value = super().__getitem__(key)
        self.move_to_end(key)
        return value

    def __setitem__(self, key, value):
        if key in self:
            self.move_to_end(key)
        super().__setitem__(key, value)
        if len(self) > self.maxsize:
            oldest = next(iter(self))
            del self[oldest]

Python 3.6 - `dict` сохраняет порядок вставки, но это особенности внутренней реализации  
Python 3.7 - `dict` сохраняет порядок вставки, это его свойство  
Python 3.8 - `dict` поддерживает `reversed`

# deque

Список, оптимизированный под вставку и удаление элементов (thread-safe, O(1))

In [37]:
deq = deque([1, 1, 2, 2, 3], maxlen=10)
deq

deque([1, 1, 2, 2, 3])

In [38]:
deq.append('end')
deq.appendleft('start')
deq

deque(['start', 1, 1, 2, 2, 3, 'end'])

In [39]:
deq.pop()
deq.popleft()
deq

deque([1, 1, 2, 2, 3])

In [40]:
deq.count(1)

2

In [41]:
deq.clear()
deq

deque([])

In [42]:
a = list(range(1_000_000))
deq = deque(a)

In [43]:
%timeit a.insert(0, 10)

1.22 ms ± 187 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [44]:
%timeit deq.insert(0, 10)

106 ns ± 8.48 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [45]:
%timeit deq.appendleft(10)

61.7 ns ± 4.82 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


# NamedTuple

In [46]:
Student = namedtuple('Student', 'fname, lname, age')  
s1 = Student('John', 'Clarke', '13')  
print(s1)

Student(fname='John', lname='Clarke', age='13')


In [47]:
s2 = Student._make(['Adam','joe','18'])  
print(s2)  

Student(fname='Adam', lname='joe', age='18')


In [48]:
print(s1._asdict())

{'fname': 'John', 'lname': 'Clarke', 'age': '13'}


In [49]:
s3 = s1._replace(age='14')  
print(s1)  
print(s3)  

Student(fname='John', lname='Clarke', age='13')
Student(fname='John', lname='Clarke', age='14')


# ChainMap

In [50]:
d1 = { 'a' : 1, 'b' : 2 }  
d2 = { 'c' : 3, 'b' : 4 }  
chain_map = ChainMap(d1, d2)  
print(chain_map)  

ChainMap({'a': 1, 'b': 2}, {'c': 3, 'b': 4})


In [51]:
for key, value in chain_map.items():
    print(key, value)

c 3
b 2
a 1


In [52]:
d3 = {'e' : 5, 'f' : 6, 'b': 8}  
new_chain_map = chain_map.new_child(d3)  
for key, value in new_chain_map.items():
    print(key, value)

c 3
b 8
a 1
e 5
f 6
