In [2]:
import collections
import os, re, sys, time

In [3]:
my_dict = {}
isinstance(my_dict, collections.abc.Mapping)

True

### Hashing

In [4]:
tt = (1, 2, (30, 40))
tl = (1, 2,frozenset([30, 40])) # Tuple 자체로는 hashable이지만, tuple안에 unhashable값이 들어가 있을 경우, non-hashable
a = hash(tt)
b = hash(tl)

# Dictionary

### Dict()

In [5]:
a = dict(one=1, two=2, three=3)
b = {'one':1, 'two':2, 'three':3}
c = dict(zip(['one','two','three'], [1,2,3]))
d = dict([('two',2), ('one',1), ('three',3)])
e = dict({'one': 1, 'three': 3, 'two': 2})
a ==b==c==d==e

True

### Dict comprehensions

In [6]:
CODES = [
    (1,'US'),
    (7,'Russia'),
    (82,'Korea'),
    (81, 'Japan')
]
country_codes1 = {key.upper(): value for value, key in CODES}
country_codes2 = {key.upper(): value for key, value in country_codes1.items() if value>80}

### Handling missing keys with set default

In [7]:
WORD_RE = re.compile('\w+') # Compiles a regex into a regex object
index = {}
with open('/home/jehyuk/Documents/Fluent_Python_appendix/data/zen.txt', encoding = 'utf-8') as fp:
    for line_no, content in enumerate(fp, 1):
        for match in WORD_RE.finditer(content):
            word = match.group()
            col_no = match.start() + 1
            location = (line_no, col_no)
            # print(word, location)
            occurrences = index.get(word, []) # 없는 경우 예외 처리([])
            # print("{}: {}".format(word, occurrences))
            occurrences.append(location)
            index[word] = occurrences
# for word in sorted(index, key = str.upper):
#     print(word, index[word])

In [8]:
WORD_RE = re.compile('\w+')
index = {}
with open('/home/jehyuk/Documents/Fluent_Python_appendix/data/zen.txt', encoding = 'utf-8') as fp:
    for line_no, content in enumerate(fp, 1):
        for match in WORD_RE.finditer(content):
            word = match.group()
            col_no = match.start() + 1
            location = (line_no, col_no)
            index.setdefault(word, []).append(location)
# for word in sorted(index, key=str.upper):
#     print(word, index[word])

### Handling missing keys with defaultdict

In [9]:
### Defaultdict: Another take on missing keys
WORD_RE = re.compile('\w+')
index = collections.defaultdict(list)
with open('/home/jehyuk/Documents/Fluent_Python_appendix/data/zen.txt', encoding = 'utf-8') as fp:
    for line_no, content in enumerate(fp):
        for match in WORD_RE.finditer(content):
            word = match.group()
            col_no = match.start() + 1
            location = (line_no, col_no)
            index[word].append(location)
# for word in sorted(index, key = str.upper):
#     print(word, index[word])

### Handling missing keys with __missing__ method

In [10]:
class StrKeyDict0(dict): # dict를 상속
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key) # 이미 key값이 string임에도 불구하고, missing된 상태라면, Keyerror발생
        return self[str(key)] # key값이 string이 아닐 경우, str으로 반환하고, 이를 dict에서 찾음
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
        
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()
d = StrKeyDict0([('0', 'zero'), ('4','four')])
# print(d['0'])
# print(d[4])
# print(d.data) # 일반 dict에선 data라는게 정의가 안되있음
# print(d[1])

### Variations of dict(Counter)

In [11]:
ct = collections.Counter('12312312312312345456')
print(ct)
ct.update('676767')
# print(ct)
# print(ct.most_common(3))

Counter({'1': 5, '3': 5, '2': 5, '5': 2, '4': 2, '6': 1})


### Subclassing UserDict

In [16]:
class StrKeyDict1(collections.UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    def __contains__(self, key):
        return str(key) in self.data
    def __setitem__(self, key, item):
        self.data[str(key)] = item

d = StrKeyDict1([('0', 'zero'), ('4','four')])
# print(d['0'])
# print(d[4])
# print(d.data) # Userdict를 상속받았고, Userdict에는 data라는 변수가 정의가 되어 있는 것으로 보임
# print(d[1])    

### Immutable Mappings

In [13]:
from types import MappingProxyType

In [14]:
d = {1:'A'}
d_proxy = MappingProxyType(d)
# print(d_proxy)
# print(d_proxy[1])
# d_proxy[2] = 'x' # 이 연산은 불가능. --> MappingProxy는 read-only
d[2]= 'B'
d_proxy = MappingProxyType(d)
# print(d_proxy) # dict자체를 바꿔줘야 위에서 목표했던 바를 할 수 있음

# Set

#### Set: A collection of unique objects

In [17]:
a = ['a','b','c','a']
set_a = set(a)
print(a)
print(set_a)

['a', 'b', 'c', 'a']
{'c', 'a', 'b'}


In [18]:
from dis import dis

In [20]:
%time {1,2,3,1,2,3,1,2,3,1,2,3,1,2,3}
%time set([1,2,3,1,2,3,1,2,3,1,2,3,1,2,3])
# dis('{1,2,3,1,2,3,1,2,3,1,2,3,1,2,3}')
# dis('set([1,2,3,1,2,3,1,2,3,1,2,3,1,2,3])')

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 10.3 µs
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 13.6 µs


{1, 2, 3}

In [21]:
### Set Comprehension
from unicodedata import name
a = {(i,chr(i)) for i in range(64, 256) if 'SIGN' in name(chr(i), '')}
print(a)

{(172, '¬'), (167, '§'), (176, '°'), (174, '®'), (247, '÷'), (182, '¶'), (164, '¤'), (165, '¥'), (169, '©'), (163, '£'), (215, '×'), (181, 'µ'), (162, '¢'), (177, '±')}


In [22]:
a = {1,2,3}
b,c,d = [1,2],[3,4],[5,6]
a.union(b,c,d) # Inplace 연산은 아님

{1, 2, 3, 4, 5, 6}

### Key search in dictionary (Time comparison)

In [133]:
a, b = dict(), list()
for i in range(100000):
    a[i] = i
    b.append((i,i))

In [134]:
import time
key = 99000
start = time.time()
print('a[{}]: {}, time : {:.4f}'.format(key, key, time.time() - start))

start = time.time()
for i in range(len(b)):
    if b[i][0] == key:
        print('b[{}]: {}, time : {:.4f}'.format(key, key, time.time() - start))

a[99000]: 99000, time : 0.0001
b[99000]: 99000, time : 0.0254


In [128]:
DIAL_CODES = list()
DIAL_CODES.append((86,'China'))
DIAL_CODES.append((1,'US'))
DIAL_CODES.append((82,'Korea'))
DIAL_CODES.append((81,'Japan'))
DIAL_CODES.append((7,'Russia'))

In [131]:
d1 = dict(DIAL_CODES)
print("d1: {}".format(d1.keys()))
d2 = dict(sorted(DIAL_CODES))
print("d2: {}".format(d2.keys()))
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1]))
print(("d3: {}".format(d3.keys())))
print((d1==d2) and (d2 == d3))

d1: dict_keys([1, 82, 7, 86, 81])
d2: dict_keys([1, 82, 86, 81, 7])
d3: dict_keys([81, 82, 1, 86, 7])
True


### Adding item in dictionary
#### Method1: 매번 Iterating하면서 없는 키들을 추가하면서 dictionary 생성
#### Method2: Iterating하면서 없는 키들을 먼저 탐색하고 dictionary는 한방에 추가

In [154]:
a, b = dict(), list()
for i in range(100000):
    a[i] = i
    b.append((i,i))

new_keys = [x for x in range(99995, 11000000)]
start = time.time()
for key in new_keys:
    if key not in a.keys():
        a[key] = key
print("Method1 Time: {:.4f}".format(time.time() - start))

Method1 Time: 1.9957


In [155]:
a, b = dict(), list()
for i in range(100000):
    a[i] = i
    b.append((i,i))

new_keys = [x for x in range(99995, 11000000)]
add_a = dict()
start = time.time()
add_keys = new_keys - a.keys()
for key in add_keys:
    add_a[key] = key
a.update(add_a)
print("Method2 Time: {:.4f}".format(time.time() - start))

Method2 Time: 1.6884


#### --> 추가되어야 할 dictionary의 크기가 큰 경우, method2가 좋으나, 작을경우, method1이 더 빠른 경우도 존재 