# 3.9 딕셔너리와 짖ㅂ합

## 3.1 일반적인 매핑형

In [2]:
my_dict = {}
import collections
isinstance(my_dict, collections.abc.Mapping)

True

In [3]:
tt = (1, 2, (30, 40))
hash(tt)

-3907003130834322577

In [4]:
tl = (1, 2, [30, 40])
hash(tl)

TypeError: unhashable type: 'list'

In [5]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)

5149391500123939311

In [6]:
a = dict(one=1, two=2, three=3)
b = {'one' : 1, 'two' : 2, 'three' : 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three' : 3, 'one' : 1, 'two' : 2})

a == b == c == d == e

True

## 3.2 지능형 딕셔너리

In [8]:
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
]
country_code = {country: code for code, country in DIAL_CODES}
country_code

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japan': 81}

In [9]:
{code: country.upper() for country, code in country_code.items()
if code < 66}

{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}

## 3.3 공통적인 매핑 메서드

3.3.1 존재하지 않는 키를 setdefault()로 처리하기

In [None]:
import sys
import re

WORD_RE = re.compile(r'\w+')
index = {}

with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            # 보기 좋은 코드는 아니지만, 설명하기 위해 이렇게 구현했다.
            OCCurrences = index.get(word, [])
            occurrences.append(location)
            index[word] = occurrences

# 알파벳순으로 출력한다.
for word in sorted(index, key=str.upper):
    print(word, index[word])

In [None]:
# 단어가 나타나는 위치를 가리키는 인덱스를 만든다.
import sys
import re

WORD_RE = re.compile(r'\w+')
index = {}

with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)

# 알파벳순으로 출력한다.
for word in sorted(index, key=str.upper):
    print(word, index[word])

In [None]:
if key not in my_dict:
    my_dict[key] = []
my_dict[key].append(new_value)

## 3.4 융통성 있게 키를 조회하는 매핑

3.4.1 defaultdict: 존재하지 않는 키에 대한 또 다른 처리

In [None]:
# 단어가 나타나는 위치를 가리키는 인덱스를 만든다.
import sys
import re
import collections

WORD_RE = re.compile(r'\w+')

index = collections.defaultdict(list)
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word])

3.4.2 ____missing____() 메서드

In [12]:
class StrKeyDict0(dict):

    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()


In [13]:
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
d['2']

'two'

In [14]:
d[4]

'four'

In [15]:
d[1]

KeyError: '1'

In [16]:
d.get('2')

'two'

In [17]:
d.get(4)

'four'

In [18]:
d.get(1, 'N/A')

'N/A'

In [19]:
2 in d

True

In [20]:
1 in d

False

## 3.5 그 외 매핑형

In [23]:
ct = collections.Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [24]:
ct.update('aaaaazzz')
ct

Counter({'a': 10, 'b': 2, 'r': 2, 'c': 1, 'd': 1, 'z': 3})

In [25]:
ct.most_common(2)

[('a', 10), ('z', 3)]

## 3.6 UserDict 상속하기

In [26]:
import collections

class StrKeyDict(collections.UserDict):

    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

## 3.7 불변 매핑

In [27]:

from types import MappingProxyType
d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [28]:
d_proxy[1]

'A'

In [30]:
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [31]:

d[2] = 'B'

In [32]:
d_proxy

mappingproxy({1: 'A', 2: 'B'})

In [33]:
d_proxy[2]

'B'

## 3.8 집합 이론

In [34]:
l = ['spam', 'smap', 'eggs', 'spam']
set(l)

{'eggs', 'smap', 'spam'}

In [35]:
list(set(l))

['eggs', 'smap', 'spam']

In [None]:
found = len(needles & haystack)

In [None]:
found = 0
for n in needles:
    if n in haystack:
        found += 1

In [None]:
found = len(set(needles) & set(haystack))

# 또 다른 방법:
found = len(set(needles).intersection(haystack))

3.8.1 집합 리터럴

In [40]:
s = {1}
type(s)

set

In [41]:
s

{1}

In [42]:
s.pop()

1

In [43]:
s

set()

In [44]:
from dis import dis
dis('{1}')

  1           0 LOAD_CONST               0 (1)
              2 BUILD_SET                1
              4 RETURN_VALUE


In [45]:
dis('set([1])')

  1           0 LOAD_NAME                0 (set)
              2 LOAD_CONST               0 (1)
              4 BUILD_LIST               1
              6 CALL_FUNCTION            1
              8 RETURN_VALUE


In [46]:
frozenset(range(10))

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

3.8.2 지능형 집합

In [47]:
from unicodedata import name
{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

## 3.9 dict와 set의 내부 구조

3.9.1 성능구조

3.9.2 딕셔너리 안의 해시 테이블

3.9.3 dict 작동 방식에 의한 영향

In [49]:
# 인구 10대 국가의 국제전화 코드

DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
]

d1 = dict(DIAL_CODES)
print('d1:', d1.keys())
d2 = dict(sorted(DIAL_CODES))
print('d2:', d2.keys())
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1]))
print('d3:', d3.keys())
assert d1 == d2 and d2 == d3

d1: dict_keys([86, 91, 1, 62, 55, 92, 880, 234, 7, 81])
d2: dict_keys([1, 7, 55, 62, 81, 86, 91, 92, 234, 880])
d3: dict_keys([880, 55, 86, 91, 62, 81, 234, 92, 7, 1])


3.9.4 집합의 작동 방식 - 현실적으로 미치는 영향