# <center>流畅的 python</center>
## 第三章：字典和集合
**字典的几种初始化方式**

In [2]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('one', 1), ('two', 2), ('three', 3)])
e = dict({'two': 2, 'one': 1, 'three': 3})
print(a==b==c==d==e)

True


**示例3-1 字典推导的应用**

In [3]:
DIAL_CODES = [(86, 'China'), (91, 'India'), (1, 'United States'), (62, 'Indonesia'), (55, 'Brazil'), 
              (92, 'Pakistan'), (880, 'Bangladesh'), (234, 'Nigeria'), (7, 'Russia'), (81, 'Japan'),
             ]
country_code1 = {country: code for code, country in DIAL_CODES}
print(country_code1)
country_code2 = {code: country.upper() for country, code in country_code1.items() if code < 66}
print('\n')
print(country_code2)

{'China': 86, 'India': 91, 'United States': 1, 'Indonesia': 62, 'Brazil': 55, 'Pakistan': 92, 'Bangladesh': 880, 'Nigeria': 234, 'Russia': 7, 'Japan': 81}


{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}


**示例3-2 从索引中获取单词出现的频率信息，并写进对应的列表中**

In [9]:
import re

WORD_RE = re.compile(r'\w+')

index = {}
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            occurrences = index.get(word, [])
            occurrences.append(location)
            index[word] = occurrences

for word in sorted(index, key=str.upper):
    print(word, index[word])

and [(1, 21)]
are [(1, 29)]
estates [(1, 40)]
family [(1, 33)]
I [(1, 19)]
of [(1, 48)]
Prince [(1, 8), (2, 1)]
so [(1, 16)]
the [(1, 51)]
Well [(1, 2), (2, 9)]
you [(1, 25)]


**示例3-4 使用dict.setdefault解决**

In [10]:
import re

WORD_RE = re.compile(r'\w+')

index = {}
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)
            

for word in sorted(index, key=str.upper):
    print(word, index[word])

and [(1, 21)]
are [(1, 29)]
estates [(1, 40)]
family [(1, 33)]
I [(1, 19)]
of [(1, 48)]
Prince [(1, 8), (2, 1)]
so [(1, 16)]
the [(1, 51)]
Well [(1, 2), (2, 9)]
you [(1, 25)]


**示例3-5 利用defaultdict实例**

In [11]:
import re
import collections

WORD_RE = re.compile(r'\w+')

index = collections.defaultdict(list)
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)
            

for word in sorted(index, key=str.upper):
    print(word, index[word])

and [(1, 21)]
are [(1, 29)]
estates [(1, 40)]
family [(1, 33)]
I [(1, 19)]
of [(1, 48)]
Prince [(1, 8), (2, 1)]
so [(1, 16)]
the [(1, 51)]
Well [(1, 2), (2, 9)]
you [(1, 25)]


**示例3-7 StrKeyDict0在查询的时候把非字符串的键转换为字符串**

In [12]:
class StrKeyDict0(dict):
    def __missing__(self, key):
        if(isinstance(key, str)):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

**Tests for item retrieval using 'd[key]' notation:**

In [13]:
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
print(d['2'])
print(d[4])
print(d[1])

two
four


KeyError: '1'

**Tests for item retrieval using 'd.get(key)' notation:**

In [14]:
print(d.get('2'))
print(d.get(4))
print(d.get(1, 'N/A'))

two
four
N/A


**Tests for the 'in' operator:**

In [15]:
print(2 in d)
print(1 in d)

True
False


**字典的变种--Counter:**

In [16]:
ct = collections.Counter('abracadabra')
print(ct)
ct.update('aaaaazzz')
print(ct)
print(ct.most_common(2))

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
[('a', 10), ('z', 3)]


**示例3-8 使用UserDict来定义StrKeyDict**

In [17]:
from collections import UserDict

class StrKeyDict(UserDict):
    def __missing__(self, key):
        if(isinstance(key, str)):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

**示例3-9 不可变映射类型**

In [18]:
from types import MappingProxyType

d = {1:'A'}
d_proxy = MappingProxyType(d)
print(d_proxy)
print(d_proxy[1])
d[2] = 'B'
print(d_proxy[2])
d_proxy[3] = 'C'

{1: 'A'}
A
B


TypeError: 'mappingproxy' object does not support item assignment

**示例3-13 集合推导**

In [20]:
from unicodedata import name

seta = {chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}
print(seta)

{'#', 'µ', '¬', '×', '=', '®', '>', '¥', '÷', '°', '¤', '$', '<', '+', '±', '¶', '¢', '%', '£', '§', '©'}


**示例3-17 将同样的数据以不同的顺序添加到三个字典里**

In [None]:
DIAL_CODES = [(86, 'China'), (91, 'India'), (1, 'United States'), (62, 'Indonesia'), (55, 'Brazil'), 
              (92, 'Pakistan'), (880, 'Bangladesh'), (234, 'Nigeria'), (7, 'Russia'), (81, 'Japan'),
             ]
d1 = dict(DIAL_CODES)
print('d1: ', d1.keys())
d2 = dict(sorted(DIAL_CODES))
print('d2: ', d2.keys())
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1]))
print('d3: ', d3)