### 第一章
#### 1-1

In [1]:
import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])


class FrenchDeck(object):
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                       for rank in self.ranks]

    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

In [2]:
beer_card = Card('7', 'diamonds')
print(beer_card.suit)

diamonds


In [3]:
deck = FrenchDeck()
print(len(deck))
print(deck[0],deck[-1])

52
Card(rank='2', suit='spades') Card(rank='A', suit='hearts')


In [4]:
from random import choice

print(choice(deck))

Card(rank='K', suit='diamonds')


In [5]:
print(deck[:3])
print(deck[12::13])

[Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]
[Card(rank='A', suit='spades'), Card(rank='A', suit='diamonds'), Card(rank='A', suit='clubs'), Card(rank='A', suit='hearts')]


In [6]:
for card in reversed(deck):
    print(card)

Card(rank='A', suit='hearts')
Card(rank='K', suit='hearts')
Card(rank='Q', suit='hearts')
Card(rank='J', suit='hearts')
Card(rank='10', suit='hearts')
Card(rank='9', suit='hearts')
Card(rank='8', suit='hearts')
Card(rank='7', suit='hearts')
Card(rank='6', suit='hearts')
Card(rank='5', suit='hearts')
Card(rank='4', suit='hearts')
Card(rank='3', suit='hearts')
Card(rank='2', suit='hearts')
Card(rank='A', suit='clubs')
Card(rank='K', suit='clubs')
Card(rank='Q', suit='clubs')
Card(rank='J', suit='clubs')
Card(rank='10', suit='clubs')
Card(rank='9', suit='clubs')
Card(rank='8', suit='clubs')
Card(rank='7', suit='clubs')
Card(rank='6', suit='clubs')
Card(rank='5', suit='clubs')
Card(rank='4', suit='clubs')
Card(rank='3', suit='clubs')
Card(rank='2', suit='clubs')
Card(rank='A', suit='diamonds')
Card(rank='K', suit='diamonds')
Card(rank='Q', suit='diamonds')
Card(rank='J', suit='diamonds')
Card(rank='10', suit='diamonds')
Card(rank='9', suit='diamonds')
Card(rank='8', suit='diamonds')
Card(r

In [7]:
print(Card('Q', 'hearts') in deck)
print(Card('7', 'beasts') in deck)

True
False


In [8]:
# 对纸牌进行排序，梅花2最小是0，黑桃A最大是51
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)
def spades_high(card):
    rank_value = FrenchDeck.ranks.index(card.rank)
    return rank_value * len(suit_values) + suit_values[card.suit]

In [9]:
for card in sorted(deck, key=spades_high):
    print(card)

Card(rank='2', suit='clubs')
Card(rank='2', suit='diamonds')
Card(rank='2', suit='hearts')
Card(rank='2', suit='spades')
Card(rank='3', suit='clubs')
Card(rank='3', suit='diamonds')
Card(rank='3', suit='hearts')
Card(rank='3', suit='spades')
Card(rank='4', suit='clubs')
Card(rank='4', suit='diamonds')
Card(rank='4', suit='hearts')
Card(rank='4', suit='spades')
Card(rank='5', suit='clubs')
Card(rank='5', suit='diamonds')
Card(rank='5', suit='hearts')
Card(rank='5', suit='spades')
Card(rank='6', suit='clubs')
Card(rank='6', suit='diamonds')
Card(rank='6', suit='hearts')
Card(rank='6', suit='spades')
Card(rank='7', suit='clubs')
Card(rank='7', suit='diamonds')
Card(rank='7', suit='hearts')
Card(rank='7', suit='spades')
Card(rank='8', suit='clubs')
Card(rank='8', suit='diamonds')
Card(rank='8', suit='hearts')
Card(rank='8', suit='spades')
Card(rank='9', suit='clubs')
Card(rank='9', suit='diamonds')
Card(rank='9', suit='hearts')
Card(rank='9', suit='spades')
Card(rank='10', suit='clubs')
Ca

#### 1.2.1 模拟数值类型

In [10]:
from math import hypot


class Vector:

    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y

    def __repr__(self):
        return 'Vector(%r, %r)' % (self.x, self.y)

    def __abs__(self):
        return hypot(self.x, self.y)

    def __bool__(self):
        return bool(self.x or self.y)

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)

    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

In [11]:
v1 = Vector(3, 4)
v2 = Vector(4, 6)
print(v1)
print(bool(v1))
print(v1 + v2)
print(v1 * 3)

Vector(3, 4)
True
Vector(7, 10)
Vector(9, 12)


In [12]:
v = memoryview(b'abcdefg')
print(v[0], v[-1])
print(bytes(v[1:4]))

97 103
b'bcd'


### 第二部分 数据结构
#### 第二章 序列构成的数组

##### 2.2 列表推导和生成器表达式

In [13]:
x = 'ABCD'
dummy = [ord(x) for x in x]
print(x, dummy)

ABCD [65, 66, 67, 68]


#####  2.2.2 列表推导同filter 和mpa的比较
filter 和 map 合起来能做的事情，列表推导也可以做，而且还不需要
借助难以理解和阅读的 lambda 表达式。

In [14]:
symbols = '$¢£¥€¤'
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
print(beyond_ascii)
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))
print(beyond_ascii)

[162, 163, 165, 8364, 164]
[162, 163, 165, 8364, 164]


##### 2.2.3 笛卡尔积
用列表推导可以生成两个或以上的可迭代类型的笛卡儿积。
笛卡儿积是一个列表，列表里的元素是由输入的可迭代类型的元素对构
成的元组，因此笛卡儿积列表的长度等于输入变量的长度的乘积

In [15]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
print(tshirts)

[('black', 'S'), ('black', 'M'), ('black', 'L'), ('white', 'S'), ('white', 'M'), ('white', 'L')]


In [16]:
for color in colors:
    for size in sizes:
        print((color, size))

('black', 'S')
('black', 'M')
('black', 'L')
('white', 'S')
('white', 'M')
('white', 'L')


In [17]:
tshirts = [(color, size) for size in sizes
           for color in colors]
print(tshirts)

[('black', 'S'), ('white', 'S'), ('black', 'M'), ('white', 'M'), ('black', 'L'), ('white', 'L')]


##### 2.2.4 生成器表达式
生成器表达式的语法跟列表推导差不多，只不过把方括号换成圆括号而
已。

In [18]:
symbols = '$¢£¥€¤'
print(tuple(ord(symbol) for symbol in symbols))
import array
print(array.array('I', (ord(symbol) for symbol in symbols)))

(36, 162, 163, 165, 8364, 164)
array('I', [36, 162, 163, 165, 8364, 164])


In [19]:
# 生成器表达式计算笛卡尔积
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in ('%s %s' % (c, s) for c in colors for s in sizes):
    print(tshirt)

black S
black M
black L
white S
white M
white L


##### 2.3 元组不仅仅是不可表的列表
有些 Python 入门教程把元组称为“不可变列表”，然而这并没有完全概括
元组的特点。除了用作不可变的列表，它还可以用于没有字段名的记
录。鉴于后者常常被忽略，我们先来看看元组作为记录的功用。

In [20]:
# 把元组用作记录
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA20586')]
for passport in sorted(traveler_ids):
    print('%s/%s' % passport)
for country, _ in traveler_ids:
    print(country)

BRA/CE342567
ESP/XDA20586
USA/31195855
USA
BRA
ESP


In [21]:
# 元组拆包
lax_coordinates = (33.9425, -118.408056)
latitude, longitude = lax_coordinates
print(latitude, longitude)

33.9425 -118.408056


In [22]:
# 用 * 运算符把一个可迭代对象拆开作为函数的参数
print(divmod(20, 8))
t = (20, 8)
print(divmod(*t))
quotient, remainder = divmod(*t)
print(quotient, remainder)

(2, 4)
(2, 4)
2 4


In [23]:
# os.path.split() 函数就会返回以路径和最后一个文件名组成的元组 (path, last_part)
import os 
path, filename = os.path.split('/home/luciano/.ssh/idrsa.pub')
print(path, filename)

/home/luciano/.ssh idrsa.pub


In [24]:
# 用*来处理剩下的元素
# 在平行赋值中，* 前缀只能用在一个变量名前面，但是这个变量可以出现在赋值表达式的任意位置
a, b, *rest = range(2)
print(a, b, rest)
a, *rest, c, d = range(8)
print(a, rest, c, d)

0 1 []
0 [1, 2, 3, 4, 5] 6 7


In [25]:
# 嵌套元组拆包
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:^9.4f} | {:9.4f}'
for name, cc, pop, (latitude, longitude) in metro_areas:
    if longitude <= 0:
        print(fmt.format(name, latitude, longitude))

                |   lat.    |   long.  
Mexico City     |  19.4333  |  -99.1333
New York-Newark |  40.8086  |  -74.0204
Sao Paulo       | -23.5478  |  -46.6358


In [26]:
# 命名元组
from collections import namedtuple

City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
print(tokyo)
print(tokyo.population, tokyo.coordinates, tokyo[1])

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))
36.933 (35.689722, 139.691667) JP


In [27]:
print(City._fields)
LatLong = namedtuple('LatLong', 'lat long')
delpi_data = ('Delpi NCR', 'IN', 21.935, LatLong(28.613889, 77.208889))
delpi = City._make(delpi_data)
for key, value in delpi._asdict().items():
    print(key + ':', value)

('name', 'country', 'population', 'coordinates')
name: Delpi NCR
country: IN
population: 21.935
coordinates: LatLong(lat=28.613889, long=77.208889)


##### 2.4 切片

In [28]:
print(deck[12::13])

[Card(rank='A', suit='spades'), Card(rank='A', suit='diamonds'), Card(rank='A', suit='clubs'), Card(rank='A', suit='hearts')]


In [29]:
l = list(range(10))
print(l)
l[2:5] = [20, 30]
print(l)
del l[5:7]
print(l)
l[3::2] = [11, 22]
print(l)
l[2:5] = [100]
print(l)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 8, 9]
[0, 1, 20, 11, 5, 22, 9]
[0, 1, 100, 22, 9]


##### 2.5 对序列使用+和*
Python 程序员会默认序列是支持 + 和 * 操作的。通常 + 号两侧的序列由
相同类型的数据所构成，在拼接的过程中，两个被操作的序列都不会被
修改，Python 会新建一个包含同样类型数据的序列来作为拼接的结果。

In [30]:
l = [1, 2, 3]
print(l * 5)
print(5 * 'abcd')

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
abcdabcdabcdabcdabcd


In [31]:
board = [['_'] * 3 for i in range(3)]
print(board)
board[1][2] = 'X'
print(board)

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]


In [32]:
weird_board = [['_'] * 3] * 3
print(weird_board)
weird_board[1][2] = 'O'
print(weird_board)

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', 'O'], ['_', '_', 'O'], ['_', '_', 'O']]


##### 2.6 序列的增量赋值
1. 对不可变序列进行重复拼接操作的话，效率会很低，因为每次都有一个
新对象，而解释器需要把原来对象中的元素先复制到新的对象里，然后
再追加新的元素。
2. str 是一个例外，因为对字符串做 += 实在是太普遍了，所以 CPython 对它做了优化。为 str
初始化内存的时候，程序会为它留出额外的可扩展空间，因此进行增量操作的时候，并不会涉
及复制原有字符串到新位置这类操作。

In [33]:
l = [1, 2, 3]
print(id(l))
l *= 2
print(l)
print(id(l))
t = (1, 2, 3)
print(id(t))
t *= 2
print(id(t))

1999199105864
[1, 2, 3, 1, 2, 3]
1999199105864
1999199110200
1999198878120


In [34]:
t = (1, 2, [30, 40])
t[2] += [50, 60]
print(t)

TypeError: 'tuple' object does not support item assignment

In [None]:
print(t)

##### 2.7 list.sort方法和内置函数sorted
list.sort 方法会就地排序列表，也就是说不会把原列表复制一份。  
与 list.sort 相反的是内置函数 sorted，它会新建一个列表作为返回
值。这个方法可以接受任何形式的可迭代对象作为参数，甚至包括不可
变序列或生成器（见第 14 章）。而不管 sorted 接受的是怎样的参
数，它最后都会返回一个列表。

##### 2.8 使用bisect 来管理已排序的序列
bisect 模块包含两个主要函数，bisect 和 insort，两个函数都利用
二分查找算法来在有序序列中查找或插入元素。

In [None]:
# 在有序序列中用bisect查找某个元素的插入位置
import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'
def demo(bisect_fn):
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)
        offset = position * '  |'
        print(ROW_FMT.format(needle, position, offset))

print('DEMO:', bisect.bisect.__name__)
print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
demo(bisect.bisect)

In [None]:
import bisect
import random

In [None]:
SIZE = 7
random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

##### 2.9 当列表不是首选时


In [None]:
# 一个浮点型数组的创建、存入文件和从文件读取的过程
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
print(floats[-1])

In [None]:
with open('floats.bin', 'wb') as fp:
    floats.tofile(fp)
floats2 = array('d')
with open('floats.bin', 'rb') as fp:
    floats2.fromfile(fp, 10**7)
print(floats2[-1])
print(floats == floats2)

In [None]:
# 通过改变数组中的一个字节来更新数组里某个元素的值
numbers = array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
print(len(memv), memv[0])
memv_oct = memv.cast('B')
print(memv_oct.tolist())
memv_oct[5] = 4
print(numbers)

In [None]:
# numpy
import numpy as np

a = np.arange(12)
print(a, type(a), a.shape)
a.shape = 3, 4
print(a)
print(a[2])
print(a[2, 1])
print(a[:, 1])
print(a.T)

In [None]:
np.save('a', a)
a2 = np.load('a.npy', 'r+')
print(a2)

In [None]:
# 双端队列
from collections import deque

dq = deque(range(10), maxlen=10)
print(dq)
dq.rotate(3)
print(dq)
dq.rotate(-4)
print(dq)
dq.appendleft(-1)
print(dq)
dq.extend([11, 22, 33])
print(dq)
dq.extendleft([10, 20, 30])
print(dq)

#### 第三章 字典和集合
##### 3.1 泛映射类型

In [None]:
from collections import abc

my_dict = {}
print(isinstance(my_dict, abc.Mapping))

In [None]:
tt = (1, 2, (30, 40))
print(hash(tt))
t1 = (1, 2, [30, 40])
print(hash(t1))

In [None]:
tf = (1, 2, frozenset([30, 40]))
print(hash(tf), id(tf))

In [None]:
# dict的创建
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})
print(a == b == c == d ==e)

##### 3.4 映射的弹性键查询
有时候为了方便起见，就算某个键在映射里不存在，我们也希望在通过
这个键读取值的时候能得到一个默认值。有两个途径能帮我们达到这个
目的，一个是通过 defaultdict 这个类型而不是普通的 dict，另一个
是给自己定义一个 dict 的子类，然后在子类中实现 __missing__ 方
法。下面将介绍这两种方法。

In [None]:
# 在实例化一个 defaultdict 的时候，需要给构造方法提供
# 一个可调用对象，这个可调用对象会在 __getitem__ 碰到找不到的键
# 的时候被调用，让 __getitem__ 返回某种默认值。
from collections import defaultdict

dd = defaultdict(list)
dd['a'].append(1)
print(dd)

In [None]:
# __missing__
class StrKeyDict0(dict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()
    

In [None]:
d1 = StrKeyDict0([('1', 'a')])
print(d1)
print(d1.get(2, None))
print(1 in d1)

##### 3.5 字典的变种
-  collections.OrderedDict
  - 这个类型在添加键的时候会保持顺序，因此键的迭代次序总是一致
的。OrderedDict 的 popitem 方法默认删除并返回的是字典里的最后
一个元素，但是如果像 my_odict.popitem(last=False) 这样调用
它，那么它删除并返回第一个被添加进去的元素。
- collections.ChainMap
  - 该类型可以容纳数个不同的映射对象，然后在进行键查找操作的时
候，这些对象会被当作一个整体被逐个查找，直到键被找到为止。这个
功能在给有嵌套作用域的语言做解释器的时候很有用，可以用一个映射
对象来代表一个作用域的上下文。
- collections.Counter
 - 这个映射类型会给键准备一个整数计数器。每次更新一个键的时候
都会增加这个计数器。所以这个类型可以用来给可散列表对象计数，或
者是当成多重集来用——多重集合就是集合里的元素可以出现不止一
次。Counter 实现了 + 和 - 运算符用来合并记录，还有像
most_common([n]) 这类很有用的方法。most_common([n]) 会按照次
序返回映射里最常见的 n 个键和它们的计数

In [None]:
from collections import Counter

ct = Counter('abdgsdgsefasdfawe')
print(ct)
ct.update('sdfseaaaaaddddsssssss')
print(ct)
print(ct.most_common(2))

- colllections.UserDict
  - 这个类其实就是把标准 dict 用纯 Python 又实现了一遍。
跟 OrderedDict、ChainMap 和 Counter 这些开箱即用的类型不
同，UserDict 是让用户继承写子类的。  

##### 3.6 子类化UserDict

In [None]:
from collections import UserDict

class StrKeyDict(UserDict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

##### 3.7 不可变映射类型
从 Python 3.3 开始，types 模块中引入了一个封装类名叫
MappingProxyType。如果给这个类一个映射，它会返回一个只读的映
射视图。虽然是个只读视图，但是它是动态的。这意味着如果对原映射
做出了改动，我们通过这个视图可以观察到，但是无法通过这个视图对
原映射做出修改。

In [None]:
from types import MappingProxyType

d = {1: 'A'}
d_proxy = MappingProxyType(d)
print(d_proxy, d_proxy[1])
d[2] = 'x'
print(d_proxy, d_proxy[2])

##### 3.8 集合论
集合的本质是许多唯一对象的聚集。

In [None]:
l = ['a', 'a', 'd', 'd', 'd', 'c']
print(set(l))
s = {1}
print(s, type(s))
s.pop()
print(s)

In [None]:
# 用dis.dis来看看两个方法的字节码的不同
from dis import dis

print(dis('{1}'))
print(dis('set([1])'))

In [None]:
print(frozenset(range(10)))

In [None]:
# 集合推导
from unicodedata import name

s1 = {chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}
print(s1)

#####  3.9 dict和set的背后

###### 3.9.2 字典中的散列表    
![jupyter](./dict.jpg)

In [None]:
print(1 == 1.0, hash(1), hash(1.0))
print(hash(1))
print(hash(1.0))
print(hash(1.0001))
print(hash(1.0002))

#### 第四章 文本和字节序列


In [None]:
# 编码和解码
s = 'cafe'
print(len(s))
b = s.encode('utf-8')
print(b)
print(len(b))
print(b.decode('utf-8'))

In [None]:
# 包含5个字节的bytes和bytearray对象
cafe = bytes('cafe点的’\n', encoding='utf-8')
print(cafe, cafe[0], cafe[:1])
cafe_arr = bytearray(cafe)
print(cafe_arr, cafe_arr[-1], cafe_arr[-1:])

In [None]:
# 使用数组中的原始数据初始化bytes对象
import array

numbers = array.array('h', [-2, -1, 0, 1, 2])
octets = bytes(numbers)
print(octets)

In [None]:
# 使用memoryview和struct查看一个GIF图像的首部
import struct

fmt = '<3s3sHH'
with open('dict.jpg', 'rb') as fp:
    img = memoryview(fp.read())

header = img[:10]
print(bytes(header))
print(struct.unpack(fmt, header))
del header
del img

##### 4.3 基本的编解码器

In [None]:
# 使用3个编解码器编码字符串“El Niño”，得到的字节序列差异很大
for codec in ['latin_1', 'utf_8', 'utf_16']:
    print(codec, 'El Niño'.encode(codec), sep='\t')


##### 4.4 了解编码的问题
虽然有个一般性的 UnicodeError 异常，但是报告错误时几乎都会指明
具体的异常：UnicodeEncodeError（把字符串转换成二进制序列时）
或 UnicodeDecodeError（把二进制序列转换成字符串时）。如果源码
的编码与预期不符，加载 Python 模块时还可能抛出 SyntaxError。接
下来的几节说明如何处理这些错误。

In [None]:
# 编码成字节序列：成功和错误处理
city = 'São Paulo'
print(city.encode('utf-8'))
print(city.encode('utf_16'))
print(city.encode('iso8859_1'))
print(city.encode('cp437', errors='ignore'))
print(city.encode('cp437', errors='replace'))
print(city.encode('cp437', errors='xmlcharrefreplace'))
print(city.encode('cp437'))

In [None]:
# 把字节序列解码成字符串：成功和错误处理
octets = b'Montr\xe9al'
print(octets.decode('cp1252'))
print(octets.decode('iso8859_7'))
print(octets.decode('koi8_r'))
print(octets.decode('utf-8', errors='replace'))
print(octets.decode('utf-8'))

In [None]:
import chardet
print(chardet.detect(city.encode('utf-8')))

In [None]:
u16 = 'El Niño'.encode('utf_16')
u16le = 'El Niño'.encode('utf_16le')
u16be = 'El Niño'.encode('utf_16be')
print(u16)
print(u16le)
print(u16be)
print(list(u16))
print(list(u16le))
print(list(u16be))

##### 4.5 处理文本文件

In [None]:
with open('cafe.txt', 'w', encoding='utf-8') as fp:
    fp.write('café')
    print(fp)
import os
print(os.stat('cafe.txt').st_size)
with open('cafe.txt') as fp:
    print(fp.encoding)
    print(fp.read())
with open('cafe.txt', encoding='utf-8') as fp:
    print(fp)
    print(fp.read())
with open('cafe.txt', 'rb') as fp:
    print(fp)
    print(fp.read())

In [None]:
# 探索编码默认值
import sys, locale
expressions = """
locale.getpreferredencoding()
type(my_file)
my_file.encoding
sys.stdout.isatty()
sys.stdout.encoding
sys.stdin.isatty()
sys.stdin.encoding
sys.stderr.isatty()
sys.stderr.encoding
sys.getdefaultencoding()
sys.getfilesystemencoding()
"""
my_file = open('dummy', 'w')
for expression in expressions.split():
    value = eval(expression)
    print(expression.rjust(30), '->', repr(value))

##### 4.6 为了正确比较而规范化Unicode字符串

In [None]:
s1 = 'café'
s2 = 'cafe\u0301'
print(s1, len(s1), s2, len(s2), s1 == s2)

- NFC（Normalization Form C）使用最少的码位构成等价的字符串，而
NFD 把组合字符分解成基字符和单独的组合字符。这两种规范化方式都
能让比较行为符合预期：

In [None]:
from unicodedata import normalize
s1 = 'café'
s2 = 'cafe\u0301'
print(s1, len(s1), s2, len(s2))
print(len(normalize('NFC', s1)), len(normalize('NFC', s2)))
print(len(normalize('NFD', s1)), len(normalize('NFD', s2)))
print(normalize('NFC', s1) == normalize('NFC', s2))
print(normalize('NFD', s1) == normalize('NFD', s2))

In [None]:
from unicodedata import normalize, name
ohm = '\u2126'
print(ohm, name(ohm))
ohm_c = normalize('NFC', ohm)
print(ohm_c, name(ohm_c))
print(ohm == ohm_c)
print(normalize('NFC', ohm) == normalize('NFC', ohm_c))

In [None]:
from unicodedata import normalize, name
half = '½'
print(normalize('NFKC', half))
four_squared = '4²'
print(normalize('NFKC', four_squared))

In [None]:
# 大小写折叠
micro = 'μ'
print(name(micro))
micro_cf = micro.casefold()
print(name(micro_cf))
print(micro, micro_cf)
eszett = 'ß'
print(name(eszett))
eszett_cf = eszett.casefold()
print(eszett, eszett_cf)

### 第三部分 把函数视作对象
#### 第五章 一等函数
在 Python 中，函数是一等对象。编程语言理论家把“一等对象”定义为满
足下述条件的程序实体：
- 在运行时创建
- 能赋值给变量或数据结构中的元素
- 能作为参数传给函数
- 能作为函数的返回结果

##### 5.1 把函数视作对象

In [None]:
# 创建并测试一个函数，然后读取它的__doc__属性，再检查它的类型
def factorial(n):
    '''returns n!'''
    return 1 if n < 2 else n * factorial(n-1)
print(factorial(42))
print(factorial.__doc__, type(factorial))

##### 5.2 高阶函数
接受函数为参数，或者把函数作为结果返回的函数是高阶函数（higher-
order function）。

In [None]:
if any([]):
    print(True)
else:
    print(False)

##### 5.3 匿名函数

In [None]:
fruits = ['strawberry', 'fig', 'apple', 'cherry', 'raspberry', 'banana']
print(sorted(fruits, key=lambda word: word[::-1]))

##### 5.4 可调用对象
除了用户定义的函数，调用运算符（即 ()）还可以应用到其他对象
上。如果想判断对象能否调用，可以使用内置的 callable() 函数。   
- 用户定义的函数
 - 使用 def 语句或 lambda 表达式创建。
- 内置函数
 - 使用 C 语言（CPython）实现的函数，如 len 或 time.strftime。  
- 内置方法
 - 使用 C 语言实现的方法，如 dict.get。  
- 方法
 - 在类的定义体中定义的函数。  
- 类
 - 调用类时会运行类的 __new__ 方法创建一个实例，然后运行
__init__ 方法，初始化实例，最后把实例返回给调用方。因为 Python
没有 new 运算符，所以调用类相当于调用函数。（通常，调用类会创建
那个类的实例，不过覆盖 __new__ 方法的话，也可能出现其他行为。）  
- 类的实例
 - 如果类定义了 __call__ 方法，那么它的实例可以作为函数调用。  
- 生成器函数
 - 使用 yield 关键字的函数或方法。调用生成器函数返回的是生成
器对象。

In [None]:
callable(str)

##### 5.5 用户定义的可调用类型
不仅 Python 函数是真正的对象，任何 Python 对象都可以表现得像函
数。为此，只需实现实例方法 \_\_call\_\_。

In [None]:
import random

class BingoCage:
    def __init__(self, items):
        self._items = list(items)
        random.shuffle(self._items)
    
    def pick(self):
        try:
            return self._items.pop()
        except IndexError:
            raise LookupError('pick from empty BingoCage')
    
    def __call__(self):
        return self.pick()

In [None]:
bingo = BingoCage(range(6))
print(callable(bingo), bingo.pick(), bingo())

##### 5.6 函数内省

In [None]:
dir(bingo)

In [None]:
print(bingo.__dict__)

In [None]:
# 列出常规对象没有而函数有的属性
class C: pass
obj = C()
def func(): pass
print(sorted(set(dir(func)) - set(dir(obj))))

##### 5.7  从定位参数到仅限关键字参数

In [4]:
def tag(name, *content, cls=None, **attrs):
    '''生成一个或多个HTML标签'''
    if cls is not None:
        attrs['class'] = cls
    if attrs:
        attr_str = ''.join(' %s="%s"' % (attr, value)
                          for attr, value in sorted(attrs.items()))
    else:
        attr_str = ''
    if content:
        return '\n'.join('<%s%s>%s</%s>' %
                        (name, attr_str, c, name) for c in content)
    else:
        return '<%s%s />' % (name, attr_str)

In [5]:
print(tag('br'))
print(tag('html',tag('body', tag('p', 'Hello World!', cls='sidebar', id=3))))
my_tag = {'name': 'img', 'title': 'Sunset Boulevard',
         'src': 'sunset.jpg', 'cls': 'framed'}
print(tag(**my_tag))

<br />
<html><body><p class="sidebar" id="3">Hello World!</p></body></html>
<img class="framed" src="sunset.jpg" title="Sunset Boulevard" />


#####  5.8 获取关于参数的信息

In [43]:
# 在指定长度附近截取字符串的函数
def clip(text:str, max_len:'int > 0'=80) -> str:
    '''在max_len前面或后面的第一个空格处截取文本
    '''
    end = None
    if len(text) > max_len:
        space_before = text.rfind(' ', 0, max_len)
        if space_before >= 0:
            end = space_before
        else:
            space_after = text.rfind(' ', max_len)
            if space_after >= 0:
                end = space_after
    if end is None: # 没找到空格
        end = len(text)
    return text[:end].rstrip()

In [44]:
# 提取关于函数参数的信息
print(clip.__defaults__)
print(clip.__code__)
print(clip.__code__.co_varnames)
print(clip.__code__.co_argcount)

(80,)
<code object clip at 0x000001D17C579AE0, file "<ipython-input-43-9b88fb3ab2ba>", line 2>
('text', 'max_len', 'end', 'space_before', 'space_after')
2


In [45]:
# 提取函数签名
from inspect import signature

sig = signature(clip)
print(sig, str(sig))
for name, param in sig.parameters.items():
    print(param.kind, ':', name, '=', param.default)

(text: str, max_len: 'int > 0' = 80) -> str (text: str, max_len: 'int > 0' = 80) -> str
POSITIONAL_OR_KEYWORD : text = <class 'inspect._empty'>
POSITIONAL_OR_KEYWORD : max_len = 80


##### 5.9 函数注解

In [46]:
print(clip.__annotations__)

{'text': <class 'str'>, 'max_len': 'int > 0', 'return': <class 'str'>}


In [47]:
# 从函数签名中提取注解
from inspect import signature

sig = signature(clip)
print(sig.return_annotation)
for param in sig.parameters.values():
    note = repr(param.annotation).ljust(13)
    print(note, ':', param.name, '=', param.default)

<class 'str'>
<class 'str'> : text = <class 'inspect._empty'>
'int > 0'     : max_len = 80


##### 5.10 支持函数式编程的包

In [48]:
# 使用reduce 和 operator.mul 函数计算阶乘
from functools import reduce
from operator import mul

def fact(n):
    return reduce(mul, range(1, n+1))

In [54]:
# 演示使用itemgetter排序一个元组列表
from operator import itemgetter
metro_data = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]
for city in sorted(metro_data, key=itemgetter(1)):
    print(city)

('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833))
('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889))
('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
('Mexico City', 'MX', 20.142, (19.433333, -99.133333))
('New York-Newark', 'US', 20.104, (40.808611, -74.020386))


In [62]:
# 如果把多个参数传给itemgetter，它构建的函数会返回提取的值构成的元组
cc_name = itemgetter(1, 0)
for city in metro_data:
    print(cc_name(city))

<class 'operator.itemgetter'>
('JP', 'Tokyo')
('IN', 'Delhi NCR')
('MX', 'Mexico City')
('US', 'New York-Newark')
('BR', 'Sao Paulo')


In [65]:
# 定义一个namedtuple，名为metro_data，演示使用attrgetter处理它
from collections import namedtuple
LatLong = namedtuple('LatLong', 'lat log')
Metropolis = namedtuple('Metropolis', 'name cc pop coord')
metro_areas = [Metropolis(name, cc, pop, LatLong(lat, long))
               for name, cc, pop, (lat, long) in metro_data]
print(metro_areas[0])
print(metro_areas[0].coord.lat)

Metropolis(name='Tokyo', cc='JP', pop=36.933, coord=LatLong(lat=35.689722, log=139.691667))
35.689722


In [66]:
from operator import attrgetter
name_lat = attrgetter('name', 'coord.lat')
for city in sorted(metro_areas, key=attrgetter('coord.lat')):
    print(name_lat(city))

('Sao Paulo', -23.547778)
('Mexico City', 19.433333)
('Delhi NCR', 28.613889)
('Tokyo', 35.689722)
('New York-Newark', 40.808611)


In [69]:
# methodcaller 使用示例：第二个测试展示绑定额外参数的方式
from operator import methodcaller
s = 'The time has come'
upcase = methodcaller('upper')
print(upcase(s))
hiphenate = methodcaller('replace', ' ', '-')
print(hiphenate(s))

THE TIME HAS COME
The-time-has-come


In [1]:
# 使用functools.partial把一个两个参数函数改编成需要单参数的可调用对象
from operator import mul
from functools import partial

triple = partial(mul, 3)
print(triple(10))
print(list(map(triple, range(1, 10))))

30
[3, 6, 9, 12, 15, 18, 21, 24, 27]


In [3]:
# 使用partial构建一个便利的Unicode规范化函数
from unicodedata import normalize
nfc = partial(normalize, 'NFC')
s1 = 'café'
s2 = 'cafe\u0301'
print(s1, s2)
print(s1 == s2)
print((nfc(s1) == nfc(s2)))

café café
False
True


In [10]:
# 把partial应用到tag函数上
print(tag)
picture = partial(tag, 'img', cls='pic-frame')
print(picture(src='wumpus.jpeg'))
print(picture)
print(picture.func, picture.args, picture.keywords)

<function tag at 0x0000020D59640048>
<img class="pic-frame" src="wumpus.jpeg" />
functools.partial(<function tag at 0x0000020D59640048>, 'img', cls='pic-frame')
<function tag at 0x0000020D59640048> ('img',) {'cls': 'pic-frame'}
