流畅的Python，Chapter 14 可迭代的对象、迭代器和生成器

In [1]:
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)  # <1>

    def __getitem__(self, index):
        return self.words[index]  # <2>

    def __len__(self, index):  # <3>
        return len(self.words)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)  # <4>

In [2]:
s = Sentence('"The time has come," the Walrus said,')

In [3]:
s

Sentence('"The time ha... Walrus said,')

In [4]:
for word in s:
    print(word)

The
time
has
come
the
Walrus
said


In [5]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In [6]:
s[0]

'The'

In [7]:
s[5]

'Walrus'

In [8]:
s[-1]

'said'

In [9]:
class Foo:
    def __iter__(self):
        pass

In [10]:
from collections import abc
issubclass(Foo, abc.Iterable)

True

In [12]:
f = Foo()
isinstance(f, abc.Iterable)

True

In [13]:
issubclass(Sentence, abc.Iterable)

False

## 判断对象x能否迭代，最好的方法是用iter(x)函数，如果不可迭代，再处理TypeError异常。比用isinstance(x, abc.Iterable)更准确。

In [14]:
# Python从可迭代的对象中获取迭代器
s = 'ABC'
for char in s:
    print(char)

A
B
C


In [15]:
# 不用for语句，用while模拟循环
s = 'ABC'
it = iter(s)
while True:
    try:
        print(next(it))
    except StopIteration:
        del it
        break

A
B
C


In [16]:
# 标准的迭代器接口有2个方法
# __next__ 返回下一个可用的元素，如果没有了，抛出StopIteration异常
# __iter__ 返回self，一般在应该使用可迭代对象的地方使用迭代器，如for循环

In [17]:
s3 = Sentence('Pig and Pepper')
it = iter(s3)
it

<iterator at 0x10e1f4438>

In [18]:
next(it)

'Pig'

In [19]:
next(it)

'and'

In [20]:
next(it)

'Pepper'

In [21]:
next(it)

StopIteration: 

In [22]:
list(it)

[]

In [23]:
# 迭代器不能检查是否还有遗留的元素，也不能还原迭代器。
# 如果再次迭代，必须使用iter(...)，传入可迭代对象。
# 传入迭代器本身没用，因为Iterator.__iter__方法的实现方式是返回实例本身。
list(iter(s3))

['Pig', 'and', 'Pepper']

In [24]:
# 迭代器是这样的对象
# 实现了无参数的__next__ 返回下一个可用的元素，如果没有了，抛出StopIteration异常
# 实现了__iter__ 返回self，一般在应该使用可迭代对象的地方使用迭代器，如for循环.迭代器也可以迭代。

In [25]:
# BEGIN SENTENCE_ITER  使用  迭代器模式  实现Sentence类
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):  # <1> 实例化并返回一个迭代器
        return SentenceIterator(self.words)  # <2>


class SentenceIterator:
    
    def __init__(self, words):
        self.words = words  # <3>
        self.index = 0  # <4>

    def __next__(self):
        try:
            word = self.words[self.index]  # <5>
        except IndexError:
            raise StopIteration()  # <6>
        self.index += 1  # <7>
        return word  # <8>

    def __iter__(self):  # <9>
        return self
# END SENTENCE_ITER

In [26]:
issubclass(SentenceIterator, abc.Iterator)

True

In [27]:
# 符合 Python 习惯, 用生成器函数代替SentenceIterator类

import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        for word in self.words:  # <1>
            yield word  # <2> 创建的迭代器其实是生成器对象, 这里的__iter__是生成器函数
        return  # <3>

# 只要函数的定义体中有yield，该函数就是生成器函数，返回一个生成器对象。也就是说，生成器函数是生成器工厂
# 这是一个实现了迭代器接口的生成器对象。

In [28]:
def gen_123():
    yield 1
    yield 2
    yield 3

In [29]:
gen_123

<function __main__.gen_123()>

In [30]:
gen_123()

<generator object gen_123 at 0x10e2858b8>

In [31]:
for i in gen_123():
    print(i)

1
2
3


In [32]:
g = gen_123()
next(g)

1

In [33]:
next(g)

2

In [34]:
next(g)

3

In [35]:
next(g)

StopIteration: 

In [36]:
def gen_AB():
    print('start')
    yield 'A'
    print('continue')
    yield 'B'
    print('end. ')

In [37]:
# 迭代时 for 机制的作用与 g = iter(gen_AB())一样，用于获取生成器对象，然后每次迭代时调用next(g)
# 第一次调用next(it)时，打印start并停在第一个yield处
# 最后一次调用next(it),继续迭代，打印end.生成器均开出StopIteration体内异常。for机制捕获异常，因此循环终止时没有报错。。

for c in gen_AB():
    print('--->', c)

start
---> A
continue
---> B
end. 


In [38]:
# 惰性求值和及早求值是编程语言理论方面的技术术语
# 不再需要words列表，不再需要处理整个文本。减少内存使用量

import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:

    def __init__(self, text):
        self.text = text  # <1>

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        # finditer函数构建一个迭代器，包含self.text中匹配RE_WORD的单词，产出MatchObject实例
        # match.group()方法从MatchObject实例中提取匹配正则表达式的具体文本
        
        for match in RE_WORD.finditer(self.text):  # <2>
            yield match.group()  # <3>

In [39]:
res1 = [x * 3 for x in gen_AB()]

start
continue
end. 


In [40]:
for i in res1:
    print('-->', i)

--> AAA
--> BBB


In [48]:
res2 = (x * 3 for x in gen_AB())

In [49]:
res2

<generator object <genexpr> at 0x10e285e58>

In [50]:
for i in res2:
    print('-->', i)

start
--> AAA
continue
--> BBB
end. 


In [51]:
# 以上可以看出 
# 列表推导式 及早求值(eager evaluation)
# 生成器表达式 惰性求值(lazy evaluation)

# 因为生成器是迭代器，所以对生成器运用iter()函数会调用__iter__，返回生成器本身。
a = (x ** 2 for i in range(10))
a_iter = iter(a)

# a = <generator object <genexpr> at 0x10d64c570>
# a_iter = <generator object <genexpr> at 0x10d64c570>

In [52]:
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:

    def __init__(self, text):
        self.text = text

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))
# 这里没有生成器函数了(没有yield)，使用生成器表达式构建生成器，然后将其返回。

In [53]:
# 内置的range函数用于生成有穷整数等差数列，
# itertools.count函数用于生成无穷整数等差数列。

class ArithmeticProgression:
    
    def __init__(self, begin, step, end=None):
        self.begin = begin
        self.step = step
        self.end = end
        
    def __iter__(self):
        result = type(self.begin + self.step)(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            # 不用 result += self.step 是为了避免处理浮点数时累积效应，使误差变大。
            result = self.begin + self.step * index

In [54]:
ap = ArithmeticProgression(0, 1, 3)
list(ap)

[0, 1, 2]

In [55]:
ap = ArithmeticProgression(1, 0.5, 3)
list(ap)

[1.0, 1.5, 2.0, 2.5]

In [56]:
ap = ArithmeticProgression(0, 1/3, 1)
list(ap)

[0.0, 0.3333333333333333, 0.6666666666666666]

In [57]:
from fractions import Fraction
ap = ArithmeticProgression(0, Fraction(1, 3), 1)
list(ap)

[Fraction(0, 1), Fraction(1, 3), Fraction(2, 3)]

In [58]:
from decimal import Decimal
ap = ArithmeticProgression(0, Decimal('.1'), .3)
list(ap)

[Decimal('0'), Decimal('0.1'), Decimal('0.2')]

In [59]:
# 使用生成器函数比用类中实现__iter__方法简洁
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

In [60]:
import itertools
gen = itertools.count(1, .5) # 永无止境
next(gen)

1

In [61]:
gen = itertools.takewhile(lambda n: n < 3, itertools.count(1, .5))
list(gen)

[1, 1.5, 2.0, 2.5]

In [62]:
import itertools


def aritprog_gen(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n < end, ap_gen)
    return ap_gen

In [63]:
def chain(*iterables):
    for it in iterables:
        for i in it:
            yield i

s = 'ABC'
t = tuple(range(3))
list(chain(s, t))

['A', 'B', 'C', 0, 1, 2]

In [65]:
def chain(*iterables):
    for i in iterables:
        yield from i

s = 'ABC'
t = tuple(range(3))
list(chain(s, t))

['A', 'B', 'C', 0, 1, 2]

In [66]:
all([1, 2, 3])

True

In [67]:
all([1, 0, 3])

False

In [68]:
all([])

True

In [69]:
any([1, 2, 3])

True

In [70]:
any([1, 0, 3])

True

In [71]:
any([])

False

In [72]:
g = (n for n in [0, 0.0, 7, 8])
any(g)

True

In [73]:
next(g)

8

In [74]:
from random import randint
def d6():
    return randint(1, 6)

In [80]:
d6_iter = iter(d6, 1)
d6_iter

<callable_iterator at 0x10e2b7160>

In [81]:
for roll in d6_iter:
    print(roll)

3


In [82]:
a = 'abc'
a_iter = iter(a)
a_iter

<str_iterator at 0x10e2b72b0>

In [83]:
a = [1, 2, 3]
a_iter = iter(a)
a_iter

<list_iterator at 0x10e2b72e8>

In [84]:
def f():
    def do_yield(n):
        yield n
    x = 0
    while True:
        x += 1
        # yield关键字只能把最近的外层函数变成生成器函数，不能简单的函数调用把职责委托给另一个生成器函数。否则会陷入无限循环。
        # 使用yield from 可以
        # do_yield(x) 
        yield from do_yield(x)
 
g = f()
print(next(g))
print(next(g))
print(next(g))       

1
2
3


In [85]:
help(enumerate)

Help on class enumerate in module builtins:

class enumerate(object)
 |  enumerate(iterable, start=0)
 |  
 |  Return an enumerate object.
 |  
 |    iterable
 |      an object supporting iteration
 |  
 |  The enumerate object yields pairs containing a count (from start, which
 |  defaults to zero) and a value yielded by the iterable argument.
 |  
 |  enumerate is useful for obtaining an indexed list:
 |      (0, seq[0]), (1, seq[1]), (2, seq[2]), ...
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [86]:
for i, n in enumerate('abc'):
    print(i, n)

0 a
1 b
2 c


In [87]:
# 拓展1
# 杨辉三角 使用生成器(yield)


def triangles():
    a = [1]
    while True:
        yield a
        a = [sum(i) for i in zip([0] + a, a + [0])]
    
    
n = 0
results = []
for t in triangles():
    print(t)
    results.append(t)
    n = n + 1
    if n == 10:
        break
if results == [
    [1],
    [1, 1],
    [1, 2, 1],
    [1, 3, 3, 1],
    [1, 4, 6, 4, 1],
    [1, 5, 10, 10, 5, 1],
    [1, 6, 15, 20, 15, 6, 1],
    [1, 7, 21, 35, 35, 21, 7, 1],
    [1, 8, 28, 56, 70, 56, 28, 8, 1],
    [1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
]:
    print('测试通过!')
else:
    print('测试失败!')

[1]
[1, 1]
[1, 2, 1]
[1, 3, 3, 1]
[1, 4, 6, 4, 1]
[1, 5, 10, 10, 5, 1]
[1, 6, 15, 20, 15, 6, 1]
[1, 7, 21, 35, 35, 21, 7, 1]
[1, 8, 28, 56, 70, 56, 28, 8, 1]
[1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
测试通过!


In [88]:
# 实现杨辉三角的10种解法--体验Python之美
# 收集了使用python实现杨辉三角的多种解法，从中可以体会python编写一个算法的不同思想和Python语法的特点。
# 解法函数是下面这样的。个人觉得第7，8，9，10这几个的实现最为巧妙。

def triangle1():
    p = [1]
    a = 0
    while True:
        q = []
        i = 0
        while i <= a:
            if i == 0 or i == len(p):
                q.append(1)
            else:
                q.append(p[i-1] + p[i])
            i += 1
        p = q
        yield q
        a += 1

def triangle2():
    q = [1]
    while True:
        yield q
        i = 0
        while 0 <= i <= len(q) - 1:
            if i == 0:
                pass
            else:
                q[i] = p[i-1] + p[i]
            i += 1
            q.append(1)
            p = tuple(q) # or can be p = q[:]

def triangle3():
    q = [1]
    while True:
        yield q
        i = 1
        while 1 <= i <= len(q) - 1:
            q[i] = p[i-1] + p[i]
            i += 1
        q.append(1)
        p = q[:]

def triangle4():
    q = [1]
    while True:
        yield q
        for i in range(1, len(q)):
            q[i] = p[i-1] + p[i]
        q.append(1)
        p = q[:]

def triangle5():
    p = [1]
    while True:
        yield p
        p = [p[0] if i == 0 or i == len(p) else p[i-1] + p[i] for i in range(len(p) + 1)]

def triangle6():
    p = [1]
    while True:
        yield p
        p = [1] + [p[i] + p[i+1] for i in range(len(p) - 1)] + [1]

def triangle7():
    p = [1]
    while True:
        yield p
        p.insert(0,0)
        p.append(0)
        p = [p[i] + p[i+1] for i in range(len(p) - 1)]

def triangle8():
    p = [1]
    while True:
        yield p
        a = p[:]
        b = p[:]
        a.insert(0,0)
        b.append(0)
        p = [a[i] + b[i] for i in range(len(a))]

def triangle9():
    p = [1]
    while True:
        yield p
        p.append(0)
        p =  [p[i-1] + p[i] for i in range(len(p))]

def triangle10():
    a = [1]
    while True:
        yield a
        a = [sum(i) for i in zip([0] + a, a + [0])]

def triangle11(n):
    if n == 1:
        return [1]
    if n > 1:
        a = triangle11(n-1)
        b = triangle11(n-1)
        a.insert(0,0)

        b.append(0)
        return [a[i] + b[i] for i in range(n)]

# test

n = 0
for i in triangle9():
    print(i)
    n += 1
    if n == 11:
        break

for i in range(1, 12):
    print(triangle11(i))

[1]
[1, 1]
[1, 2, 1]
[1, 3, 3, 1]
[1, 4, 6, 4, 1]
[1, 5, 10, 10, 5, 1]
[1, 6, 15, 20, 15, 6, 1]
[1, 7, 21, 35, 35, 21, 7, 1]
[1, 8, 28, 56, 70, 56, 28, 8, 1]
[1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
[1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1]
[1]
[1, 1]
[1, 2, 1]
[1, 3, 3, 1]
[1, 4, 6, 4, 1]
[1, 5, 10, 10, 5, 1]
[1, 6, 15, 20, 15, 6, 1]
[1, 7, 21, 35, 35, 21, 7, 1]
[1, 8, 28, 56, 70, 56, 28, 8, 1]
[1, 9, 36, 84, 126, 126, 84, 36, 9, 1]
[1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1]


In [89]:
# 拓展2
# 对于控制一些资源的访问很有用。

class Bank():
    crisis = False

    def create_atm(self):
        while not self.crisis:
            yield '$100'


hsbc = Bank()
corner_street_atm = hsbc.create_atm()
print(next(corner_street_atm))
print([next(corner_street_atm) for cash in range(5)])
hsbc.crisis = True
# crisis 变成True之后，corner_street_atm 生成器 玩完了。
print(next(corner_street_atm))

$100
['$100', '$100', '$100', '$100', '$100']


StopIteration: 

In [90]:
# wall_street_atm 虽然是一个新的生成器，但是crisis 变成 True 之后，还没开始就玩完了。

wall_street_atm = hsbc.create_atm()
print(next(wall_street_atm))

StopIteration: 

In [91]:
# 恢复正常
hsbc.crisis = False
brand_new_atm = hsbc.create_atm()
print(next(brand_new_atm))

$100
