# 迭代器与生成器

## 手动遍历迭代器

In [1]:
items = [1, 2, 3, 4]
it = iter(items)

try:
    while True:
        num = next(it)
        print(num)
except StopIteration:
    pass

1
2
3
4


## 代理迭代

In [3]:
class Node:
    def __init__(self, value):
        self._value = value
        self._children = []

    def __repr__(self):
        return 'Node({!r})'.format(self._value)

    def add_child(self, node):
        self._children.append(node)

    def __iter__(self):
        return iter(self._children)

root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
# Outputs Node(1), Node(2)
for ch in root:
    print(ch)

Node(1)
Node(2)


## 使用生成器创建新的迭代模式

In [4]:
def frange(start, stop, increment):
    x = start
    while x < stop:
        yield x
        x += increment

for i in frange(0, 5, 0.5):
    print(i)

0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
4.0
4.5


## 实现迭代器协议

In [5]:
class Node:
    def __init__(self, value):
        self._value = value
        self._children = []

    def __repr__(self):
        return 'Node({!r})'.format(self._value)

    def add_child(self, node):
        self._children.append(node)

    def __iter__(self):
        return iter(self._children)

    def depth_first(self):
        yield self
        for c in self:
            yield from c.depth_first()
root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
child1.add_child(Node(3))
child1.add_child(Node(4))
child2.add_child(Node(5))
for ch in root.depth_first():
    print(ch)

Node(0)
Node(1)
Node(3)
Node(4)
Node(2)
Node(5)


## 反向迭代

In [9]:
a = [1, 2, 3, 4]
for x in reversed(a):
    print(x)
## 反向迭代仅仅当对象的大小可预先确定或者对象实现了 __reversed__() 的特殊方法时才能生效。

class Countdown:
    def __init__(self, start):
        self.start = start

    def __iter__(self):
        n = self.start
        while n > 0:
            yield n
            n -= 1
    
    def __reversed__(self):
        n = 1
        while n <= self.start:
            yield n
            n += 1
print('-'*10)
for rr in reversed(Countdown(10)):
    print(rr)

4
3
2
1
----------
1
2
3
4
5
6
7
8
9
10


## 带有外部状态的生成器函数

In [2]:
from collections import deque

class linehistory:
    def __init__(self, lines, len=3):
        self.lines = lines
        self.history = deque(maxlen=len)

    def __iter__(self):
        for lineno, line in enumerate(self.lines, 1):
            self.history.append((lineno, line))
            yield line
    
    def clear(self):
        self.history.clear()

lines = linehistory('a b c d e f'.split(' '))
for line in lines:
    if 'c' in line:
        for lineno, hline in lines.history:
            print('{}:{}'.format(lineno, hline), end=' ')

1:a 2:b 3:c

## 迭代器切片

In [5]:
def count(n):
    while True:
        yield n
        n += 1

import itertools
c = count(0)
for x in itertools.islice(c, 10, 20):
    print(x)

10
11
12
13
14
15
16
17
18
19


## 跳过可迭代对象的开始部分

In [6]:
from itertools import dropwhile

lines = '#a #b c d e f'.split(' ')
for line in dropwhile(lambda line : line.startswith('#'), lines):
    print(line, end=' ')

c d e f

## 排列组合的迭代

In [11]:
from itertools import permutations

items = ['a', 'b' , 'c']
for i in permutations(items):
    print(i)
print()
for i in permutations(items, 2):
    print(i)
print()

from itertools import combinations
for c in combinations(items, 3):
    print(c)
print()
for c in combinations(items, 2):
    print(c)
print()
from itertools import combinations_with_replacement
for c in combinations_with_replacement(items, 3):
    print(c)

('a', 'b', 'c')
('a', 'c', 'b')
('b', 'a', 'c')
('b', 'c', 'a')
('c', 'a', 'b')
('c', 'b', 'a')

('a', 'b')
('a', 'c')
('b', 'a')
('b', 'c')
('c', 'a')
('c', 'b')

('a', 'b', 'c')

('a', 'b')
('a', 'c')
('b', 'c')

('a', 'a', 'a')
('a', 'a', 'b')
('a', 'a', 'c')
('a', 'b', 'b')
('a', 'b', 'c')
('a', 'c', 'c')
('b', 'b', 'b')
('b', 'b', 'c')
('b', 'c', 'c')
('c', 'c', 'c')


## 序列上索引值迭代

In [12]:
my_list = ['a', 'b', 'c']
for idx, val in enumerate(my_list):
    print(idx, val)

0 a
1 b
2 c


## 同时迭代多个序列

In [19]:
xpts = [1, 5, 4, 2, 10, 7]
ypts = [101, 78, 37, 15, 62, 99]
for x, y in zip(xpts, ypts):
    print(x, y)

1 101
5 78
4 37
2 15
10 62
7 99


In [18]:
a = [1, 2, 3]
b = ['w', 'x', 'y', 'z']
for i in zip(a, b):
    print(i)

(1, 'w')
(2, 'x')
(3, 'y')


In [20]:
from itertools import zip_longest
for i in zip_longest(a, b):
    print(i)

(1, 'w')
(2, 'x')
(3, 'y')
(None, 'z')


## 不同集合上元素的迭代

In [22]:
from itertools import chain
'''
itertools.chain() 接受一个或多个可迭代对象最为输入参数。
然后创建一个迭代器，依次连续的返回每个可迭代对象中的元素。
这种方式要比先将序列合并再迭代要高效的多。
'''
a = [1, 2, 3, 4]
b = ['x', 'y', 'z']
for x in chain(a, b):
    print(x)

1
2
3
4
x
y
z


## 创建数据处理管道

In [None]:
import os
import fnmatch
import gzip
import bz2
import re

def gen_find(filepat, top):
    '''
    Find all filenames in a directory tree that match a shell wildcard pattern
    '''
    for path, dirlist, filelist in os.walk(top):
        for name in fnmatch.filter(filelist, filepat):
            yield os.path.join(path,name)

def gen_opener(filenames):
    '''
    Open a sequence of filenames one at a time producing a file object.
    The file is closed immediately when proceeding to the next iteration.
    '''
    for filename in filenames:
        if filename.endswith('.gz'):
            f = gzip.open(filename, 'rt')
        elif filename.endswith('.bz2'):
            f = bz2.open(filename, 'rt')
        else:
            f = open(filename, 'rt')
    yield f
    f.close()

def gen_concatenate(iterators):
    '''
    Chain a sequence of iterators together into a single sequence.
    '''
    for it in iterators:
        yield from it

def gen_grep(pattern, lines):
    '''
    Look for a regex pattern in a sequence of lines
    '''
    pat = re.compile(pattern)
    for line in lines:
        if pat.search(line):
            yield line

## 展开嵌套的序列

In [23]:
from collections import Iterable
def flatten(items, ignore_types=(str, bytes)):
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
            yield from flatten(x)
        else:
            yield x
items = [1, 2, [3, 4, [5, 6], 7], 8]
for x in flatten(items):
    print(x)

1
2
3
4
5
6
7
8


## 顺序迭代合并后的排序迭代对象

In [None]:
import heapq

a = [1, 4, 7, 10]
b = [2, 5, 6, 11]
'''
heapq.merge 可迭代特性意味着它不会立马读取所有序列。这就意味着你可以在非常长的序列中使用它，而不会有太大的开销。
'''
for c in heapq.merge(a, b):
    print(c)