## Iterables, iterators and generators


In [1]:
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text
        self.words = re.findall(RE_WORD, text)
        
    def __getitem__(self, idx):
        return self.words[idx]
    
    def __len__(self):
        return len(self.words)
    
    def __repr__(self):
        return 'Sentence(%s)' %reprlib.repr(self.text)
    
    

In [2]:
s = Sentence('"The time has come," the Walrus said,')
s

Sentence('"The time ha... Walrus said,')

In [4]:
# 由于使用了 __getitem__ 所以该类满足 序列 协议
# 所以能看做 iterator
for w in s:
    print(w)

The
time
has
come
the
Walrus
said


In [5]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In [8]:
n = iter(s)
next(n)

'The'

In [9]:
next(n)

'time'

### Iterables versus iterators


可迭代和迭代器是有区别的，简单来说，可迭代对象包含了迭代器

可迭代至少实现了 \_\_iter\_\_ 方法

迭代器至少实现了 \_\_iter\_\_　和 \_\_next\_\_ 方法

In [18]:
# 重构刚刚的代码
RE_P = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text 
        self.words = re.findall(RE_P, text)
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        return SentenceIterator(self.words)
    

class SentenceIterator:
    
    def __init__(self, words):
        self.words = words
        self.index = 0
        
    def __next__(self):
        try:
            word = self.words[self.index]
            
        except IndexError:
            raise StopIteration()
            
        self.index += 1
        return word
    
    
    def __iter__(self):
        return self

In [19]:
s = Sentence('"The time has come," the Walrus said,')

In [14]:
s

Sentence('"The time ha... Walrus said,')

In [16]:
for i in s:
    print(i)

The
time
has
come
the
Walrus
said


In [21]:
it = iter(s)
it

<__main__.SentenceIterator at 0x7fb7b4476f98>

In [22]:
next(it)

'The'

### a generator function


In [23]:
# 继承　Ｓentence 
# 重写 iter

class SentenceG(Sentence):
    
    def __iter__(self):
        for word in self.words:
            yield word
            
        return
    
    

In [24]:
sg = SentenceG('"The time has come," the Walrus said,')
sg

Sentence('"The time ha... Walrus said,')

In [26]:
for i in sg:
    print(i)

The
time
has
come
the
Walrus
said


In [27]:
sg.__iter__()

<generator object SentenceG.__iter__ at 0x7fb7b43f1678>

### How a generator function works


In [28]:
def gen123():
    
    for i in range(3):
        yield i
        

In [30]:
gen123()

<generator object gen123 at 0x7fb7b43f1830>

In [31]:
g = gen123()
next(g)

0

In [32]:
next(g)

1

In [33]:
# example 2

def genAB():
    print('A')
    yield 'A'
    print('B')
    yield 'B'
    
    

In [34]:
ga = genAB()
next(ga)

A


'A'

In [35]:
next(ga)

B


'B'

In [36]:
next(ga)

StopIteration: 

从上面的例子可以得出：

- 含有 yield 关键字的函数自动变为　生成器
- next 会执行每两次 yield 之间的内容
- 执行完毕之后会出发 StopIteration 异常

###  lazy implementation


In [43]:
class NewSentence:
    
    def __init__(self, text):
        self.text = text
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    # __iter__ 的实现完全采用了 lazy 的思想
    # 在 init 中不用实现 list 
    def __iter__(self):
        
        for match in RE_WORD.finditer(self.text):
            yield match.group()

In [39]:
ns = NewSentence('"The time has come," the Walrus said,')
ns

Sentence('"The time ha... Walrus said,')

In [40]:
it = iter(ns)
for i in it:
    print(i)

The
time
has
come
the
Walrus
said


In [42]:
# iter 是一个生成器
next(it)

StopIteration: 

In [44]:
# 结合生成器推导，写出更 pythonic 的代码
# 继承 ns, 覆写 __iter__


class Sentence2(NewSentence):
    
    def __iter__(self):
        return (match.group() for match in re.finditer(RE_WORD, self.text))

In [45]:
s2 = Sentence2('"The time has come," the Walrus said,')

In [46]:
s2

Sentence('"The time ha... Walrus said,')

In [47]:
for i in s2:
    print(i)

The
time
has
come
the
Walrus
said


### arithmetic progression generator


In [57]:
class ArithP:
    
    def __init__(self, start, step, end):
        
        self.begin = start
        self.step = step
        self.end = end # 'None' to infinite series
        
    def __iter__(self):
        result = type(self.begin + self.step)(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step*index
            

In [49]:
# 针对   result = type(self.begin + self.step)(self.begin) 
# 这一句的解释

s = 2.0 + 1 # float + int
type(s)

float

In [50]:
from fractions import Fraction

s2 = 2 + Fraction(1,3)
type(s2)

fractions.Fraction

In [51]:
from decimal import Decimal
s3 = 3 + Decimal('.1')
type(s3)

decimal.Decimal

In [54]:
# 所以 type(self.begin + self.step) 
# 的作用是获取到一个数据处理方法
#　然后 type(self.begin + self.step)(self.begin)  
# 反作用于 self.begin 本身

def unif(num1, num2):
    return type(num1+num2)(num1)

r1 = unif(2, Fraction(5,5))
print(r1)
print(type(r1))

2
<class 'fractions.Fraction'>


In [55]:
r2 = unif(1, 1.5)
print(r2)

1.0


In [58]:
# 对　ArithP　进行测试
ap = [i for i in ArithP(1, 0.1, 2)]
ap

[1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7000000000000002, 1.8, 1.9]

In [59]:
ap2 = [i for i in ArithP(1, Fraction(1,3), 3)]
ap2


[Fraction(1, 1),
 Fraction(4, 3),
 Fraction(5, 3),
 Fraction(2, 1),
 Fraction(7, 3),
 Fraction(8, 3)]

### Arithmetic progression with itertools


In [1]:
import itertools

# itertools.count(start, step)
gen = itertools.count(1, 0.5)
next(gen)

1

In [2]:
next(gen)

1.5

In [64]:
# itertools.count 没有 end 参数
# 所以千万不能 list(itertools.count(1, 0.5))
# 内存立马爆炸

itt = itertools.takewhile(lambda n: n<3, itertools.count(1, 0.5))
[i for i in itt]

[1, 1.5, 2.0, 2.5]

In [1]:
# 使用函数重写 Arithp

def arithp(start, step, end):
    start = type(start+step)(start)
    ap_gen = itertools.count(start, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda x: x<end, ap_gen)
        
    # 防止把我电脑搞挂
    ap_gen = itertools.takewhile(lambda x: x<20, ap_gen)
    return ap_gen

In [5]:
list(arithp(1, 0.5, 5))

[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]

### Generator functions in the standard library


In [2]:
# itertools 下面的函数

def vowel(c):
    return c.lower() in 'abcde'

list(filter(vowel, 'Ackdshwsgc'))

['A', 'c', 'd', 'c']

In [5]:
import itertools
s = 'Ackdshwsgc'
# 选出不满足 vowel 条件
list(itertools.filterfalse(vowel, s))

['k', 's', 'h', 'w', 's', 'g']

In [6]:
# 只要该序列中一个不满足，返回接下来的所有元素
list(itertools.dropwhile(vowel, s))

['k', 'd', 's', 'h', 'w', 's', 'g', 'c']

In [7]:
list(itertools.takewhile(vowel, s))

['A', 'c']

In [10]:
list(itertools.compress(s, [1 for i in range(len(s)) if i%2==0]))

['A', 'c', 'k', 'd', 's']

In [11]:
list(itertools.islice(s, 1,10,2))

['c', 'd', 'h', 's', 'c']

In [12]:
import random

sample = random.sample(range(100), 10)
sample

[56, 42, 79, 69, 72, 66, 89, 8, 43, 90]

In [14]:
# 累加函数
list(itertools.accumulate(sample))

[56, 98, 177, 246, 318, 384, 473, 481, 524, 614]

### New syntax in Python 3.3: yield from


In [15]:
# yeild from 使用场景

def chain(*iterables):
    for it in iterables:
        for i in it:
            yield i
            
def chain2(*iterables):
    for it in iterables:
        yield from it
        

In [16]:
s, t = range(5), 'ABCD'
print(list(chain(s,t)))
print(list(chain2(s,t)))

[0, 1, 2, 3, 4, 'A', 'B', 'C', 'D']
[0, 1, 2, 3, 4, 'A', 'B', 'C', 'D']


### Iterable reducing functions


In [17]:
# all any max min sum 等内置函数

s = range(3)
s2 = range(1,4)

In [18]:
any(s)

True

In [19]:
all(s)

False

### A closer look at the iter function


In [23]:
# iter 可接受两个参数
# iter(callable, setinel)
# 当返回的数为 sentinel 时，程序结束

def d6():
    return random.randint(1,6)

it = iter(d6, 3)
for i in it:
    print(i)

4
2


In [24]:
it

<callable_iterator at 0x7fa3802bbf28>