In [39]:
# 14.1 

In [40]:
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __getitem__(self, item):
        return self.words[item]

    def __len__(self):
        return  len(self.words)

    def __repr__(self):
        return 'Sentence({})'.format(reprlib.repr(self.text))

In [41]:
s = Sentence("this is not the end")

In [42]:
# test for the if can iteration

In [43]:
for item in s:
    print(item)

this
is
not
the
end


In [44]:
list(s)

['this', 'is', 'not', 'the', 'end']

In [45]:
iter(s)

<iterator at 0x1d507599da0>

In [46]:
s3 = Sentence("Pig and Pepper")
it = iter(s3)

In [47]:
next(it)

'Pig'

In [48]:
list(it)

['and', 'Pepper']

In [49]:
list(iter(s3))

['Pig', 'and', 'Pepper']

In [50]:
# 14.3 Sectence2

In [51]:
import re
import reprlib
import collections.abc as abc

RE_WORD = re.compile('\w+')

# 迭代器
# 实现迭代器必须实现__next__方法
# __iter__必须返回自身
class SentenceIterator:

    def __init__(self, words):
        self.words = words
        self.index = 0

    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration

        self.index += 1
        return word

    def __iter__(self):
        return self

# 可迭代对象
# 只可实现__iter__方法， 而不能实现__next__方法
class Sentence2:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __iter__(self):
        return SentenceIterator(self.words)

    def __len__(self):
        return  len(self.words)

    def __repr__(self):
        return 'Sentence({})'.format(reprlib.repr(self.text))

In [52]:
print(issubclass(SentenceIterator, abc.Iterator))
print(issubclass(Sentence2, abc.Iterable))

True
True


In [53]:
# 注意可迭代对象一定不能是自身的迭代器

In [54]:
# 14.4 更python的迭代器

In [55]:
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence3:

    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __iter__(self):
        for word in self.words:
            yield word

    def __len__(self):
        return  len(self.words)

    def __repr__(self):
        return 'Sentence({})'.format(reprlib.repr(self.text))

In [56]:
# 说明yield的用法

In [57]:
# 示例1

In [58]:
def gen123():
    yield 1
    yield 2
    yield 3

In [59]:
for i in gen123():
    print(i)

1
2
3


In [60]:
# 从上面看出gen123返回的是生成器
# 生成器是迭代器，会生成传给yield关键字的表达式的值

In [61]:
g = gen123()

In [62]:
next(g)

1

In [63]:
next(g)

2

In [64]:
next(g)

3

In [65]:
next(g)

StopIteration: 

In [66]:
# 14.5 Sentence的惰性实现

In [67]:
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence3:

    def __init__(self, text):
        # 不再需要words列表
        self.text = text

    def __iter__(self):
        for match in RE_WORD.finditer(self.text):
            yield match.groups()

    def __repr__(self):
        return 'Sentence({})'.format(reprlib.repr(self.text))

In [68]:
# 生成器表达式可以理解为列表推导的惰性版本
def gen_AB():
    print("start")
    yield 'A'
    print("continue")
    yield 'B'
    print("end")

In [70]:
a = [x*2 for x in gen_AB()] # 可以看出列表推导迫切地推出

start
continue
end


In [71]:
a = (x*2 for x in gen_AB()) #而生成器表达式是惰性求值

In [72]:
for i in a:
    print(i)

start
AA
continue
BB
end


In [73]:
# 第五个版本的sentence

In [74]:
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence3:

    def __init__(self, text):
        # 不再需要words列表
        self.text = text

    def __iter__(self):
        return (x for x in RE_WORD.finditer(self.text))

    def __repr__(self):
        return 'Sentence({})'.format(reprlib.repr(self.text))

In [75]:
# 等差数列生成器
class ArithmeticProgressions:

    def __init__(self, begin, step, end=None):
        self.begin = begin
        self.step = step
        self.end = end

    def __iter__(self):
        index=0
        result=type(self.begin+self.step)(self.begin)
        forever = self.end is None
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step*index

In [76]:
# 优化为函数

In [79]:
def artiprog_gen(begin, step, end = None):
    result = type(begin + step)(begin)
    forever = end is None
    index=0
    while forever or result < step:
        yield result
        index+=1
        result = begin + index*step

In [82]:
# 介绍使用itertools库的count函数和takewhile函数
import itertools

gen = itertools.takewhile(lambda n: n < 10, itertools.count(0, 2))
list(gen)

[0, 2, 4, 6, 8]

In [84]:
# 使用itertools优化函数
import itertools

def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    ap_gen = itertools.count(begin, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n< end, ap_gen)
    return ap_gen

In [91]:
# 标准库中的生成器函数
import itertools
def vowel(c):
    return c.lower() in 'aeiou'

In [None]:
# 1.用于过滤的生成器函数

In [92]:
words = 'Aardvark'

In [93]:
list(filter(vowel, words)) #产出真值元素

['A', 'a', 'a']

In [96]:
list(itertools.filterfalse(vowel, words)) #产出非真值元素

['r', 'd', 'v', 'r', 'k']

In [97]:
list(itertools.dropwhile(vowel, words))   #跳过开始为真的元素，然后产出之后的所有元素

['r', 'd', 'v', 'a', 'r', 'k']

In [98]:
list(itertools.compress(words, (1,0,1,1,0,1))) #并行处理两个课迭代对象，后者为真时，才产出前面的元素

['A', 'r', 'd', 'a']

In [100]:
# 类似切片操作，但返回的是生成器
list(itertools.islice(words, 4))

['A', 'a', 'r', 'd']

In [101]:
list(itertools.islice(words, 4, 7))

['v', 'a', 'r']

In [102]:
list(itertools.islice(words, 4, 7, 2))

['v', 'r']

In [103]:
# 2.用于映射的生成器函数

In [104]:
sample = [0, 7, 4, 8, 2, 5, 6, 1, 3, 9]

In [106]:
# 如果无第二个参数，默认产出前两者的总和，并依次迭代下去
list(itertools.accumulate(sample))

[0, 7, 11, 19, 21, 26, 32, 33, 36, 45]

In [108]:
list(itertools.accumulate(sample, max))

[0, 7, 7, 8, 8, 8, 8, 8, 8, 9]

In [109]:
# 输出（序号， 元素）
list(enumerate(words, 1))

[(1, 'A'),
 (2, 'a'),
 (3, 'r'),
 (4, 'd'),
 (5, 'v'),
 (6, 'a'),
 (7, 'r'),
 (8, 'k')]

In [111]:
# map(func, it1, [it2,...,itN]) 将iter的元素传给func，产出结果
import operator
list(map(operator.mul, range(1,11), range(1,11)))

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [112]:
list(map(operator.mul, range(1,11), [2, 4, 8]))

[2, 8, 24]

In [113]:
# 区别于map函数，itertool.starmap中传入fuc传入参数要求必须是func(*iter)调用func
list(itertools.starmap(operator.mul, enumerate(words, 1)))

['A', 'aa', 'rrr', 'dddd', 'vvvvv', 'aaaaaa', 'rrrrrrr', 'kkkkkkkk']

In [114]:
# 3.合并多个可迭代对象的生成器函数

In [115]:
# 先产第一个iter,后产生下一个
list(itertools.chain('ABC', '012'))

['A', 'B', 'C', '0', '1', '2']

In [116]:
# 一个接一个产出元素
list(itertools.chain.from_iterable(enumerate(words, 1)))

[1, 'A', 2, 'a', 3, 'r', 4, 'd', 5, 'v', 6, 'a', 7, 'r', 8, 'k']

In [119]:
# 同时输出元素，并且迭代长度为最小长度
list(zip('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2)]

In [120]:
# 同时输出元素，并且迭代长度为为最长长度
list(itertools.zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

In [124]:
# 输出笛卡尔积
list(itertools.product('AB','45',repeat=2))

[('A', '4', 'A', '4'),
 ('A', '4', 'A', '5'),
 ('A', '4', 'B', '4'),
 ('A', '4', 'B', '5'),
 ('A', '5', 'A', '4'),
 ('A', '5', 'A', '5'),
 ('A', '5', 'B', '4'),
 ('A', '5', 'B', '5'),
 ('B', '4', 'A', '4'),
 ('B', '4', 'A', '5'),
 ('B', '4', 'B', '4'),
 ('B', '4', 'B', '5'),
 ('B', '5', 'A', '4'),
 ('B', '5', 'A', '5'),
 ('B', '5', 'B', '4'),
 ('B', '5', 'B', '5')]

In [125]:
# 4.把输入元素的各个元素扩展成多个输出元素的生成器函数

In [129]:
# 从start开始计数，每次加一个step
ct = itertools.count()

In [130]:
next(ct), next(ct)

(0, 1)

In [131]:
# 不断循环产出元素
list(itertools.islice(itertools.cycle('ABCD'), 7))

['A', 'B', 'C', 'D', 'A', 'B', 'C']

In [133]:
# 重复产出指定元素
print(list(itertools.repeat(4,7)))
list(map(operator.mul, range(11), itertools.repeat(5)))

[4, 4, 4, 4, 4, 4, 4]


[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

In [136]:
# 组合学/生成器函数
print(list(itertools.combinations('ABC',2))) # 输出不包括相同元素的无序组合种类
print(list(itertools.combinations_with_replacement('ABC', 2))) # 输出包括相同元素的无序组合
print(list(itertools.permutations('ABC', 2))) # 输出不包括相同元素的有序排列
list(itertools.product('ABC', repeat=2))      # 输出包括相同元素的有序排列

[('A', 'B'), ('A', 'C'), ('B', 'C')]
[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]
[('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]


[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

In [137]:
# 5.用于重新排列的生成器函数

In [142]:
# 按自定义的分类标准进行分组
for char, group in itertools.groupby("DDDDAAAASSS"):
    print(char,'-->', list(group))

D --> ['D', 'D', 'D', 'D']
A --> ['A', 'A', 'A', 'A']
S --> ['S', 'S', 'S']


In [144]:
animals = ['rebbit', 'duck', 'rat', 'giraffe', 'dolphin', 'lion', 'bear']
animals.sort(key=len)
for length, group in itertools.groupby(animals, len):
    print(length, '-->', list(group))

3 --> ['rat']
4 --> ['duck', 'lion', 'bear']
6 --> ['rebbit']
7 --> ['giraffe', 'dolphin']


In [145]:
# 产生多个迭代器
list(zip(*itertools.tee('ABC')))

[('A', 'A'), ('B', 'B'), ('C', 'C')]

In [147]:
# yield from 方法
s1 = 'ABC'
s2 = '012'

# 模仿itertools.chain
def mychain(*iterables):
    for i in iterables:
        yield from i
list(mychain(s1, s2))

['A', 'B', 'C', '0', '1', '2']

In [148]:
# 注意any,all,max,min对生成器的影响
g = (n for n in [0, 0.0, 7, 8])
any(g)
next(g)

8

In [157]:
# iter()函数的特殊用法，添加哨兵值, 函数必须有返回值
from random import randint

def d6():
    return randint(1, 6)
d6_iter = iter(d6, 1) # 1为哨符，不会被打印
for i in d6_iter:
    print(i)

4
