# Chapter 14. Iterables, Iterators, and Generators

# Sentence Take #1: A Sequence of Words

In [1]:
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __getitem__(self, index):
        return self.words[index]
    
    def __len__(self):
        return len(self.words)
    
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

In [2]:
s = Sentence('"The time has come," the Walrus said,')
s

Sentence('"The time ha... Walrus said,')

In [3]:
for word in s:
    print(word)

The
time
has
come
the
Walrus
said


In [5]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

## Why Sequences Are Iterable: The iter Function
The interpreter calls `iter(x)`
1. Checks Whether the object implements `__iter__`, and calls that to obtain an iterator.
1. if `__iter__` is not implemented, but `__getitem__` is implemented, Python creates an iterator that attempts to fetch items in order, starting from index 0
1. If that fails, Python raises `TypeError`, usually saying "C object is not iterable," where C is the class of the target object.
The best way to check if an object x is an iterator is to call `isinstance(x, abc.Iterator)` because of `Iterator.__subclasshook__`

### Iterable
- Any object from which the `iter` built-in funnction can obtain an iterator.
- Objects implementing an `__iter__` method returning an `iterator` are iterable.
- Sequences are always iterable; as are objects implementing a `__getitem__` method that takes 0-based indexes.

<br>

The most accurate way to check whether an object x is terable is to call `iter(x)` and handle a `TypeError` exception if it isn't.  
This is more accurate than using `isinstance(x, abc.Iterable)`, because `iter(x)` also considers the legacy `__getitem__` method, while the `Iterable` ABC does not.

### Iterator
- Any object that implements the `__next__` no-argument method that returns the next item in a series or raises `StopIteration` when there are no more items.
- Python iterators also implement the `__iter__` method so they are `iterable` as well.

<br>

The best way to check if an object x is an iterator is to call `isinstance(x, abc.Iterator)` because of `Iterator.__subclasshook__`

In [7]:
s = 'ABC'
for char in s:
    print(char)

A
B
C


In [8]:
s = 'ABC'
it = iter(s)
while True:
    try:
        print(next(it))
    except StopIteration:
        del it
        break

A
B
C


# Sentence Take #2: A Classic Iterator
Iterator Design Pattern
- to access an aggregate object's contents without exposing its internal representation.
- to support muliple traversals of aggregate objects.
- to provide a uniform interface for traversing different aggregate structures (that is, to support polymorphic iteration).

In [None]:
# Sentence implemented using the Iterator pattern
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        return SentenceIterator(self.words)


class SentenceIterator:
    def __init__(self, words):
        self.words = words
        self.index = 0

    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration()
        self.index += 1
        return word

    def __iter__(self):
        return self

In [13]:
class MyIterable:
    def __init__(self, els):
        self.els = list(els)
    
    def __iter__(self):
        return MyIterator(self.els)

class MyIterator:
    def __init__(self, els):
        self.els = els
        self.idx = 0
    
    def __next__(self):
        try:
            el = self.els[self.idx]
        except IndexError:
            raise StopIteration()
        self.idx += 1
        return el
    
    def __iter__(self):
        return self

In [14]:
my_iterable = MyIterable(range(3))
my_it = iter(my_iterable)
my_it

<__main__.MyIterator at 0x10f0983c8>

In [15]:
my_i = iter(my_it)
my_i

<__main__.MyIterator at 0x10f0983c8>

In [16]:
next(my_i)

0

In [17]:
next(my_i)

1

In [24]:
my_iterable = MyIterable([x * 10**i for x in range(1, 4)] for i in range(3))

In [25]:
my_iter = iter(my_iterable)
next(my_iter)

[1, 2, 3]

In [26]:
my_it = iter(my_iter)
next(my_it)

[10, 20, 30]

# Sentence Take #3: A Generator Function

In [27]:
# Senetece implemented using a generator function
import re
import reprlib

RE_WORD = re.compile('\w+')


class Senetence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __repr__(self):
        return 'Sentence(%s)' & reprlib.repr(self.text)
    
    def __iter__(self):
        for word in self.words:
            yield word
        return

# No need for a separate iterator class

## How a Generator Function Works

In [28]:
def gen_123():
    yield 1
    yield 2
    yield 3

gen_123

<function __main__.gen_123()>

In [29]:
gen_123()

<generator object gen_123 at 0x10f06b8b8>

In [30]:
for i in gen_123():
    print(i)

1
2
3


In [31]:
g = gen_123()
next(g)

1

In [32]:
next(g)

2

In [39]:
def gen_lst():
    for i in range(3):
        yield iter([x * 10**i for x in range(1, 4)])

In [40]:
gl = gen_lst()
next(gl)

<list_iterator at 0x10f0b5c88>

In [41]:
l = iter(gl)
next(l)

<list_iterator at 0x10f0855c0>

In [42]:
gl

<generator object gen_lst at 0x10f06ba20>

# Sentence Take #4: A Lazy Implementation

In [43]:
# Senetence implemented using a generator function calling the re.finditer generator function
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:
    def __init__(self, text):
        self.text = text
    
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        for match in RE_WORD.finditer(text):
            yield match.group()

# Sentence Take #5: A Generator Expression

In [44]:
# Sentence implemented using a generator expression
import re
import reprlib

RE_WORD = re.compile('\w+')


class Sentence:
    def __init__(self, text):
        self.text = text
    
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))

# Another Example: Arithmetic Progression Generator

In [45]:
class ArithmeticProgression:
    
    def __init__(self, begin, step, end=None):
        self.begin = begin
        self.step = step
        self.end = end
    
    def __iter__(self):
        result = type(self.begin + self.step)(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield reuslt
            index += 1
            result = self.begin + self.step * index

In [46]:
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

## Arithmetic Progression with itertools

In [47]:
import itertools

gen = itertools.count(1, .5)
next(gen)

1

In [48]:
next(gen)

1.5

In [50]:
gen = itertools.takewhile(lambda n: n < 3, itertools.count(1, .5))
list(gen)

[1, 1.5, 2.0, 2.5]

In [51]:
import itertools

def aritprog_gen(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n < end, ap_gen)
    return ap_gen

# Generator Functions in the Standard Library

## Filtering generator functions

In [54]:
def vowel(c):
    return c.lower() in 'aeiou'

s = 'Aardvark'
list(filter(vowel, s))

['A', 'a', 'a']

In [55]:
import itertools
list(itertools.filterfalse(vowel, s))

['r', 'd', 'v', 'r', 'k']

In [56]:
list(itertools.dropwhile(vowel, s))

['r', 'd', 'v', 'a', 'r', 'k']

In [57]:
list(itertools.takewhile(vowel, s))

['A', 'a']

In [58]:
list(itertools.compress(s, (1, 0, 1, 1, 0, 1)))

['A', 'r', 'd', 'a']

In [59]:
list(itertools.islice(s, 4))

['A', 'a', 'r', 'd']

In [60]:
list(itertools.islice(s, 4, 7))

['v', 'a', 'r']

In [61]:
list(itertools.islice(s, 1, 7, 2))

['a', 'd', 'a']

## Mapping generator functions

In [62]:
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
import itertools
list(itertools.accumulate(sample))

[5, 9, 11, 19, 26, 32, 35, 35, 44, 45]

In [63]:
list(itertools.accumulate(sample, min))

[5, 4, 2, 2, 2, 2, 2, 0, 0, 0]

In [64]:
list(itertools.accumulate(sample, max))

[5, 5, 5, 8, 8, 8, 8, 8, 9, 9]

In [68]:
import operator
list(itertools.accumulate(sample, operator.mul))

[5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]

In [66]:
list(itertools.accumulate(range(1, 11), operator.mul))

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

In [70]:
list(enumerate('albatroz', 1))

[(1, 'a'),
 (2, 'l'),
 (3, 'b'),
 (4, 'a'),
 (5, 't'),
 (6, 'r'),
 (7, 'o'),
 (8, 'z')]

In [71]:
import operator
list(map(operator.mul, range(11), range(11)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [72]:
list(map(operator.mul, range(11), [2, 4, 8]))

[0, 4, 16]

In [73]:
list(map(lambda a, b: (a, b), range(11), [2, 4, 8]))

[(0, 2), (1, 4), (2, 8)]

In [77]:
import itertools
list(itertools.starmap(operator.mul, enumerate('albatroz', 1)))

['a', 'll', 'bbb', 'aaaa', 'ttttt', 'rrrrrr', 'ooooooo', 'zzzzzzzz']

In [78]:
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
list(itertools.starmap(lambda a, b: b/a, enumerate(itertools.accumulate(sample), 1)))

[5.0,
 4.5,
 3.6666666666666665,
 4.75,
 5.2,
 5.333333333333333,
 5.0,
 4.375,
 4.888888888888889,
 4.5]

## Generator functions that merge multiple input iterables

In [79]:
list(itertools.chain('ABC', range(2)))

['A', 'B', 'C', 0, 1]

In [80]:
list(itertools.chain(enumerate('ABC')))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [81]:
list(itertools.chain.from_iterable(enumerate('ABC')))

[0, 'A', 1, 'B', 2, 'C']

In [82]:
list(zip('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2)]

In [83]:
list(zip('ABC', range(5), [10, 20, 30, 40]))

[('A', 0, 10), ('B', 1, 20), ('C', 2, 30)]

In [84]:
list(itertools.zip_longest('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2), (None, 3), (None, 4)]

In [85]:
list(itertools.zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

In [86]:
list(itertools.product('ABC', range(2)))

[('A', 0), ('A', 1), ('B', 0), ('B', 1), ('C', 0), ('C', 1)]

In [87]:
suits = 'spades hearts diamond clubs'.split()
list(itertools.product('AB', suits))

[('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamond'),
 ('A', 'clubs'),
 ('B', 'spades'),
 ('B', 'hearts'),
 ('B', 'diamond'),
 ('B', 'clubs')]

In [88]:
list(itertools.product('ABC'))

[('A',), ('B',), ('C',)]

In [89]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

In [90]:
list(itertools.product(range(2), repeat=3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 1, 0),
 (0, 1, 1),
 (1, 0, 0),
 (1, 0, 1),
 (1, 1, 0),
 (1, 1, 1)]

In [91]:
rows = itertools.product('AB', range(2), repeat=2)
for row in rows: print(row)

('A', 0, 'A', 0)
('A', 0, 'A', 1)
('A', 0, 'B', 0)
('A', 0, 'B', 1)
('A', 1, 'A', 0)
('A', 1, 'A', 1)
('A', 1, 'B', 0)
('A', 1, 'B', 1)
('B', 0, 'A', 0)
('B', 0, 'A', 1)
('B', 0, 'B', 0)
('B', 0, 'B', 1)
('B', 1, 'A', 0)
('B', 1, 'A', 1)
('B', 1, 'B', 0)
('B', 1, 'B', 1)


## Generator functions that expand each input item into multiple output items

In [92]:
ct = itertools.count()
next(ct)

0

In [93]:
next(ct)

1

In [94]:
next(ct), next(ct), next(ct)

(2, 3, 4)

In [96]:
list(itertools.islice(itertools.count(1, .3), 3))

[1, 1.3, 1.6]

In [97]:
cy = itertools.cycle('ABC')
next(cy)

'A'

In [98]:
list(itertools.islice(cy, 7))

['B', 'C', 'A', 'B', 'C', 'A', 'B']

In [100]:
rp = itertools.repeat(7)
next(rp), next(rp)

(7, 7)

In [101]:
list(itertools.repeat(8, 4))

[8, 8, 8, 8]

In [102]:
list(map(operator.mul, range(11), itertools.repeat(5)))

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

In [103]:
list(itertools.combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [104]:
list(itertools.combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [105]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

## Rearranging generator functions

In [106]:
list(itertools.groupby('LLLLAAGGG'))

[('L', <itertools._grouper at 0x10f0c24e0>),
 ('A', <itertools._grouper at 0x10f0c2860>),
 ('G', <itertools._grouper at 0x10f0c2908>)]

In [107]:
for char, group in itertools.groupby('LLLLAAGGG'):
    print(char, '->', list(group))

L -> ['L', 'L', 'L', 'L']
A -> ['A', 'A']
G -> ['G', 'G', 'G']


In [109]:
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear', 'bat', 'dolphin', 'shark', 'lion']
animals.sort(key=len)
animals

['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']

In [110]:
for length, group in itertools.groupby(animals, len):
    print(length, '->', list(group))

3 -> ['rat', 'bat']
4 -> ['duck', 'bear', 'lion']
5 -> ['eagle', 'shark']
7 -> ['giraffe', 'dolphin']


In [112]:
for length, group in itertools.groupby(reversed(animals), len):
    print(length, '->', list(group))

7 -> ['dolphin', 'giraffe']
5 -> ['shark', 'eagle']
4 -> ['lion', 'bear', 'duck']
3 -> ['bat', 'rat']


In [113]:
list(itertools.tee('ABC'))

[<itertools._tee at 0x10f0dc808>, <itertools._tee at 0x10f0e26c8>]

In [114]:
g1, g2 = itertools.tee('ABC')
next(g1)

'A'

In [115]:
next(g2)

'A'

In [116]:
list(g1)

['B', 'C']

In [117]:
list(g2)

['B', 'C']

In [118]:
list(zip(*itertools.tee('ABC')))

[('A', 'A'), ('B', 'B'), ('C', 'C')]

## New Syntax in Python 3.3: yield from

In [119]:
def chain(*iterables):
    for it in iterables:
        for i in it:
            yield i

In [121]:
s = 'ABC'
t = tuple(range(3))
list(chain(s, t))

['A', 'B', 'C', 0, 1, 2]

In [122]:
def chain(*iterables):
    for i in iterables:
        yield from i

In [123]:
list(chain(s, t))

['A', 'B', 'C', 0, 1, 2]

## Iterable Reducing Funcitons

In [124]:
all([1, 2, 3])

True

In [125]:
all([1, 0, 3])

False

In [126]:
all([])

True

In [127]:
any([1, 2, 3])

True

In [128]:
any([1, 0, 3])

True

In [129]:
any([0, 0, 0])

False

In [130]:
any([])

False

In [131]:
g = (n for n in [0, 0.0, 7, 8])
any(g)

True

In [132]:
next(g)

8

# A Closer Look at the iter Function

In [159]:
import random

random.seed(1)

def d6():
    return random.randint(1, 6)

In [160]:
# iter(callable, sentinel)
d6_iter = iter(d6, 1)
d6_iter

<callable_iterator at 0x10f0de128>

In [161]:
for roll in d6_iter:
    print(roll)

2
5


In [None]:
with open('mydata.txt') as fp:
    for line in iter(fp.readline, '\n'):
        process_line(line)

# Generators as Coroutines
David Beazley warned in the PyCon US 2009 tutorial:
- Generators produce data for iteration
- Coroutines are consumers of data
- To kepp your breain from exploding, you don't mix the two concepts together
- Coroutinnes are not related to iteration
- Note: There is a use of having yield produce a value in a coroutine, but it's not tied to iteration.