# Iterator from sequences

In [1]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __getitem__(self, index):
        return self.words[index]
    
    def __len__(self):
        return len(self.words)
    
    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

s = Sentence('"The time has come, " the Walrus said.')
list(s) # ['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

Sentences('"The time ha... Walrus said.')

# The `iter()` function

Whenever python needs to iterate over an object `x`, it will call the `iter(x)` function. This function does:
- verify if the `__iter__` method is implemented. If so, calls it
- if `__iter__` is not implemented, but `__getitem__` is, then `iter()` will create an iterator that tries to search by the index, starting with zero
- if that fails, a `TypeError` is raised

This is a extreme example of duck typing. For goose typing, a iterable is only a subclass of `abc.Iterable` if implements the `__iter__` function. This is ensured by the `__subclasshook__`.

## Using iter with a callable

We can use the `iter()` function with two arguments: a callable object (e. g. a function) and a sentinel value, used to create a StopIteration exception

```python
def d6():
    return randint(1, 6)
d6_iter = iter(d6, 1) # <callable_iterator at 0x69>
for roll in d6_iter:
    print(rool) # When 1 is returned, the iteration stops
# 4, 3, 6, 3
```

This only works when the callable receives no arguments.

# Iterable vs iterators

A iterable is any object from which the `iter()` function can create a iterator. Any object that implements `__iter__` is iterable. Any sequence is iterable. Any object that implements `__getitem__` and support the index 0 is iterable.

The best way to check if a object is a iterator is calling `ìsinstance(x, abc.Iterator)`

Let's implement the `__iter__` function on the Sentence class we've created before. This will use the classic Iterator pattern from the Project patterns. This is not pythonic, but the refactors that we'll do will make it.

It's tempting to implement `__next__` in the `Sentence` class, but that would be wrong, because the `Sentence` is iterable but not a iterator. We will create a iterator calling `iter(sentence)`. This is necessary because we might need to create more than one iterator for the same object and that iterators need to have it's own internal state.

In [8]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        return SentenceIterator(self.words)

class SentenceIterator:
    def __init__(self, words) -> None:
        self.words = words # create a copy?
        self.index = 0

    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration
        self.index += 1
        return word
    
    # necessary to be considered a abc.Iterator
    def __iter__(self):
        return self

s = Sentence('"The time has come, " the Walrus said.')
s_iter1 = iter(s)
s_iter2 = iter(s)
print(next(s_iter1))
print(next(s_iter2))
s_iter1.words.remove('The')
s.words # modified
print(next(s_iter1)) # Reference to the same list
print(next(s_iter2))



The
The
has
has


In [13]:
# pythonic way
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

    def __iter__(self): # a generator function that creates a generator object
        for word in self.words:
            yield word
        # no need to a explicit return call
        # it does not generate a StopIteration? It does

s = Sentence('"The time has come, " the Walrus said.')
s_iter1 = iter(s)
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1)) # Generates a StopIteration



The
time
has
come
the
Walrus
said


StopIteration: 

# Lazy generator expressions

In general, we want to delay the need to process a value as much as possible. Our `Sentence` class is not lazy yet, because it stores all the text inside the `words` list. We can make it more lazy using the `re.finditer` instead of `re.findall`

In [None]:
# Lazy Sentence class

import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text

    def __iter__(self):
        for match in RE_WORD.finditer(self.text):
            yield match.group()

    # A more concise implementation could be done with a generator expression
    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))
    
            

In [None]:
# a practical example

class ArithmeticProgression:
    def __init__(self, begin, step, end=None) -> None:
        self.begin = begin
        self.step = step
        self.end = end # None -> infinite series

    def __iter__(self):
        result_type = type(self.begin + self.step)
        result = result_type(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step * index

# this could also be done with a genetor function
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

# using the itertools module the function above could be
def aritprog_gen(begin, step, end=None):
    import itertools
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is None:
        return ap_gen
    return itertools.takewhile(lambda n: n < end, ap_gen)


    

In [8]:
# There are a lot of iterator functions defined in the standard lib.
import itertools
import operator

def vowel(c):
    return c.lower() in 'aieou'

# Filtering functions
list(filter(vowel, 'Aardvark')) # ['A', 'a', 'a']
list(itertools.filterfalse(vowel, 'Aardvark')) # ['r', 'd', 'v', 'r', 'k']
list(itertools.dropwhile(vowel, 'Aardvark')) # ['r', 'd', 'v', 'a', 'r', 'k']
list(itertools.takewhile(vowel, 'Aardvark')) # ['A', a']
list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1))) # ['A', 'r', 'd', 'a']
list(itertools.islice('Aardvark', 1, 7, 2)) # ['a', 'd', 'a']

# Mapping functions
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
list(itertools.accumulate(sample)) # [5, 9, 11, 19, 26, 32, 35, 35, 44, 45]
list(itertools.accumulate(sample, min)) # [5, 4, 2, 2, 2, 2, 2, 0, 0, 0]
list(itertools.accumulate(sample, max)) # [5, 5, 5, 8, 8, 8, 8, 8, 9, 9]
list(itertools.accumulate(sample, operator.mul)) # [5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]
list(enumerate('abc')) # [(1, 'a'), (2, 'b'), (3, 'c')]
list(map(operator.mul, range(11), range(11))) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
list(map(operator.mul, range(11), [2, 4, 8])) # [0, 4, 16]
list(itertools.starmap(operator.mul, enumerate('abc'))) # ['a', 'bb', 'ccc']

# Fusion function
list(itertools.chain('ABC', range(2))) # ['A', 'B', 'C', 0, 1]
list(zip('ABC', range(3))) # [('A', 1), ('B', 2), ('C', 3)]
list(itertools.product('ABC', range(2))) # [('A', 0), ('A', 1), ('B', 0), ('B', 1), ('C', 0), ('C', 1)]

#Expansion functions
cy = itertools.cycle('ABC')
list(itertools.islice(cy, 4)) # ['A', 'B', 'C', 'A']
list(itertools.pairwise(range(4))) # [(0, 1), (1, 2), (2, 3)]
rp = itertools.repeat(7, 4)
list(rp) # [7, 7, 7, 7]
list(map(operator.mul, range(11), itertools.repeat(5))) # [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

# Combinations functions
list(itertools.combinations('ABC', 2)) # [('A', 'B'), ('A', 'C'), ('B', 'C')]
list(itertools.combinations_with_replacement('ABC', 2)) # [('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]
list(itertools.permutations('ABC', 2)) # [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

# Rearrangement functions
for char, group in itertools.groupby('LLLLAAAGG'):
    print(char, '->', list(group))
# L -> ['L', 'L', 'L', 'L']
# A -> ['A', 'A',]
# G -> ['G', 'G', 'G']
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear', 'bat', 'dolphin', 'shark', 'lion']
animals.sort(key=len) # ['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']
list(reversed(animals)) ['dolphin', 'giraffe', 'shark', 'eagle', 'lion', 'bear', 'duck', 'bat', 'rat']

g1, g2 = itertools.tee('ABC')
next(g1), next(g2) # ('A', 'A')
list(g1) # ['B', 'C']

# Reduction functions, see chapter about functional programming to see about reduce
all([1, 2, 3]) # True
all([1, 0, 3]) # False
any([1, 2, 3]) # True
any([1, 0, 3]) # True
any([0, 0, 0]) # False
    

L -> ['L', 'L', 'L', 'L']
A -> ['A', 'A', 'A']
G -> ['G', 'G']


['dolphin', 'giraffe', 'shark', 'eagle', 'lion', 'bear', 'duck', 'bat', 'rat']

# Subgenerators

You can use the expression `yield from` to use a subgenerator, which is a generator inside another

```python
def sub_gen():
    yield 1.1
    yield 1.2

def gen():
    yield 1
    yield from sub_gen()
    yield 2

for x in gen():
    print(x) # 1, 1.1, 1.2, 2
```

# Type annotation for generators and iterators

For a generator expression use the `typing.Iterator` to annotate the return type

```python
from typing import Iterator
def fibbonacci() -> Iterator[int]:
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a + b
```

A `typing.Iterator[str]` is a consistent with the `abc.Generator[str, None, None]` because a iterator is a generator that consume or return values. A generator that is able to consume and return values is a coroutine.

# Coroutines (classical)

A coroutine is a generator function with a `yield` keyword in it. 

```python
from collections.abc import Generator

def averager() -> Generator[float, float, None]:
    total = 0.0
    count = 0
    average = 0.0
    while True:
        term = yield average
        total += term
        count += 1
        average = total / count

coro_avg = averager()
next(coro_avg) # 0.0 # initialize the coroutine
coro_avg.send(10.0) # 10.0
coro_avg.send(30.0) # 20.0
coro_avg.send(5.0) # 15.0
coro_avg.close() # Explicitly stop the coroutine (not necessary in general)
```

Returning a value from a coroutine:

```python
from collections.abc import Generator
from typing import NamedTuple

class Result(NamedTuple):
    count: int
    average: float

class Sentinel:
    def __repr__(self):
        return f'<Sentinel>'

STOP = Sentinel()
def averager2(verboe: bool = False) -> Generator[None, float | Sentinel, Result]:
    total = 0.0
    count = 0
    average = 0.0
    while True:
        term = yield
        if verbose:
            print('received: ', term)
        if isinstance(term, Sentinel):
            break
        total += term
        count += 1
        average = total / count
    return Result(count, average)
coro_avg = averager2()
next(coro_avg)
coro_avg.send(10) # return None
coro_avg.send(30) # return None
coro_avg.send(6.5) # return None
try:
    coro_avg.send(STOP)
except StopIteration as exc:
    result = exc.value
print result # Result(count = 3, average = 15.5)
```