# Iterator from sequences

In [1]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __getitem__(self, index):
        return self.words[index]
    
    def __len__(self):
        return len(self.words)
    
    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

s = Sentence('"The time has come, " the Walrus said.')
list(s) # ['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

Sentences('"The time ha... Walrus said.')

# The `iter()` function

Whenever python needs to iterate over an object `x`, it will call the `iter(x)` function. This function does:
- verify if the `__iter__` method is implemented. If so, calls it
- if `__iter__` is not implemented, but `__getitem__` is, then `iter()` will create an iterator that tries to search by the index, starting with zero
- if that fails, a `TypeError` is raised

This is a extreme example of duck typing. For goose typing, a iterable is only a subclass of `abc.Iterable` if implements the `__iter__` function. This is ensured by the `__subclasshook__`.

## Using iter with a callable

We can use the `iter()` function with two arguments: a callable object (e. g. a function) and a sentinel value, used to create a StopIteration exception

```python
def d6():
    return randint(1, 6)
d6_iter = iter(d6, 1) # <callable_iterator at 0x69>
for roll in d6_iter:
    print(rool) # When 1 is returned, the iteration stops
# 4, 3, 6, 3
```

This only works when the callable receives no arguments.

# Iterable vs iterators

A iterable is any object from which the `iter()` function can create a iterator. Any object that implements `__iter__` is iterable. Any sequence is iterable. Any object that implements `__getitem__` and support the index 0 is iterable.

The best way to check if a object is a iterator is calling `ìsinstance(x, abc.Iterator)`

Let's implement the `__iter__` function on the Sentence class we've created before. This will use the classic Iterator pattern from the Project patterns. This is not pythonic, but the refactors that we'll do will make it.

It's tempting to implement `__next__` in the `Sentence` class, but that would be wrong, because the `Sentence` is iterable but not a iterator. We will create a iterator calling `iter(sentence)`. This is necessary because we might need to create more than one iterator for the same object and that iterators need to have it's own internal state.

In [8]:
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

    def __iter__(self):
        return SentenceIterator(self.words)

class SentenceIterator:
    def __init__(self, words) -> None:
        self.words = words # create a copy?
        self.index = 0

    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration
        self.index += 1
        return word
    
    # necessary to be considered a abc.Iterator
    def __iter__(self):
        return self

s = Sentence('"The time has come, " the Walrus said.')
s_iter1 = iter(s)
s_iter2 = iter(s)
print(next(s_iter1))
print(next(s_iter2))
s_iter1.words.remove('The')
s.words # modified
print(next(s_iter1)) # Reference to the same list
print(next(s_iter2))



The
The
has
has


In [13]:
# pythonic way
import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text
        self.words = RE_WORD.findall(text)

    def __repr__(self) -> str:
        return 'Sentences(%s)' % reprlib.repr(self.text)

    def __iter__(self): # a generator function that creates a generator object
        for word in self.words:
            yield word
        # no need to a explicit return call
        # it does not generate a StopIteration? It does

s = Sentence('"The time has come, " the Walrus said.')
s_iter1 = iter(s)
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1))
print(next(s_iter1)) # Generates a StopIteration



The
time
has
come
the
Walrus
said


StopIteration: 

# Lazy generator expressions

In general, we want to delay the need to process a value as much as possible. Our `Sentence` class is not lazy yet, because it stores all the text inside the `words` list. We can make it more lazy using the `re.finditer` instead of `re.findall`

In [None]:
# Lazy Sentence class

import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:
    def __init__(self, text) -> None:
        self.text = text

    def __iter__(self):
        for match in RE_WORD.finditer(self.text):
            yield match.group()

    # A more concise implementation could be done with a generator expression
    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))
    
            

In [None]:
# a practical example

class ArithmeticProgression:
    def __init__(self, begin, step, end=None) -> None:
        self.begin = begin
        self.step = step
        self.end = end # None -> infinite series

    def __iter__(self):
        result_type = type(self.begin + self.step)
        result = result_type(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step * index

# this could also be done with a genetor function
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

# using the itertools module the function above could be
def aritprog_gen(begin, step, end=None):
    import itertools
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is None:
        return ap_gen
    return itertools.takewhile(lambda n: n < end, ap_gen)

# There are a lot of iterator functions defined in the standard lib, take a look at the book for more examples

# The rest of the chapter, I will not annotate.


    