## Iterables, iterators and generators

In [1]:
#sentence_v1.py
# A class that extracts words from a text by index
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
        
    def __getitem__(self, index):
        return self.words[index]
    
    def __len__(self):
        return len(self.words)
    
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)

In [4]:
s = Sentence('"The time has come," the Walrus said.')
s

Sentence('"The time ha... Walrus said.')

In [5]:
for word in s:
    print(word)

The
time
has
come
the
Walrus
said


In [6]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In [7]:
s[0]

'The'

In [8]:
class Foo:
    def __iter__(self):
        pass

from collections import abc
issubclass(Foo, abc.Iterable)

True

In [9]:
f = Foo()
isinstance(f, abc.Iterable)

True

In [10]:
# __getitem__
s = 'ABC'
for char in s:
    print(char)

A
B
C


In [11]:
dir(s)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',


In [12]:
# __iter__
it = iter(s)
while True:
    try:
        print(next(it))
    except StopIteration:
        del it
        break

A
B
C


In [13]:
#sentence_iter.py
# A classic iterator: implemented using the Iterator pattern
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        return SentenceIterator(self.words)
    
class SentenceIterator:
    
    def __init__(self, words):
        self.words = words
        self.index = 0
        
    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration()
        self.index += 1
        return word
    
    def __iter__(self):
        return self

In [14]:
#sentence_gen.py 
#implemented using a generator function
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(self.text)
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        for word in self.words:
            yield word
        return
    

In [15]:
#sentence_gen2.py 
#A lazy implementation
#implemented using a generator function calling re.finditer gen function
import re
import reprlib

RE_WORD = re.compile('\w+')

class Sentence:
    
    def __init__(self, text):
        self.text = text
        
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text)
    
    def __iter__(self):
        for match in RE_WORD.finditer(self.text):
            yield match.group()

In [16]:
#ArithmeticProgression class
class ArithmeticProgression:
    
    def __init__(self, begin, step, end=None):
        self.begin = begin
        self.step = step
        self.end = end # None -> infinite
        
    def __iter__(self):
        result = type(self.begin + self.step)(self.begin)
        forever = self.end is None
        index = 0
        while forever or result < self.end:
            yield result
            index += 1
            result = self.begin + self.step * index

In [18]:
ap = ArithmeticProgression(0, 1, 3)
list(ap)

[0, 1, 2]

In [19]:
ap = ArithmeticProgression(1, .5, 3)
list(ap)

[1.0, 1.5, 2.0, 2.5]

In [20]:
ap = ArithmeticProgression(0, 1/3, 1)
list(ap)

[0.0, 0.3333333333333333, 0.6666666666666666]

In [22]:
from fractions import Fraction
ap = ArithmeticProgression(0, Fraction(1, 3), 1)
list(ap)

[Fraction(0, 1), Fraction(1, 3), Fraction(2, 3)]

In [23]:
# the aritprog_gen generator
def aritprog_gen(begin, step, end=None):
    result = type(begin + step)(begin)
    forever = end is None
    index = 0
    while forever or result < end:
        yield result
        index += 1
        result = begin + step * index

### itertools  
itertools has 19 generator functions

In [24]:
import itertools
gen = itertools.count(1, .5)
next(gen)

1

In [25]:
next(gen)

1.5

In [26]:
def count(firstval=0, step=1):
    x = firstval
    while 1:
        yield x
        x += step
        

In [29]:
gen = itertools.takewhile(lambda n: n < 5, itertools.count(1, .5))
list(gen)

[1, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5]

In [30]:
def aritprog_gen1(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is not None:
        ap_gen = itertools.takewhile(lambda n: n < end, ap_gen)
    return ap_gen

### Filtering generator function
itertools.compress(it, selector_it)  
itertools.dropwhile(predicate, it)  
filter(predicte, it)  
itertools.filterfalse(predicate, it)  
itertools.islice(it, stop)  
itertools.takewhile(predict, it)  

In [31]:
def vowel(c):
    return c.lower() in 'aeiou'

list(filter(vowel, 'Aardvark'))

['A', 'a', 'a']

In [32]:
list(itertools.filterfalse(vowel, 'Aardvark'))

['r', 'd', 'v', 'r', 'k']

In [35]:
list(itertools.dropwhile(vowel, 'EAardvarkAE'))

['r', 'd', 'v', 'a', 'r', 'k', 'A', 'E']

In [36]:
list(itertools.takewhile(vowel, 'EAardvarkAE'))

['E', 'A', 'a']

In [37]:
list(itertools.compress('Aardvark', '1,0,1,1,0,1'))

['A', 'a', 'r', 'd', 'v', 'a', 'r', 'k']

In [38]:
list(itertools.islice('Aardvark', 4))

['A', 'a', 'r', 'd']

### Mapping generator functions  
itertools.accumulate(it, [func])  
enumerate(iterable, start=0)  
map(func, it1, [it2]...)  
itertools.starmap(func, it)  

In [39]:
sample = [5,4,2,8,7,6,3,0,9,1]
list(itertools.accumulate(sample))  #default sum

[5, 9, 11, 19, 26, 32, 35, 35, 44, 45]

In [40]:
list(itertools.accumulate(sample, min))

[5, 4, 2, 2, 2, 2, 2, 0, 0, 0]

In [41]:
list(itertools.accumulate(sample, max))

[5, 5, 5, 8, 8, 8, 8, 8, 9, 9]

In [42]:
import operator
list(itertools.accumulate(sample, operator.mul))

[5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]

In [43]:
list(itertools.accumulate(range(1, 11), operator.mul))

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

In [44]:
list(enumerate('abcdefg', 1))

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g')]

In [46]:
list(map(operator.mul, range(11), range(11)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [47]:
list(map(lambda a: a > 3, range(11)))

[False, False, False, False, True, True, True, True, True, True, True]

In [50]:
list(map(lambda c: c.lower(), 'ABcdEf'))

['a', 'b', 'c', 'd', 'e', 'f']

In [51]:
list(itertools.starmap(operator.mul, enumerate('abcdefg', 1)))

['a', 'bb', 'ccc', 'dddd', 'eeeee', 'ffffff', 'ggggggg']

In [52]:
list(enumerate(itertools.accumulate(sample), 1))

[(1, 5),
 (2, 9),
 (3, 11),
 (4, 19),
 (5, 26),
 (6, 32),
 (7, 35),
 (8, 35),
 (9, 44),
 (10, 45)]

In [53]:
list(itertools.starmap(lambda a, b: b/a, enumerate(itertools.accumulate(sample), 1)))

[5.0,
 4.5,
 3.6666666666666665,
 4.75,
 5.2,
 5.333333333333333,
 5.0,
 4.375,
 4.888888888888889,
 4.5]

### Generator functions that merge multiple input iterables
itertools.chain(it1, ..., itN)  
itertools.chain.from_iterable(it)  
itertools.product(it1, ..., itN, repeat=1)  
zip(it1, ..., itN)  
itertools.zip_longest(it1, ..., itN, fillvalue=None)  

In [55]:
list(itertools.chain('ABC', range(2)))

['A', 'B', 'C', 0, 1]

In [56]:
list(itertools.chain(enumerate('ABC')))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [57]:
list(itertools.chain.from_iterable(enumerate('ABC')))

[0, 'A', 1, 'B', 2, 'C']

In [60]:
list(zip('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2)]

In [61]:
list(zip('ABC', range(5), [10, 20, 30, 40]))

[('A', 0, 10), ('B', 1, 20), ('C', 2, 30)]

In [62]:
list(itertools.zip_longest('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2), (None, 3), (None, 4)]

In [63]:
list(itertools.zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

In [64]:
list(itertools.product('ABC', range(2)))

[('A', 0), ('A', 1), ('B', 0), ('B', 1), ('C', 0), ('C', 1)]

In [65]:
list(itertools.product('ABC', range(2), repeat=2))

[('A', 0, 'A', 0),
 ('A', 0, 'A', 1),
 ('A', 0, 'B', 0),
 ('A', 0, 'B', 1),
 ('A', 0, 'C', 0),
 ('A', 0, 'C', 1),
 ('A', 1, 'A', 0),
 ('A', 1, 'A', 1),
 ('A', 1, 'B', 0),
 ('A', 1, 'B', 1),
 ('A', 1, 'C', 0),
 ('A', 1, 'C', 1),
 ('B', 0, 'A', 0),
 ('B', 0, 'A', 1),
 ('B', 0, 'B', 0),
 ('B', 0, 'B', 1),
 ('B', 0, 'C', 0),
 ('B', 0, 'C', 1),
 ('B', 1, 'A', 0),
 ('B', 1, 'A', 1),
 ('B', 1, 'B', 0),
 ('B', 1, 'B', 1),
 ('B', 1, 'C', 0),
 ('B', 1, 'C', 1),
 ('C', 0, 'A', 0),
 ('C', 0, 'A', 1),
 ('C', 0, 'B', 0),
 ('C', 0, 'B', 1),
 ('C', 0, 'C', 0),
 ('C', 0, 'C', 1),
 ('C', 1, 'A', 0),
 ('C', 1, 'A', 1),
 ('C', 1, 'B', 0),
 ('C', 1, 'B', 1),
 ('C', 1, 'C', 0),
 ('C', 1, 'C', 1)]

In [66]:
suits = 'spades hearts diamonds clubs'.split()
list(itertools.product('AK', suits))

[('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamonds'),
 ('A', 'clubs'),
 ('K', 'spades'),
 ('K', 'hearts'),
 ('K', 'diamonds'),
 ('K', 'clubs')]

In [67]:
list(itertools.product('ABC'))

[('A',), ('B',), ('C',)]

In [68]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

### Generator functions that expand each input item into multiple output items
itertools.combinations(it, out_len)  
itertools.combinations_with_replacement(it, out_len)  
itertools.count(start=0, step=1)   #never stop  
itertools.cycle(it)   #never stop  
itertools.permutations(it, out_len=None)  
itertools.repeat(item, [items])  

In [70]:
cy = itertools.cycle('ABC')
next(cy)

'A'

In [71]:
list(itertools.islice(itertools.count(1, .3), 3))

[1, 1.3, 1.6]

In [73]:
list(itertools.islice(cy, 10))

['C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C']

In [74]:
list(itertools.repeat(8, 4))

[8, 8, 8, 8]

In [75]:
list(itertools.combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [76]:
list(itertools.combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [77]:
list(itertools.permutations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

### Rearranging generator functions
itertools.groupby(it, key=None)  
itertools.tee(it, n=2)  #yields multiple generators  
reversed(seq)  #seq in reverse order  

In [78]:
list(itertools.groupby('LLLLAAGGG'))

[('L', <itertools._grouper at 0x1034fe320>),
 ('A', <itertools._grouper at 0x1034fe080>),
 ('G', <itertools._grouper at 0x1034fe048>)]

In [82]:
for char, group in itertools.groupby('ABCDD'):
    print(char, '->', list(group))

A -> ['A']
B -> ['B']
C -> ['C']
D -> ['D', 'D']


In [84]:
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear', 'bat', 'dolphin', 'shark', 'lion']
animals.sort(key=len)

In [85]:
animals

['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']

In [88]:
for length, group in itertools.groupby(animals, len):
    print(length, '->', list(group))

3 -> ['rat', 'bat']
4 -> ['duck', 'bear', 'lion']
5 -> ['eagle', 'shark']
7 -> ['giraffe', 'dolphin']


In [90]:
for length, group in itertools.groupby(reversed(animals), len):
    print(length, '->', list(group))

7 -> ['dolphin', 'giraffe']
5 -> ['shark', 'eagle']
4 -> ['lion', 'bear', 'duck']
3 -> ['bat', 'rat']


In [91]:
list(itertools.tee('ABC'))

[<itertools._tee at 0x10345f108>, <itertools._tee at 0x1034fadc8>]

In [94]:
g1, g2 = itertools.tee('ABC')

In [95]:
list(g1)

['A', 'B', 'C']

In [96]:
list(g2)

['A', 'B', 'C']

In [97]:
list(zip(*itertools.tee('ABC')))

[('A', 'A'), ('B', 'B'), ('C', 'C')]

### yield from

In [99]:
# yield
def chain(*iterable):
    for it in iterable:
        for i in it:
            yield i

s = 'ABC'
list(chain(s, range(3)))

['A', 'B', 'C', 0, 1, 2]

In [100]:
# yield from
def chain(*iterable):
    for i in iterable:
        yield from i  

list(chain(s, range(3)))

['A', 'B', 'C', 0, 1, 2]

### Built-in functions that read iterables and return single values
all(it)  
any(it)  
max(it, [key], [default])  
min(it, [key], [default])  
sum(it, start=0)  
funtools.reduce(func, it, [initial])  #returns the result of func to item  

In [101]:
all([1, 2, 3])

True

In [102]:
all([1, 0, 2])

False

In [103]:
any([1, 2, 3])

True

In [104]:
any([1, 0, 2])

True

In [105]:
any([])

False

In [106]:
all([])

True

In [107]:
# functools.reduce roughly equivalent to this following
def reduce(func, iterable, initialize=None):
    it = iter(iterable)
    if initialize is None:
        value = next(iterable)
    else:
        value = initialize
    for element in it:
        value = func(value, element)
    return value

### iter function

In [113]:
from random import randint
def d6():
    return randint(1, 6)

d6_iter = iter(d6, 1)
d6_iter

<callable_iterator at 0x103501898>

In [114]:
for roll in d6_iter:
    print(roll)

4
5
3


In [115]:
# a trick
# reads lines from a file until a blank line happened
with open('mydata.txt') as fp:
    for line in iter(fp.readline, ''):
        process_line(line)

FileNotFoundError: [Errno 2] No such file or directory: 'mydata.txt'

### Further reading
https://www.python.org/dev/peps/pep-0342/   Coroutines via Enhanced Generators  
https://www.python.org/dev/peps/pep-0255/   Simple Generators
http://www.dabeaz.com/coroutines/  
https://docs.python.org/3/whatsnew/3.3.html#pep-380-syntax-for-delegating-to-a-subgenerator  
https://docs.python.org/3/library/itertools.html  
