# Generators & Comprehensions - Part 2

In [1]:
import itertools

In [2]:
print(itertools.__doc__)

Functional tools for creating and using iterators.

Infinite iterators:
count(start=0, step=1) --> start, start+step, start+2*step, ...
cycle(p) --> p0, p1, ... plast, p0, p1, ...
repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times

Iterators terminating on the shortest input sequence:
accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...
chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...
compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...
dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails
groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)
filterfalse(pred, seq) --> elements of seq where pred(elem) is False
islice(seq, [start,] stop [, step]) --> elements from
       seq[start:stop:step]
pairwise(s) --> (s[0],s[1]), (s[1],s[2]), (s[2], s[3]), ...
starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...
tee(it, n=2) --> (it1, it2 , ... itn) splits one it

In [3]:
help(itertools.islice)

Help on class islice in module itertools:

class islice(builtins.object)
 |  islice(iterable, stop) --> islice object
 |  islice(iterable, start, stop[, step]) --> islice object
 |  
 |  Return an iterator whose next() method returns selected values from an
 |  iterable.  If start is specified, will skip all preceding elements;
 |  otherwise, start defaults to zero.  Step defaults to one.  If
 |  specified as another value, step determines how many values are
 |  skipped between successive calls.  Works like a slice() on a list
 |  but returns an iterator.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  -------------------------------------------------------------

In [4]:
a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [5]:
it = itertools.islice(a, 3, 7)

In [6]:
list(it)

[3, 4, 5, 6]

In [7]:
b = [10, 20, 30]
for x in b:
    print(x)
for x in b:
    print(x)

10
20
30
10
20
30


In [8]:
it = iter(b)
for x in it:
    print(x)
for x in it:
    print(x)

10
20
30


In [9]:
help(enumerate)

Help on class enumerate in module builtins:

class enumerate(object)
 |  enumerate(iterable, start=0)
 |  
 |  Return an enumerate object.
 |  
 |    iterable
 |      an object supporting iteration
 |  
 |  The enumerate object yields pairs containing a count (from start, which
 |  defaults to zero) and a value yielded by the iterable argument.
 |  
 |  enumerate is useful for obtaining an indexed list:
 |      (0, seq[0]), (1, seq[1]), (2, seq[2]), ...
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  ----------------------------------------------------------------------
 |  Class methods defined here:
 |  
 |  __class_getitem__(...) from builtins.type
 |      See PEP 585
 |  
 |  --------------------------------------------------------

In [10]:
a = [10, 20, 30]
b = [1, 2, 3]
[x + y for x in a for y in b]

[11, 12, 13, 21, 22, 23, 31, 32, 33]

In [11]:
[x + y
 for x in a
 for y in b]

[11, 12, 13, 21, 22, 23, 31, 32, 33]

In [12]:
result = []
for x in a:
    for y in b:
        result.append(x + y)
result

[11, 12, 13, 21, 22, 23, 31, 32, 33]

In [13]:
{x + y: [x, y] for x in a for y in b}

{11: [10, 1],
 12: [10, 2],
 13: [10, 3],
 21: [20, 1],
 22: [20, 2],
 23: [20, 3],
 31: [30, 1],
 32: [30, 2],
 33: [30, 3]}

In [14]:
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(value for value in row for row in matrix)  # People often write this wrong code.

NameError: name 'row' is not defined

In [15]:
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(*(value for row in matrix for value in row))

1 2 3 4 5 6 7 8 9


In [16]:
[value
 for row in matrix
 for value in row]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [17]:
result = []
for row in matrix:
    for value in row: 
        result.append(value)
result

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [18]:
a = [1, 17, 4]
b = [2, 3, 0]
c = [1, 9, 8, 5]

In [19]:
sums = set()
for x in a:
    for y in b:
        for z in c:
            sums.add(x + y + z)
print(sums)

{2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29}


In [20]:
# Comprehensions are often much shorter than using explicit loops.
sumsc = {x + y + z for x in a for y in b for z in c}
print(sumsc)

{2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29}


Variable scoping:

- Loop variables in comprehensions (with any number of `for` clauses)
  are scoped locally to the comprehension.

- Loop variables in `for` loops are not scoped locally to the loop.

In [21]:
words = ['hello', 'bobcat', 'ow']

In [22]:
# With a loop (usually the slowest).
total = 0
for word in words:
    total += len(word)
total

13

In [23]:
# With a comprehension (usually faster than a loop):
sum(len(word) for word in words)

13

In [24]:
# With map (usually the fastest because len is written in C):
sum(map(len, words))

13

In [25]:
# But usually these performance considerations should be secondary to
# other considerations: correctness and readability.

In [26]:
a = [10, 20, 30, 40, 50, 60]
b = [1, 2, 3, 4]
print([x + y for x in a for y in b])

[11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44, 51, 52, 53, 54, 61, 62, 63, 64]


In [27]:
# These addends tip poorly and are banned from our restaurant.
deny = {3, 20, 50}

In [28]:
[x + y
 for x in a if x not in deny
 for y in b if y not in deny]

[11, 12, 14, 31, 32, 34, 41, 42, 44, 61, 62, 64]

In [29]:
[x + y
 for x in a if x not in deny
 for y in b if y not in deny and x != y * 10]

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]

In [30]:
[x + y
 for x in a if x not in deny and x != y * 10
 for y in b if y not in deny]

UnboundLocalError: local variable 'y' referenced before assignment

In [31]:
[x + y
 for x in a if x not in deny
 for y in b if y not in deny
 if x != y * 10]

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]

In [32]:
[x + y
 for x in a if x not in deny
 for y in b if y not in deny if x != y * 10]

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]

In [33]:
sums = []
for x in a: 
    if x not in deny: 
        for y in b: 
            if y not in deny and x != y * 10: 
                sums.append(x + y)
print(sums)

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]


In [34]:
sums = []
for x in a: 
    if x not in deny: 
        for y in b: 
            if y not in deny: 
                if x != y * 10: 
                    sums.append(x + y)
print(sums)

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]


In [35]:
sums = []
for x in a: 
    if x in deny: 
        continue
    for y in b: 
        if y in deny or x == y * 10: 
            continue
        sums.append(x + y)
print(sums)

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]


In [36]:
sums = []
for x in a: 
    if x in deny: 
        continue
    for y in b: 
        if y in deny:
            continue
        if x == y * 10:
            continue 
        sums.append(x + y)
print(sums)

[12, 14, 31, 32, 34, 41, 42, 61, 62, 64]


In [37]:
filtered = []
for x in (1, 5, 2, 7, 3, 15, 17, 100, 9, 10):
    if x % 5 == 0:
        continue
    filtered.append(x)
filtered

[1, 2, 7, 3, 17, 9]

In [38]:
filtered = []
for x in (1, 5, 2, 7, 3, 15, 17, 100, 9, 10):
    if x % 5 != 0:
        filtered.append(x)
filtered 

[1, 2, 7, 3, 17, 9]

In [39]:
dir(itertools)

['__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_grouper',
 '_tee',
 '_tee_dataobject',
 'accumulate',
 'chain',
 'combinations',
 'combinations_with_replacement',
 'compress',
 'count',
 'cycle',
 'dropwhile',
 'filterfalse',
 'groupby',
 'islice',
 'pairwise',
 'permutations',
 'product',
 'repeat',
 'starmap',
 'takewhile',
 'tee',
 'zip_longest']

In [40]:
help(itertools.product)

Help on class product in module itertools:

class product(builtins.object)
 |  product(*iterables, repeat=1) --> product object
 |  
 |  Cartesian product of input iterables.  Equivalent to nested for-loops.
 |  
 |  For example, product(A, B) returns the same as:  ((x,y) for x in A for y in B).
 |  The leftmost iterators are in the outermost for-loop, so the output tuples
 |  cycle in a manner similar to an odometer (with the rightmost element changing
 |  on every iteration).
 |  
 |  To compute the product of an iterable with itself, specify the number
 |  of repetitions with the optional repeat keyword argument. For example,
 |  product(A, repeat=4) means the same as product(A, A, A, A).
 |  
 |  product('ab', range(3)) --> ('a',0) ('a',1) ('a',2) ('b',0) ('b',1) ('b',2)
 |  product((0,1), (0,1), (0,1)) --> (0,0,0) (0,0,1) (0,1,0) (0,1,1) (1,0,0) ...
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /

In [41]:
list(itertools.product((1,2),(1,2)))

[(1, 1), (1, 2), (2, 1), (2, 2)]

In [42]:
list(itertools.product((1,2), repeat=2))

[(1, 1), (1, 2), (2, 1), (2, 2)]

In [43]:
a = 'pqr'
b = 'xyz'
c = '123'
list(itertools.product(a, b, c))

[('p', 'x', '1'),
 ('p', 'x', '2'),
 ('p', 'x', '3'),
 ('p', 'y', '1'),
 ('p', 'y', '2'),
 ('p', 'y', '3'),
 ('p', 'z', '1'),
 ('p', 'z', '2'),
 ('p', 'z', '3'),
 ('q', 'x', '1'),
 ('q', 'x', '2'),
 ('q', 'x', '3'),
 ('q', 'y', '1'),
 ('q', 'y', '2'),
 ('q', 'y', '3'),
 ('q', 'z', '1'),
 ('q', 'z', '2'),
 ('q', 'z', '3'),
 ('r', 'x', '1'),
 ('r', 'x', '2'),
 ('r', 'x', '3'),
 ('r', 'y', '1'),
 ('r', 'y', '2'),
 ('r', 'y', '3'),
 ('r', 'z', '1'),
 ('r', 'z', '2'),
 ('r', 'z', '3')]

In [44]:
a_it = iter(a)
b_it = iter(b)
c_it = iter(c)
list(itertools.product(a_it, b_it, c_it))

[('p', 'x', '1'),
 ('p', 'x', '2'),
 ('p', 'x', '3'),
 ('p', 'y', '1'),
 ('p', 'y', '2'),
 ('p', 'y', '3'),
 ('p', 'z', '1'),
 ('p', 'z', '2'),
 ('p', 'z', '3'),
 ('q', 'x', '1'),
 ('q', 'x', '2'),
 ('q', 'x', '3'),
 ('q', 'y', '1'),
 ('q', 'y', '2'),
 ('q', 'y', '3'),
 ('q', 'z', '1'),
 ('q', 'z', '2'),
 ('q', 'z', '3'),
 ('r', 'x', '1'),
 ('r', 'x', '2'),
 ('r', 'x', '3'),
 ('r', 'y', '1'),
 ('r', 'y', '2'),
 ('r', 'y', '3'),
 ('r', 'z', '1'),
 ('r', 'z', '2'),
 ('r', 'z', '3')]

In [45]:
a_it = iter(a)
b_it = iter(b)
c_it = iter(c)
[(x, y, z) for x in a_it for y in b_it for z in c_it]

[('p', 'x', '1'), ('p', 'x', '2'), ('p', 'x', '3')]

In [46]:
it = ((index, value) for index in itertools.count() for value in 'abc')
next(it)

(0, 'a')

In [47]:
next(it)

(0, 'b')

In [48]:
next(it)

(0, 'c')

In [49]:
next(it)

(1, 'a')

In [50]:
next(it)

(1, 'b')

In [51]:
# itertools.product(itertools.count(), 'abc')

In [52]:
a = [10, 20, 30, 40, 50, 60]
b = [1, 2, 3, 4]
print([x + y for x in a for y in b])

[11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44, 51, 52, 53, 54, 61, 62, 63, 64]


In [53]:
m = [x + y for x, y in itertools.product(a, b)]
print(m)

[11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44, 51, 52, 53, 54, 61, 62, 63, 64]


In [54]:
m = [sum(xy) for xy in itertools.product(a, b)]
print(m)

[11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44, 51, 52, 53, 54, 61, 62, 63, 64]


In [55]:
m = list(map(sum, itertools.product(a, b)))
print(m)

[11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44, 51, 52, 53, 54, 61, 62, 63, 64]


In [56]:
help(itertools.chain)

Help on class chain in module itertools:

class chain(builtins.object)
 |  chain(*iterables) --> chain object
 |  
 |  Return a chain object whose .__next__() method returns elements from the
 |  first iterable until it is exhausted, then elements from the next
 |  iterable, until all of the iterables are exhausted.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  ----------------------------------------------------------------------
 |  Class methods defined here:
 |  
 |  __class_getitem__(...) from builtins.type
 |      See PEP 585
 |  
 |  from_iterable(iterable, /) from builtins.type
 |      Alternative chain() constructor taking a single iterable argument tha

In [57]:
help(sum)

Help on built-in function sum in module builtins:

sum(iterable, /, start=0)
    Return the sum of a 'start' value (default: 0) plus an iterable of numbers
    
    When the iterable is empty, return the start value.
    This function is intended specifically for use with numeric values and may
    reject non-numeric types.



In [58]:
row1 = (1, 2, 3)
row2 = (4, 5, 6)
row3 = (7, 8, 9)

In [59]:
trow1 = (row1[0], row2[0], row3[0])
trow1

(1, 4, 7)

In [60]:
it1 = iter(row1)
it2 = iter(row2)
it3 = iter(row3)

In [61]:
trow1 = (next(it1), next(it2), next(it3)) 

In [62]:
trow1

(1, 4, 7)

In [63]:
# matrix = ((1, 2, 3), (4, 5, 6), (7, 8, 9))

In [64]:
for element in row3: 
    print(element)

7
8
9


In [65]:
for index, element in enumerate(row3):
    print(f'{index}: {element}')

0: 7
1: 8
2: 9


In [66]:
words = ['first', 'second', 'third']

In [67]:
for word, element in zip(words, row3):
    print(f'{word}: {element}')

first: 7
second: 8
third: 9


In [68]:
entries = ['seven', 'eight', 'nine']

In [69]:
# first: 7 (seven)
for word, element, entry in zip(words, row3, entries):
    print(f'{word}: {element} ({entry})')

first: 7 (seven)
second: 8 (eight)
third: 9 (nine)


In [70]:
# Print out the rows, one per line, of the transpose of a matrix
# whose rows are row1, row2, and row3.
for element1, element2, element3 in zip(row1, row2, row3): 
    print(element1, element2, element3)

1 4 7
2 5 8
3 6 9


In [71]:
# Now, do the same thing, but actually create each row of the transpose
# and print it out. (The rows of the transpose are also tuples.)
for element1, element2, element3 in zip(row1, row2, row3): 
    trow = (element1, element2, element3)
    print(trow)

(1, 4, 7)
(2, 5, 8)
(3, 6, 9)


In [72]:
# Now, do the same thing, but use only one loop variable, not three.
for elements in zip(row1, row2, row3):
    print(elements)    

(1, 4, 7)
(2, 5, 8)
(3, 6, 9)


In [73]:
# bad
a = [10, 20, 30, 40, 50]
s = {*a}
s

{10, 20, 30, 40, 50}

In [74]:
# better
a = [10, 20, 30, 40, 50]
s = set(a)
s

{10, 20, 30, 40, 50}

In [75]:
t = tuple(s)
t

(40, 10, 50, 20, 30)

In [76]:
l = list(t)
l

[40, 10, 50, 20, 30]

In [77]:
sq = (x**2 for x in range(1, 6))

In [78]:
sqs = set(sq)
sqs

{1, 4, 9, 16, 25}

In [79]:
a = [10, 20, 30, 40, 50]
b = enumerate(a)

In [80]:
le = list(b) 

In [81]:
le

[(0, 10), (1, 20), (2, 30), (3, 40), (4, 50)]

In [82]:
a = 'spam'
b = 'quux'
z = zip(a, b)

In [83]:
zt = tuple(z)

In [84]:
zt

(('s', 'q'), ('p', 'u'), ('a', 'u'), ('m', 'x'))

In [85]:
a = [1, 7, 14, 7, 1, 1, 1, 9, 1, 15, 14, 3, 1, 1, 7]

In [86]:
s = {*a}

In [87]:
s

{1, 3, 7, 9, 14, 15}

In [88]:
s = set(a)

In [89]:
s

{1, 3, 7, 9, 14, 15}