# itertools module
- collection of advanced iteration tools
- [doc](https://docs.python.org/3.6/library/itertools.html)

In [1]:
import itertools

In [2]:
# cartesian product

list(itertools.product(['jack','jill'], ['hill', 'up', 'water']))

[('jack', 'hill'),
 ('jack', 'up'),
 ('jack', 'water'),
 ('jill', 'hill'),
 ('jill', 'up'),
 ('jill', 'water')]

# do nested loops with easy breaks

In [3]:
# product goes thru the cartesian product 
# in linear order

for x,y in itertools.product(range(4), range(10,13)):
    print (x,y)
    if x == 3 and y == 11:
        break

0 10
0 11
0 12
1 10
1 11
1 12
2 10
2 11
2 12
3 10
3 11


# groupby
- something like linux 'uniq' command 
- lazy function

In [4]:
for k, g in itertools.groupby(sorted([1,2,3,1,1,2,1,3,7,3])):
    print(k , list(g))


1 [1, 1, 1, 1]
2 [2, 2]
3 [3, 3, 3]
7 [7]


# chain
- takes an arbitrary number of args,
- and iterates over each arg, from left to right
- note chain can take any mix of iterable types
- lazy function

In [5]:
[t for t in itertools.chain('foo', [1,2,3], 'bar')]

['f', 'o', 'o', 1, 2, 3, 'b', 'a', 'r']

In [6]:
# takes one iterable arg, and iterates over each element

list(itertools.chain.from_iterable(('foo', [1,2,3],'bar')))

['f', 'o', 'o', 1, 2, 3, 'b', 'a', 'r']

# compress

In [7]:
# similiar to numpy boolean indexing

list(itertools.compress(range(5), [1,0,0,1,0]))

[0, 3]

# Dot product again!

In [8]:
# another way to do a padded dot product 

list(itertools.zip_longest([1,2,3,4], [1], [4,5], fillvalue=10))

[(1, 1, 4), (2, 10, 5), (3, 10, 10), (4, 10, 10)]

# combinations 
- iterates over all possible subsets of a given size that can be made from an iterable
- remember that sets are not ordered, so would not see (0,1,2) and (2,1,0) in output
- can make subsets with or without replacement
- lazy function 

In [9]:
list(itertools.combinations(range(4), 3))

[(0, 1, 2), (0, 1, 3), (0, 2, 3), (1, 2, 3)]

In [10]:
for comb in itertools.combinations(range(4), 3):
    if sum(comb) == 5:
        print(comb)
        break

(0, 2, 3)


In [11]:
# list of iterables

x = [1, 2, 3]

(itertools.combinations(x, r) for r in range(len(x)+1))

<generator object <genexpr> at 0x00000208C41AE9E8>

In [12]:
list((itertools.combinations(x, r) for r in range(len(x)+1)))

[<itertools.combinations at 0x208c41f37c8>,
 <itertools.combinations at 0x208c41f35e8>,
 <itertools.combinations at 0x208c41f3458>,
 <itertools.combinations at 0x208c41f33b8>]

In [13]:
# lazyness gets out of control sometimes!
# power sets

list(map(list, (itertools.combinations(x, r) for r in range(len(x)+1))))

[[()], [(1,), (2,), (3,)], [(1, 2), (1, 3), (2, 3)], [(1, 2, 3)]]

In [14]:
# power sets again
# maybe a little nicer

list(itertools.chain.from_iterable(itertools.combinations(x, r) \
                                   for r in range(len(x)+1)))

[(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]

In [15]:
# no replacements

list(itertools.combinations(range(3), 3))

[(0, 1, 2)]

In [16]:
list(itertools.combinations_with_replacement(range(3), 3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 0, 2),
 (0, 1, 1),
 (0, 1, 2),
 (0, 2, 2),
 (1, 1, 1),
 (1, 1, 2),
 (1, 2, 2),
 (2, 2, 2)]

# permutations
- order DOES matter
- lazy function

In [17]:
list(itertools.permutations(range(3)))

[(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]

In [18]:
list(itertools.permutations(range(3),2))

[(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]

In [19]:
# repeat generates infinite sequence of one value

g = itertools.repeat(2)
for e in range(4):
    print(next(g))

2
2
2
2


In [20]:
# repeats indefinitely

c = itertools.cycle('larry')

[ next(c) for j in range(13) ]

['l', 'a', 'r', 'r', 'y', 'l', 'a', 'r', 'r', 'y', 'l', 'a', 'r']

In [21]:
# can use repeat with zip, because zip terminates when one sequence terminates

[b**e for b,e in zip(c, range(4))]

TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int'

In [22]:
# count produces an infinite sequence
# count is lazy

for j,c in enumerate(itertools.count(start=3, step=5)):
    if j > 10:
        break
    print(j, c)


0 3
1 8
2 13
3 18
4 23
5 28
6 33
7 38
8 43
9 48
10 53


# 'slices' of generators

In [23]:
# takewhile takes elements from begining of a sequence until predicate fails

g = itertools.takewhile(lambda x: x < 30, itertools.count(start=3, step=5))
list(g)

[3, 8, 13, 18, 23, 28]

In [24]:
# dropwhile drops some number of items at the begining of a sequence

g = itertools.dropwhile(lambda x: x < 30, itertools.count(start=3, step=5))
[ next(g) for j in range(20) ]

[33,
 38,
 43,
 48,
 53,
 58,
 63,
 68,
 73,
 78,
 83,
 88,
 93,
 98,
 103,
 108,
 113,
 118,
 123,
 128]

In [25]:
# since count is infinite, g is infinite

next(g)

133

In [26]:
# lets you take a slice of a generator

list(itertools.islice(itertools.count(start=100), 4, 10, 2 ))

[104, 106, 108]

In [27]:
# sort of a running total
# lazy

list(itertools.accumulate([1,4,7,4,3,1,2,9]))

[1, 5, 12, 16, 19, 20, 22, 31]

In [28]:
# make N independent iterables over an iterable

g1,g2,g3 = itertools.tee(range(5), 3)
[g1, g2, g3]

[<itertools._tee at 0x208c4288188>,
 <itertools._tee at 0x208c4288388>,
 <itertools._tee at 0x208c4288408>]

In [29]:
next(g1)
next(g1)
next(g2)
[next(g1), next(g2), next(g3)]

[2, 1, 0]

In [30]:
# list will get what's left

[list(g1), list(g2), list(g3)]

[[3, 4], [2, 3, 4], [1, 2, 3, 4]]

In [31]:
# pull out parts of a list
# another case of too much lazyness

t1, t2, t3 = itertools.tee(range(20),3)

list(map(list, [filter(lambda x : 0 == x % 2, t1), filter(lambda x : x >10, t2), 
           filter(lambda x : x < 7, t3)]))

[[0, 2, 4, 6, 8, 10, 12, 14, 16, 18],
 [11, 12, 13, 14, 15, 16, 17, 18, 19],
 [0, 1, 2, 3, 4, 5, 6]]