# The itertools Module
(lecture based on the material by [Dr. Gregory Watson](https://nyu-cds.github.io/python-itertools/))

In this lecture we will learn about:

- The concept of iterators
- Infinite Iterators
- Finite Iterators
- Combinatoric Generators

The __itertools__ module implements a number of iterator building blocks that provide fast, memory efficient tools.

An iterator is an object that provides two methods:

- __next__ which returns the next value from the iterator
- __iter__ which returns the iterator itself

An iterator behaves like a list of values, with some important differences:

- The values are generated on demand (sequence is stored in memory)
- The values can only be accessed in sequence (not like an array)
- The values can only be accessed once

In [16]:
it = iter('PYTHON')

print(it.__next__())
print(it.__next__())
l = [i for i in it]
print(l)

# at this point the iterator is done
print(it.__next__())

P
Y
['T', 'H', 'O', 'N']


StopIteration: 

In [None]:
it = iter('DS-GA-3001.001')

print(list(enumerate(it)))
    
# At this point the iterator is done

print(list(enumerate(it)))

## Infinite Iterators
__itertools__ package comes with three iterators that can iterate infinitely.
- useful for generating numbers or cycling over iterables of unknown length
- infinite iterators need to be stopped

#### itertools.count(start,step)

In [21]:
from itertools import count

for i in count(10, 3):
    if i > 30: 
        break
    else:
        print(i)

10
13
16
19
22
25
28


#### itertools.islice(seq, [start,] stop [, step])

In [28]:
from itertools import islice

print(list(islice(count(10,3),7)))
print(list(islice(count(10,3),5,7)))
print(list(islice(count(10,3),2,8,2)))

[10, 13, 16, 19, 22, 25, 28]
[25, 28]
[16, 22, 28]


In [32]:
print(list(islice('ABCDEFG', 2, None)))

['C', 'D', 'E', 'F', 'G']


#### itertools.cycle(seq)

In [29]:
from itertools import cycle

print(list(islice(cycle('abc'),10)))

['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a']


In [36]:
# iterators can be used in different ways
l = ['advanced','python','for','data','science']

liter=islice(cycle(l),10)
for i in liter:
    print(i)

print('-----')
liter=islice(cycle(l),10)  # why do we have to restate liter?
for i in range(10):
    print(next(liter))

advanced
python
for
data
science
advanced
python
for
data
science
-----
advanced
python
for
data
science
advanced
python
for
data
science


#### itertools.repeat(elem [,times])

In [37]:
from itertools import repeat

print(list(repeat('abcd',5)))

['abcd', 'abcd', 'abcd', 'abcd', 'abcd']


## Finite Iterators
itertools also has a numbert of iterators that terminate.

#### itertools.accumulate(seq [, func])

In [39]:
from itertools import accumulate
import operator

print(list(accumulate(range(10)))) # 0, 0+1, 1+2, 3+3, 6+4,...

print(list(accumulate(range(1,5),operator.mul))) # 1, 1*2, 2*3, 6*4

[0, 1, 3, 6, 10, 15, 21, 28, 36, 45]
[1, 2, 6, 24]


In [40]:
# it can also handle non-numeric lists
print(list(accumulate('abc')))

['a', 'ab', 'abc']


#### itertools.chain(*seq)
flats a sequences

In [119]:
from itertools import chain

n = 10
li = list(accumulate('abc'))
l = [[] for i in range(n)]
for i in range(n):
    l[i] = list(islice(cycle(li),10))

print(l[0][:])
print(l[1][:])

['a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a']
['a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a']


In [114]:
%%timeit -n 10 
lflat1 = list(chain(*l))

#print(lflat1[:20])

['a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab']


In [115]:
%%timeit -n 10

lflat2 = []
for i in range(n):
    lflat2.extend(l[i])

#print(lflat2[:20])

['a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab']


In [120]:
%%timeit -n 10

lflat2 = []
for i in range(n):
    lflat2 += l[i]

#print(lflat2[:20])

['a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a', 'ab', 'abc', 'a']


In [109]:
%%timeit -n 10 
lflat1 = list(chain.from_iterable(l))

#print(lflat1[:10])

99.3 ms ± 2.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### itertools.compress(seq, selectors)
Useful for filtering an iterable using a second iterable

In [121]:
from itertools import compress

letters = 'ABCDEFG'
bools = [True, False, True, True, False]

# notice the sizes do not need to match

print(list(compress(letters, bools)))

['A', 'C', 'D']


#### itertools.dropwhile(predicate, seq) and itertools.takewhile(predicate, seq)
- Drop the elements whose predicate is False
- Take the elements whose predicate is True

In [144]:
from itertools import dropwhile, takewhile

print(list(dropwhile(lambda x: x > 5, [6, 7, 8, 9, 1, 2, 3, 10])))
print(list(takewhile(lambda x: x > 5, [6, 7, 8, 9, 1, 2, 3, 10])))

[1, 2, 3, 10]
[6, 7, 8, 9]


#### itertools.filterfalse(predicate, seq) 

In [123]:
from itertools import filterfalse

print(list(filterfalse(lambda x: x<5, [6, 7, 8, 9, 1, 2, 3, 10])))

[6, 7, 8, 9, 10]


#### itertools.groupby(seq, key=None)
Return consecutive keys and groups from the seq.

In [132]:
from itertools import groupby
 
numbers = range(15)

for key, group in groupby(numbers, lambda x: x//5):
    print(key,list(group))
    
# groups are formed from consecutive elements

0 [0, 1, 2, 3, 4]
1 [5, 6, 7, 8, 9]
2 [10, 11, 12, 13, 14]


In [133]:
for key, group in groupby(numbers, lambda x: x%2):
    print(key,list(group))

0 [0]
1 [1]
0 [2]
1 [3]
0 [4]
1 [5]
0 [6]
1 [7]
0 [8]
1 [9]
0 [10]
1 [11]
0 [12]
1 [13]
0 [14]


In [140]:
even_odd = [(x%2,x) for x in numbers]
even_odd = sorted(even_odd)
print(even_odd)

for key, group in groupby(even_odd, lambda x: x[0]):
    print(key,[i for k,i in group])

[(0, 0), (0, 2), (0, 4), (0, 6), (0, 8), (0, 10), (0, 12), (0, 14), (1, 1), (1, 3), (1, 5), (1, 7), (1, 9), (1, 11), (1, 13)]
0 [0, 2, 4, 6, 8, 10, 12, 14]
1 [1, 3, 5, 7, 9, 11, 13]


#### itertools.starmap(function, seq)

In [141]:
from itertools import starmap

for item in starmap(lambda a,b: a+b, [(2,3), (4,5)]):
    print(item)

5
9


In [142]:
def add(a, b):
    return a+b
 
for item in starmap(add, [(2,3), (4,5)]):
    print(item)

5
9


#### itertools.tee(seq, n=2)
Creates n iterators from the seq

In [148]:
from itertools import tee
data = 'ABCDE'
iters = tee(data,3)

for i in range(3):
    print('iterator ',i)
    for item in iters[i]:
        print(item)

iterator  0
A
B
C
D
E
iterator  1
A
B
C
D
E
iterator  2
A
B
C
D
E


#### itertools.zip_longest(*seq, fillvalue=None)

In [154]:
from itertools import zip_longest

print(list(zip_longest('ABCD', 'xy', fillvalue='BLANK')))

print(list(zip_longest('ABCD', 'xy',range(5), fillvalue='BLANK')))

[('A', 'x'), ('B', 'y'), ('C', 'BLANK'), ('D', 'BLANK')]
[('A', 'x', 0), ('B', 'y', 1), ('C', 'BLANK', 2), ('D', 'BLANK', 3), ('BLANK', 'BLANK', 4)]
[(['ABCD'],), (['xy'],), ([range(0, 5)],)]


In [156]:
# usefull to create dictionaries

l = [['xy'],['zk']]

dc = dict(zip_longest('ABCD', l, fillvalue='BLANK'))
print(dc)

{'A': ['xy'], 'B': ['zk'], 'C': 'BLANK', 'D': 'BLANK'}


## Combinatoric Generators
Iterators that can be used for creating combinations and permutations of data

#### itertools.combinations(seq, r) 
#### itertools.combinations_with_replacement(seq, r)

In [162]:
from itertools import combinations, combinations_with_replacement

print(list(combinations('WXYZ', 2)))
print(list(combinations('WXYZ', 3)))

[('W', 'X'), ('W', 'Y'), ('W', 'Z'), ('X', 'Y'), ('X', 'Z'), ('Y', 'Z')]
[('W', 'X', 'Y'), ('W', 'X', 'Z'), ('W', 'Y', 'Z'), ('X', 'Y', 'Z')]


In [160]:
for item in combinations('WXYZ', 2):
    print(''.join(item))

WX
WY
WZ
XY
XZ
YZ


In [166]:
for item in combinations_with_replacement('WXYZ', 2):
    print(''.join(item))

WW
WX
WY
WZ
XX
XY
XZ
YY
YZ
ZZ


#### itertools.product(*seq repeat=1)
Produces the cartesian product of sequences

In [167]:
from itertools import product

arrays = [(-1,1), (-3,3), (-5,5)]
cp = list(product(*arrays))
print(cp)

[(-1, -3, -5), (-1, -3, 5), (-1, 3, -5), (-1, 3, 5), (1, -3, -5), (1, -3, 5), (1, 3, -5), (1, 3, 5)]


#### itertools.permutations(iterable, r=None)

In [168]:
from itertools import permutations

for item in permutations('WXYZ', 2):
    print(''.join(item))

WX
WY
WZ
XW
XY
XZ
YW
YX
YZ
ZW
ZX
ZY


In [171]:
import numpy as np

M = np.zeros((2,2,2,2))
cids = product((0,1,2),(0,1,2),(0,1,2),(0,1,2))
for i in cids:
    M[i]=1
    
print(M)

[[[[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]]


 [[[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]]


 [[[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]

  [[ 1.  1.  1.]
   [ 1.  1.  1.]
   [ 1.  1.  1.]]]]
