# The itertools Module
(lecture based on the material by [Dr. Gregory Watson](https://nyu-cds.github.io/python-itertools/))

In this lecture we will learn about:

- The concept of iterators
- Infinite Iterators
- Finite Iterators
- Combinatoric Generators

The __itertools__ module implements a number of iterator building blocks that provide fast, memory efficient tools.

An iterator is an object that provides two methods:

- __next__ which returns the next value from the iterator
- __iter__ which returns the iterator itself

An iterator behaves like a list of values, with some important differences:

- The values are generated on demand (sequence is stored in memory)
- The values can only be accessed in sequence (not like an array)
- The values can only be accessed once

In [None]:
it = iter('PYTHON')

print(it.__next__())
print(it.__next__())
l = [i for i in it]
print(l)

# at this point the iterator is done
print(it.__next__())

In [None]:
it = iter('DS-GA-3001.001')

print(list(enumerate(it)))

# At this point the iterator is done

print(list(enumerate(it)))

## Infinite Iterators
__itertools__ package comes with three iterators that can iterate infinitely.
- useful for generating numbers or cycling over iterables of unknown length
- infinite iterators need to be stopped

#### itertools.count(start,step)

In [None]:
from itertools import count

for i in count(10, 3):
    if i > 30: 
        break
    else:
        print(i)

#### itertools.islice(seq, [start,] stop [, step])

In [None]:
from itertools import islice

print(list(islice(count(10,3),7)))
print(list(islice(count(10,3),5,7)))
print(list(islice(count(10,3),2,8,2)))

In [None]:
print(list(islice('ABCDEFG', 2, None)))

#### itertools.cycle(seq)

In [None]:
from itertools import cycle

print(list(islice(cycle('abc'),10)))
#print(islice(cycle('abc'),10))

In [None]:
# iterators can be used in different ways
l = ['advanced','python','for','data','science']

In [None]:
%%timeit -n 1
liter=islice(cycle(l),10)
for i in liter:
    print(i)

In [None]:
%%timeit -n 1
liter=islice(cycle(l),10)  # why do we have to restate liter?
for i in range(10):
    print(next(liter))

In [None]:
%%timeit -n 1
liter=cycle(l)
for i in range(11):
    print(liter.__next__())

#### itertools.repeat(elem [,times])

In [None]:
from itertools import repeat

print(list(repeat('abcde',5)))
#print(list(repeat(l,5)))

## Finite Iterators
itertools also has a numbert of iterators that terminate.

#### itertools.accumulate(seq [, func])

In [None]:
from itertools import accumulate
import operator

print(list(accumulate(range(10)))) # 0, 0+1, 1+2, 3+3, 6+4,...

print(list(accumulate(range(1,5),operator.mul))) # 1, 1*2, 2*3, 6*4

In [None]:
# it can also handle non-numeric lists
print(list(accumulate('abc')))
print(list(accumulate(repeat('abc',3))))

#### itertools.chain(*seq)
flats a sequences

In [None]:
from itertools import chain

n = 100000
li = list(accumulate('abc'))
l = [[] for i in range(n)]
for i in range(n):
    l[i] = list(islice(cycle(li),10))

print(l[0][:])
print(l[1][:])

# remove the for loop using iterators

In [None]:
%%timeit -n 10 
lflat1 = list(chain(*l))
print(lflat1[:20])

In [None]:
%%timeit -n 10

lflat2 = []
for i in range(n):
    lflat2.extend(l[i])

#print(lflat2[:20])

In [None]:
%%timeit -n 10

lflat3 = []
for i in range(n):
    lflat3 += l[i]

#print(lflat3[:20])

In [None]:
%%timeit -n 10 
lflat1 = list(chain.from_iterable(l))

#print(lflat1[:10])

#### itertools.compress(seq, selectors)
Useful for filtering an iterable using a second iterable

In [None]:
from itertools import compress

letters = 'ABCDEFG'
bools = [True, False, True, True, False]

# notice the sizes do not need to match

print(list(compress(letters, bools)))

In [None]:
def is_A_or_B(c):
    return((c == 'A') or (c == 'B'))


print(list(compress(letters, [is_A_or_B(i) for i in letters])))

#### itertools.dropwhile(predicate, seq) and itertools.takewhile(predicate, seq)
- Drop the elements whose predicate is False
- Take the elements whose predicate is True

In [None]:
from itertools import dropwhile, takewhile

print(list(dropwhile(lambda x: x > 5, [6, 7, 8, 9, 1, 2, 3, 10])))
print(list(takewhile(lambda x: x > 5, [6, 7, 8, 9, 1, 2, 3, 10])))

#### itertools.filterfalse(predicate, seq) 

In [None]:
from itertools import filterfalse

print(list(filterfalse(lambda x: x<5, [6, 7, 8, 9, 1, 2, 3, 10])))

#### itertools.groupby(seq, key=None)
Return consecutive keys and groups from the seq.

In [None]:
from itertools import groupby
 
numbers = range(15)

for key, group in groupby(numbers, lambda x: x//5):
    print(key,list(group))
    
# groups are formed from consecutive elements

In [None]:
for key, group in groupby(numbers, lambda x: x%2):
    print(key,list(group))

In [None]:
even_odd = [(x%2,x) for x in numbers]
even_odd = sorted(even_odd)
print(even_odd)

for key, group in groupby(even_odd, lambda x: x[0]):
    print(key,[i for k,i in group])

#### itertools.starmap(function, seq)

In [None]:
from itertools import starmap

for item in starmap(lambda a,b: a+b, [(2,3), (4,5)]):
    print(item)

In [None]:
def add(a, b):
    return a+b
 
for item in starmap(add, [(2,3), (4,5)]):
    print(item)

#### itertools.tee(seq, n=2)
Creates n iterators from the seq

In [None]:
from itertools import tee
data = 'ABCDE'
iters = tee(data,3)

for i in range(3):
    print('iterator ',i)
    for item in iters[i]:
        print(item)

#### itertools.zip_longest(*seq, fillvalue=None)

In [None]:
from itertools import zip_longest

print(list(zip_longest('ABCD', 'xy', fillvalue='BLANK')))

print(list(zip_longest('ABCD', 'xy',range(5), fillvalue='BLANK')))

In [None]:
# usefull to create dictionaries

l = [['xy'],['zk']]

dc = dict(zip_longest('ABCD', l, fillvalue='BLANK'))
print(dc)

## Combinatoric Generators
Iterators that can be used for creating combinations and permutations of data

#### itertools.combinations(seq, r) 
#### itertools.combinations_with_replacement(seq, r)

In [None]:
from itertools import combinations, combinations_with_replacement

print(list(combinations('WXYZ', 2)))
print(list(combinations('WXYZ', 3)))

In [None]:
for item in combinations('WXYZ', 2):
    print(''.join(item))

In [None]:
for item in combinations_with_replacement('WXYZ', 2):
    print(''.join(item))

#### itertools.product(*seq repeat=1)
Produces the cartesian product of sequences

In [None]:
from itertools import product

arrays = [(-1,1), (-3,3), (-5,5)]
cp = list(product(*arrays))
print(cp)

#### itertools.permutations(iterable, r=None)

In [None]:
from itertools import permutations

for item in permutations('WXYZ', 2):
    print(''.join(item))

In [None]:
import numpy as np

M = np.zeros((3,3,3,3))
cids = product((0,1,2),(0,1,2),(0,1,2),(0,1,2))
for i in cids:
    print(i)
    M[i]=1
    
print(M)