## Python Itertools
Similar Iterator building blocks <br>
Inspired by iterator concepts from SML , Hashkell <br>
Very fast and widely used in many areas of python development

In [121]:
import itertools as itl

In [122]:
dir(itl)

['__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_grouper',
 '_tee',
 '_tee_dataobject',
 'accumulate',
 'chain',
 'combinations',
 'combinations_with_replacement',
 'compress',
 'count',
 'cycle',
 'dropwhile',
 'filterfalse',
 'groupby',
 'islice',
 'permutations',
 'product',
 'repeat',
 'starmap',
 'takewhile',
 'tee',
 'zip_longest']

### Accumulate elements of an iterator 
Make an iterator that returns accumulated sums, or accumulated results of other binary functions <br>
If func is supplied, it should be a function of two arguments. <br> 
Elements of the input iterable may be any type that can be accepted as arguments to func.

### <font color=red> itertools.accumulate(iterable, func, *, initial=None) <font>    

In [123]:
test1 = [1,2,3,4]
test2 = (1,2,3,4)
test3 = {1,2,3,4}
print(list(itl.accumulate(test1)))
print(list(itl.accumulate(test2)))
print(list(itl.accumulate(test3)))

[1, 3, 6, 10]
[1, 3, 6, 10]
[1, 3, 6, 10]


In [124]:
# now we will use an function in input
import operator
print(test1)
print(list(itl.accumulate(test1,operator.mul,initial=15)))
#now its obvious that if we use an initial value ,output will have once extra element

[1, 2, 3, 4]
[15, 15, 30, 90, 360]


In [125]:
data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8]
list(itl.accumulate(data, max))
cashflows = [1000, -90, -90, -90, -90]
list(itl.accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt))

[1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001]

### Chain in iterator 
Make an iterator that returns elements from the first iterable until it is exhausted <br> 
then proceeds to the next iterable, until all of the iterables are exhausted

### <font color=red> itertools.chain(*iterables) <font>   

In [126]:
print(tuple(itl.chain('dot','py')))
print(list(itl.chain([1,2,3],[4,5,6])))

('d', 'o', 't', 'p', 'y')
[1, 2, 3, 4, 5, 6]


In [127]:
# what chain do acctually 
def xxchain(*iterables):
    # chain('ABC', 'DEF') --> A B C D E F
    for it in iterables:
        for element in it:
            yield element

In [128]:
list(xxchain([1,2,3],[11]))

[1, 2, 3, 11]

### <font color=red> itertools.chain.from_iterable(iterables) <font>   
the main difference with chain is , it can not operate on strings , which are considered as dummy iterator in python

In [129]:
def chain(*iterables):
    # chain('ABC', 'DEF') --> A B C D E F
    for it in iterables:
        for element in it:
            yield element

### <font color=red> itertools.compress(data, selectors) <font>
Make an iterator that filters elements from data returning only those that have a corresponding element in selectors that evaluates to True. <br> Stops when either the data or selectors iterables has been exhausted.

In [130]:
print(list(itl.compress('ABCDEF', [1,0,1,0,1,1])))
print(list(itl.compress(['A','B','C'], [1,0,1,0,1,1])))

['A', 'C', 'E', 'F']
['A', 'C']


### <font color=red> itertools.count(start=0, step=1) <font>
Make an iterator that returns evenly spaced values starting with number start. <br> 
Often used as an argument to map() to generate consecutive data points. <br> 
Also, used with zip() to add sequence numbers. <br>
count() creates an iterator object , dont indulge in a for loop with count directly <br>
    
#### These are infinite iterators 

In [131]:
def xxcount(start=0, step=1):
    # count(10) --> 10 11 12 13 14 ...
    # count(2.5, 0.5) -> 2.5 3.0 3.5 ...
    n = start
    while True:
        yield n
        n += step

In [132]:
itz = iter(itl.count(0,2))
[next(itz) for _ in range(10)]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [133]:
next(itz)

20

### <font color=red> itertools.cycle(iterable) <font>
Make an iterator that drops elements from the iterable as long as the predicate is true <br>
afterwards, returns every element. <br> 
Note, the iterator does not produce any output until the predicate first becomes false, so it may have a lengthy start-up time.<br>
cycle creates an iterator object , dont indulge in a for loop with count directly <br>
    
#### These are infinite iterators 

In [134]:
def xxcycle(iterable):
    saved = []
    for element in iterable:
        yield element
        saved.append(element)
    while saved:
        for element in saved:
              yield element

#### Difference between iterable object and iterator object
| Iterable Object | Iterator Object |
| --------------- | --------------- | 
| Iterator methods works on this object (used as parameter to many itertools methods) | After working on iterable objects , the end result is iterator object |
| Iterable objects has __iter__ methods and they dont have __next__ method | Iterator objects has __next__ methods and they dont have __iter__ method |    
| example lists , dictionary , tuples | example end result of iter(any iterable object) , yield output (generator) |

In [135]:
itz = itl.cycle('ABCD')

In [136]:
next(itz)

'A'

In [137]:
# we will do next again 
next(itz)

'B'

In [138]:
test2 = itl.cycle([11,12,13])
print([next(test2) for _ in range(10)])

[11, 12, 13, 11, 12, 13, 11, 12, 13, 11]


### <font color=red> itertools.repeat(object[, times]) <font>
Make an iterator that returns object over and over again.<br> 
Runs indefinitely unless the times argument is specified. <br> 
Used as argument to map() for invariant parameters to the called function. <br>
Also used with zip() to create an invariant part of a tuple record.
    
#### These are infinite iterators 

In [139]:
list(map(pow, range(10), itl.repeat(2)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [140]:
list(itl.repeat(2,4))

[2, 2, 2, 2]

### <font color=red> itertools.dropwhile(predicate, iterable) <font>
Make an iterator that drops elements from the iterable as long as the predicate is true; <br>
afterwards, returns every element. <br>
Note, the iterator does not produce any output until the predicate first becomes false, so it may have a lengthy start-up time. <br> 

In [141]:
def xxdropwhile(predicate, iterable):
    # dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
    iterable = iter(iterable)
    for x in iterable:
        if not predicate(x):
            yield x
            break
    for x in iterable:
        yield x

In [142]:
li = [2, 4, 5, 7, 10] 
print(list(itl.dropwhile(lambda x : x % 2 == 0, li))) 

[5, 7, 10]


In [143]:
list(itl.dropwhile(lambda x:x**2>10, [1,2,3,4,5]))

[1, 2, 3, 4, 5]

### <font color=red> itertools.filterfalse(predicate, iterable) <font>
Make an iterator that filters elements from iterable returning only those for which the predicate is False

In [144]:
list(itl.filterfalse(lambda x:x>2,[1,2,3,4,5]))

[1, 2]

### <font color=red> itertools.groupby(iterable, key=None) <font>
Make an iterator that returns consecutive keys and groups from the iterable.<br>
The key is a function computing a key value for each element. <br>
If not specified or is None, key defaults to an identity function and returns the element unchanged. <br>
Generally, the iterable needs to already be sorted on the same key function.

In [145]:
a_list = [("Animal", "cat"), 
          ("Animal", "dog"), 
          ("Bird", "peacock"), 
          ("Bird", "pigeon")]
an_iterator = itl.groupby(a_list, lambda x : x[0])
  
for key, group in an_iterator:
    key_and_group = {key : list(group)}
    print(key_and_group)

{'Animal': [('Animal', 'cat'), ('Animal', 'dog')]}
{'Bird': [('Bird', 'peacock'), ('Bird', 'pigeon')]}


A. Group consecutive items together <br>
B. Group all occurrences of an item, given a sorted iterable <br>
C. Specify how to group items with a key function <br>

In [146]:
[k for k, g in itl.groupby('AAAABBBCCDAABBB')]

['A', 'B', 'C', 'D', 'A', 'B']

In [147]:
[list(g) for k, g in itl.groupby('AAAABBBCCD')]

[['A', 'A', 'A', 'A'], ['B', 'B', 'B'], ['C', 'C'], ['D']]

In [148]:
students = [
    {'mark': '65','grade': 'C'},
    {'mark': '86','grade': 'A'},
    {'mark': '73','grade': 'B'},
    {'mark': '49','grade': 'D'},
    {'mark': '91','grade': 'A'},
    {'mark': '79','grade': 'B'}
]
for key, value in itl.groupby(students,
                          key = lambda x : x['grade']):
    print(key)
    for k in value:
        print(k)

C
{'mark': '65', 'grade': 'C'}
A
{'mark': '86', 'grade': 'A'}
B
{'mark': '73', 'grade': 'B'}
D
{'mark': '49', 'grade': 'D'}
A
{'mark': '91', 'grade': 'A'}
B
{'mark': '79', 'grade': 'B'}


In [149]:
# Now using itemgetter 
from operator import itemgetter
for key, value in itl.groupby(students,
                          key = itemgetter('grade')):
    print(key)
    for k in value:
        print(k)

C
{'mark': '65', 'grade': 'C'}
A
{'mark': '86', 'grade': 'A'}
B
{'mark': '73', 'grade': 'B'}
D
{'mark': '49', 'grade': 'D'}
A
{'mark': '91', 'grade': 'A'}
B
{'mark': '79', 'grade': 'B'}


In [150]:
lst = [{'date':'2008-04-23','value':'1'},
{'date':'2008-04-01','value':'8'},
{'date':'2008-04-05','value':'3'},
{'date':'2009-04-19','value':'5'},
{'date':'2009-04-21','value':'8'},
{'date':'2010-09-09','value':'3'},
{'date':'2010-09-10','value':'4'}]

for k,v in itl.groupby(lst,key=lambda x:x['date'][:7]):
    print (k, list(v))

2008-04 [{'date': '2008-04-23', 'value': '1'}, {'date': '2008-04-01', 'value': '8'}, {'date': '2008-04-05', 'value': '3'}]
2009-04 [{'date': '2009-04-19', 'value': '5'}, {'date': '2009-04-21', 'value': '8'}]
2010-09 [{'date': '2010-09-09', 'value': '3'}, {'date': '2010-09-10', 'value': '4'}]


### <font color=red> itertools.islice(iterable, start, stop[, step]) <font>
Make an iterator that returns selected elements from the iterable. <br>
If start is non-zero, then elements from the iterable are skipped until start is reached. <br>
Afterward, elements are returned consecutively unless step is set higher than one which results in items being skipped. <br>
If stop is None, then iteration continues until the iterator is exhausted, <br>
if at all; otherwise, it stops at the specified position. Unlike regular slicing, islice() does not support negative values for start, stop, or step.

In [151]:
li = [2, 4, 5, 7, 8, 10, 20] 
print(list(itl.islice(li, 1, 6, 2)))

[4, 7, 10]


In [152]:
iterator = (x**2 for x in range(10))
list(itl.islice(iterator, 2, 5))

[4, 9, 16]

In [153]:
list(iterator)

[25, 36, 49, 64, 81]

In [154]:
#this us clearly incorrect 
li

[2, 4, 5, 7, 8, 10, 20]

In [155]:
li[2:3]

[5]

### <font color=red> itertools.permutations(iterable, r=None) <font>
Return successive r length permutations of elements in the iterable.<br>

If r is not specified or is None, then r defaults to the length of the iterable and all possible full-length permutations are generated.

In [156]:
list(itl.permutations('ABCD',2))

[('A', 'B'),
 ('A', 'C'),
 ('A', 'D'),
 ('B', 'A'),
 ('B', 'C'),
 ('B', 'D'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'D'),
 ('D', 'A'),
 ('D', 'B'),
 ('D', 'C')]

In [157]:
list(itl.permutations('ABCD'))

[('A', 'B', 'C', 'D'),
 ('A', 'B', 'D', 'C'),
 ('A', 'C', 'B', 'D'),
 ('A', 'C', 'D', 'B'),
 ('A', 'D', 'B', 'C'),
 ('A', 'D', 'C', 'B'),
 ('B', 'A', 'C', 'D'),
 ('B', 'A', 'D', 'C'),
 ('B', 'C', 'A', 'D'),
 ('B', 'C', 'D', 'A'),
 ('B', 'D', 'A', 'C'),
 ('B', 'D', 'C', 'A'),
 ('C', 'A', 'B', 'D'),
 ('C', 'A', 'D', 'B'),
 ('C', 'B', 'A', 'D'),
 ('C', 'B', 'D', 'A'),
 ('C', 'D', 'A', 'B'),
 ('C', 'D', 'B', 'A'),
 ('D', 'A', 'B', 'C'),
 ('D', 'A', 'C', 'B'),
 ('D', 'B', 'A', 'C'),
 ('D', 'B', 'C', 'A'),
 ('D', 'C', 'A', 'B'),
 ('D', 'C', 'B', 'A')]

In [158]:
list(map("".join, itl.permutations('1234')))

['1234',
 '1243',
 '1324',
 '1342',
 '1423',
 '1432',
 '2134',
 '2143',
 '2314',
 '2341',
 '2413',
 '2431',
 '3124',
 '3142',
 '3214',
 '3241',
 '3412',
 '3421',
 '4123',
 '4132',
 '4213',
 '4231',
 '4312',
 '4321']

### <font color=red> itertools.product(*iterables, repeat=1) <font>
In the terms of Mathematics Cartesian Product of two sets is defined as the set of all ordered pairs (a, b) where a belongs to A and b belongs to B
<ol>
<li>itertools.product(*iterables, repeat=1):
It returns the cartesian product of the provided iterable with itself for the number of times specified by the optional keyword “repeat”. For example, product(arr, repeat=3) means the same as product(arr, arr, arr)
</li>
<li>itertools.product(*iterables):
It returns the cartesian product of all the iterable provided as the argument. For example, product(arr1, arr2, arr3).
</li>    
</ol>

In [159]:
arr1 = [1, 2, 3]
arr2 = [5, 6, 7]
print(list(itl.product(arr1, arr2)))

[(1, 5), (1, 6), (1, 7), (2, 5), (2, 6), (2, 7), (3, 5), (3, 6), (3, 7)]


### <font color=red> itertools.starmap(function, iterable) <font>

In [160]:

li =[(2, 3), (3, 1), (4, 6), (5, 3), (6, 5), (7, 2)]
 
list(itl.starmap(lambda x, y:x + y, li))

[5, 4, 10, 8, 11, 9]

In [161]:
### This is because map wont work in iterable(iterable)  format 

### <font color=red> itertools.tee(iterable, n=2) <font>

In [162]:
li = [2, 4, 6, 7, 8, 10, 20] 
[list(i) for i in itl.tee(li, 3)]

[[2, 4, 6, 7, 8, 10, 20], [2, 4, 6, 7, 8, 10, 20], [2, 4, 6, 7, 8, 10, 20]]

In [163]:
li = [2, 4, 6, 7, 8, 10, 20] 
[list(i) for i in itl.tee(iter(li), 3)]

[[2, 4, 6, 7, 8, 10, 20], [2, 4, 6, 7, 8, 10, 20], [2, 4, 6, 7, 8, 10, 20]]

### <font color=red> itertools.zip_longest(*iterables, fillvalue=None) <font> 

In [164]:
list(zip([1,2,3],(11,12,13,14)))

[(1, 11), (2, 12), (3, 13)]

In [165]:
list(itl.zip_longest([1,2,3],(11,12,13,14),fillvalue='*'))

[(1, 11), (2, 12), (3, 13), ('*', 14)]

### <font color=red> itertools.combinations(iterable, r) <font> 

In [169]:
letters ="code"

In [171]:
list(map(''.join,itl.combinations(letters, 2)))

['co', 'cd', 'ce', 'od', 'oe', 'de']

In [172]:
list(map(''.join,itl.permutations(letters, 2)))

['co', 'cd', 'ce', 'oc', 'od', 'oe', 'dc', 'do', 'de', 'ec', 'eo', 'ed']

In [173]:
list(map(''.join,itl.combinations_with_replacement(letters, 2)))

['cc', 'co', 'cd', 'ce', 'oo', 'od', 'oe', 'dd', 'de', 'ee']