# The intertools module
- every itertools returns lazy iterator
-
- Slicing -> islice
- Selecting and Filtering -> dropwhile, takewhile,compress,filterfalse
- chaining and Teeing -> chain, tee
- Mapping and Reducing -> starmap , accumulate
- infinite iterators -> count, cycle, repeat
- zipping -> zip_longest
- combinatorics -> product, permutations, combinations, combinations_with_replacement.

### Aggregators
- Functions that iterate through an iterable and return a single value that takes into account every element of the iterable.
- min(iterable)
- max(iterable)
- sum(iterable)
- any(iterable) -> return True of any element in iterable is truthy
- all(iterable) -> return True if all the element in iterable is truthy
### Associated Truth Values
- Every object in Python has an associated truth values.
- Every object has a True truth value, except
    - None
    - False
    - 0 in any numeric type(0,0.0,0+0j)
    - empty sequence(list,tuple,string)
    - empty mapping (dict,set)
    - custom classes that implement (____bool____,____len____)
    - method that returns False or 0.
### Leveraging the any and all Functions
- Often, we are not particularly intesrted in the direct truth value of the element in iterable
- we want to know if any or all satisfy some condition.
- A function that takes a single argument and returns True or False is called a predicate.

# Aggregators

In [1]:
# eg:
l = [1,20,3,4,50,60,7]
pred = lambda x:x<10

# using list comprehension
result = [pred(x) for x in l]
print(all(result))
print(any(result))

# using map
result = map(pred,l)
print(all(result))

# using generator
result = (pred(x) for x in l)
print(all(result))

False
True
False
False


In [2]:
class Myseq:
    def __init__(self,n):
        self.n = n
    
    def __len__(self):
        return self.n

    def __getitem__(self):
        pass

my_seq = Myseq(0)
bool(my_seq)

False

In [3]:
my_seq = Myseq(1)
bool(my_seq)

True

In [4]:
from numbers import Number

print(isinstance(10,Number))
print(isinstance(10.5,Number))
print(isinstance(10/2,Number))
print(isinstance(2+3j,Number))

True
True
True
True


In [5]:
l = [10,20,30,40]
pred = lambda x:isinstance(x,Number)

print(all(map(pred,l)))

True


In [6]:
l = [10,20,30,40,'hello']
pred = lambda x:isinstance(x,Number)

print(all(map(pred,l)))

False


# itertools.islice
- it returns a lazy iterator
- any iterable
- islice(iterable,start,stop,step)

In [7]:
from itertools import islice

l = [1,2,3,4,5,6,7,8,9,10,11]
result = islice(l,0,3)
list(result)

[1, 2, 3]

In [8]:
list(result)# because it it iterator.

[]

# itertools.filter
- filter(predicate,iterable)
- returns all elements where predicate (element) is True
- - predicate can be None- in which case it is the identity function f(x)->x
- filter returns a lazy iterator.
    - which means, it dont create a list and then apply the filter and return list.
- it is lazy, so it will get exhausted.

In [33]:
fi = filter(lambda x:x<4,[1,2,3,4,6,7,9])
list(fi)

[1, 2, 3]

In [35]:
fi = filter(None,[0,'','hello',100,False])
list(fi)

['hello', 100]

# itertools.filterfalse
- This work same way as the filter function
    - instead if retaining elements where the predicate evaluates to True.
    - it retains elements where the predicate evaluates to False.

In [36]:
from itertools import filterfalse

fi = filterfalse(lambda x:x<4,[1,2,3,4,6,7,9])
list(fi)

[4, 6, 7, 9]

In [37]:
fi = filterfalse(None,[0,'','hello',100,False])
list(fi)

[0, '', False]

# itertools.compress
- returns a lazy iterator.
- This is not a compressor in the sense of say a zip archive.
- It is basically a way of filtering one iterable, using the truthiness of items in another iterable.

In [41]:
from itertools import compress
 
data = ['a','b','c','d','e']

selectors = [True,False,1,0]

list(compress(data,selectors))

['a', 'c']

# itertools.takewhile
- it returns an iterator that will yield items while pred(item) is Truthy
- At that point the iterator is exhausted.
    - even if there are more items in the iterable whose predicate would be truthy.

In [11]:
from itertools import takewhile

res = takewhile(lambda x:x<5,[1,2,10,3,4,6])
list(res)

[1, 2]

# itertools.dropwhile
- returns an iterator that will start iterating ( and yield all remaining elements) once pred(item) becomes Falsy

In [46]:
from itertools import dropwhile

res = dropwhile(lambda x:x<5, [1,3,5,2,1])
list(res)

[5, 2, 1]

# itertools.count
- it is an infinite iterator
- similar to range -> start,step
- different from range -> no stop -> infinite
- start and stop can be any numeric type(float,complex,Decimal,bool)

In [9]:
from itertools import count

f = count(10,2)
[next(f) for _ in range(10)]

[10, 12, 14, 16, 18, 20, 22, 24, 26, 28]

In [10]:
f = count(10.5,0.1)
[round(next(f),4) for _ in range(10)]

[10.5, 10.6, 10.7, 10.8, 10.9, 11.0, 11.1, 11.2, 11.3, 11.4]

In [25]:
f = takewhile(lambda x:round(x,2)<10.8,count(10.5,0.1))
list(f)

[10.5, 10.6, 10.7]

# itertools.cycle
- It allows us to loop over a finite iterable indefinitely.
- ### If the argument of cycle is itself an iterator -> iterators becomes exhausted, 
- cycle will still produce an infinite sequence.

In [28]:
from itertools import cycle

l = cycle(['a','b','c'])
[next(l) for _ in range(10)]

['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a']

# itertools.repeat
- it simply yields the same value indefinitely.
- it product same object for each iterator.

In [10]:
from itertools import repeat

l = repeat('span')
[next(l) for _ in range(10)]

['span',
 'span',
 'span',
 'span',
 'span',
 'span',
 'span',
 'span',
 'span',
 'span']

In [11]:
l = repeat('span',3)
next(l)
next(l)
next(l)
next(l)

StopIteration: 

# itertools.chain
- This is analogous to sequence concatenation, but not the same.
- dealing with iterables (including iterators.)
- chaining is itself a lazy iterator.
- variable number of positional arguments -> each arguments must be an iterable.

In [12]:
l1 = (i for i in range(4))
l2 = (i for i in range(4,8))
l3 = (i for i in range(8,12))

for gen in l1,l2,l3:
    for item in gen:
        print(item)

0
1
2
3
4
5
6
7
8
9
10
11


In [13]:
# using iterator

def chain_iterable(*iterables):
    for iterable in iterables:
        yield from iterable

l1 = (i for i in range(4))
l2 = (i for i in range(4,8))
l3 = (i for i in range(8,12))

for item in chain_iterable(l1,l2,l3):
    print(item)

0
1
2
3
4
5
6
7
8
9
10
11


In [14]:
# the better way
from itertools import chain

l1 = (i for i in range(4))
l2 = (i for i in range(4,8))
l3 = (i for i in range(8,12))

for item in chain(l1,l2,l3):
    print(item)

0
1
2
3
4
5
6
7
8
9
10
11


In [15]:
l1 = (i for i in range(4))
l2 = (i for i in range(4,8))
l3 = (i for i in range(8,12))

lists = [l1,l2,l3]

for item in chain(*lists): # unpacking is not lazy.
    print(item)

0
1
2
3
4
5
6
7
8
9
10
11


In [16]:
def square():
    yield (i for i in range(4)) # here is alot of computation, which we dont want.
    yield (i for i in range(4,8))
    yield (i for i in range(8,12))

for item in chain(*square()):
    print(item)

0
1
2
3
4
5
6
7
8
9
10
11


In [17]:
# the alternate construction
c = chain.from_iterable(square()) # nothing get called unless you iterate through it.

for item in c:
    print(item)

    # but iterator get exhaused, we need to make another iterator.

0
1
2
3
4
5
6
7
8
9
10
11


In [18]:
from itertools import tee

def squares(n):
    for i in range(n):
        yield i**2

gen = squares(10)
list(gen)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [19]:
gen = squares(10)
iters = tee(gen,3) # it returns a copy of tuples of iterators.
print(list(iters))

iter1,iter2,iter3 = iters # independed copy of our iterator.
print(list(iter1))
print(list(iter2))
print(list(iter3))

[<itertools._tee object at 0x000001CCDBF59940>, <itertools._tee object at 0x000001CCDBF59400>, <itertools._tee object at 0x000001CCDBF59440>]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [20]:
# we can tee list types.
l = [1,2,3,4]
lists = tee(l,2) # they are now lazy iterators.
lists[0]

<itertools._tee at 0x1ccdbf5d580>

In [21]:
list(lists[0])

[1, 2, 3, 4]

In [26]:
list(lists[0])

[]

In [27]:
list(lists[1])

[1, 2, 3, 4]

In [28]:
list(lists[1])

[]

# itertools.starmap
- starmap is very similar to map
- it unpacks every sub element of the iterable argument, and passes that to the map function.
- used if we have to unpack element.
- it allows us to pass iterable of iterables.
- useful for mapping a multi-argumnet function on an iterable of iterables

In [1]:
l = [[1,2],[3,4]] # iterable of iterables.
ll = [[1,2,3],[10,20,30],[100,200,300]]

In [2]:
# using map
print(map(lambda item:item[0]*item[1],l))

print(list(map(lambda item:item[0]*item[1],l)))

print(list(map(lambda item:item[0]+item[1]+item[2],ll)))

<map object at 0x0000024464915C40>
[2, 12]
[6, 60, 600]


In [3]:
# using generator
import operator

print((operator.mul(*item) for item in l))

print(list((operator.mul(*item) for item in l)))

print(list((operator.add(*item) for item in ll)))

<generator object <genexpr> at 0x0000024464917120>
[2, 12]


TypeError: add expected 2 arguments, got 3

In [4]:
# using starmap
from itertools import starmap

print(starmap(operator.mul,l)) # it is lazy iterator.

print(list(starmap(operator.mul,l)))

print(list(starmap(lambda x,y,z:x+y+z,ll)))

<itertools.starmap object at 0x000002446481CE80>
[2, 12]
[6, 60, 600]


# itertools.acuumulate
- lazy iterator.
- it si similar to the reduce function.
- but , it returns (lazy) iterator producing all the intermediate results.
- argument order is not same.

In [5]:
from itertools import accumulate
list1 = [10,20,30,40]
print(' The summation of list 1 is :')
print(list(accumulate(list1,lambda x,y:x+y))) # sum at every loop and print

 The summation of list 1 is :
[10, 30, 60, 100]


# zip
- lazy iterable.
- it takes a variable number of positions argument- each of which are iterables.
- it returns an iterator that produces tuples containing the elements of the iterables, iterated one at a time.
- It stops immediately once one of the iterables has been completely iterated over.

In [6]:
l1 = [1,2,3,4]
l2 = [10,20,30,40]
l3 = 'python'
l = zip(l1,l2,l3)
print(l)
print(next(l))
print(list(l))
list(l)

<zip object at 0x000002446491D200>
(1, 10, 'p')
[(2, 20, 'y'), (3, 30, 't'), (4, 40, 'h')]


[]

# itertools.zip_longest
- if we want to zip, but based in the longest iterable.
    - may need to provide a default value for the 'holes' -> fillvalue.

In [7]:
from itertools import zip_longest

print(zip_longest(l1,l2,l3))
l =zip_longest(l1,l2,l3,fillvalue='zero')
print(next(l))
print('-----------------------')
print(list(l))
list(l)

<itertools.zip_longest object at 0x0000024464905EF0>
(1, 10, 'p')
-----------------------
[(2, 20, 'y'), (3, 30, 't'), (4, 40, 'h'), ('zero', 'zero', 'o'), ('zero', 'zero', 'n')]


[]

# itertools.groupby
- sometime if we want to loop over an iterable of elements.
    - but, we want to group those elements as we iterate through them
- lazy iterator
-
- we iterate the 1st group and want to iterate the 3rd group, then python has to perform the 2nd group, th reach the 3rd group.
- actually iterates through all the elements of the current 'sub-iterator' before proceeding to the next group.

In [85]:
def gen_group():
    for key in range(1,4):
       #key 
        for i in range(key,key+3):
            yield(key,i)
g = gen_group()
print(list(g))
list(g)

[(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (2, 4), (3, 3), (3, 4), (3, 5)]


[]

In [8]:
from itertools import groupby

def gen_group():
    for key in range(1,4):
        # key 
        for i in range(key,key+3):
            yield(key,i)
g = gen_group()

groups = groupby(g,key=lambda x:x[0])
for group in groups:
    print(group[0],list(group[1]))

list(g) # all consumed.

1 [(1, 1), (1, 2), (1, 3)]
2 [(2, 2), (2, 3), (2, 4)]
3 [(3, 3), (3, 4), (3, 5)]


[]

In [9]:
group = [
    (1,10,100),
    (1,11,101),
    (1,12,102),
    (2,20,200),
    (2,21,201),
    (3,30,300),
    (3,31,301),
    (3,32,302),
]

In [10]:
from itertools import groupby

g = groupby(group,lambda x:x[0]) # group by 1st element.
for key,grp in g:
    print(key)
    for i in grp:
        print(i)

1
(1, 10, 100)
(1, 11, 101)
(1, 12, 102)
2
(2, 20, 200)
(2, 21, 201)
3
(3, 30, 300)
(3, 31, 301)
(3, 32, 302)


In [35]:
g = groupby(group,lambda x:x[0])
print(next(g))
print(next(g))

(1, <itertools._grouper object at 0x000001C3BB5EED90>)
(2, <itertools._grouper object at 0x000001C3BC058F70>)


In [39]:
import itertools

with open('../File/assignment/Monthly_Attendance.csv') as f:
    for row in itertools.islice(f,0,20):
        print(row,end='')

School,MonthCode,CalMonth,GradeLevel,GradeSort,RosterCount,Absent,Present,Released
01M015,5,Jan,1,1,31,28,574,0
01M015,5,Jan,2,2,26,48,445,0
01M015,5,Jan,3,3,31,43,568,0
01M015,5,Jan,4,4,24,29,442,0
01M015,5,Jan,5,5,19,12,368,0
01M015,5,Jan,0K,0,30,43,537,0
01M015,5,Jan,PK,-1,17,22,265,15
01M015,6,Feb,1,1,30,16,404,0
01M015,6,Feb,2,2,25,17,333,0
01M015,6,Feb,3,3,32,15,419,0
01M015,6,Feb,4,4,24,21,315,0
01M015,6,Feb,5,5,19,9,257,0
01M015,6,Feb,0K,0,30,20,380,0
01M015,6,Feb,PK,-1,15,17,193,0
01M015,7,Mar,1,1,30,48,593,0
01M015,7,Mar,2,2,25,37,513,0
01M015,7,Mar,3,3,31,31,651,0
01M015,7,Mar,4,4,25,30,509,0
01M015,7,Mar,5,5,19,12,406,0


In [44]:
# we need to find out, how many student have,  same school

from collections import defaultdict

stu = defaultdict(int)
with open('../File/assignment/Monthly_Attendance.csv') as f:
    next(f)
    for row in f:
        School,*_ = row.strip('\n').split(',')
        stu[School] += 1

for key,value in stu.items():
    print(key,value)

01M015 70
01M019 70
01M020 70
01M034 100
01M063 70
01M064 70
01M110 70
01M134 70
01M140 100
01M142 70
01M184 100
01M188 100
01M292 40
01M301 30
01M315 70
01M332 30
01M361 70
01M363 70
01M364 70
01M378 30
01M448 40
01M450 70
01M458 40
01M509 40
01M515 30
01M539 130
01M650 40
01M696 40
01M839 31
02M001 71
02M002 70
02M003 27


In [98]:
# we can simplify using group by
from itertools import groupby

def count_group(group):
    i = 0
    for item in group:
        i +=1
    return i

with open('../File/assignment/Monthly_Attendance.csv') as f:
    next(f)
    g = groupby(f,key=lambda x:x.strip('\n').split(',')[0])

    # count = ((key,count_group(group)) for key,group in g)
    count = ((key,sum(map(lambda x:1,group))) for key,group in g)
    print(list(count))

[('01M015', 70), ('01M019', 70), ('01M020', 70), ('01M034', 100), ('01M063', 70), ('01M064', 70), ('01M110', 70), ('01M134', 70), ('01M140', 100), ('01M142', 70), ('01M184', 100), ('01M188', 100), ('01M292', 40), ('01M301', 30), ('01M315', 70), ('01M332', 30), ('01M361', 70), ('01M363', 70), ('01M364', 70), ('01M378', 30), ('01M448', 40), ('01M450', 70), ('01M458', 40), ('01M509', 40), ('01M515', 30), ('01M539', 130), ('01M650', 40), ('01M696', 40), ('01M839', 31), ('02M001', 71), ('02M002', 70), ('02M003', 27)]


# itertools.product
- cartisian product.

In [6]:
def cartisian_product(a,b):
    for i in a:
        for j in b:
            yield(i,j)

list(cartisian_product([1,2,3],['a','b','c']))

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c')]

In [26]:
from itertools import product

l1 = [1,2,3]
l2 = ['a','b','c']

p = product(l1,l2)
list(p)

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c')]

In [7]:
# product can have more than 2 arguments
l3 = [100,200]
p = product(l1,l2,l3)
list(p)

[(1, 'a', 100),
 (1, 'a', 200),
 (1, 'b', 100),
 (1, 'b', 200),
 (1, 'c', 100),
 (1, 'c', 200),
 (2, 'a', 100),
 (2, 'a', 200),
 (2, 'b', 100),
 (2, 'b', 200),
 (2, 'c', 100),
 (2, 'c', 200),
 (3, 'a', 100),
 (3, 'a', 200),
 (3, 'b', 100),
 (3, 'b', 200),
 (3, 'c', 100),
 (3, 'c', 200)]

In [28]:
SUITS = 'SHDC'
RANKS = tuple(map(str,range(2,11)))+ tuple('JQKA')

deck = [f'{suit}{rank}' for suit,rank in product(SUITS,RANKS)]
deck[0:6]

['S2', 'S3', 'S4', 'S5', 'S6', 'S7']

# itertools.permutations(iterable,r=None)
- r is the size of permutation
- r - None means length of each permutation is the length of the iterable.
- Each of the iterable are considred unique based on their position, not their value.

In [25]:
from itertools import permutations

l = [1,5,7,1] # 1st and last '1' are different.
p  = permutations(l)
list(p)

[(1, 5, 7, 1),
 (1, 5, 1, 7),
 (1, 7, 5, 1),
 (1, 7, 1, 5),
 (1, 1, 5, 7),
 (1, 1, 7, 5),
 (5, 1, 7, 1),
 (5, 1, 1, 7),
 (5, 7, 1, 1),
 (5, 7, 1, 1),
 (5, 1, 1, 7),
 (5, 1, 7, 1),
 (7, 1, 5, 1),
 (7, 1, 1, 5),
 (7, 5, 1, 1),
 (7, 5, 1, 1),
 (7, 1, 1, 5),
 (7, 1, 5, 1),
 (1, 1, 5, 7),
 (1, 1, 7, 5),
 (1, 5, 1, 7),
 (1, 5, 7, 1),
 (1, 7, 1, 5),
 (1, 7, 5, 1)]

# itertools.combination
- order of elements in the combination is not considred.
- we can sort the elements
- combinations of length r, can be picked from a set.
- Each of the iterable are considred unique based on their position, 

In [23]:
from itertools import combinations
from itertools import combinations_with_replacement

l = [1,2,3,8,9]

print(list(combinations(l,r=4)))

list(combinations_with_replacement(l,r=3))

[(1, 2, 3, 8), (1, 2, 3, 9), (1, 2, 8, 9), (1, 3, 8, 9), (2, 3, 8, 9)]


[(1, 1, 1),
 (1, 1, 2),
 (1, 1, 3),
 (1, 1, 8),
 (1, 1, 9),
 (1, 2, 2),
 (1, 2, 3),
 (1, 2, 8),
 (1, 2, 9),
 (1, 3, 3),
 (1, 3, 8),
 (1, 3, 9),
 (1, 8, 8),
 (1, 8, 9),
 (1, 9, 9),
 (2, 2, 2),
 (2, 2, 3),
 (2, 2, 8),
 (2, 2, 9),
 (2, 3, 3),
 (2, 3, 8),
 (2, 3, 9),
 (2, 8, 8),
 (2, 8, 9),
 (2, 9, 9),
 (3, 3, 3),
 (3, 3, 8),
 (3, 3, 9),
 (3, 8, 8),
 (3, 8, 9),
 (3, 9, 9),
 (8, 8, 8),
 (8, 8, 9),
 (8, 9, 9),
 (9, 9, 9)]