In [1]:
import itertools

### Aggregators

\- are functions that iterate through an iterable and return a single value that (*usually*) takes into account every element of the iterable.<br>
Examples of aggregators: `min`, `max`, `sum`, `any`, `all`.

Every object in Python has an associated **truth value**. Every object has a **True** truth value, except
- **None**,
- **False**,
- **0** in any numeric type (*e.g. 0, 0.0, 0+0j*),
- empty sequences (*e.g. list, tuple, string*),
- empty mapping types (*e.g. dictionary, set*),
- custom classes that implement a `__bool__` or `__len__` method that returns **False** or **0**

which have a **False** truth value.

A function that takes a single argument and returns True or False is called a **predicate**.

In [2]:
any([False, 0, '', None, dict(), set(), tuple()])  # all these objects have False truth value

False

In [3]:
all([True, '0', (None,), [None], 10])              # all these objects have True truth value

True

<br>

**Example 1**: whether all given objects are numbers

In [4]:
from numbers import Number

In [5]:
ls = [10, 20, 30, 40, '50']

all(isinstance(n, Number) for n in ls)  # here we use generator expression

False

In [6]:
ls = [10, 20, 30, 40, '50']

all(map(lambda n: isinstance(n, Number), ls))  # '(lambda n: isinstance(n, Number)' is a predicate

False

<br>

**Example 2**: verifying content of the file.<br>
Check that all string in the file have length al least 3 characters (+'\n')

In [7]:
with open('working_files/car-brands-1.txt') as fh:
    result = all(len(row) >= 4 for row in fh)  # here we use  generator expression
    
print(result)

True


In [8]:
with open('working_files/car-brands-1.txt') as fh:
    result = all(map(lambda row: len(row) >= 4, fh))
    
print(result)

True


<br>

### Slicing

In [9]:
import math
from itertools import islice

In [10]:
# simple generator
def factorials(n):
    for i in range(n):
        yield math.factorial(i)

In [11]:
# slice from the 3rd to 9th result of the generator output
list(islice(factorials(100), 3, 10))

[6, 24, 120, 720, 5040, 40320, 362880]

In [12]:
# using step
list(islice(factorials(100), 3, 10, 2))

[6, 120, 5040, 362880]

<br>

### Selecting and Filtering

(`filter`, `filterfalse`, `compress`, `takewhile`, `dropwhile`)

#### filter and filterfalse

In [13]:
def gen_cubes(n):
    for i in range(n):
        print(f'yielding {i}')
        yield i**3

def is_odd(x):          # predicate
    return x % 2 == 1

is_odd(4), is_odd(81)

(False, True)

In [14]:
filtered = filter(is_odd, gen_cubes(10))

list(filtered)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[1, 27, 125, 343, 729]

In [15]:
filteredfalse = itertools.filterfalse(is_odd, gen_cubes(10))

list(filteredfalse)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[0, 8, 64, 216, 512]

#### dropwhile and takewhile

In [16]:
from math import sin, pi

def sine_wave(n):
    start = 0
    max_ = 2 * pi
    step = (max_ - start) / (n-1)
    
    for _ in range(n):
        yield round(sin(start), 2)
        start += step
        
        
list(sine_wave(15))

[0.0,
 0.43,
 0.78,
 0.97,
 0.97,
 0.78,
 0.43,
 0.0,
 -0.43,
 -0.78,
 -0.97,
 -0.97,
 -0.78,
 -0.43,
 -0.0]

In [17]:
res_takenwhile = itertools.takewhile(lambda x: 0 <= x <= 0.9, sine_wave(15))

list(res_takenwhile)

# next(res_takenwhile)    # StopIteration:

[0.0, 0.43, 0.78]

<br>

In [18]:
res_dropwhile = itertools.dropwhile(lambda x: 0 <= x <= 0.9, sine_wave(15))

list(res_dropwhile)

# next(res_dropwhile)    # StopIteration:

[0.97, 0.97, 0.78, 0.43, 0.0, -0.43, -0.78, -0.97, -0.97, -0.78, -0.43, -0.0]

#### compress

Filtering one iterable using the contents of another iterable

In [19]:
data =      ['a', 'b', 'c', 'd', 'e', 'f', 'g']
selectors = [True, False, 0, '', None, 1]        # None for the last element

list(zip(data, selectors))

[('a', True), ('b', False), ('c', 0), ('d', ''), ('e', None), ('f', 1)]

In [20]:
# compress via comprehension
[item for item, truth_value in zip(data, selectors) if truth_value]

['a', 'f']

In [21]:
res_compress = itertools.compress(data, selectors)

list(res_compress)

['a', 'f']

<br>

from [FAQ](https://www.udemy.com/course/python-3-deep-dive-part-2/learn/lecture/10516094#questions/5917774). Converter using compress:

In [22]:
def binary_decimal_converter(binary):
    decimal_values = (2**n for n in itertools.count())
    binary_reversed = map(int, binary[::-1])
    return sum(itertools.compress(decimal_values, binary_reversed))


print(binary_decimal_converter('0110'))                # 6

print(binary_decimal_converter('001100100111111001'))  # 51705

print(binary_decimal_converter([0, 1, 1, 1, 1]))       # 15

6
51705
15


<br>

### Infinite iterators


(`count`, `cycle`, `repeat`)

In [23]:
from itertools import count, cycle, repeat, islice

#### count

In [24]:
g1 = count(10)

list(islice(g1, 5))

[10, 11, 12, 13, 14]

In [25]:
g2 = count(1, 0.5)

list(islice(g2, 5))

[1, 1.5, 2.0, 2.5, 3.0]

In [26]:
g3 = count(10+20j, 1+2j)

list(islice(g3, 5))

[(10+20j), (11+22j), (12+24j), (13+26j), (14+28j)]

In [27]:
from decimal import Decimal

g4 = count(Decimal('0'), Decimal('0.1'))

list(islice(g4, 5))

[Decimal('0'), Decimal('0.1'), Decimal('0.2'), Decimal('0.3'), Decimal('0.4')]

<br>

#### cycle

In [28]:
g1 = cycle(('A', 'T', 'G', 'C'))

list(islice(g1, 6))

['A', 'T', 'G', 'C', 'A', 'T']

In [29]:
def nucleobases():
    yield 'A'
    yield 'T'
    yield 'G'
    yield 'C'
    
ncl = nucleobases()

list(ncl)

['A', 'T', 'G', 'C']

In [30]:
list(ncl)   # iterator now is exhausted

[]

In [31]:
ncl = nucleobases()

g2 = cycle(ncl)

list(islice(g2, 10))

['A', 'T', 'G', 'C', 'A', 'T', 'G', 'C', 'A', 'T']

<br>

#### repeat

In [32]:
g = repeat('mutation')

for _ in range(3):
    print(next(g))

mutation
mutation
mutation


In [33]:
g = repeat('mutation', 4)

list(g)

['mutation', 'mutation', 'mutation', 'mutation']

<p style="color: red">Caveat:</p>

In [34]:
g = repeat([], 4)

g_list = list(g)

g_list

[[], [], [], []]

In [35]:
g_list[0] is g_list[1]

True

In [36]:
g_list[0].append(20)

g_list

[[20], [20], [20], [20]]

*And the same with `itertools.cycle`*

<br>

### Infinite iterators


(`chain`, `chain.from_iterable`, `tee`)

#### chain

In [37]:
from itertools import chain, tee

In [38]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4,8))
l3 = (i**2 for i in range(8,12))

for item in chain(l1, l2, l3):
    print(item, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 

<br>

#### tee

In [39]:
ls = [1, 2, 3, 4]
lists = tee(ls, 2)

lists   # 'tee' returns not iterables but iterators

(<itertools._tee at 0x7fd628441fc8>, <itertools._tee at 0x7fd628441f08>)

<br>

### Mapping

**Mapping** - applying a callable to each element of an iterable (`map`, `starmap`)

`starmap` is very similar to `map`. It unpacks every sub element of the iterable argument, and passes that to the map function.

In [40]:
mp = map(lambda x: x**2, range(5))

print(type(mp))
print(iter(mp) is mp)
print('__next__' in dir(mp))

list(mp)

<class 'map'>
True
True


[0, 1, 4, 9, 16]

In [41]:
list(mp)  # mp is exhausted

[]

<br>

In [42]:
def add(t):
    return t[0] + t[1]

In [43]:
tested_ls = [(0, 0), [1, 1], range(2,4)]

list(map(add, tested_ls))

[0, 2, 5]

<br>

In [44]:
def add2(x, y):
    return x + y

In [45]:
# we can calculate this manually
[add2(*t) for t in tested_ls]

[0, 2, 5]

In [46]:
# but easier way is to use itertools.starmap
from itertools import starmap

list(starmap(add2, tested_ls))

[0, 2, 5]

<br>

### Reducing

**Accumulation** - reducing an iterable down to a single value (*e.g.* `sum`, `min`, `max`, `reduce`, `accumulate`)

`accumulate` is very similar to `reduce`. It returns a (*lazy*) iterator producing all the intermediate results.<br>Note that in these methods the argument order is different.

In [47]:
from functools import reduce

In [48]:
reduce(lambda x, y: x*y, [1, 2, 3, 4])

24

In [49]:
reduce(lambda x, y: x*y, [1, 2, 3, 4], 10)  # with initial value

240

<br>

In [50]:
from itertools import accumulate
import operator

In [51]:
list(accumulate([10, 20, 30]))  # by default used operation of addition

[10, 30, 60]

In [52]:
list(accumulate([1, 2, 3, 4], operator.mul))

[1, 2, 6, 24]

Initializer for `itertools.accumulate` begins to work since Python v3.8 [docs.python.org](https://docs.python.org/3/library/itertools.html#itertools.accumulate). For Python v3.7 we can imitate it via `chain`-function.

In [53]:
list(chain([10], [1, 2, 3, 4]))

[10, 1, 2, 3, 4]

In [54]:
list(accumulate(chain([10], [1, 2, 3, 4]), operator.mul))

[10, 10, 20, 60, 240]

<br>

### zipping

`zip` is based on the shortest iterable. It stops immediately once one of the iterables has been completely iterated over.

`zip_longest` is based on the longest iterable. You can provide a default value for the "holes" (by default, *None*)

In [55]:
ls1 = [11, 12, 13, 14]
ls2 = [21, 22, 23]
ls3 = [31, 32]

res = zip(ls1, ls2, ls3)

print(iter(res) is res)
print('__next__' in dir(res))

list(res)

True
True


[(11, 21, 31), (12, 22, 32)]

In [56]:
from itertools import zip_longest

In [57]:
ls1 = [11, 12, 13, 14]
ls2 = [21, 22, 23]
ls3 = [31, 32]

list(zip_longest(ls1, ls2, ls3, fillvalue='N/A'))

[(11, 21, 31), (12, 22, 32), (13, 23, 'N/A'), (14, 'N/A', 'N/A')]

<br>

### Grouping

In [58]:
ls = [
    (1, 10, 100),
    (1, 11, 101),
    (1, 12, 102),
    
    (2, 20, 200),
    (2, 21, 201),
    
    (3, 30, 300),
    (3, 31, 301),
    (3, 32, 302)
]

In [59]:
groups = itertools.groupby(ls, lambda tp: tp[0])

for group_key, sub_iter in groups:
    print(group_key, list(sub_iter))

1 [(1, 10, 100), (1, 11, 101), (1, 12, 102)]
2 [(2, 20, 200), (2, 21, 201)]
3 [(3, 30, 300), (3, 31, 301), (3, 32, 302)]


<br>

### Combinatorics

#### itertools.product

In [60]:
ls1 = [1, 2, 3]
ls2 = ['a', 'b', 'c', 'd']

list(itertools.product(ls1, ls2))

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (1, 'd'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (2, 'd'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c'),
 (3, 'd')]

<br>
More complicated example:

In [61]:
# classic approach

def matrix(n):
    for i in range(1, n+1):
        for j in range(1, n+1):
            yield (i, j, i*j)

list(matrix(3))

[(1, 1, 1),
 (1, 2, 2),
 (1, 3, 3),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (3, 1, 3),
 (3, 2, 6),
 (3, 3, 9)]

In [62]:
# rewrite this using itertools.product

def matrix(n):
    for i, j in itertools.product(range(1, n+1), range(1, n+1)):
        yield(i, j, i*j)

list(matrix(3)) 

[(1, 1, 1),
 (1, 2, 2),
 (1, 3, 3),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (3, 1, 3),
 (3, 2, 6),
 (3, 3, 9)]

In [63]:
# alternative

def matrix(n):
    return ((i, j, i*j)
            for i, j in itertools.product(range(1, n+1), range(1, n+1)))

list(matrix(3))

[(1, 1, 1),
 (1, 2, 2),
 (1, 3, 3),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (3, 1, 3),
 (3, 2, 6),
 (3, 3, 9)]

In [64]:
# generalize the previous approach, via itertools.tee

def matrix(n):
    return ((i, j, i*j)
            for i, j in itertools.product(*itertools.tee(range(1, n+1), 2)))

list(matrix(3))

[(1, 1, 1),
 (1, 2, 2),
 (1, 3, 3),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (3, 1, 3),
 (3, 2, 6),
 (3, 3, 9)]

<br>

#### itertools.permutations, itertools.combinations & itertools.combinations_with_replacement

In [65]:
st1 = 'abc'
list(itertools.permutations(st1))

[('a', 'b', 'c'),
 ('a', 'c', 'b'),
 ('b', 'a', 'c'),
 ('b', 'c', 'a'),
 ('c', 'a', 'b'),
 ('c', 'b', 'a')]

In [66]:
list(itertools.permutations(st1, 2))

[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]

<br>

In [67]:
list(itertools.combinations(st1, 2))

[('a', 'b'), ('a', 'c'), ('b', 'c')]

In [68]:
list(itertools.combinations(st1, 1))

[('a',), ('b',), ('c',)]

<br>

In [69]:
list(itertools.combinations_with_replacement(st1, 2))

[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]