https://realpython.com/python-itertools/

In [1]:
list(zip([1, 2, 3], ['a', 'b', 'c']))

[(1, 'a'), (2, 'b'), (3, 'c')]

[1, 2, 3] and ['a', 'b', 'c'], like all lists, are iterable, which means they can return their elements one at a time. Technically, any Python object that implements the .__iter__() or .__getitem__() methods is iterable.

In [2]:
iter([1, 2, 3, 4])

<list_iterator at 0x107bfedc0>

Under the hood, the zip() function works, in essence, by calling iter() on each of its arguments, then advancing each iterator returned by iter() with next() and aggregating the results into tuples. The iterator returned by zip() iterates over these tuples.

The map() built-in function is another “iterator operator” that, in its simplest form, applies a single-parameter function to each element of an iterable one element at a time:

In [3]:
list(map(len, ['abc', 'de', 'fghi']))

[3, 2, 4]

In [4]:
list(map(sum, zip([1, 2, 3], [4, 5, 6])))

[5, 7, 9]

In [5]:
def naive_grouper(inputs, n):
    num_groups = len(inputs) // n
    return [tuple(inputs[i*n:(i+1)*n]) for i in range(num_groups)]

nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
naive_grouper(nums, 2)

[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]

In [6]:
def naive_grouper(inputs, n):
    num_groups = len(inputs) // n
    return [tuple(inputs[i*n:(i+1)*n]) for i in range(num_groups)]


for _ in naive_grouper(range(100000000), 10):
    pass

The list and tuple implementation in naive_grouper() requires approximately 4.5GB of memory to process range(100000000). Working with iterators drastically improves this situation. Consider the following:

In [7]:
def better_grouper(inputs, n):
    iters = [iter(inputs)] * n
    return zip(*iters)

There’s a lot going on in this little function, so let’s break it down with a concrete example. The expression [iters(inputs)] * n creates a list of n references to the same iterator:

In [8]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
iters = [iter(nums)] * 2
list(id(itr) for itr in iters)  # IDs are the same.

[4424134560, 4424134560]

Next, zip(*iters) returns an iterator over pairs of corresponding elements of each iterator in iters. When the first element, 1, is taken from the “first” iterator, the “second” iterator now starts at 2 since it is just a reference to the “first” iterator and has therefore been advanced one step. So, the first tuple produced by zip() is (1, 2).

At this point, “both” iterators in iters start at 3, so when zip() pulls 3 from the “first” iterator, it gets 4 from the “second” to produce the tuple (3, 4). This process continues until zip() finally produces (9, 10) and “both” iterators in iters are exhausted:

In [10]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
list(better_grouper(nums, 2))

[(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]

In [2]:
def better_grouper(inputs, n):
    iters = [iter(inputs)] * n
    return zip(*iters)


for _ in better_grouper(range(100000000), 10):
    pass

In [3]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
list(better_grouper(nums, 4))
[(1, 2, 3, 4), (5, 6, 7, 8)]

[(1, 2, 3, 4), (5, 6, 7, 8)]

In [4]:
import itertools as it
x = [1, 2, 3, 4, 5]
y = ['a', 'b', 'c']
list(zip(x, y))
[(1, 'a'), (2, 'b'), (3, 'c')]
list(it.zip_longest(x, y))
[(1, 'a'), (2, 'b'), (3, 'c'), (4, None), (5, None)]

[(1, 'a'), (2, 'b'), (3, 'c'), (4, None), (5, None)]

With this in mind, replace zip() in better_grouper() with zip_longest():

In [4]:
import itertools as it

def grouper(inputs, n, fillvalue=None):
    iters = [iter(inputs)] * n
    return it.zip_longest(*iters, fillvalue=fillvalue)
#Now you get a better result:

nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(list(grouper(nums, 4)))


[(1, 2, 3, 4), (5, 6, 7, 8), (9, 10, None, None)]


In [10]:
list(it.combinations([1, 2, 3], 2))


[(1, 2), (1, 3), (2, 3)]

In [6]:
list(it.combinations_with_replacement([1, 2], 2))
[(1, 1), (1, 2), (2, 2)]
#Compare that to combinations():

list(it.combinations([1, 2], 2))

[(1, 2)]

In [7]:
list(it.permutations(['a', 'b', 'c']))

[('a', 'b', 'c'),
 ('a', 'c', 'b'),
 ('b', 'a', 'c'),
 ('b', 'c', 'a'),
 ('c', 'a', 'b'),
 ('c', 'b', 'a')]

In [None]:
>>> counter = it.count()
>>> list(next(counter) for _ in range(5))
[0, 1, 2, 3, 4]
You can start counting from any number you like by setting the start keyword argument, which defaults to 0. You can even set a step keyword argument to determine the interval between numbers returned from count()—this defaults to 1.

With count(), iterators over even and odd integers become literal one-liners:

>>> evens = it.count(step=2)
>>> list(next(evens) for _ in range(5))
[0, 2, 4, 6, 8]

>>> odds = it.count(start=1, step=2)
>>> list(next(odds) for _ in range(5))
[1, 3, 5, 7, 9]
Ever since Python 3.1, the count() function also accepts non-integer arguments:

>>> count_with_floats = it.count(start=0.5, step=0.75)
>>> list(next(count_with_floats) for _ in range(5))
[0.5, 1.25, 2.0, 2.75, 3.5]
You can even pass it negative numbers:

>>> negative_count = it.count(start=-1, step=-0.5)
>>> list(next(negative_count) for _ in range(5))
[-1, -1.5, -2.0, -2.5, -3.0]

In [11]:
list(zip(it.count(), ['a', 'b', 'c']))

[(0, 'a'), (1, 'b'), (2, 'c')]

In [13]:
list(it.repeat(1, 5))  # 1, 1, 1, 1, 1

[1, 1, 1, 1, 1]

In [None]:
list(it.cycle([1, -1, 5]))
list(it.cycle(['a', 'b', 'c']))
a, b, c, a, b, c, a, ...

In [2]:
import operator
import itertools as it
list(it.accumulate([1, 2, 3, 4, 5], operator.add))


[1, 3, 6, 10, 15]

In [3]:
list(it.accumulate([1, 2, 3, 4, 5]))

[1, 3, 6, 10, 15]

In [4]:
list(it.accumulate([9, 21, 17, 5, 11, 12, 2, 6], min))

[9, 9, 9, 5, 5, 5, 2, 2]

In [5]:
list(it.accumulate([1, 2, 3, 4, 5], lambda x, y: (x + y) / 2))

[1, 1.5, 2.25, 3.125, 4.0625]

In [7]:
ranks = ['A', 'K', 'Q', 'J', '10', '9', '8', '7', '6', '5', '4', '3', '2']
suits = ['H', 'D', 'C', 'S']
def cards():
    """Return a generator that yields playing cards."""
    for rank in ranks:
        for suit in suits:
            yield rank, suit

cards = ((rank, suit) for rank in ranks for suit in suits)

In [6]:
it.product([1, 2], ['a', 'b'])  # (1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')

<itertools.product at 0x10932c240>

In [8]:
list(it.product(ranks, suits))

[('A', 'H'),
 ('A', 'D'),
 ('A', 'C'),
 ('A', 'S'),
 ('K', 'H'),
 ('K', 'D'),
 ('K', 'C'),
 ('K', 'S'),
 ('Q', 'H'),
 ('Q', 'D'),
 ('Q', 'C'),
 ('Q', 'S'),
 ('J', 'H'),
 ('J', 'D'),
 ('J', 'C'),
 ('J', 'S'),
 ('10', 'H'),
 ('10', 'D'),
 ('10', 'C'),
 ('10', 'S'),
 ('9', 'H'),
 ('9', 'D'),
 ('9', 'C'),
 ('9', 'S'),
 ('8', 'H'),
 ('8', 'D'),
 ('8', 'C'),
 ('8', 'S'),
 ('7', 'H'),
 ('7', 'D'),
 ('7', 'C'),
 ('7', 'S'),
 ('6', 'H'),
 ('6', 'D'),
 ('6', 'C'),
 ('6', 'S'),
 ('5', 'H'),
 ('5', 'D'),
 ('5', 'C'),
 ('5', 'S'),
 ('4', 'H'),
 ('4', 'D'),
 ('4', 'C'),
 ('4', 'S'),
 ('3', 'H'),
 ('3', 'D'),
 ('3', 'C'),
 ('3', 'S'),
 ('2', 'H'),
 ('2', 'D'),
 ('2', 'C'),
 ('2', 'S')]

The cut() function is pretty simple, but it suffers from a couple of problems. When you slice a list, you make a copy of the original list and return a new list with the selected elements. With a deck of only 52 cards, this increase in space complexity is trivial, but you could reduce the memory overhead using itertools. To do this, you’ll need three functions: itertools.tee(), itertools.islice(), and itertools.chain().

In [20]:
iterator1, iterator2, *sd = it.tee([1, 2, 3, 4, 5], 5)
print(list(iterator1))
#[1, 2, 3, 4, 5]
#print(list(iterator1))  # iterator1 is now exhausted.
#[]
print(list(iterator2))  # iterator2 works independently of iterator1
#[1, 2, 3, 4, 5].
#print(list(iterator2))

[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]


In [None]:
# Slice from index 2 to 4
list(it.islice('ABCDEFG', 2, 5))
['C' 'D' 'E']

# Slice from beginning to index 4, in steps of 2
list(it.islice([1, 2, 3, 4, 5], 0, 5, 2))
[1, 3, 5]

# Slice from index 3 to the end
list(it.islice(range(10), 3, None))
[3, 4, 5, 6, 7, 8, 9]

# Slice from beginning to index 3
list(it.islice('ABCDE', 4))
['A', 'B', 'C', 'D']

In [22]:
print(list(it.chain('ABC', 'DEF')))
['A' 'B' 'C' 'D' 'E' 'F']

list(it.chain([1, 2], [3, 4, 5, 6], [7, 8, 9]))
[1, 2, 3, 4, 5, 6, 7, 8, 9]

['A', 'B', 'C', 'D', 'E', 'F']


[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [23]:
list(it.chain.from_iterable([[1, 2, 3], [4, 5, 6]]))
[1, 2, 3, 4, 5, 6]
# There’s no reason the argument of chain.from_iterable() needs to be finite. 
# You could emulate the behavior of cycle(), for example:

cycle = it.chain.from_iterable(it.repeat('abc'))
list(it.islice(cycle, 8))
['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b']

['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b']

In [None]:
only_positives = it.filterfalse(lambda x: x <= 0, [0, 1, -1, 2, -2])
list(only_positives)
[1, 2]

In [24]:
it.takewhile(lambda x: x < 3, [0, 1, 2, 3, 4])  # 0, 1, 2
# The dropwhile() function does exactly the opposite. 
# It returns an iterator beginning at the first element for which the predicate returns False:

it.dropwhile(lambda x: x < 3, [0, 1, 2, 3, 4])  # 3, 4

<itertools.dropwhile at 0x109e2a240>