[Reference1](https://towardsdatascience.com/tour-of-python-itertools-2af84db18a5e) <br>
[Reference2](https://docs.python.org/3/library/itertools.html#itertools-recipes)

# Compress

In [1]:
dates = [
    "2020-01-01",
    "2020-02-04",
    "2020-02-01",
    "2020-01-24",
    "2020-01-08",
    "2020-02-10",
    "2020-02-15",
    "2020-02-11",
]

counts = [1, 4, 3, 8, 0, 7, 9, 2]

from itertools import compress
bools = [n > 3 for n in counts]
print(list(compress(dates, bools)))  # Compress returns iterator!

['2020-02-04', '2020-01-24', '2020-02-10', '2020-02-15']


# Accumulate

In [2]:
from itertools import accumulate
import operator

data = [3, 4, 1, 3, 5, 6, 9, 0, 1]

list(accumulate(data, max))  # running maximum

list(accumulate(range(1, 11), operator.mul))  # Factorial

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

# Cycle

In [6]:
# Cycling through players
from itertools import cycle

players = ["John", "Ben", "Martin", "Peter"]

next_player = cycle(players).__next__
player = next_player()

player = next_player()

# Infinite Spinner
import time

for c in cycle('/-\|'):
    print(c, end = '\r')
    time.sleep(0.2)

# Tee

In [4]:
from itertools import tee

def pairwise(iterable):
    """
    s -> (s0, s1), (s1, s2), (s2, s3), ...
    """
    a, b = tee(iterable, 2)
    next(b, None)
    return zip(a, b)

# more_itertools

# Divide

In [5]:
from more_itertools import divide
data = ["first", "second", "third", "fourth", "fifth", "sixth", "seventh"]

[list(l) for l in divide(3, data)]

[['first', 'second', 'third'], ['fourth', 'fifth'], ['sixth', 'seventh']]

# Partition

In [7]:
# Split based on age
from datetime import datetime, timedelta
from more_itertools import partition

dates = [ 
    datetime(2015, 1, 15),
    datetime(2020, 1, 16),
    datetime(2020, 1, 17),
    datetime(2019, 2, 1),
    datetime(2020, 2, 2),
    datetime(2018, 2, 4)
]

is_old = lambda x: datetime.now() - x < timedelta(days=30)
old, recent = partition(is_old, dates)
list(old)
#  [datetime.datetime(2015, 1, 15, 0, 0), datetime.datetime(2019, 2, 1, 0, 0), datetime.datetime(2018, 2, 4, 0, 0)]
list(recent)
#  [datetime.datetime(2020, 1, 16, 0, 0), datetime.datetime(2020, 1, 17, 0, 0), datetime.datetime(2020, 2, 2, 0, 0)]


# Split based on file extension
files = [
    "foo.jpg",
    "bar.exe",
    "baz.gif",
    "text.txt",
    "data.bin",
]

ALLOWED_EXTENSIONS = ('jpg','jpeg','gif','bmp','png')
is_allowed = lambda x: x.split(".")[1] in ALLOWED_EXTENSIONS

allowed, forbidden = partition(is_allowed, files)
list(allowed)
#  ['bar.exe', 'text.txt', 'data.bin']
list(forbidden)
#  ['foo.jpg', 'baz.gif']

['foo.jpg', 'baz.gif']

# Consecutive_groups

In [8]:
# Consecutive Groups of dates
import datetime
import more_itertools
  
dates = [ 
    datetime.datetime(2020, 1, 15),
    datetime.datetime(2020, 1, 16),
    datetime.datetime(2020, 1, 17),
    datetime.datetime(2020, 2, 1),
    datetime.datetime(2020, 2, 2),
    datetime.datetime(2020, 2, 4)
]

ordinal_dates = []
for d in dates:
    ordinal_dates.append(d.toordinal())

groups = [list(map(datetime.datetime.fromordinal, group)) for group in more_itertools.consecutive_groups(ordinal_dates)]

In [9]:
groups

[[datetime.datetime(2020, 1, 15, 0, 0),
  datetime.datetime(2020, 1, 16, 0, 0),
  datetime.datetime(2020, 1, 17, 0, 0)],
 [datetime.datetime(2020, 2, 1, 0, 0), datetime.datetime(2020, 2, 2, 0, 0)],
 [datetime.datetime(2020, 2, 4, 0, 0)]]

# Side_effect

In [15]:
import more_itertools
num_events = 0

def _increment_num_events(_):
    nonlocal num_events
    num_events += 1

# Iterator that will be consumed
event_iterator = more_itertools.side_effect(_increment_num_events, events)

more_itertools.consume(event_iterator)

print(num_events)

SyntaxError: ignored

# Collapse

In [16]:
import more_itertools
import os

# Get flat list of all files and directories
# list(more_itertools.collapse(list(os.walk("/home/martin/Downloads"))))

# Get all nodes of tree into flat list
tree = [40, [25, [10, 3, 17], [32, 30, 38]], [78, 50, 93]]  # [Root, SUB_TREE_1, SUB_TREE_2, ..., SUB_TREE_n]
list(more_itertools.collapse(tree))

[40, 25, 10, 3, 17, 32, 30, 38, 78, 50, 93]

# Split_at

In [17]:
import more_itertools

lines = [
    "erhgedrgh",
    "erhgedrghed",
    "esdrhesdresr",
    "ktguygkyuk",
    "-------------",
    "srdthsrdt",
    "waefawef",
    "ryjrtyfj",
    "-------------",
    "edthedt",
    "awefawe",
]

list(more_itertools.split_at(lines, lambda x: '-------------' in x))

[['erhgedrgh', 'erhgedrghed', 'esdrhesdresr', 'ktguygkyuk'],
 ['srdthsrdt', 'waefawef', 'ryjrtyfj'],
 ['edthedt', 'awefawe']]

# Bucket

In [18]:
# Split based on Object Type
import more_itertools

class Cube:
    pass

class Circle:
    pass

class Triangle:
    pass

shapes = [Circle(), Cube(), Circle(), Circle(), Cube(), Triangle(), Triangle()]
s = more_itertools.bucket(shapes, key=lambda x: type(x))
list(s[Cube])
list(s[Circle])

[<__main__.Circle at 0x7fe652181f50>,
 <__main__.Circle at 0x7fe652181dd0>,
 <__main__.Circle at 0x7fe652181fd0>]

# Map_reduce

In [19]:
from more_itertools import map_reduce
data = 'This sentence has words of various lengths in it, both short ones and long ones'.split()

keyfunc = lambda x: len(x)
result = map_reduce(data, keyfunc)
print(result)

valuefunc = lambda x: 1
result = map_reduce(data, keyfunc, valuefunc)
print(result)

reducefunc = sum
result = map_reduce(data, keyfunc, valuefunc, reducefunc)
print(result)

defaultdict(None, {4: ['This', 'both', 'ones', 'long', 'ones'], 8: ['sentence'], 3: ['has', 'it,', 'and'], 5: ['words', 'short'], 2: ['of', 'in'], 7: ['various', 'lengths']})
defaultdict(None, {4: [1, 1, 1, 1, 1], 8: [1], 3: [1, 1, 1], 5: [1, 1], 2: [1, 1], 7: [1, 1]})
defaultdict(None, {4: 5, 8: 1, 3: 3, 5: 2, 2: 2, 7: 2})


# Sort_together

In [20]:
from more_itertools import sort_together
cols = [
    ("John", "Ben", "Andy", "Mary"),
    ("1994-02-06", "1985-04-01", "2000-06-25", "1998-03-14"),
    ("2020-01-06", "2019-03-07", "2020-01-08", "2018-08-15")
]

sort_together(cols, key_list=(1, 2))

[('Ben', 'John', 'Mary', 'Andy'),
 ('1985-04-01', '1994-02-06', '1998-03-14', '2000-06-25'),
 ('2019-03-07', '2020-01-06', '2018-08-15', '2020-01-08')]

# Seekable

In [21]:
from more_itertools import seekable

data = "This is example sentence for seeking back and forth".split()

it = seekable(data)
for word in it:
    ...

next(it)

it.seek(3)
next(it)

StopIteration: ignored

# Filter_except

In [22]:
from more_itertools import filter_except

data = ['1.5', '6', 'not-important', '11', '1.23E-7', 'remove-me', '25', 'trash']
list(map(float, filter_except(float, data, TypeError, ValueError)))

[1.5, 6.0, 11.0, 1.23e-07, 25.0]

# Unique_to_each

In [23]:
from more_itertools import unique_to_each

# Graph (adjacency list)
graph = {'A': {'B', 'E'}, 'B': {'A', 'C'}, 'C': {'B'}, 'D': {'E'}, 'E': {'A', 'D'}}

unique_to_each({'B', 'E'}, {'A', 'C'}, {'B'}, {'E'}, {'A', 'D'})

[[], ['C'], [], [], ['D']]

# Numeric_range

In [24]:
from more_itertools import numeric_range
import datetime
from decimal import Decimal

list(numeric_range(Decimal('1.7'), Decimal('3.5'), Decimal('0.3')))
#  [Decimal('1.7'), Decimal('2.0'), Decimal('2.3'), Decimal('2.6'), Decimal('2.9'), Decimal('3.2')]

start = datetime.datetime(2020, 2, 10)
stop = datetime.datetime(2020, 2, 15)
step = datetime.timedelta(days=2)
list(numeric_range(start, stop, step))

[datetime.datetime(2020, 2, 10, 0, 0),
 datetime.datetime(2020, 2, 12, 0, 0),
 datetime.datetime(2020, 2, 14, 0, 0)]

# Make_decorator

In [25]:
from more_itertools import make_decorator
from more_itertools import map_except

mapper_except = make_decorator(map_except, result_index=1)

@mapper_except(float, ValueError, TypeError)
def read_file(f):
    ... # Read mix of text and numbers from file
    return ['1.5', '6', 'not-important', '11', '1.23E-7', 'remove-me', '25', 'trash']

list(read_file("file.txt"))

[1.5, 6.0, 11.0, 1.23e-07, 25.0]