# itertools

In [1]:
import itertools as it

#### chain

In [2]:
for i in it.chain(range(4), ['a','b','c']):
    print i,

0 1 2 3 a b c


In [3]:
%timeit list(it.chain.from_iterable([range(4), ['a','b','c']]))

The slowest run took 5.51 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 2.3 µs per loop


In [4]:
%%timeit
y = []
for x in [range(4), ['a','b','c']]:
    y.extend(x)
y

1000000 loops, best of 3: 1.34 µs per loop


#### izip

In [5]:
for i in it.izip(range(3), ['a','b', 'c','d'], ['A','B','C']):
    print i,

(0, 'a', 'A') (1, 'b', 'B') (2, 'c', 'C')


#### imap

In [6]:
for i in it.imap(lambda x : 2*x, xrange(5)):
    print i,

0 2 4 6 8


In [7]:
for i in it.imap(lambda x,y : (x, y, x+y), xrange(5), xrange(5,100)):
    print '%d + %d = %d' %i

0 + 5 = 5
1 + 6 = 7
2 + 7 = 9
3 + 8 = 11
4 + 9 = 13


In [8]:
from operator import add
for i in it.imap(add, xrange(5), xrange(5,100)):
    print i,

5 7 9 11 13


#### starmap

In [9]:
values = [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]
for i in it.starmap(lambda x,y:(x, y, x+y), values):
    print '%d + %d = %d' % i

0 + 5 = 5
1 + 6 = 7
2 + 7 = 9
3 + 8 = 11
4 + 9 = 13


#### count

In [10]:
counter = it.count(1)
for i in range(50):
    total_iter = next(counter)
    if total_iter % 10 == 0:
        print 'total iter %d' %total_iter

total iter 10
total iter 20
total iter 30
total iter 40
total iter 50


In [11]:
for i in it.izip(it.count(1), ['a','b','c']):
    print i,

(1, 'a') (2, 'b') (3, 'c')


#### cycle

In [12]:
for i in it.izip(xrange(5), it.cycle(['a','b'])):
    print i,

(0, 'a') (1, 'b') (2, 'a') (3, 'b') (4, 'a')


#### repeat

In [13]:
for i in it.repeat('hello', 5):
    print i,

hello hello hello hello hello


In [14]:
for i in it.imap(pow, xrange(10), it.repeat(2)):
    print i,

0 1 4 9 16 25 36 49 64 81


#### ifilter

In [15]:
%timeit -n100 list(it.ifilter(lambda x : x < 1, [-1, 0, 1, 2, 3, 4, 1, -2 ]))

100 loops, best of 3: 3.51 µs per loop


In [16]:
%timeit -n100 list(x for x in [-1, 0, 1, 2, 3, 4, 1, -2 ] if x < 1)

100 loops, best of 3: 4.96 µs per loop


#### islice

In [17]:
# islice() does not support negative values for start, stop, or step
# If stop is None, then iteration continues until the iterator is exhausted
print list(it.islice('ABCDEFG', 2))
print list(it.islice('ABCDEFG', 2, 4))
print list(it.islice('ABCDEFG', 2, None))
print list(it.islice('ABCDEFG', 0, None, 2))

['A', 'B']
['C', 'D']
['C', 'D', 'E', 'F', 'G']
['A', 'C', 'E', 'G']


In [18]:
def moving_average(d, n=3):
    from itertools import islice
    len_d = len(d)
    for start in xrange(0, len_d-n+1):
        yield sum(islice(d, start, start+n))/float(n)
list(moving_average([40, 30, 50, 46, 39, 44], n=3))

[40.0, 42.0, 45.0, 43.0]

#### compress

In [19]:
list(it.compress('ABCDEF', [True, False, True, False, True, True]))

['A', 'C', 'E', 'F']

#### dropwhile /  takewhile

In [20]:
print list(it.dropwhile(lambda x: x < 5, [1,4,6,4,1]))
print list(it.takewhile(lambda x: x < 5, [1,4,6,4,1]))

[6, 4, 1]
[1, 4]


#### groupby

In [21]:
from operator import itemgetter
import itertools as it

d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
di = sorted(d.iteritems(), key=itemgetter(1))
for k, g in it.groupby(di, key=itemgetter(1)):
#     print k, list(g)
#     print k, map(itemgetter(0), g)
    key, value = k, [each[0] for each in g] #  much fast
    print key, value

1 ['a', 'c', 'e']
2 ['b', 'd', 'f']
3 ['g']


In [22]:
from collections import defaultdict

d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
result = defaultdict(list)
for k,v in d.iteritems():
    result[v].append(k)
for key in result:
    print key, result[key]

1 ['a', 'c', 'e']
2 ['b', 'd', 'f']
3 ['g']


#### product

In [23]:
%timeit -n100 list(it.product('ABC', [1, 2]))  # 笛卡尔积

100 loops, best of 3: 5.38 µs per loop


In [24]:
%timeit -n100 [(x, y) for x in 'ABC' for y in [1, 2]]

100 loops, best of 3: 5.64 µs per loop


In [25]:
list(it.product('AB', repeat=3))  # list(it.product('AB','AB','AB'))  

[('A', 'A', 'A'),
 ('A', 'A', 'B'),
 ('A', 'B', 'A'),
 ('A', 'B', 'B'),
 ('B', 'A', 'A'),
 ('B', 'A', 'B'),
 ('B', 'B', 'A'),
 ('B', 'B', 'B')]

#### permutations

In [26]:
list(it.permutations('ABC', 2))  # 排列

[('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

#### combinations

In [27]:
list(it.combinations('ABC', 2))  # 组合

[('A', 'B'), ('A', 'C'), ('B', 'C')]

#### tee

In [33]:
def pairwalk1(iterable):
    a, b = it.tee(iterable)
    return it.izip(it.islice(a, 0, None), it.islice(b, 1, None))

In [60]:
def pairwalk2(iterable):
    a, b = it.tee(iterable, 2)
    next(b, None)   # next 默认值为None的用法值得学习
    return it.izip(a, b)

In [64]:
iterable = range(100000)

In [65]:
%timeit list(pairwalk1(iterable))

100 loops, best of 3: 13.4 ms per loop


In [66]:
%timeit list(pairwalk2(iterable))

100 loops, best of 3: 11.6 ms per loop
