31 引数に対してイテレータを使うときには確実さを優先する

In [1]:
path = 'my_numbers.txt'
with open(path, 'w') as f:
    for i in (15, 35, 80):
        f.write('%d\n' % i)

In [7]:
def normalize(numbers):
    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100*value/total
        result.append(percent)
    return result
def read_visits(data_path):
    with open(data_path) as f:
        for line in f:
            yield int(line)

it = read_visits('my_numbers.txt')
print(list(it))
print(list(it))

[15, 35, 80]
[]


In [8]:
#新たなイテレータをそのたびに生成
def normalize_func(get_iter):
    total = sum(get_iter())
    result = []
    for value in get_iter():
        percent = 100*value/total
        result.append(percent)

    return result

In [9]:
path = 'my_numbers.txt'
percentages = normalize_func(lambda : read_visits(path))
print(percentages)
assert sum(percentages)==100.0

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [10]:
#もっとよい、イテレータぷろとっこるを実装した新たなコンテナクラスを提供する
class ReadVisits:
    def __init__(self, data_path):
        self.data_path = data_path

    def __iter__(self):
        with open(self.data_path) as f:
            for line in f:
                yield int(line)

visits = ReadVisits(path)
percentages = normalize(visits)
print(percentages)

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [11]:
#iterがイテレータかコンテナかを判別する
def normalize_defensive(numbers):
    if iter(numbers) is numbers:
        raise TypeError('Must supply a container')

    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100*value/total
        result.append(percent)

    return result

In [12]:
from collections.abc import Iterator
def normalize_defensive(numbers):
    if isinstance(numbers, Iterator):
        raise TypeError('Must supply a container')

    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100*value/total
        result.append(percent)

    return result

In [13]:
visits = ReadVisits(path)
normalize_defensive(visits)

[11.538461538461538, 26.923076923076923, 61.53846153846154]

In [14]:
visits = [15,35,80]
normalize_defensive(it)

TypeError: Must supply a container

32 大きなリスト内包表記にはジェネレータ式を考える

In [15]:
import random

with open('my_file.txt', 'w') as f:
    for _ in range(10):
        f.write('a' * random.randint(0, 100))
        f.write('\n')

In [16]:
#リスト内包表記
value = [len(x) for x in open('my_file.txt')]
print(value)

[49, 94, 4, 88, 99, 41, 12, 83, 16, 72]


In [19]:
#ジェネレータ表記
it = (len(x) for x in open('my_file.txt'))
print(it)

<generator object <genexpr> at 0x7f57054102e0>


In [20]:
print(next(it))
print(next(it))

49
94


In [21]:
#ジェネレータを連鎖させる
roots = ((x, x**0.5) for x in it)
print(next(roots))

(4, 2.0)


33 yield fromで複数のジェネレータを作る

In [22]:
def move(period, speed):
    for _ in range(period):
        yield speed

def pause(delay):
    for _ in range(delay):
        yield 0

In [23]:
#yield from 入れ子になった子ジェネレータからすべての値を生成できる
def animate_composed():
    yield from move(4, 5.0)
    yield from pause(3)
    yield from move(2, 3.0)

34 sendでジェネレータにデータを注入するのは避ける

In [24]:
import math
def transmit(output):
    if output is None:
        print(f'Output is None')
    else:
        print(f'Output: {output:>5.1f}')
def wave_cascading(amplitude_it, steps):
    step_size = 2*math.pi/steps
    for step in range(steps):
        radians = step*step_size
        fraction =math.sin(radians)
        amplitude = next(amplitude_it)
        output = amplitude*fraction
        yield output

def complex_wave_cascading(amplitude_it):
    yield from wave_cascading(amplitude_it, 3)
    yield from wave_cascading(amplitude_it, 4)
    yield from wave_cascading(amplitude_it, 5)

def run_cascading():
    amplitudes = [7,7,7,2,2,2,2,10,10,10,10,10]
    it = complex_wave_cascading(iter(amplitudes))
    for amplitude in amplitudes:
        output = next(it)
        transmit(output)

In [25]:
run_cascading()

Output:   0.0
Output:   6.1
Output:  -6.1
Output:   0.0
Output:   2.0
Output:   0.0
Output:  -2.0
Output:   0.0
Output:   9.5
Output:   5.9
Output:  -5.9
Output:  -9.5


In [26]:
class MyError(Exception):
    pass

def my_generator():
    yield 1
    yield 2
    yield 3

it = my_generator()
print(next(it))
print(next(it))
print(it.throw(MyError('test error')))

1
2


MyError: test error

In [27]:
def my_generator():
    yield 1
    try:
        yield 2
    except MyError:
        print('Got MyError!')
    else:
        yield 3
    yield 4

In [28]:
it = my_generator()
print(next(it))
print(next(it))
print(it.throw(MyError('test error')))

1
2
Got MyError!
4


In [32]:
#時々リセットできるタイマーのプログラム
class Timer:
    def __init__(self, period):
        self.current = period
        self.period = period
    
    def reset(self):
        self.current = self.period
    
    def __iter__(self):
        while self.current:
            self.current -= 1
            yield self.current

In [34]:
RESETS = [
    False, False, False, True, False, True, False,
    False, False, False, False, False, False, False]
def check_for_reset():
    # Poll for external event
    return RESETS.pop(0)

def announce(remaining):
    print(f'{remaining} ticks remaining')
def run():
    timer = Timer(4)
    for current in timer:
        if check_for_reset():
            timer.reset()
        announce(current)

run()

3 ticks remaining
2 ticks remaining
1 ticks remaining
0 ticks remaining
3 ticks remaining
2 ticks remaining
3 ticks remaining
2 ticks remaining
1 ticks remaining
0 ticks remaining


36 イテレータとジェネレータの作業ではitertoolsを使う

イテレータをつなげる

In [35]:
import itertools

#chain 複数のイテレータをつなげて1つのシーケンスにする
it = itertools.chain([1,2,3],[4,5,6])
print(list(it))

[1, 2, 3, 4, 5, 6]


In [36]:
#repeat 1つの値を何回も出力したり第2引数で指定した最大繰り返し回数で出力する
it = itertools.repeat('hello',3)
print(list(it))

['hello', 'hello', 'hello']


In [37]:
#cycleイテレータの要素を何回もくりかえすにはcycleを使う
it = itertools.cycle([1,2])
result = [next(it) for _ in range(10)]
print(result)

[1, 2, 1, 2, 1, 2, 1, 2, 1, 2]


In [38]:
#tee 1つのイテレータを分割して大に引数で指定した複数の並列イテレータにする
it1, it2, it3 = itertools.tee(['first', 'second'], 3)
print(list(it1))
print(list(it2))
print(list(it3))

['first', 'second']
['first', 'second']
['first', 'second']


In [40]:
#zip_longest 長さが異なるイテレータの場合はイテレータが終了したらプレースホルダー値を返す
keys = ['one','two','three']
values = [1,2]
normal = list(zip(keys, values))
print('zip: ', normal)
it = itertools.zip_longest(keys, values, fillvalue='nope')
longest = list(it)
print('zip_longest:', longest)

zip:  [('one', 1), ('two', 2)]
zip_longest: [('one', 1), ('two', 2), ('three', 'nope')]


イテレータの要素をふるい分ける

In [41]:
#islice インデックスでイテレータをスライスする　start, end, strideを指定できる

values = [i+1 for i in range(10)]
first_five = itertools.islice(values, 5)
print('first five: ', list(first_five))

middle_odds = itertools.islice(values, 2, 8, 2)
print('Middle odds: ', list(middle_odds))


first five:  [1, 2, 3, 4, 5]
Middle odds:  [3, 5, 7]


In [42]:
#takewhile 述語関数がFalseを返すまでイテレータの要素を返す
values = [i+1 for i in range(10)]
less_than_seven = lambda x : x<7
it = itertools.takewhile(less_than_seven, values)
print(list(it))

[1, 2, 3, 4, 5, 6]


In [44]:
#dropwhile 述語関数がTrueを返すまでイテレータは要素をスキップする
values = [i+1 for i in range(10)]
less_than_seven = lambda x: x<7
it = itertools.dropwhile(less_than_seven, values)
print(list(it))

[7, 8, 9, 10]


In [46]:
#filterfalse 述語関数がFalseになるイテレータの全要素を返す
values = [i+1 for i in range(10)]
evens = lambda x: x%2==0
filter_result = filter(evens, values)
print('Filter:  ', list(filter_result))

filter_false_result = itertools.filterfalse(evens, values)
print('Filter false:', list(filter_false_result))

Filter:   [2, 4, 6, 8, 10]
Filter false: [1, 3, 5, 7, 9]


イテレータで得た要素を組み合わせる

In [47]:
#accumulate 入力値に対する累積結果を出力
values = [i+1 for i in range(10)]
sum_reduce = itertools.accumulate(values)
print('Sum  ', list(sum_reduce))

def sum_modulo_20(first, second):
    output = first+second
    return output%20

module_reduce = itertools.accumulate(values, sum_modulo_20)
print('Module:', list(module_reduce))


Sum   [1, 3, 6, 10, 15, 21, 28, 36, 45, 55]
Module: [1, 3, 6, 10, 15, 1, 8, 16, 5, 15]


In [48]:
#product 1つ以上のイテレータからの要素の直咳を返す
single = itertools.product([1,2], repeat=2)
print('Single: ', list(single))
multiple = itertools.product([1,2],['a','b'])
print('Multiple:', list(multiple))

Single:  [(1, 1), (1, 2), (2, 1), (2, 2)]
Multiple: [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')]


In [50]:
#permutations イテレータからNこの要素を取り出してできる順列を返す
it = itertools.permutations([1,2,3,4],2)
print(list(it))

[(1, 2), (1, 3), (1, 4), (2, 1), (2, 3), (2, 4), (3, 1), (3, 2), (3, 4), (4, 1), (4, 2), (4, 3)]


In [51]:
#combinations イテレータからNこの要素を取り出した時に可能なすべての組み合わせを返す
it = itertools.combinations([1,2,3,4],2)
print(list(it))

[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


In [52]:
#combinations_with_replacement 取り出した値を元に戻す形式で同じ要素の反復を許す
it = itertools.combinations_with_replacement([1,2,3,4],2)

In [53]:
help(itertools)

Help on built-in module itertools:

NAME
    itertools - Functional tools for creating and using iterators.

DESCRIPTION
    Infinite iterators:
    count(start=0, step=1) --> start, start+step, start+2*step, ...
    cycle(p) --> p0, p1, ... plast, p0, p1, ...
    repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times
    
    Iterators terminating on the shortest input sequence:
    accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2
    chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...
    chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...
    compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...
    dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails
    groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)
    filterfalse(pred, seq) --> elements of seq where pred(elem) is False
    islice(seq, [start,] stop [, step]) --> elements from
           seq[start:stop:step]
    starmap(fun, seq) --> fun(*seq[0

In [62]:
from collections import namedtuple, defaultdict

Grade = namedtuple('Grade', ('score', 'weight'))
class Subject:
    def __init__(self):
        self._grades = []

    def report_grade(self, score, weight):
        self._grades.append(Grade(score, weight))

    def average_grade(self):
        total, total_weight = 0,0
        for grade in self._grades:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total/ total_weight

class Student:
    def __init__(self):
        self._subjects = defaultdict(Subject)

    def get_subject(self, name):
        return self._subjects[name]

    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count+=1
        return total/ count

class Gradebook:
    def __init__(self):
        self._students = defaultdict(Student)

    def get_student(self, name):
        return self._students[name]


In [63]:
book = Gradebook()
albert = book.get_student('Albert Einstein')
math = albert.get_subject('Math')
math.report_grade(75, 0.05)
math.report_grade(65, 0.15)
math.report_grade(70, 0.80)
gym = albert.get_subject('Gym')
gym.report_grade(100, 0.4)
gym.report_grade(85, 0.6)
print(albert.average_grade())

80.25


38 単純なインターフェースにはクラスの代わりに関数を使う

In [64]:
#関数を渡すことによって振る舞いをカスタマイズできる組み込みapi フック
names = ['Socrates', 'Archimedes','Plato','Aristotle']
names.sort(key=len)
print(names)

['Plato', 'Socrates', 'Aristotle', 'Archimedes']


In [65]:
def log_missing():
    print('Key added')
    return 0

current = {"green":12, "blue":3}
increments=[
    ('red',5),
    ('blue',17),
    ('orange',9),
]
result = defaultdict(log_missing, current)
print('Before:', dict(result))
for key, amount in increments:
    result[key]+=amount
print('After: ', dict(result))

Before: {'green': 12, 'blue': 3}
Key added
Key added
After:  {'green': 12, 'blue': 20, 'red': 5, 'orange': 9}


In [66]:
def increment_with_report(current, increments):
    added_count = 0

    def missing():
        nonlocal added_count
        added_count +=1
        return 0

    result = defaultdict(missing, current)
    for key, amount in increments:
        result[key] +=amount

    return result, added_count

result, count=  increment_with_report(current, increments)
assert count==2

In [67]:
#__call__をつかってオブジェクトを関数で呼び出す
class BetterCountMissing:
    def __init__(self):
        self.added = 0

    def __call__(self):
        self.added +=1
        return 0

counter = BetterCountMissing()
assert counter() ==0
assert callable(counter)

In [68]:
counter = BetterCountMissing()
result = defaultdict(counter, current)
for key, amount in increments:
    result[key] +=amount

assert counter.added == 2

39 @classmethodポリもるふぃずむを使ってオブジェクトをジェネリックに構築する

In [69]:
class InputData:
    def read(self):
        raise NotImplementedError

class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        with open(self.path) as f:
            return f.read()


In [70]:
class Worker:
    def __init__(self,input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self,other):
        raise NotImplementedError

class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result


In [71]:
import os
def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))

def create_workers(input_list):
    workers =[]
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

In [72]:
from threading import Thread

def execute(workers):
    threads = [Thread(target = w.map) for w in workers]
    for thread in threads:thread.start()
    for thread in threads:thread.join()

    first, *rest=workers
    for worker in rest:
        first.reduce(worker)
    return first.result

def mapreduce(data_dir):
    inputs=generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)


In [73]:
import os
import random


def write_test_files(tmpdir):
    os.makedirs(tmpdir)
    for i in range(100):
        with open(os.path.join(tmpdir, str(i)),'w') as f:
            f.write('\n'*random.randint(0,100))

tmpdir = 'test_inputs'
write_test_files(tmpdir)

In [74]:
result = mapreduce(tmpdir)
print(f'There are {result} lines')

There are 5468 lines


In [None]:
#上のコードはジェネリックではなかった
#サブクラスを書き換えたら関数でも書き換えなければいけない

#クラスメソッドポリモルフィズムを使う。構築されたオブジェクトにではなくクラス全体について適用される