#  辞書やタプルで複雑なデータを扱いそうな時はヘルパークラスを使用する

In [7]:
import collections
Grade = collections.namedtuple('Grade', ('score', 'weight'))

class Subject(object):
    def __init__(self):
        self._grades = []
    
    def report_grade(self, score, weight):
        self._grades.append(Grade(score, weight))
        
    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self._grades:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total / total_weight
    
class Student(object):
    def __init__(self):
        self._subjects = {}
    
    def subject(self, name):
        if name not in self._subjects:
            self._subjects[name] = Subject()
        return self._subjects[name]
    
    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count += 1
        return total / count
    
class Gradebook(object):
    def __init__(self):
        self._students = {}
        
    def student(self, name):
        if name not in self._students:
            self._students[name] = Student()
        return self._students[name]

book = Gradebook()
albert = book.student('Albert Einstein')
math = albert.subject('Math')
math.report_grade(80, 0.1)

print(albert.average_grade())

80.0


# 単純なインターフェスにはクラスの代わりに関数

- ヘルパークラスを定義して__call__関数を使用することによってヘルパークラスのインスタンスを関数のように使用できるようにして辞書内に存在していなかったキーの数を数える処理をしている
- defaultdictはキーが存在しない場合に呼ぶことが可能な関数を与えることが可能

In [10]:
current = {'green': 12, 'blue':3}
increments = [('red', 5), ('blue', 17), ('orange', 9)]

class BetterCountMissing(object):
    def __init__(self):
        self.added = 0
    
    def __call__(self):
        self.added += 1
        return 0

counter = BetterCountMissing()
result = collections.defaultdict(counter, current)
for key, amount in increments:
    result[key] += amount
assert counter.added == 2

# @classmethodポリモーフィズムによってオブジェクトをジェネリックに構築

In [None]:
class GenericInputData(object):
    def read(self):
        raise NotImplmentError
    
    @classmethod
    def generate_inputs(cls, config):
        raise NotImplmentError

class PathInputData(GenericInputData):
    def __init__(self, path):
        super().__init__()
        self.path = path
    
    def read(self):
        return open(self.path).read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))

class GenericWorker(object):
    def map(self):
        raise NotImplmentError
    
    def reduce(self, other):
        raise NotImplmentError
    
    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers
    
class LineCountWorker(GenericWorker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')
        
    def reduce(self, other):
        self.result += other.result

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
        
    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
    return first.result
        
def mapreduce(worker_class, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)

with TemporaryDirectory() as tmpdir:
    write_test_files(tmpdir)
    config = {'data_dir': tmpdir}
    # LineCountWorker(GenericWorkerのサブクラス)とPathInputData(GenericInputDataのサブクラス)を変えて動作可能になっている。
    result = mapreduce(LineCountWorker, PathInputData, config)
print('There are', result, 'lines')

# 親クラスの使用はsuperを用いる

- 親クラスの初期化はsuperを使用する
- 継承の順序によって処理が異なる。
 - 後で継承されているものの処置が優先される。 `GoodWay(TimesFiveCorrect, PlusTwoCorrect)`の場合はPlusTwoCorrect,TimesFiveCorrectの順
  - (5 + 2) * 5 = 35
 - 後で継承されているものの処置が優先される。 `GoodWay(PlusTwoCorrect, TimesFiveCorrect)`の場合はTimesFiveCorrect,PlusTwoCorrectの順
  - (5 * 5) + 2 = 27

In [15]:
class MyBaseClass(object):
    def __init__(self, value):
        self.value = value
        
class TimesFiveCorrect(MyBaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value *= 5
        
class PlusTwoCorrect(MyBaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value += 2
        
class GoodWay(TimesFiveCorrect, PlusTwoCorrect):
    def __init__(self, value):
        super().__init__(value)

foo = GoodWay(5)
print(foo.value)
from pprint import pprint
pprint(GoodWay.mro())

class GoodWay(PlusTwoCorrect, TimesFiveCorrect):
    def __init__(self, value):
        super().__init__(value)
        
foo = GoodWay(5)
print(foo.value)
from pprint import pprint
pprint(GoodWay.mro())

35
[<class '__main__.GoodWay'>,
 <class '__main__.TimesFiveCorrect'>,
 <class '__main__.PlusTwoCorrect'>,
 <class '__main__.MyBaseClass'>,
 <class 'object'>]
27
[<class '__main__.GoodWay'>,
 <class '__main__.PlusTwoCorrect'>,
 <class '__main__.TimesFiveCorrect'>,
 <class '__main__.MyBaseClass'>,
 <class 'object'>]


# 多重継承よりはmix-inを使用する

- mix-inは属性を保持せずにメソッドのみが定義されているクラス

In [25]:
class ToDictMixin(object):
    def to_dict(self):
        return self._traverse_dict(self.__dict__)
    
    def _traverse_dict(self, instance_dict):
        output = {}
        for key, value in instance_dict.items():
            output[key] = self._traverse(key, value)
        return output
    
    def _traverse(self, key, value):
        if isinstance(value, ToDictMixin):
            return value.to_dict()
        elif isinstance(value, dict):
            return self._traverse_dict(value)
        elif isinstance(value, list):
            return [self._traverse(key, i) for i in value]
        else:
            return value

class BinaryTree(ToDictMixin):
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
        
tree = BinaryTree(10, 
                  left = BinaryTree(7, right=BinaryTree(9)),
                  right = BinaryTree(13, left=BinaryTree(11)
                 ))
pprint(tree.to_dict())

class BinaryTreeWithParent(BinaryTree):
    def __init__(self, value, left=None, right=None, parent=None):
        super().__init__(value, left=left, right=right)
        self.parent = parent
    # over write
    def _traverse(self, key, value):
        if (isinstance(value, BinaryTreeWithParent) and key == 'parent'):
            return value.value
        else:
            return super()._traverse(key, value)

root = BinaryTreeWithParent(10)
root.left = BinaryTreeWithParent(7, parent=root)
root.left.right = BinaryTreeWithParent(9, parent=root.left)
pprint(root.to_dict())

class NamedSubTree(ToDictMixin):
    def __init__(self, name, tree_with_parent):
        self.name = name
        self.tree_with_parent = tree_with_parent
        
my_tree = NamedSubTree('foobar', root.left.right)
pprint(my_tree.to_dict())

import json

class JsonMixin(object):
    @classmethod
    def from_json(cls, data):
        kwargs = json.loads(data)
        return cls(**kwargs)
    
    def to_json(self):
        return json.dumps(self.to_dict())
    
class DatacenterRack(ToDictMixin, JsonMixin):
    def __init__(self, switch=None, machines=None):
        self.switch = Switch(**switch)
        self.machines = [Machine(**kwargs) for kwargs in machines]
        
class Switch(ToDictMixin, JsonMixin):
    def __init__(self, ports=None, speed=None):
        self.ports = ports
        self.speed = speed

class Machine(ToDictMixin, JsonMixin):
    def __init__(self, cores=None, ram=None, disk=None):
        self.cores = cores
        self.ram = ram
        self.disk = disk
        
serialized = """{
    "switch": {"ports": 5, "speed": 1e9},
    "machines": [
        {"cores": 8, "ram": 32e9, "disk": 5e12},
        {"cores": 4, "ram": 16e9, "disk": 1e12},
        {"cores": 2, "ram": 4e9, "disk": 500e9}
    ]
}"""

deserialized = DatacenterRack.from_json(serialized)
roundtrip = deserialized.to_json()
pprint(roundtrip)

{'left': {'left': None,
          'right': {'left': None, 'right': None, 'value': 9},
          'value': 7},
 'right': {'left': {'left': None, 'right': None, 'value': 11},
           'right': None,
           'value': 13},
 'value': 10}
{'left': {'left': None,
          'parent': 10,
          'right': {'left': None, 'parent': 7, 'right': None, 'value': 9},
          'value': 7},
 'parent': None,
 'right': None,
 'value': 10}
{'name': 'foobar',
 'tree_with_parent': {'left': None, 'parent': 7, 'right': None, 'value': 9}}
('{"machines": [{"ram": 32000000000.0, "cores": 8, "disk": 5000000000000.0}, '
 '{"ram": 16000000000.0, "cores": 4, "disk": 1000000000000.0}, {"ram": '
 '4000000000.0, "cores": 2, "disk": 500000000000.0}], "switch": {"ports": 5, '
 '"speed": 1000000000.0}}')


# カスタムコンテナ型はcollections.abcを継承する

In [27]:
class BinaryNode(object):
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
        
class IndexableNode(BinaryNode):
    def _search(self, count, index):
        found = None
        if self.left:
            found, count = self.left._search(count, index)
        if not found and count == index:
            found = self
        else:
            count += 1
        if not found and self.right:
            found, count = self.right._search(count, index)
        return found, count
    
    def __getitem__(self, index):
        found, _ = self._search(0, index)
        if not found:
            raise IndexError('Index out of range')
        return found.value
    
class SequenceNode(IndexableNode):
    def __len__(self):
        _, count = self._search(0, None)
        return count

from collections.abc import Sequence
    
class BetterNode(SequenceNode, Sequence):
    pass

tree = BetterNode(
    10,
    left=BetterNode(
        5,
        left=BetterNode(2),
        right=BetterNode(
            6, right=BetterNode(7))),
    right=BetterNode(
        15, left=BetterNode(11))
)

print('LRR =', tree.left.right.right.value)
print('Index 0 =', tree[0])
print('Index 1 =', tree[1])
print('11 in the tree?', 11 in tree)
print('17 in the tree?', 17 in tree)
print('Tree is', list(tree))
print('Three has Nodes', len(tree))
print('Index of 7 is', tree.index(7))
print('Count of 10 is', tree.count(10))

LRR = 7
Index 0 = 2
Index 1 = 5
11 in the tree? True
17 in the tree? False
Tree is [2, 5, 6, 7, 10, 11, 15]
Three has Nodes 7
Index of 7 is 3
Count of 10 is 1
