In [1]:
import sys

In [5]:
class BetterCountMissing:
    def __init__(self):
        self.count = 0

    def __call__(self):
        self.count += 1
        return 0

from collections import defaultdict
current = {'green': 12, 'blue': 3}
counter = BetterCountMissing()
d = defaultdict(counter, current)
d['red']
counter.count


1

In [8]:
class GenericInputData:
    def read(self):
        raise NotImplementedError

    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError


class PathInputData(GenericInputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        with open(self.path) as f:
            return f.read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config["data_dir"]
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))



In [9]:
class GenericWorker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers



class LineCounterWorker(GenericWorker):
    def map(self):
        data= self.input_data.read()
        self.result = data.count("\n")

    def reduce(self, other):
        self.result += other.result


In [10]:
import os

def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))
""
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCounterWorker(input_data))
    return workers


In [11]:
from threading import Thread


def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads:
        thread.start()
        thread.join()

    # for thread in threads:
    #    thread.join()

    first, *rest = workers
    for worker in rest:
        first.reduce(worker)
    return first.result

def mapreduce(worker_class:GenericWorker, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)
    # inputs = generate_inputs(data_dir)
    # workers = create_workers(inputs)
    # return execute(workers)


In [12]:
# import os, random
#
# def write_test_files(tmpdir):
#     os.makedirs(tmpdir)
#
#     for i in range(100000):
#         with open(os.path.join(tmpdir, str(i)), "w") as f:
#             f.write("\n" * random.randint(0, 1000))
#
tmpdir = "test_inputs"
# write_test_files(tmpdir)

In [13]:
import time
config = {'data_dir': tmpdir}
t = time.time()
mapreduce(LineCounterWorker, PathInputData, config)
print(time.time() - t)

14.526331186294556


In [77]:
class BaseClass:
    def __init__(self, value):
        self.value = value

class TimeSeven(BaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value *= 7


class PlusFive(BaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value += 5

class OneWay(PlusFive, TimeSeven):
    def __init__(self, value):
        super(TimeSeven, self).__init__(value)

print(OneWay(100).value)
print("MRO -> ", OneWay.mro())

# class PlusFive(BaseClass):
#     def __init__(self, value):
#         BaseClass.__init__(self, value)
#         self.value += 5
#
#
# class OneWay(TimeSeven, PlusFive):
#     def __init__(self, value):
#         super(TimeSeven, self).__init__(value)


100
MRO ->  [<class '__main__.OneWay'>, <class '__main__.PlusFive'>, <class '__main__.TimeSeven'>, <class '__main__.BaseClass'>, <class 'object'>]


In [73]:
print(OneWay(100).value)
print("MRO -> ", OneWay.mro())

735
MRO ->  [<class '__main__.OneWay'>, <class '__main__.TimeSeven'>, <class '__main__.PlusFive'>, <class '__main__.BaseClass'>, <class 'object'>]


In [78]:
TimeSeven.mro()

[__main__.TimeSeven, __main__.BaseClass, object]

In [98]:
class ToDictMixin:
    def to_dict(self):
        return self._traverse_dict(self.__dict__)

    def _traverse_dict(self, instance_dict):
        output = {}
        for key, value in instance_dict.items():
            output[key] = self._traverse(value)
        return output

    def _traverse(self, value):
        if isinstance(value, ToDictMixin):
            return value.to_dict()
        elif isinstance(value, dict):
            return self._traverse_dict(value)
        elif isinstance(value, list):
            return [self._traverse(i) for i in value]
        # elif hasattr(value, "__dict__"):
        #     return self._traverse_dict(value.__dict__)
        else:
            return value



In [99]:
class BinaryTree(ToDictMixin):
    def __init__(self, value, left=None, right=None):
        self.l = [{'a' : 'b'}, {'a' : 'b'}]
        self.value = value
        self.left = left
        self.right = right

tree = BinaryTree(10, left=BinaryTree(7, right=BinaryTree(9)), right=BinaryTree(13, left=BinaryTree(11)))

In [100]:
print(tree.to_dict())

{'l': [{'a': 'b'}, {'a': 'b'}], 'value': 10, 'left': {'l': [{'a': 'b'}, {'a': 'b'}], 'value': 7, 'left': None, 'right': {'l': [{'a': 'b'}, {'a': 'b'}], 'value': 9, 'left': None, 'right': None}}, 'right': {'l': [{'a': 'b'}, {'a': 'b'}], 'value': 13, 'left': {'l': [{'a': 'b'}, {'a': 'b'}], 'value': 11, 'left': None, 'right': None}, 'right': None}}
