# 第五章，类与接口

### 第37条，用组合起来的类来实现多层结构，不要用嵌套的内置类型

In [3]:
# 字典与相关的内置类型用起来很方便，但同时也容易遭到滥用导致代码出问题
# 如果遇到比较复杂的需求，不要再嵌套字典、元组、集合、列表等内置的类型，而是应当编写一批新类让这些类成一套体系
from collections import namedtuple
from collections import defaultdict

Grade = namedtuple('Grade', ('score', 'weight'))

class Subject:
    def __init__(self):
        self._grade = []
    
    def report_grade(self, score, weight):
        self._grade.append(Grade(score, weight))
    
    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self._grade:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total/total_weight

# 定义学生类，返回一个字典，其中键key的值时学生名字，value值时一个Subject类
class Student:
    def __init__(self) -> None:
        self._subject = defaultdict(Subject)

    def get_subject(self, name):
        return self._subject[name]
    
    def average_grade(self):
        total, count = 0, 0
        for subject in self._subject.values():
            total += subject.average_grade()
            count += 1
        return total/count

# 定义一个成绩册类，把每位学生的名字环宇这位学生的Student对象关联起来
# 如果成绩册里还没有记录过这位学生，那么在调用get_student方法时，Gradebook会构造一个默认的Student对象
class Gradebook:
    def __init__(self) -> None:
        self._student = defaultdict(Student)
    
    def get_student(self, name):
        return self._student[name]

In [4]:
book = Gradebook()
albert = book.get_student('Albert Einstein')
math = albert.get_subject('Math')
gym = albert.get_subject('gym')
math.report_grade(75, 0.05)
math.report_grade(65, 0.15)
math.report_grade(70, 0.8)
gym.report_grade(100,0.4)
gym.report_grade(85, 0.6)
print(albert.average_grade())

80.25


### 第38条，让简单的接口接受函数，而不是类的实例

头等对象(first-class object)指的是在一种编程语言中能够被当做参数传递、赋值给变量、存储在数据结构中的对象。

Python中，许多挂钩都是无状态的函数，带有明确的参数与返回值。

In [6]:
def log_missing():
    print('Key added')
    return 0

from collections import defaultdict

current = {'green':12, 'blue':3}
increment = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9)
]
result = defaultdict(log_missing, current)

print('Before:', dict(result))

for key, amount in increment:
    result[key] += amount

print('After:', dict(result))

Before: {'green': 12, 'blue': 3}
Key added
Key added
After: {'green': 12, 'blue': 20, 'red': 5, 'orange': 9}


In [None]:
# 有状态的闭包（stateful colsure）
def increment_with_report(current, increments):
    add_count = 0

    def missing():
        nonlocal add_count
        add_count += 1
        return 0
    
    result = defaultdict(missing, current)
    for key, amount in increments:
        result[key] += amount

    return result, add_count

In [9]:
# 使用__call__特殊方法，使类的对象能够像函数那样调用
class BetterCountMissing:
    def __init__(self):
        self.added = 0
    
    def __call__(self):
        self.added += 1
        return 0
    
counter = BetterCountMissing()
assert counter() == 0
assert callable(counter)

current = {'green':12, 'blue':3}
increment = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9)
]

counter = BetterCountMissing()
result = defaultdict(counter, current)
for key, amount in increment:
    result[key] += amount
assert counter.added == 2

### 第39条，通过@classmethod多态来构造统一体系中的各类对象
多态机制可以使同一体系中的多个类按照各自独有的方式来实现同一个方法。这意味着这些类都可以满足同一套接口，或者都可以当做某个抽象类来使用，同时，它们又能在这个前提下，实现各自的功能。

In [13]:
# 以下为一个案例，实现了一套MapRedeuce流程，并且以一个通用的类来表示输入数据。

class InputData:
    def read(self):
        raise NotImplementedError       # 在面向对象编程中，如果想在父类中预留一个方法，使该方法在子类中实现，如果子类中没有对该方法进行重写就被调用，则会报错：NotImplementError！

class PathInputData(InputData):
    def __init__(self, path) -> None:
        super().__init__()
        self.path = path

    def read(self):
        with open(self.path) as f:
            return f.read()
        
class Worker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError
    
    def reduce(self, other):
        raise NotImplementedError
    
class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')
    def reduce(self, other):
        self.result += other.result

import os

def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))

def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

from threading import Thread

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()

    first, *rest = workers
    for workder in rest:
        first.reduce(workder)
    return first.result

def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)

In [25]:
# 多线程入门
import time
import requests
import threading            # 多线程
import multiprocessing      # 多进程

# t = threading.Thread(target='函数名', args=('函数参数'))
# t.start()

# t = multiprocessing.Process(target='函数名', args=('函数参数'))
# t.start()

def task(t):
    time.sleep(1)
    print("任务结束")

t = threading.Thread(target=task, args=(11,))
t.daemon = False        # deamon的作用是守护主线程，默认主线程结束，子线程会自行运行至结束。如果设置为True，则主线程一结束，子线程也立即结束。
t.start()
print('主线程结束')

主线程结束


任务结束


In [2]:
# 自定义线程类，通过重写run方法，设置每个线程的任务内容。
import threading

class DouYinThread(threading.Thread):
    def run(self):
        file_name, video_url = self._args
        res = requests.get(video_url)
        with open(file_name, mode='wb') as f:
            f.write(res.content)

t = DouYinThread(args=('file_name', 'video_url'))
t.start()

In [None]:
import threading

lock_object = threading.RLock()

number = 0
loop = 1000000

def _add(count):
    lock_object.acquire()
    global number
    for i in range(count):
        number += 1
    lock_object.release()

def _add(count):
    lock_object.acquire()
    global number
    for i in range(count):
        number -= 1
    lock_object.release()

In [8]:
# 单例模式，每次实例化对象，用的是同一个内存地址
import threading
class Singleton:
    instance = None
    lock = threading.RLock()

    def __init__(self, name):
        self.name = name

    def __new__(cls, *args, **kwargs):
        if cls.instance:
            return cls.instance        

        with cls.lock:
            if cls.instance:
                return cls.instance
            cls.instance = object.__new__(cls)
            return cls.instance
    
obj1 = Singleton('alex')
obj2 = Singleton('Bob')

print(obj1, obj2)   # 检查内存地址

# 但是上述单例模式在多线程中会出现问题

<__main__.Singleton object at 0x000001E7E32E0D60> <__main__.Singleton object at 0x000001E7E32E0D60>
