### 和threading的比较

多进程 Multiprocessing 和多线程 threading 类似, 都是在 python 中用来并行运算的. 
不过既然有了threading, 为什么 Python 还要出一个 multiprocessing 呢? 原因很简单, 就是用来弥补 threading 的一些劣势, 比如在 threading 中的GIL.

使用 multiprocessing 也非常简单, python 把 multiprocessing 和 threading 的使用方法做的几乎差不多.

In [1]:
import multiprocessing as mp
#import threading as td

def job(a,d):
    print('aaaaa')

if __name__ == '__main__':  # 使用多进程，必须要写这一行，格式要求！
    p1 = mp.Process(target = job,args=(1,2))
    p1.start()
    p1.join()

注意：上面的运行在notebook中会没有结果，需要在terminal或prompt中运行才有结果

### Queue功能

In [None]:
import multiprocessing as mp

def job(q):
    res = 0
    for i in range(1000):
        res += i+i**2+i**3
    q.put(res)  # queue

if __name__ == '__main__':  
    q = mp.Queue()
    p1 = mp.Process(target = job,args=(q,))
    p2 = mp.Process(target = job,args=(q,)) # 加","的原因是args里面的东西有可能是可以迭代的。
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print(res1+res2)

还是需要在terminal或prompt中执行

### 效率对比

- 创建多进程 multiprocessing

In [None]:
import multiprocessing as mp
import threading as td
import time

def job(q):
    res = 0
    for i in range(1000000):
        res += i+i**2+i**3
    q.put(res)  # queue

def multicore():
    q = mp.Queue()
    p1 = mp.Process(target = job,args=(q,))
    p2 = mp.Process(target = job,args=(q,)) # 加","的原因是args里面的东西有可能是可以迭代的。
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print('multicore:',res1 + res2)
  
def normal():
    res = 0
    for _ in range(2):
        for i in range(1000000):
            res += i + i**2 + i**3
    print('normal:', res)
    
def multithread():
    q = mp.Queue()  # 线程放到进程的队列中没有什么问题
    t1 = td.Thread(target = job,args=(q,))
    t2 = td.Thread(target = job,args=(q,)) # 加","的原因是args里面的东西有可能是可以迭代的。
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    res1 = q.get()
    res2 = q.get()
    print('multithread:', res1 + res2)
    
    
if __name__ == '__main__':  
    st = time.time()
    normal()
    st1 = time.time()
    print('normal time:', st1 - st)
    multithread()
    st2 = time.time()
    print('multithread time:', st2 - st1)
    multicore()
    print('multicore time:', time.time() - st2)

normal: 499999666667166666000000
normal time: 1.5457499027252197
multithread: 499999666667166666000000
multithread time: 1.557366132736206


在prompt中运行的结果为

normal: 499999666667166666000000

**normal time: 1.5386521816253662**

multithread: 499999666667166666000000

**multithread time: 1.5308480262756348**

multicore: 499999666667166666000000

**multicore time: 0.9713168144226074**

运行时间是 多进程 < 普通 < 多线程

### 进程池

In [None]:
import multiprocessing as mp

def job(x):
    return x*x

def multicore():
    pool = mp.Pool(processes=3) #指定用3个核，默认为全部核
    
    res = pool.map(job,range(10))
    print(res)   # 返回结果为 [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
    
    res = pool.apply_async(job,(2,))
    print(res.get())  # 返回结果为 4，即用2*2
    
    # 迭代器，i=0时apply一次，i=1时apply一次等等
    multi_res = [pool.apply_async(job,(i,)) for i in range(10)]
    # 从迭代器中取出
    print([res.get() for res in multi_res]) #返回[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
    
if __name__ == '__main__':
    multicore()

### 共享内存

多进程中不能用全局变量，所以需要共享内存

各参数代表的数据类型

| Type code | C Type             | Python Type       | Minimum size in bytes |
| --------- | ------------------ | ----------------- | --------------------- |
| `'b'`     | signed char        | int               | 1                     |
| `'B'`     | unsigned char      | int               | 1                     |
| `'u'`     | Py_UNICODE         | Unicode character | 2                     |
| `'h'`     | signed short       | int               | 2                     |
| `'H'`     | unsigned short     | int               | 2                     |
| `'i'`     | signed int         | int               | 2                     |
| `'I'`     | unsigned int       | int               | 2                     |
| `'l'`     | signed long        | int               | 4                     |
| `'L'`     | unsigned long      | int               | 4                     |
| `'q'`     | signed long long   | int               | 8                     |
| `'Q'`     | unsigned long long | int               | 8                     |
| `'f'`     | float              | float             | 4                     |
| `'d'`     | double             | float             | 8                     |

In [2]:
import multiprocessing as mp

value = mp.Value('d',1) # 定义type，这是一个共享内存
array = mp.Array('i',[1,3,4]) # 这个array只能是一个列表(一维列表)，不能是多维的


### 进程锁

In [None]:
# 不加进程锁
import multiprocessing as mp
import time

def job(v,num):
    for _ in range(10):
        time.sleep(0.1)
        v.value += num   # 每次对共享内存中的value＋num
        print(v.value)

def multicore():
    v = mp.Value('i',0)  # v为共享内存中的值
    p1 = mp.Process(target=job, args=(v,1))
    p2 = mp.Process(target=job, args=(v,3)) # 设定不同的number看如何抢夺内存
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    
if __name__ == '__main__':
    multicore()

输出结果为：
1
1
4
5
8
9
12
13
16
17
20
21
24
25
28
29
32
33
36
37


这是在争夺内存，因此需要加上锁

In [None]:
# 加进程锁
import multiprocessing as mp
import time

def job(v,num,l):   # l为锁
    l.acquire() # 锁住
    for _ in range(10):
        time.sleep(0.1)
        v.value += num   # 每次对共享内存中的value＋num
        print(v.value)
    l.release() # 释放
    
def multicore():
    l = mp.Lock()
    v = mp.Value('i',0)  
    p1 = mp.Process(target=job, args=(v,1,l)) # 需要将lock传入
    p2 = mp.Process(target=job, args=(v,3,l)) 
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    
if __name__ == '__main__':
    multicore()

运行的可能结果有两种

3
6
9
12
15
18
21
24
27
30
31
32
33
34
35
36
37
38
39
40

或

1
2
3
4
5
6
7
8
9
10
13
16
19
22
25
28
31
34
37
40