# 1、GIL (global interpreter lock) 全局性解释锁

python中的一个线程对应于C语言的一个线程
GIL使得同一时间只有一个线程在cpu上执行字节码，无法将多个线程映射到多个cpu上执行

- GIL会根据执行的字节码行数以及时间片释放GIL锁
- GIL遇到IO操作的时候会主动释放

In [None]:
total = 0

def add():
    global total
    for i in range(1000000):
        total += 1

def desc():
    global total
    for i in range(1000000):
        total -= 1
        
import threading
thread1 = threading.Thread(target=add)
thread2 = threading.Thread(target=desc)
thread1.start()
thread2.start()

thread1.join()
thread2.join()

print(total)

# 2、多线程编程
操作系统能够调动的最小单元是线程

对于IO操作来说，多进程和多线程的性能差别不大

## 2.1 创建多线程的方式

In [None]:
# 1、通过Thread类实例化

import time
import threading

def get_detail_html(url):
    # 爬取文章详情
    print('get detail html started')
    time.sleep(2)
    print('get detail html end')
    
def get_detail_url(url):
    # 爬取文章列表
    print('get detail url started')
    time.sleep(2)
    print('get detail url end')
    
def main():
    t1 = threading.Thread(target=get_detail_html, args=('',))
    t2 = threading.Thread(target=get_detail_url, args=('',))
    start_time = time.time()
    t1.start()
    t2.start()
    
    t1.join()
    t2.join()
    print('last time: {}'.format(time.time() - start_time))
    
main()

In [None]:
# 2、通过集成Thread来实现多线程

class GetDetailHtml(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)
        
    # 重载的应该是run()方法
    def run(self):
        print('get detail html started')
        time.sleep(2)
        print('get detail html end')

class GetDetailUrl(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)
    
    # 重载的应该是run()方法
    def run(self):
        print('get detail url started')
        time.sleep(4)
        print('get detail url end')

def main():
    t1 = GetDetailHtml('get_detal_html')
    t2 = GetDetailUrl('get_detail_url')
    start_time = time.time()
    t1.start()
    t2.start()
    
    t1.join()
    t2.join()
    print('last time: {}'.format(time.time() - start_time))

main()

## 2.2 线程间的通信

In [None]:
# 1、共享变量
import time
import threading

detail_url_list = []

def get_detail_html(detail_url_list):
    while True:
        if detail_url_list:
            url = detail_url_list.pop()
            # 爬取文章详情
            print('get detail html: {}'.format(url))
            time.sleep(2)

def get_detail_url(detail_url_list):
    while True:
        # 爬取文章列表
        print('get detail url started')
        time.sleep(4)
        for i in range(20):
            detail_url_list.append('detail_url_{}'.format(i))
        print('get detail url end')
    
def main():
    thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_list,))
    html_threads = []
    
    for i in range(2):
        html_thread = threading.Thread(target=get_detail_html, args=(detail_url_list,))
        html_thread.start()
        html_threads.append(html_thread)
        
    
    thread_detail_url.start()
    thread_detail_url.join()
    
    for t in html_threads:
        t.join()
    
    start_time = time.time()
    print('last time: {}'.format(time.time() - start_time))
    
main()

**Note：**共享变量的方式，存在线程安全性问题，需要通过对共享变量加锁来解决。

In [None]:
# 2、queue
from queue import Queue
# 1、共享变量
import time
import threading


def get_detail_html(queue):
    while True:
        url = queue.get()
        # 爬取文章详情
        print('get detail html: {}'.format(url))
        time.sleep(2)

def get_detail_url(queue):
    while True:
        # 爬取文章列表
        print('get detail url started')
        time.sleep(4)
        for i in range(20):
            queue.put('detail_url_{}'.format(i))
        print('get detail url end')
    
def main():
    detail_url_queue = Queue(maxsize=1000)
    
    thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))
    html_threads = []
    
    for i in range(2):
        html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))
        html_thread.start()
        html_threads.append(html_thread)
        
    
    thread_detail_url.start()
    thread_detail_url.join()
    
    for t in html_threads:
        t.join()
    
    start_time = time.time()
    print('last time: {}'.format(time.time() - start_time))
    
main()