## Python 线程之间的通信

- 基于全局变量通信

In [3]:
import time
import threading

# 可以提取到 variables.py
detail_url_list = []

def get_detail_html(url):
    global detail_url_list
    while True:
        if len(detail_url_list):
            url = detail_url_list.pop()
            print("enter {} detail html".format(url))
            time.sleep(2)
            print("get {} detail html".format(url))
            break
        else:
            time.sleep(1)

def get_detail_url(url):
    global detail_url_list
    print("enter detail url")
    time.sleep(4)
    for i in range(20):
        detail_url_list.append("http://projectsedu.com/{id}".format(id=i))
    print("get detail url")

if __name__ == "__main__":
    thread1 = threading.Thread(target=get_detail_url, args=("",))
    detail_threads = []
    for i in range(10):
        html_thread = threading.Thread(target=get_detail_html, args=("",))
        html_thread.start()
        detail_threads.append(html_thread)
    
    start_time = time.time()
    thread1.start()
    thread1.join()

    for thread in detail_threads:
        thread.join()

    print("last time: {}".format(time.time() - start_time))


enter detail url
get detail url
enter http://projectsedu.com/19 detail html
enter http://projectsedu.com/18 detail html
enter http://projectsedu.com/17 detail html
enter http://projectsedu.com/16 detail html
enter http://projectsedu.com/15 detail html
enter http://projectsedu.com/14 detail html
enter http://projectsedu.com/13 detail html
enter http://projectsedu.com/12 detail html
enter http://projectsedu.com/11 detail html
enter http://projectsedu.com/10 detail html
get http://projectsedu.com/13 detail html
get http://projectsedu.com/15 detail html
get http://projectsedu.com/12 detail html
get http://projectsedu.com/16 detail html
get http://projectsedu.com/11 detail html
get http://projectsedu.com/14 detail html
get http://projectsedu.com/18 detail html
get http://projectsedu.com/17 detail html
get http://projectsedu.com/10 detail html
get http://projectsedu.com/19 detail html
last time: 6.011427164077759


- 基于全局变量通信（加锁版本）

In [None]:
import time
import threading

# 可以提取到 variables.py
detail_url_list = []
list_lock = threading.Lock()  # 创建一个锁（OS 的 mutex 锁）

def get_detail_html(url):
    global detail_url_list
    while True:
        with list_lock:  # 在访问 shared resource 时加锁
            if len(detail_url_list):
                url = detail_url_list.pop()
                print("enter {} detail html".format(url))
        time.sleep(2)
        print("get {} detail html".format(url))
        break

def get_detail_url(url):
    global detail_url_list
    print("enter detail url")
    time.sleep(4)
    with list_lock:  # 在访问 shared resource 时加锁
        for i in range(20):
            detail_url_list.append("http://projectsedu.com/{id}".format(id=i))
    print("get detail url")

if __name__ == "__main__":
    thread1 = threading.Thread(target=get_detail_url, args=("",))
    detail_threads = []
    for i in range(10):
        html_thread = threading.Thread(target=get_detail_html, args=("",))
        html_thread.start()
        detail_threads.append(html_thread)  # Append to the list
    start_time = time.time()
    thread1.start()
    thread1.join()

    for thread in detail_threads:  # Iterate over the list of threads
        thread.join()

    print("last time: {}".format(time.time() - start_time))


enter detail url
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get  detail html
get detail url
last time: 4.00635027885437


- 通过 queue 进行线程间同步

In [9]:
import time
import threading
from queue import Queue

def get_detail_html(queue):
    while True:
        # 为空则阻塞，队列为空时线程会在这里等待
        url = queue.get()
        if url is None:
            break  # 通过发送 None 来通知线程停止
        print("enter {} detail html".format(url))
        time.sleep(2)
        print("get {} detail html".format(url))
        queue.task_done()  # 处理完任务后标记任务完成

def get_detail_url(queue):
    print("enter detail url")
    time.sleep(4)
    for i in range(20):
        queue.put("http://projectsedu.com/{id}".format(id=i))  # 向队列中添加 URL
    print("get detail url")

if __name__ == "__main__":
    detail_url_queue = Queue(maxsize=1000)
    detail_threads = []  # 定义一个线程列表
    start_time = time.time()
    
    # 创建并启动 10 个线程来处理 HTML 获取任务
    for i in range(10):
        html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))
        html_thread.start()
        detail_threads.append(html_thread)
    
    # 启动线程获取 URL
    thread1 = threading.Thread(target=get_detail_url, args=(detail_url_queue,))
    thread1.start()
    thread1.join()  # 等待获取 URL 的线程完成
    
    # 使用 None 来通知所有工作线程停止
    for _ in range(10):  # 启动的线程数量
        detail_url_queue.put(None)
    
    # 等待所有线程完成任务
    for thread in detail_threads:
        thread.join()

    print("last time: {}".format(time.time() - start_time))


enter detail url
get detail urlenter http://projectsedu.com/0 detail html

enter http://projectsedu.com/1 detail html
enter http://projectsedu.com/2 detail html
enter http://projectsedu.com/3 detail html
enter http://projectsedu.com/4 detail html
enter http://projectsedu.com/5 detail html
enter http://projectsedu.com/6 detail html
enter http://projectsedu.com/7 detail html
enter http://projectsedu.com/8 detail html
enter http://projectsedu.com/9 detail html
get http://projectsedu.com/8 detail html
enter http://projectsedu.com/10 detail html
get http://projectsedu.com/1 detail html
enter http://projectsedu.com/11 detail html
get http://projectsedu.com/3 detail html
enter http://projectsedu.com/12 detail html
get http://projectsedu.com/0 detail html
enter http://projectsedu.com/13 detail html
get http://projectsedu.com/5 detail html
enter http://projectsedu.com/14 detail html
get http://projectsedu.com/7 detail html
enter http://projectsedu.com/15 detail html
get http://projectsedu.com/6