# **Thread safe**

### **Definition**

Thread safety is a computer programming concept applicable to multi-threaded code. Thread-safe code only manipulates shared data structures in a manner that ensures that all threads behave properly and fulfill their design specifications without unintended interaction.


# **Python threading example**

In [1]:
import threading


def threadFun(a, b):
    print(a,b)


t = threading.Thread(target=threadFun, args=(100, 200,))

t.start()
t.join()


100 200


In [23]:
import requests

from bs4 import BeautifulSoup 


urls = [f"https://ww.cnblogs.com/#p{page}" for page in range(1, 50+1)]

def parseHtml(html):
    soup = BeautifulSoup(html,"html.parser")
    links = soup.find_all("a",class_="post-item-title")
    return [(link['href'],link.get_text()) for link in links  ]


def craw(url):
    # r = requests.get(url)
    # parseHtml(r)
    # print(url, len(r.text))
    return requests.get(url).text

def mul():
    threads = [threading.Thread(target=craw, args=(url,)) for url in urls]

    for thread in threads:
        thread.start()
        
    for thread in threads:
        thread.join()


# mul()

In [29]:

import queue
import time
import random


def do_craw(urlqueue:queue.Queue, htmlqueue:queue.Queue):
    while True:
        url = urlqueue.get()
        html = craw(url)
        htmlqueue.put(html)
        print(threading.current_thread().name,
              f"crawl {url}", "URL queue size", urlqueue.qsize())
        time.sleep(random.randint(1, 2))


def do_parse(htmlqueue:queue.Queue, fileout):
    while True:
        html = htmlqueue.get()
        res = parseHtml(html)
        for re in res:
            fileout.write(str(re)+"\n")
        print(threading.current_thread().name,
              f"crawl {url}", "res queue size",len(res),
              f"html queue size", htmlqueue.qsize())
        time.sleep(random.randint(1, 2))


In [30]:
if __name__ == '__main__':
    urlqueue = queue.Queue()
    htmlqueue = queue.Queue()
    for url in urls:
        urlqueue.put(url)

    for idx in range(2):
        t = threading.Thread(
            target=do_craw,
            args=(urlqueue, htmlqueue),
            name=f"craw {idx}")
        t.start()

    # fout = open("crawl.txt", "w")
    # for idx in range(4):
    #     t = threading.Thread(
    #         target=do_parse,
    #         args=(urlqueue, htmlqueue),
    #         name=f"craw {idx}")
    #     t.start()


craw 1 crawl https://ww.cnblogs.com/#p2 URL queue size 48
craw 0 crawl https://ww.cnblogs.com/#p1 URL queue size 48
craw 0 crawl https://ww.cnblogs.com/#p3 URL queue size 47
craw 1 crawl https://ww.cnblogs.com/#p4 URL queue size 46
craw 0 crawl https://ww.cnblogs.com/#p5 URL queue size 45
craw 1 crawl https://ww.cnblogs.com/#p6 URL queue size 44
craw 1 crawl https://ww.cnblogs.com/#p7 URL queue size 42
craw 0 crawl https://ww.cnblogs.com/#p8 URL queue size 42
craw 1 crawl https://ww.cnblogs.com/#p9 URL queue size 40
craw 0 crawl https://ww.cnblogs.com/#p10 URL queue size 40
craw 1 crawl https://ww.cnblogs.com/#p11 URL queue size 39
craw 0 crawl https://ww.cnblogs.com/#p12 URL queue size 38
craw 1 crawl https://ww.cnblogs.com/#p13 URL queue size 36
craw 0 crawl https://ww.cnblogs.com/#p14 URL queue size 36
craw 1 crawl https://ww.cnblogs.com/#p15 URL queue size 34
craw 0 crawl https://ww.cnblogs.com/#p16 URL queue size 34
craw 0 crawl https://ww.cnblogs.com/#p17 URL queue size 33
craw 1

In [34]:
import asyncio


async def task1():
    print("Send first email")
    # asyncio.create_task(task2())
    await asyncio.sleep(5)
    print("First Email reply")


async def task2():
    print("Send second email")
    asyncio.create_task(task3())
    await asyncio.sleep(2)
    print("Second Email reply")


async def task3():
    print("Send third email")
    await asyncio.sleep(2)
    print("Third Email reply")


asyncio.run(task1())


RuntimeError: asyncio.run() cannot be called from a running event loop