In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os 
import time

https://foofish.net/thread.html

In [None]:
def decrement(n):
    while n > 0:
        n -= 1
        
start = time.time()
decrement(100000000)
cost = time.time() - start
cost 

In [None]:
import threading

start = time.time()

t1 = threading.Thread(target=decrement, args=[50000000])
t2 = threading.Thread(target=decrement, args=[50000000])

t1.start() # 启动线程，执行任务
t2.start() # 同上

t1.join() # 主线程阻塞，直到t1执行完成，主线程继续往后执行
t2.join() # 同上

cost = time.time() - start
cost 

**是什么原因导致多线程不快反慢的呢**

原因就在于 GIL ，在 Cpython 解释器（Python语言的主流解释器）中，有一把全局解释锁（Global Interpreter Lock），在解释器解释执行 Python 代码时，先要得到这把锁，意味着，任何时候只可能有一个线程在执行代码，其它线程要想获得 CPU 执行代码指令，就必须先获得这把锁，如果锁被其它线程占用了，那么该线程就只能等待，直到占有该锁的线程释放锁才有执行代码指令的可能。
    
因此，这也就是为什么两个线程一起执行反而更加慢的原因，因为同一时刻，只有一个线程在运行，其它线程只能等待，即使是多核CPU，也没办法让多个线程「并行」地同时执行代码，只能是交替执行，因为多线程涉及到上线文切换、锁机制处理（获取锁，释放锁等），所以，多线程执行不快反慢。

**什么时候 GIL 被释放呢？**

当一个线程遇到 I/O 任务时，将释放GIL。计算密集型（CPU-bound）线程执行 100 次解释器的计步（ticks）时（计步可粗略看作 Python 虚拟机的指令），也会释放 GIL。可以通过 sys.setcheckinterval()设置计步长度，sys.getcheckinterval() 查看计步长度。相比单线程，这些多是多线程带来的额外开销

**CPython 解释器为什么要这样设计？**

多线程是为了适应现代计算机硬件高速发展充分利用多核处理器的产物，通过多线程使得 CPU 资源可以被高效利用起来，Python 诞生于1991年，那时候硬件配置远没有今天这样豪华，现在一台普通服务器32核64G内存都不是什么司空见惯的事，但是多线程有个问题，怎么解决共享数据的同步、一致性问题，因为，对于多个线程访问共享数据时，可能有两个线程同时修改一个数据情况，如果没有合适的机制保证数据的一致性，那么程序最终导致异常，所以，Python之父就搞了个全局的线程锁，不管你数据有没有同步问题，反正一刀切，上个全局锁，保证数据安全。这也就是多线程鸡肋的原因，因为它没有细粒度的控制数据的安全，而是用一种简单粗暴的方式来解决。

这种解决办法放在90年代，其实是没什么问题的，毕竟，那时候的硬件配置还很简陋，单核 CPU 还是主流，多线程的应用场景也不多，大部分时候还是以单线程的方式运行，单线程不要涉及线程的上下文切换，效率反而比多线程更高（在多核环境下，不适用此规则）。所以，采用 GIL 的方式来保证数据的一致性和安全，未必不可取，至少在当时是一种成本很低的实现方式。

**那么把 GIL 去掉可行吗？**

还真有人这么干多，但是结果令人失望，在1999年Greg Stein 和Mark Hammond 两位哥们就创建了一个去掉 GIL 的 Python 分支，在所有可变数据结构上把 GIL 替换为更为细粒度的锁。然而，做过了基准测试之后，去掉GIL的 Python 在单线程条件下执行效率将近慢了2倍。

Python之父表示：基于以上的考虑，去掉GIL没有太大的价值而不必花太多精力。

https://www.liaoxuefeng.com/wiki/1016959663602400/1017629247922688

启动与CPU核心数量相同的N个线程，在4核CPU上可以监控到CPU占用率仅有102%，也就是仅使用了一核。

但是用C、C++或Java来改写相同的死循环，直接可以把全部核心跑满，4核就跑到400%，8核就跑到800%，为什么Python不行呢？

因为Python的线程虽然是真正的线程，但解释器执行代码时，有一个GIL锁：Global Interpreter Lock，任何Python线程执行前，必须先获得GIL锁，然后，每执行100条字节码，解释器就自动释放GIL锁，让别的线程有机会执行。这个GIL全局锁实际上把所有线程的执行代码都给上了锁，所以，多线程在Python中只能交替执行，即使100个线程跑在100核CPU上，也只能用到1个核。

GIL是Python解释器设计的历史遗留问题，通常我们用的解释器是官方实现的CPython，要真正利用多核，除非重写一个不带GIL的解释器。

所以，在Python中，可以使用多线程，但不要指望能有效利用多核。如果一定要通过多线程利用多核，那只能通过C扩展来实现，不过这样就失去了Python简单易用的特点。

不过，也不用过于担心，Python虽然不能利用多线程实现多核任务，但可以通过多进程实现多核任务。多个Python进程有各自独立的GIL锁，互不影响。

# Multiprocessing

##  `os.fork()` 

<span style="font-family:New York Times; font-size:1em; color:green;">
    
os.fork() works by calling the underlying OS function fork()

With the return value of fork() we can decide in which process we are: 0 means that we are in the child process while a positive return value means that we are in the parent process. A negative return value means that an error occurred while trying to fork. 
    
Unix/Linux操作系统提供了一个fork()系统调用，它非常特殊。普通的函数调用，调用一次，返回一次，但是fork()调用一次，返回两次，因为操作系统自动把当前进程（称为父进程）复制了一份（称为子进程），然后，分别在父进程和子进程内返回。子进程永远返回0，而父进程返回子进程的ID。这样做的理由是，一个父进程可以fork出很多子进程，所以，父进程要记下每个子进程的ID，而子进程只需要调用getppid()就可以拿到父进程的ID。

In [None]:
print('Process (%s) start...' % os.getpid())

# Python的os模块封装了常见的系统调用，其中就包括fork，可以在Python程序中轻松创建子进程
pid = os.fork() 
print('pid = {}, os.getpid={}'.format(pid, os.getpid()))
if pid == 0:
    print('I am child process (%s) and my parent is %s.' % (os.getpid(), os.getppid()))
else:
    print('I (%s) just created a child process (%s).' % (os.getpid(), pid))
    #print('I (%s) just created a child process (%s).' % (os.getpid(), os.getpid()))

In [None]:
from multiprocessing import Process

# 子进程要执行的代码
def run_proc(name): 
    print('Run child process {} {} ...'.format(name, os.getpid()))

if __name__=='__main__':
    print('Parent process %s.' % os.getpid())
    p = Process(target=run_proc, args=('TEST', ))
    print('Child process will start.')
    p.start()
    p.join()
    print('Child process end.')

In [None]:
def consumer():
    r = ''
    while True:
        n = yield r
        if not n:
            return
        print('[CONSUMER] Consuming %s...' % n)
        r = '200 OK'

def produce(c):
    c.send(None)
    n = 0
    while n < 5:
        n = n + 1
        print('[PRODUCER] Producing %s...' % n)
        r = c.send(n)
        print('[PRODUCER] Consumer return: %s' % r)
    c.close()

c = consumer()
produce(c)


Multithreading in Python 

将进程挂起(Suspend) 而非 阻塞(Block)

如果用sleep() 进程将阻塞

假设进程下有两个线程 那么这两个线程会继续运行

Search for specific process in htop

In [None]:
from multiprocessing import Pool
import time, random

def long_time_task(name):
    print('Run task {} ({})...'.format(name, os.getpid()))
    start = time.time()
    time.sleep(random.random() * 3)
    end = time.time()
    print('Task %s runs %0.2f seconds.' % (name, (end - start)))

if __name__=='__main__':
    print('Parent process %s.' % os.getpid())
    p = Pool(4)
    for i in range(5):
        p.apply_async(long_time_task, args=(i,))
    print('Waiting for all subprocesses done...')
    p.close()
    p.join()
    print('All subprocesses done.')

## [Python Multiprocessing: Pool vs Process – Comparative Analysis](https://www.ellicium.com/python-multiprocessing-pool-process/)

In [None]:
# View more python learning tutorial on my Youtube and Youku channel!!!

# Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg
# Youku video tutorial: http://i.youku.com/pythontutorial

import multiprocessing as mp
import threading as td
import time

def job(q):
    res = 0
    for i in range(1000000):
        res += i+i**2+i**3
    q.put(res) # queue

def multicore():
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q,))
    p2 = mp.Process(target=job, args=(q,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print('multicore:' , res1+res2)

def normal():
    res = 0
    for _ in range(2):
        for i in range(1000000):
            res += i+i**2+i**3
    print('normal:', res)

def multithread():
    q = mp.Queue()
    t1 = td.Thread(target=job, args=(q,))
    t2 = td.Thread(target=job, args=(q,))
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    res1 = q.get()
    res2 = q.get()
    print('multithread:', res1+res2)

if __name__ == '__main__':
    st = time.time()
    normal()
    st1= time.time()
    print('normal time:', st1 - st)
    multithread()
    st2 = time.time()
    print('multithread time:', st2 - st1)
    multicore()
    print('multicore time:', time.time()-st2)

## 进程间通信
<span style="font-family:New York Times; font-size:1em; color:green;">
Process之间肯定是需要通信的，操作系统提供了很多机制来实现进程间的通信。Python的multiprocessing模块包装了底层的机制，提供了Queue、Pipes等多种方式来交换数据。

我们以Queue为例，在父进程中创建两个子进程，一个往Queue里写数据，一个从Queue里读数据：

In [None]:
from multiprocessing import Process, Queue
import time, random

# 写数据进程执行的代码:
def write(q):
    print('Process to write: %s' % os.getpid())
    for value in ['A', 'B', 'C']:
        print('Put %s to queue...' % value)
        q.put(value)
        time.sleep(random.random())

# 读数据进程执行的代码:
def read(q):
    print('Process to read: %s' % os.getpid())
    while True:
        value = q.get(True)
        print('Get %s from queue.' % value)

if __name__=='__main__':
    # 父进程创建Queue，并传给各个子进程：
    q = Queue()
    pw = Process(target=write, args=(q,))
    pr = Process(target=read, args=(q,))
    # 启动子进程pw，写入:
    pw.start()
    # 启动子进程pr，读取:
    pr.start()
    # 等待pw结束:
    pw.join()
    # pr进程里是死循环，无法等待其结束，只能强行终止:
    pr.terminate()

# Multithreading

In [None]:
from threading import Thread
from queue import Queue
class WorkerThread(Thread):
    def __init__(self,*args,**kwargs):
        Thread.__init__(self,*args,**kwargs)
        self.input_queue=Queue()

    def send(self,item):
        self.input_queue.put(item)
    def close(self):
        self.input_queue.put(None)
        self.input_queue.join()
    def run(self):
        while True:
            item=self.input_queue.get()
            if item is None:
                break
            #实际开发中，此处应该使用有用的工作代替
            print(item)
            self.input_queue.task_done()
        #完成，指示收到和返回哨兵
        self.input_queue.task_done()
        return

if __name__=="__main__":
    w=WorkerThread()
    w.start()
    w.send("Mark")
    w.send("好")
    w.send("？")
    w.close()



## `concurrent.futures`

* [PYTHON: A quick introduction to the concurrent to the `concurrent.futures`](http://masnun.com/2016/03/29/python-a-quick-introduction-to-the-concurrent-futures-module.html)

    The `concurrent.futures` module is part of the standard library which provides a high level API for launching async tasks
    
* [ThreadPoolExecutor线程池](https://www.jianshu.com/p/b9b3d66aa0be)
* [https://realpython.com/python-concurrency/](https://realpython.com/python-concurrency/)
* [http://python.jobbole.com/87272/](http://python.jobbole.com/87272/)

### `ThreadPoolExecutor`

In [None]:
from concurrent.futures import ThreadPoolExecutor
from time import sleep

def return_after_5_secs(message):
    sleep(5)
    return message

pool = ThreadPoolExecutor(3)
# pool.submit(func, (func_parameter)) 
future = pool.submit(return_after_5_secs, ("hello")) # submit tasks to the pool we constructed
print(future.done()) #  tells us if the future has resolved
sleep(5)
print(future.done())
print(future.result())

In [None]:
from concurrent.futures import ThreadPoolExecutor
import time

# 参数times用来模拟网络请求的时间
def get_html(times):
    time.sleep(times)
    print("get page {}s finished".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)
# 通过submit函数提交执行的函数到线程池中，submit函数立即返回，不阻塞
task1 = executor.submit(get_html, (3))
task2 = executor.submit(get_html, (2))
# done方法用于判定某个任务是否完成
print(task1.done())
# cancel方法用于取消某个任务,该任务没有放入线程池中才能取消成功
print(task2.cancel())
time.sleep(4)
print(task1.done())
# result方法可以获取task的执行结果
print(task1.result())

# 执行结果
# False  # 表明task1未执行完成
# False  # 表明task2取消失败，因为已经放入了线程池中
# get page 2s finished
# get page 3s finished
# True  # 由于在get page 3s finished之后才打印，所以此时task1必然完成了
# 3     # 得到task1的任务返回值

### `as_completed`

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

# 参数times用来模拟网络请求的时间
def get_html(times):
    time.sleep(times)
    print("get page {}s finished".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)
urls = [3, 2, 4] # 并不是真的url
all_task = [executor.submit(get_html, (url)) for url in urls]

for future in as_completed(all_task):
    data = future.result()
    print("in main: get page {}s success".format(data))

# 执行结果
# get page 2s finished
# in main: get page 2s success
# get page 3s finished
# in main: get page 3s success
# get page 4s finished
# in main: get page 4s success

### `wait`

<span style="font-family:New York Times; font-size:1em; color:green;">wait方法接收3个参数，等待的任务序列、超时时间以及等待条件。等待条件return_when默认为ALL_COMPLETED，表明要等待所有的任务都结束。可以看到运行结果中，确实是所有任务都完成了，主线程才打印出main。等待条件还可以设置为FIRST_COMPLETED，表示第一个任务完成就停止等待。

In [None]:
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED, FIRST_COMPLETED
import time

# 参数times用来模拟网络请求的时间
def get_html(times):
    time.sleep(times)
    print("get page {}s finished".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)
urls = [3, 2, 4] # 并不是真的url
all_task = [executor.submit(get_html, (url)) for url in urls]
wait(all_task, return_when=ALL_COMPLETED)
print("main")
# 执行结果 
# get page 2s finished
# get page 3s finished
# get page 4s finished
# main

In [None]:
def get_html(times):
    time.sleep(times)
    print("get page {}s finished".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)
urls = [3, 2, 4] # 并不是真的url
all_task = [executor.submit(get_html, (url)) for url in urls]
wait(all_task, return_when=FIRST_COMPLETED)
print("main")

https://stackoverflow.com/questions/57600892/why-is-python-multithreading-in-this-example-so-slow

In [None]:
import time

start_time = time.time()


def testThread(num):
    num = ""
    for i in range(500):
        num += str(i % 10)
        a.write(num)


def main():
    for i in range(3000):
        testThread(i)


if __name__ == '__main__':
    a = open('single.txt', 'w')
    main()
    print(time.time() - start_time)

In [None]:
import threading
from concurrent.futures import ThreadPoolExecutor
from multiprocessing.pool import ThreadPool
import time

start_time = time.time()


def testThread(num):
    num = ""
    for i in range(500):
        num += str(i % 10)
        with global_lock:
            a.write(num)


def main():
    test_list = [x for x in range(3000)]

    with ThreadPool(4) as executor:
        results = executor.map(testThread, test_list)

    # with ThreadPoolExecutor() as executor:
    #    results = executor.map(testThread, test_list)


if __name__ == '__main__':
    a = open('multi.txt', 'w')
    global_lock = threading.Lock()
    main()
    print(time.time() - start_time) 

https://stackoverflow.com/questions/57618697/time-sleep-not-working-as-expected-when-adding-end-to-print

In [None]:
import time,threading

msg = "68 111 110 117 116 115 32 97 114 101 32 98 111 109 98 "
msg = [int(x) for x in msg.split()]

def print_msg():
    for c in msg:
        print(chr(c)),time.sleep(0.2)

threading.Thread(target=print_msg).start()

In [None]:
import time, threading

msg = "68 111 110 117 116 115 32 97 114 101 32 98 111 109 98 "
msg = [int(x) for x in msg.split()]

def print_msg():
    for c in msg:
        print(chr(c), end="", flush=True)
        time.sleep(0.2)

threading.Thread(target=print_msg).start()

# Parallel computation

<span style="font-family:New York Times; font-size:1em; color:green;">
    
we would want to use the `ProcessPoolExecutor` for CPU intensive tasks. The `ThreadPoolExecutor` is better suited for network operations or I/O.

In [None]:
import queue
import threading
import urllib.request, urllib.error, urllib.parse

# called by each thread
def get_url(q, url):
    q.put(urllib.request.urlopen(url).read())

theurls = ["http://google.com", "http://yahoo.com"]

q = queue.Queue()

for u in theurls:
    t = threading.Thread(target=get_url, args = (q,u))
    t.daemon = True
    t.start()

s = q.get()
print(s)

In [None]:
Result = [('80407', 'about power supply of opertional amplifier', '11 hours ago'), ('80405', '5V Regulator Power Dissipation', '11 hours ago')]

In [None]:
Result[0]

In [None]:
print('\n'.join(';'.join(i) for i in Result))

In [None]:
def findClosestPair(arr0):
    arr0s = sorted(arr0)
    n = len(arr0)
    z = []
    x = 0 

    if n != len(set(arr0s)):
        return ("No repeated elements")
    else: 
        while x<n-2:
            if arr0s[x+1]-arr0s[x] < 20:
                if arr0s[x+1]-arr0s[x] < arr0s[x+2]-arr0s[x+1]:
                    z.append([arr0s[x], arr0s[x+1]])
                    x+=2 
                else:
                    z.append([arr0s[x+1], arr0s[x+2]])
                    x+=3
            else:
                x+=1 
        # from value in z, find the corresponding index in arr0
        result_indexes = [[arr0.index(i[0]), arr0.index(i[1])]  for i in z] 
        # Adjust the index order
        for i, j in enumerate(result_indexes):
            if j[0]>j[1]:
                result_indexes[i] = [j[1], j[0]]

        result_value = [[arr0[i[0]], arr0[i[1]]] for i in result_indexes]
        return (result_indexes,result_value )

arr0 = [40, 55, 190, 80, 175, 187]
findClosestPair(arr0)

In [None]:

arr0s = sorted(arr0)
n = len(arr0)
z = []
x = 0 
while x<n-2:
    if arr0s[x+1]-arr0s[x] < 20:
        if arr0s[x+1]-arr0s[x] < arr0s[x+2]-arr0s[x+1]:
            z.append([arr0s[x], arr0s[x+1]])
            x+=2 
        else:
            z.append([arr0s[x+1], arr0s[x+2]])
            x+=3
    else:
        x+=1 
    result_indexes = [[arr0.index(i[0]), arr0.index(i[1])]  for i in z] 

    for i, j in enumerate(result_indexes):
        if j[0]>j[1]:
            result_indexes[i] = [j[1], j[0]]
    result_value = [[arr0[i[0]], arr0[i[1]]] for i in result_indexes]
print(result_indexes)
#[[0, 1], [2, 5]]
print(result_value)
#[[40, 55], [190, 187]]

## Timed out input

In [None]:
import time
from threading import Thread

answer = None

def check():
    time.sleep(2)
    if answer != None:
        return
    print ("Too Slow")

Thread(target = check).start()

answer = input("Input something: ")

In [None]:
a = 3
pw = 'pw'


t_end = time.time() + 5  
print("Inter the pass word")
while time.time() < t_end:
    what = input(":")
    if pw == what:
        print(a)
        break 
        
    else:
        print("Wrong, Input again!")
        continue
    
print("Time is up")



In [None]:
try:
    while True:
        data = eval(input('prompt:'))
        print('READ:', data)
except EOFError as e:
    print(e)