# Multi-threading and Multi-Processing

### Recommend readings:

1. [Difference Between Multithreading vs Multiprocessing in Python](https://www.geeksforgeeks.org/difference-between-multithreading-vs-multiprocessing-in-python/)
2. [Multithreading vs. Multiprocessing in Python](https://towardsdatascience.com/multithreading-vs-multiprocessing-in-python-3afeb73e105f)
3. [Python guide: Using multiprocessing versus multithreading](https://medium.com/capital-one-tech/python-guide-using-multiprocessing-versus-multithreading-55c4ea1788cd)

In [None]:
import time

"""
NOTE: if you want have a better understanding about how 
mulit- program execute, uncomment the `print(..)` line.
"""

idle_time = 0.2

def calc_square(numbers: list) -> list: 
    print("calculate square numbers")
    results = []
    for n in numbers: 
        time.sleep(idle_time)
        # print(f"square of {n} =>> {n*n}")
        results.append(n*n)
    return results

def calc_cube(numbers: list) -> list: 
    print("calculate cube numbers")
    results = []
    for n in numbers: 
        time.sleep(idle_time)
        # print(f"cube of {n} =>> {n*n*n}")
        results.append(n*n*n)
    return results

In [4]:
def generate_prime_numbers(n):
  """Generates a list of the first n prime numbers."""
  primes = []
  num = 2
  while len(primes) < n:
    is_prime = True
    for i in range(2, int(num**0.5) + 1):
      if num % i == 0:
        is_prime = False
        break
    if is_prime:
      primes.append(num)
    num += 1
  return primes

# Generate a list of the first 100 prime numbers
prime_numbers = generate_prime_numbers(100)
prime_numbers

[2,
 3,
 5,
 7,
 11,
 13,
 17,
 19,
 23,
 29,
 31,
 37,
 41,
 43,
 47,
 53,
 59,
 61,
 67,
 71,
 73,
 79,
 83,
 89,
 97,
 101,
 103,
 107,
 109,
 113,
 127,
 131,
 137,
 139,
 149,
 151,
 157,
 163,
 167,
 173,
 179,
 181,
 191,
 193,
 197,
 199,
 211,
 223,
 227,
 229,
 233,
 239,
 241,
 251,
 257,
 263,
 269,
 271,
 277,
 281,
 283,
 293,
 307,
 311,
 313,
 317,
 331,
 337,
 347,
 349,
 353,
 359,
 367,
 373,
 379,
 383,
 389,
 397,
 401,
 409,
 419,
 421,
 431,
 433,
 439,
 443,
 449,
 457,
 461,
 463,
 467,
 479,
 487,
 491,
 499,
 503,
 509,
 521,
 523,
 541]

In [8]:
start_time = time.time()

squares = calc_square(prime_numbers)
cubes = calc_cube(prime_numbers)


print(f"Done in : {time.time() - start_time}")

calculate square numbers
calculate cube numbers
Done in : 40.05647683143616


# Threading

In [None]:
# Using multi-threading
import threading


start_time = time.time()


# Create threads
thread_1 = threading.Thread(target=calc_square, args=(prime_numbers,))
thread_2 = threading.Thread(target=calc_cube, args=(prime_numbers, ))


thread_1.start()
thread_2.start()


# `.join()` methods delays a program’s flow of execution until the target thread has been completely read.
# Summarize: it make sure all threads are executed safely.
thread_1.join()
thread_2.join()


print(f"Done in : {time.time() - start_time}")

calculate square numbers
calculate cube numbers
Done in : 20.026913166046143


# Multi-processing

In [None]:
# Multi-processing
import multiprocessing


start_time = time.time()


# Create processed
process_1 = multiprocessing.Process(target=calc_square, args=(prime_numbers, ))
process_2 = multiprocessing.Process(target=calc_cube, args=(prime_numbers, ))


process_1.start()
process_2.start()


process_1.join()
process_2.join()


print(f"Done in : {time.time() - start_time}")

"""
NOTE: You can open the task manager to check whether the multiple processes
are executed or not. In this example, you should see two processes of this 
python script (`python.exe`). 
"""

calculate square numbers
calculate cube numbers
Done in : 20.046017169952393


In [22]:
"""
Every process has its own address space (vmem) => variables are not shareable.
Let's try this to check if process can write data to a global variable.
"""
start_time = time.time()
square_results = ["Outside"]

def calc_square_sharing(numbers: list) -> list: 
    print("calculate square numbers")
    global square_results
    results = []
    for n in numbers: 
        time.sleep(idle_time)           # idle_time = 0.2
        results.append(n*n)
        square_results.append(n*n)
    return results



p_sharing = multiprocessing.Process(target=calc_square_sharing, args=(prime_numbers, ))
p_sharing.start()
p_sharing.join()

print(f"Done in : {time.time() - start_time}")
print(f"square_results: {square_results}")

calculate square numbers
Done in : 20.045997142791748
square_results: ['Outside']


In [24]:
"""
Now, let's check if process can write data to a within-process-variable.
"""

start_time = time.time()
square_results = ["Outside"]


def calc_square_within(numbers: list) -> list: 
    print("calculate square numbers")
    global square_results
    results = []
    for n in numbers: 
        time.sleep(idle_time)           # idle_time = 0.2
        results.append(n*n)
        square_results.append(n*n)
    print(f"within-process-variable: {square_results}")
    return results


p_within = multiprocessing.Process(target=calc_square_within, args=(prime_numbers, ))
p_within.start()
p_within.join()

print(f"Done in : {time.time() - start_time}")
print(f"square_results: {square_results}")

calculate square numbers
within-process-variable: ['Outside', 4, 9, 25, 49, 121, 169, 289, 361, 529, 841, 961, 1369, 1681, 1849, 2209, 2809, 3481, 3721, 4489, 5041, 5329, 6241, 6889, 7921, 9409, 10201, 10609, 11449, 11881, 12769, 16129, 17161, 18769, 19321, 22201, 22801, 24649, 26569, 27889, 29929, 32041, 32761, 36481, 37249, 38809, 39601, 44521, 49729, 51529, 52441, 54289, 57121, 58081, 63001, 66049, 69169, 72361, 73441, 76729, 78961, 80089, 85849, 94249, 96721, 97969, 100489, 109561, 113569, 120409, 121801, 124609, 128881, 134689, 139129, 143641, 146689, 151321, 157609, 160801, 167281, 175561, 177241, 185761, 187489, 192721, 196249, 201601, 208849, 212521, 214369, 218089, 229441, 237169, 241081, 249001, 253009, 259081, 271441, 273529, 292681]
Done in : 20.051899433135986
square_results: ['Outside']


## Sharing data between multiple processes

In [28]:
# Using array

start_time = time.time()

# Using sharing memory to share data between processes
shm_res_arr = multiprocessing.Array(
                                    typecode_or_type='i', 
                                    size_or_initializer=len(prime_numbers), 
                                    )

def calc_square_shm(numbers: list, shm_results) -> list: 
    print("calculate square numbers")
    for idx, n in enumerate(numbers): 
        time.sleep(idle_time)
        shm_results[idx] = n*n
        
p_shm = multiprocessing.Process(target=calc_square_shm, args=(prime_numbers, shm_res_arr))
p_shm.start()
p_shm.join()

print(f"Done in : {time.time() - start_time}")
print(f"square_results: {shm_res_arr[:]}")

"""
NOTE: using `[:]` operators to print all elements in an array. 
"""

calculate square numbers
Done in : 20.04759168624878
square_results: [4, 9, 25, 49, 121, 169, 289, 361, 529, 841, 961, 1369, 1681, 1849, 2209, 2809, 3481, 3721, 4489, 5041, 5329, 6241, 6889, 7921, 9409, 10201, 10609, 11449, 11881, 12769, 16129, 17161, 18769, 19321, 22201, 22801, 24649, 26569, 27889, 29929, 32041, 32761, 36481, 37249, 38809, 39601, 44521, 49729, 51529, 52441, 54289, 57121, 58081, 63001, 66049, 69169, 72361, 73441, 76729, 78961, 80089, 85849, 94249, 96721, 97969, 100489, 109561, 113569, 120409, 121801, 124609, 128881, 134689, 139129, 143641, 146689, 151321, 157609, 160801, 167281, 175561, 177241, 185761, 187489, 192721, 196249, 201601, 208849, 212521, 214369, 218089, 229441, 237169, 241081, 249001, 253009, 259081, 271441, 273529, 292681]


'\nNOTE: using `[:]` operators to print all elements in an array. \n'

In [None]:
"""
NOTE: Now, add a value to calculate during a process.
"""

start_time = time.time()

# Using sharing memory to share data between processes
shm_results = multiprocessing.Array(
                                    typecode_or_type='i', 
                                    size_or_initializer=len(prime_numbers), 
                                    )
shm_values = multiprocessing.Value('d', 0.0)

def calc_square_shm(numbers: list, results, values) -> list: 
    print("calculate square numbers")
    for idx, n in enumerate(numbers): 
        time.sleep(idle_time)
        results[idx] = n*n
        values.value += results[idx]

p_shm = multiprocessing.Process(target=calc_square_shm, args=(prime_numbers, shm_results, shm_values))
p_shm.start()
p_shm.join()

print(f"Done in : {time.time() - start_time}")
print(f"shm_results: {shm_results[:]}")
print(f"shm_values result: {shm_values.value}")
"""
NOTE: using `[:]` operators to print all elements in an array. 
"""

calculate square numbers
Done in : 20.053504467010498
shm_results: [4, 9, 25, 49, 121, 169, 289, 361, 529, 841, 961, 1369, 1681, 1849, 2209, 2809, 3481, 3721, 4489, 5041, 5329, 6241, 6889, 7921, 9409, 10201, 10609, 11449, 11881, 12769, 16129, 17161, 18769, 19321, 22201, 22801, 24649, 26569, 27889, 29929, 32041, 32761, 36481, 37249, 38809, 39601, 44521, 49729, 51529, 52441, 54289, 57121, 58081, 63001, 66049, 69169, 72361, 73441, 76729, 78961, 80089, 85849, 94249, 96721, 97969, 100489, 109561, 113569, 120409, 121801, 124609, 128881, 134689, 139129, 143641, 146689, 151321, 157609, 160801, 167281, 175561, 177241, 185761, 187489, 192721, 196249, 201601, 208849, 212521, 214369, 218089, 229441, 237169, 241081, 249001, 253009, 259081, 271441, 273529, 292681]
shm_values result: 8384727.0


'\nNOTE: using `[:]` operators to print all elements in an array. \n'

## Sharing data between Processes using Queue

In [32]:
from multiprocessing import Queue
"""
NOTE: 
A Queue Data Structure is a fundamental concept in computer science used for storing and managing data in a specific order. It follows the principle of "First in, First out" (FIFO), where the first element added to the queue is the first one to be removed.
"""


start_time = time.time()

square_queue = Queue()

def calc_square_shm(numbers: list, queue) -> list: 
    print("calculate square numbers")
    for idx, n in enumerate(numbers): 
        time.sleep(idle_time)
        queue.put(n*n)

p_shm = multiprocessing.Process(target=calc_square_shm, args=(prime_numbers, square_queue))
p_shm.start()
p_shm.join()

print(f"Done in : {time.time() - start_time}")

while not square_queue.empty(): 
    # Release result in queue
    print(square_queue.get())

print(f"Done in with release time : {time.time() - start_time}")

calculate square numbers


Done in : 20.058963775634766
4
9
25
49
121
169
289
361
529
841
961
1369
1681
1849
2209
2809
3481
3721
4489
5041
5329
6241
6889
7921
9409
10201
10609
11449
11881
12769
16129
17161
18769
19321
22201
22801
24649
26569
27889
29929
32041
32761
36481
37249
38809
39601
44521
49729
51529
52441
54289
57121
58081
63001
66049
69169
72361
73441
76729
78961
80089
85849
94249
96721
97969
100489
109561
113569
120409
121801
124609
128881
134689
139129
143641
146689
151321
157609
160801
167281
175561
177241
185761
187489
192721
196249
201601
208849
212521
214369
218089
229441
237169
241081
249001
253009
259081
271441
273529
292681
Done in with release time : 20.072868585586548


## Multiprocessing Lock

In [53]:
total_days = 100

def deposit(balance): 
    for i in range(total_days): 
        time.sleep(idle_time)
        balance.value = balance.value + 1

def withdraw(balance): 
    for i in range(total_days): 
        time.sleep(idle_time)
        balance.value = balance.value - 1


balance = multiprocessing.Value('i', 200)

start_time = time.time()

d = multiprocessing.Process(target=deposit, args=(balance, ))
w = multiprocessing.Process(target=withdraw, args=(balance, ))


d.start()
w.start()


d.join()
w.join()


print(f"Done in : {time.time() - start_time}")
print(f"Current balance is {balance.value}")


"""
NOTE: 
The value is 200 happens in normal cases, because the program is add 1 and minus 1 to the same variable. 
However, it would be a case that the result might be 201, or else. As a matter of fact, sometimes the process is slower that allows other process to access the same variables many times
"""

Done in : 20.04433536529541
Current balance is 200


'\nNOTE: \nThe value is 200 happens in normal cases, because the program is add 1 and minus 1 to the same variable. \nHowever, it would be a case that the result might be 201, or else. As a matter of fact, sometimes the process is slower that allows other process to access the same variables many times\n'

In [55]:
"""
Now, let's create a lock
"""

total_days = 100

def deposit_with_lock(balance, lock): 
    for i in range(total_days): 
        time.sleep(idle_time)
        lock.acquire()
        balance.value = balance.value + 1
        lock.release()

def withdraw_with_lock(balance, lock): 
    for i in range(total_days): 
        time.sleep(idle_time)
        lock.acquire()
        balance.value = balance.value - 1
        lock.release()

balance = multiprocessing.Value('i', 200)
lock = multiprocessing.Lock()
start_time = time.time()

d = multiprocessing.Process(target=deposit_with_lock, args=(balance, lock))
w = multiprocessing.Process(target=withdraw_with_lock, args=(balance, lock))


d.start()
w.start()


d.join()
w.join()


print(f"Done in : {time.time() - start_time}")
print(f"Current balance is {balance.value}")


"""
NOTE: Now, the value is always 200
"""

Done in : 20.044021368026733
Current balance is 200


'\nNOTE: Now, the value is always 200\n'

## Multi-processing with Map-Reduce

In [69]:
from multiprocessing import Pool

"""
The above function returns a list which is quite in contrast to 
the natural logic of MapReduce. I will re-define the logic in 
the below code.
"""

def calc_square_per_var(n): 
    return n*n

start_time = time.time()
pool = Pool()
results = pool.map(func=calc_square_per_var, iterable=prime_numbers)
pool.close()
pool.join()     # Again, to make sure the process is done safely
print(f"Done in: {time.time() - start_time}")

Done in: 0.08231568336486816


In [70]:
start_time = time.time()
results = calc_square(prime_numbers)
print(f"Done in: {time.time() - start_time}")

calculate square numbers
Done in: 20.025547742843628
