<a href="https://colab.research.google.com/github/Mohan5353/Python/blob/main/MultiThreading_and_MultiProcessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Info about  System

In [None]:
# Number of CPUs and Threads in Linux.
# Refer: https://linux.die.net/man/1/lscpu
!ls cpu



ls: cpu: No such file or directory


In [None]:
# for Windows
! wminc cpu

zsh:1: command not found: wminc


In [None]:
# for Mac
!system_profiler SPHardwareDataType;

Hardware:

    Hardware Overview:

      Model Name: MacBook Pro
      Model Identifier: MacBookPro17,1
      Model Number: MYD82HN/A
      Chip: Unknown
      Total Number of Cores: 8 (4 performance and 4 efficiency)
      Memory: 8 GB
      System Firmware Version: 8422.141.2
      OS Loader Version: 8422.141.2
      Serial Number (system): FVFGJ4B5Q05D
      Hardware UUID: 881B6070-72A3-5BA5-BE7F-66A9A9893B4A
      Provisioning UDID: 881B6070-72A3-5BA5-BE7F-66A9A9893B4A
      Activation Lock Status: Enabled



## Generating Random Numbers

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Generate random 100MM data points
import numpy as np
n =100000000
d = np.random.rand(n)
print(d.shape)

(100000000,)


## Simple Computation

In [None]:
import time
def mean():
    #Sum using for loops. We can use inbuilt NumPy Sum opeartion for better speed.
    sum = 0
    n=d.size
    for i in range(n):
        sum +=d[i]

    #Mean
    mean = sum/n
    return mean


#Time the execution
start_time = time.time()
m = mean() # compute mean of 100MM numbers.
end_time = time.time()
print ("Time Consumed : ",end_time-start_time)
print(m)

Time Consumed :  18.124929904937744
0.49996838540312877


## Using  MultiProcessing

In [None]:
#Refer: https://docs.python.org/3/library/multiprocessing.html
from multiprocess import Process, Queue
import math

def mean_MP(s, e, q ):

    #Sum using for loops. We can use inbuilt NumPy Sum opeartion for better speed.
    sum = 0
    for i in range(s,e+1):
        sum +=d[i]

    #Mean
    mean = sum/(e-s+1)
    q.put(mean)
    return

n1 = math.floor(n/2)

q = Queue() #Queues are thread and process safe. For communicating between processes and threads.

p1 = Process(target=mean_MP, args=(0, n1,q ))
p2 = Process(target=mean_MP, args=(n1+1,n-1, q))


#Time the execution
start_time = time.time()

p1.start()
p2.start()

p1.join() # Wait till p1 finishes
p2.join()

m=0;
while not q.empty():
     m += q.get()

m /= 2;

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)
print(m)


Time Consumed :  7.235105037689209
0.49996838540309685


## Using MultiThreading

In [None]:
#Refer: https://docs.python.org/3/library/threading.html
from threading import Thread


means = [0,0];

def mean_MT(s, e, threadNum ):


   #Sum using for loops. We can use inbuilt NumPy Sum opeartion for better speed.
    sum = 0
    for i in range(s,e+1):
        sum +=d[i]

    #Mean
    mean = sum/(e-s+1)
    means[threadNum] = mean; # means is a shared varibale between the threads

    return

n1 = math.floor(n/2)

t1 = Thread(target=mean_MT, args=(0, n1,0 ))  # Third apram is the thread number
t2 = Thread(target=mean_MT, args=(n1+1,n-1,1))

#Time the execution
start_time = time.time()

t1.start()
t2.start()

t1.join() # Wait till t1 finishes
t2.join()

m = (means[0]+means[1])/2

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)
print(m)



Time Consumed :  14.255736112594604
0.49996838540309685


## Using Caching

In [None]:

#Transparent and fast disk-caching of output value
# Refer: https://joblib.readthedocs.io/en/latest/
from joblib import Memory
cachedir = './'
mem = Memory(cachedir)

import numpy as np

a = np.vander(np.arange(3)).astype("float")


square = mem.cache(np.square)
b = square(a)

print(a)
print(b)

[[0. 0. 1.]
 [1. 1. 1.]
 [4. 2. 1.]]
[[ 0.  0.  1.]
 [ 1.  1.  1.]
 [16.  4.  1.]]


In [None]:
c = square(a)
# The above call did not trigger an evaluation

## Joblib

In [None]:
# Refer: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html

import time
from math import sqrt # inbuilt fucntion

def f(i):

    # some computations  that take time
    x=10000
    p =1;
    for j in range(x):
        for k in range(j):
            p *= k

    return sqrt(i ** 2);

# Find f(x) of first n numbers
n=10;

start_time = time.time()

for i in range(n):
    f(i)

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)

Time Consumed :  17.74006199836731


### Caching of function output values

In [None]:
from joblib import Parallel, delayed

start_time = time.time()

a = Parallel(n_jobs=2)(delayed(f)(i) for i in range(n))

# Why we need dealyed(): https://stackoverflow.com/questions/42220458/what-does-the-delayed-function-do-when-used-with-joblib-in-python

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)

Time Consumed :  10.814059019088745


In [None]:
# Multi threading
start_time = time.time()

a = Parallel(n_jobs=2,prefer="threads")(delayed(f)(i ** 2) for i in range(n))

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)

Time Consumed :  17.827778816223145


In [None]:
# 6 jobs

from joblib import Parallel, delayed

start_time = time.time()

a = Parallel(n_jobs=6)(delayed(f)(i ** 2) for i in range(n))

# Why we need delayed(): https://stackoverflow.com/questions/42220458/what-does-the-delayed-function-do-when-used-with-joblib-in-python

end_time = time.time()
print ("Time Consumed : ",end_time-start_time)

Time Consumed :  6.102927923202515


# Matrix Multiplication : Numpy vs Numba

In [None]:
m, n, c = 1000,1500,1200
A = np.random.randint(1,50,size=(m,n))
B = np.random.randint(1,50,size=(n,c))

## Basic Matrix Multiplication

In [None]:
import time
def basic_mat_mult(A,B):
    assert A.shape[1] == B.shape[0]
    res = np.zeros((A.shape[0],B.shape[1]),)
    for i in range(A.shape[0]):
        for k in range(A.shape[1]):
            for j in range(B.shape[1]):
                res[i,j] += A[i,k] * B[k,j]
    return res

In [None]:
start_time = time.time()

res = basic_mat_mult(A, B)

end_time = time.time()

print ("Time Consumed : ",end_time-start_time)

Time Consumed :  928.7259409427643


## Numpy Implementation

In [None]:
start_time = time.time()

res = np.matmul(A, B)

end_time = time.time()

print ("Time Consumed : ",end_time-start_time)

Time Consumed :  1.7552309036254883


## Numba Implementation

In [None]:
from numba import njit,prange

In [None]:
@njit(parallel=True)
def numba_mat_mult(A,B):
    assert A.shape[1] == B.shape[0]
    res = np.zeros((A.shape[0],B.shape[1]),)
    for i in prange(A.shape[0]):
        for k in prange(A.shape[1]):
            for j in prange(B.shape[1]):
                res[i,j] += A[i,k] * B[k,j]
    return res

start_time = time.time()

res = numba_mat_mult(A, B)

end_time = time.time()

print ("Time Consumed : ",end_time-start_time)

Time Consumed :  0.6493539810180664
