In [1]:
"""
This notebook give a easy usage of MP class, and show it's outstanding performance, with an example of matrix calculation.
"""

from mp import MP
%load_ext autoreload
%autoreload 2
NUM_THREADS=12 # The number of pthreads, best be consistant with cpu number that you alllocated 
mlp=MP(NUM_THREADS)


In [2]:
import numpy as np
N=500
from time import time
a=np.random.randn(N,N)
b=np.random.randn(N,N)

In [3]:
# Brute force version of matrix multiplication
c=np.zeros_like(a)
st=time()
for i in range(N):
    for k in range(N):
        for j in range(N):
            c[i][j]+=a[i][k]*b[k][j]
ed=time()
print(c)
print("Result Sum:",np.sum(c))
print("Result Std:",np.std(c))
print("Time:",ed-st)

[[-38.54724656  36.01183984 -12.96472016 ...  31.72509322  14.56102282
    3.51573581]
 [ 33.07941645 -12.8237037    7.56872683 ...   0.29008016 -24.90881766
  -65.54496598]
 [  6.00512148  45.22889745  -4.83857164 ... -19.4634574   -5.219751
  -16.40322885]
 ...
 [ -1.32718804 -22.37264872   2.60526424 ...  13.13904165  10.48330414
  -52.77687479]
 [ -9.25239185 -26.88676563 -10.46947294 ...   3.8999377   -1.03615562
    5.04060175]
 [ -7.18062409   9.09205416  20.04932519 ...   1.75636194  -3.5820881
   -6.17233997]]
Result Sum: -11997.284146891883
Result Std: 22.25576049932783
Time: 83.5760977268219


In [4]:
# Faster version, with numpy offical accurate:
c=np.zeros_like(a)
st=time()
for i in range(N):
    for k in range(N):
        c[i]+=a[i][k]*b[k]
ed=time()
print(c)
print("Result Sum:",np.sum(c))
print("Result Std:",np.std(c))
print("Time:",ed-st)

[[-38.54724656  36.01183984 -12.96472016 ...  31.72509322  14.56102282
    3.51573581]
 [ 33.07941645 -12.8237037    7.56872683 ...   0.29008016 -24.90881766
  -65.54496598]
 [  6.00512148  45.22889745  -4.83857164 ... -19.4634574   -5.219751
  -16.40322885]
 ...
 [ -1.32718804 -22.37264872   2.60526424 ...  13.13904165  10.48330414
  -52.77687479]
 [ -9.25239185 -26.88676563 -10.46947294 ...   3.8999377   -1.03615562
    5.04060175]
 [ -7.18062409   9.09205416  20.04932519 ...   1.75636194  -3.5820881
   -6.17233997]]
Result Sum: -11997.284146891883
Result Std: 22.25576049932783
Time: 0.46635890007019043


In [6]:
# The inner FOR loop can be rewrite as a function. It should have similar performance as above block.
c=np.zeros_like(a)

def calc_a_line(i):
    tmp=np.zeros(N)
    for k in range(N):
        tmp+=a[i][k]*b[k]
    return tmp

st=time()
for i in range(N):
    c[i]=calc_a_line(i)

ed=time()
print(c)
print("Result Sum:",np.sum(c))
print("Result Std:",np.std(c))
print("Time:",ed-st) # Time: 0.4195728302001953

[[-38.54724656  36.01183984 -12.96472016 ...  31.72509322  14.56102282
    3.51573581]
 [ 33.07941645 -12.8237037    7.56872683 ...   0.29008016 -24.90881766
  -65.54496598]
 [  6.00512148  45.22889745  -4.83857164 ... -19.4634574   -5.219751
  -16.40322885]
 ...
 [ -1.32718804 -22.37264872   2.60526424 ...  13.13904165  10.48330414
  -52.77687479]
 [ -9.25239185 -26.88676563 -10.46947294 ...   3.8999377   -1.03615562
    5.04060175]
 [ -7.18062409   9.09205416  20.04932519 ...   1.75636194  -3.5820881
   -6.17233997]]
Result Sum: -11997.284146891883
Result Std: 22.25576049932783
Time: 0.4195728302001953


In [10]:
# Use MPG to accurate:
c=np.zeros_like(a)

def calc_a_line(i):
    tmp=np.zeros(N)
    for k in range(N):
        tmp+=a[i][k]*b[k]
    return tmp
mlp=MP(NUM_THREADS,save_log=False)

st=time()
for i in range(N):
    mlp.append(calc_a_line,i)    
res=mlp.ret()

for i,ri in enumerate(res):
    c[i]=ri["res"]

ed=time()
print(c)
print("Result Sum:",np.sum(c))
print("Result Std:",np.std(c))
print("Time:",ed-st) # Time: 0.13549447059631348, same result, about 3 times faster!

[[-38.54724656  36.01183984 -12.96472016 ...  31.72509322  14.56102282
    3.51573581]
 [ 33.07941645 -12.8237037    7.56872683 ...   0.29008016 -24.90881766
  -65.54496598]
 [  6.00512148  45.22889745  -4.83857164 ... -19.4634574   -5.219751
  -16.40322885]
 ...
 [ -1.32718804 -22.37264872   2.60526424 ...  13.13904165  10.48330414
  -52.77687479]
 [ -9.25239185 -26.88676563 -10.46947294 ...   3.8999377   -1.03615562
    5.04060175]
 [ -7.18062409   9.09205416  20.04932519 ...   1.75636194  -3.5820881
   -6.17233997]]
Result Sum: -11997.284146891883
Result Std: 22.25576049932783
Time: 0.13549447059631348


In [20]:
# How about when matrix is larger?
M=5000
al=np.random.randn(M,M)
bl=np.random.randn(M,M)

def calc_a_line_large(i):
    tmp=np.zeros(M)
    for k in range(M):
        tmp+=al[i][k]*bl[k]
    return tmp

# traditional
st=time()
cl=np.zeros_like(al)
for i in range(M):
    cl[i]=calc_a_line_large(i)
ed=time()
td_time=ed-st
print(cl)
print("Result Sum:",np.sum(cl))
print("Result Std:",np.std(cl))
print("Traditional time:",td_time)

# MPG
cl=np.zeros_like(al)

mlp=MP(NUM_THREADS,save_log=False)

st=time()
for i in range(M):
    mlp.append(calc_a_line_large,i)    
res=mlp.ret()

for i,ri in enumerate(res):
    cl[i]=ri["res"]

ed=time()
mpg_time=ed-st
print(cl)
print("Result Sum:",np.sum(cl))
print("Result Std:",np.std(cl))
print("MPG time:",mpg_time)
if mpg_time<td_time:
    print(f"MPG is {td_time/mpg_time}x faster than traditional!")
    # with more CPU on cluster, it can be faster!!!
else:
    print("MPG is a bull-shit!")

[[-148.27804742  -21.07393565  121.61612768 ...  -54.31319525
    24.73537458  -50.41059962]
 [ 101.01427547   46.2548303   -39.30043569 ...   25.70911567
   -51.15757428  -29.11510455]
 [ 123.55361766  -37.44681831  -19.96556746 ...  -44.57204772
    34.46393536  107.19803579]
 ...
 [  83.75175845   87.28065218  119.05340351 ...   42.11870558
    80.35652569  -40.50169971]
 [  19.64125197 -108.75562702  -93.25897317 ...  -46.50976526
    -7.04327434   -0.35402183]
 [ -43.35918438   79.00445047  -34.54033525 ...  -55.6742761
   -83.76952225   43.64067504]]
Result Sum: -99082.72300692147
Result Std: 70.70402219370665
Traditional time: 110.14724016189575
[[-148.27804742  -21.07393565  121.61612768 ...  -54.31319525
    24.73537458  -50.41059962]
 [ 101.01427547   46.2548303   -39.30043569 ...   25.70911567
   -51.15757428  -29.11510455]
 [ 123.55361766  -37.44681831  -19.96556746 ...  -44.57204772
    34.46393536  107.19803579]
 ...
 [  83.75175845   87.28065218  119.05340351 ...   42.11

In [24]:
# Save log function of MPG:
cl=np.zeros_like(al)
mlp=MP(NUM_THREADS,save_log=True)
for i in range(M):
    mlp.append(calc_a_line_large,i)    
res=mlp.ret()
# The executed function name, args, execute time for each pthread, function result or raised error will be saved in a table. See logs/20230111-190259.csv

In [None]:
# MPG works most outstanding when handling large dataset. For that examples, see the project "adni_muse" and "epi_reg".

In [22]:
# extra: BTW, npy official np.matmul Best QAQ, while MPG can be used in more situations.
cl=np.zeros_like(al)

st=time()
cl=np.matmul(al,bl)

ed=time()
np_time=ed-st
print(cl)
print("Result Sum:",np.sum(cl))
print("Result Std:",np.std(cl))
print("numpy time:",np_time)

[[-148.27804742  -21.07393565  121.61612768 ...  -54.31319525
    24.73537458  -50.41059962]
 [ 101.01427547   46.2548303   -39.30043569 ...   25.70911567
   -51.15757428  -29.11510455]
 [ 123.55361766  -37.44681831  -19.96556746 ...  -44.57204772
    34.46393536  107.19803579]
 ...
 [  83.75175845   87.28065218  119.05340351 ...   42.11870558
    80.35652569  -40.50169971]
 [  19.64125197 -108.75562702  -93.25897317 ...  -46.50976526
    -7.04327434   -0.35402183]
 [ -43.35918438   79.00445047  -34.54033525 ...  -55.6742761
   -83.76952225   43.64067504]]
Result Sum: -99082.72300692371
Result Std: 70.70402219370665
numpy time: 1.2069101333618164
