In [1]:
import numpy as np
import pandas as pd
import time
from multiprocessing.pool import ThreadPool as Pool
#from multiprocessing import Pool

- Restart the Jupyter kernel to clear all processes
- multiprocessing.pool.ThreadPool will work inside Jupyter instead of multiprocessing.Pool
    - https://superfastpython.com/threadpool-vs-pool-in-python/
    
- CPU Bound, I/O Bound, Cache Bound, Memory Bound:
    - https://stackoverflow.com/questions/868568/what-do-the-terms-cpu-bound-and-i-o-bound-mean

### Multiprocessing Computionally Simple Functions:

In [11]:
data_samples = [i for i in range(1000)]

def simple_func(sample_num):
    t = 2 + np.random.rand()*0.5
    time.sleep(t)
    print(f'process {sample_num} executed after {t} s')
    return None

pool = Pool(1000)
t_ = time.time()
pool.map_async(simple_func, data_samples)
pool.close()
pool.join()
print(f'{len(data_samples)} tasks executed in {time.time() -t_} s') 

process 79 executed after 2.0053774010783005 sprocess 59 executed after 2.002237964859456 s

process 46 executed after 2.0106260887209757 s
process 25 executed after 2.010575645766801 s
process 200 executed after 2.002991881389689 s
process 7 executed after 2.0257722483527436 sprocess 313 executed after 2.015190677675018 sprocess 286 executed after 2.015013475685625 sprocess 453 executed after 2.00982346061717 sprocess 574 executed after 2.0060495706388246 sprocess 535 executed after 2.014660883032735 sprocess 262 executed after 2.0111415358613094 sprocess 211 executed after 2.008454148272096 s
process 328 executed after 2.0196114404145598 s
process 534 executed after 2.009158239858886 s
process 136 executed after 2.0178569878369577 s

process 54 executed after 2.01828227259302 s
process 445 executed after 2.008044259522733 s
process 178 executed after 2.0137658615411147 s
process 584 executed after 2.006561438818197 s
process 643 executed after 2.004873513020439 s
process 112 executed

### Multiprocessing Computionally Expansive Functions:

In [None]:
data_samples = [[i for i in range(1012)]]

def heavy_func(sample_num):
    t = time.time()
    temp_ = [i**i for i in range(1000)]
    print(f'Process {sample_num} executed after {time.time() - t} s')
    return None

pool = Pool(1012)
t_ = time.time()
for result in pool.imap_unordered(heavy_func, data_samples):
    pass
print(f'{len(data_samples)} tasks executed in {time.time() -t_} s') 
pool.close()
pool.join()
#https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool
#https://jupyter-tutorial.readthedocs.io/en/stable/performance/multiprocessing.html

### Multiprocessing on I/O-heavy Functions:

In [None]:
file_names = [str(i) + '.pkl' for i in range(50)]
global_datasample = [[i*2,i,i*0.02221,i,i*0.3, 
                     i*2,i,i*0.02221,i,i*0.3, 
                     i*2,i,i*0.02221,i,i*0.3, 
                     i*2,i,i*0.02221,i,i*0.3] for i in np.arange(72000*2)] # simulated orderbook data
global_df  = pd.DataFrame(global_datasample)
def io_func(sample_num):
    t = time.time()
    global_df.to_pickle('temp_dir/' + fn)
    print(f'Process executed after {time.time() - t} s')
    return None

pool = Pool(50)
t_ = time.time()
for result in pool.imap_unordered(io_func, file_names):
    pass
print(f'{len(file_names)} tasks executed in {time.time() -t_} s') 
pool.close()
pool.join()   


### Sources:

    - Pool docs:        https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool
    - Pool Cheat Sheet: https://marvelous-writer-6152.ck.page/bff2b11214
    - Pool Tutorial:    https://superfastpython.com/multiprocessing-pool-python/
    
    - Source of Code:   https://jupyter-tutorial.readthedocs.io/en/stable/performance/multiprocessing.html


In [62]:
L = 6 # number of blocks
N = 12 # number of heads
E = 768 # embedding dimension
B = 2048 # batch size
T = 512 # sequence length
TOKS = 128100 # number of tokens in the vocab
param_bytes = 4 # float32 uses 4 bytes
bytes_to_gigs = 1000000000 # 1 billion bytes in a gigabyte
model_params = (TOKS*E)+ L*( 4*E**2 + 2*E*4*E + 4*E)
act_params = B*T*(2*TOKS+L*(14*E + N*T ))
backprop_model_params = 3*model_params
backprop_act_params = act_params
total_params = model_params+act_params+backprop_model_params+backprop_act_params#=4*mod
gigabytes_used = total_params*param_bytes/bytes_to_gigs
gigabytes_used

3001.818759168

### Notes on Multithreading for Fetching:

    - Program was tested on a local Windows machine
    - Porting to AWS should be possible, threading in Dash should work too.
    - The Task is I/O bound, meaning it's limited by request time (reading time) and is rather a light weight CPU task.
        - A ThreadPool is usually more suited for a I/O bound task than a Pool
        - https://superfastpython.com/threadpool-vs-pool-in-python/
         
    - The thread number is limited to 1024 per program. 
        - Having more than 1000 tasks will often crash the program --> (ZMQError: Too many open files)
        - About 50 tasks are needed for HFT fetching

#### Tests:

    - For local 6 CPU cores and freq of 50 ms
    - And on Notebook 'Test Local Multiprocessing':
    
    - Saving 50 different currencies each with   6000 rows (~every 5  mins) in parallel takes  550 ms!
    - Saving 50 different currencies each with  18000 rows (~every 15 mins) in parallel takes  800 ms!
    - Saving 50 different currencies each with  36000 rows (~every 30 mins) in parallel takes 1100 ms!
    - Saving 50 different currencies each with  54000 data (~every 45 mins) in parallel takes 1500 ms!
    - Saving 50 different currencies each with  72000 data (~every 1   h  ) in parallel takes 2000 ms!
    - Saving 50 different currencies each with  72000 data (~every 1.5 h  ) in parallel takes 3200 ms!
    - Saving 50 different currencies each with 144000 data (~every 2   h  ) in parallel takes 7200 ms!
    
    --> Saving every 30 mins is recommended
    - This means 48 files get saved per day per currency; These have to get concatenated afterwards.

In [63]:
7800/32

243.75

In [54]:
model_params/1000000

140.86656

In [36]:
4*E

1536

In [42]:
L = 12 # number of blocks
N = 12 # number of heads
E = 768 # embedding dimension
B = 32 # batch size
T = 512 # sequence length
TOKS = 30255 # number of tokens in the vocab
param_bytes = 4 # float32 uses 4 bytes
bytes_to_gigs = 1000000000 # 1 billion bytes in a gigabyte
model_params = (TOKS*E)+ L*( 4*E**2 + 2*E*4*E + 4*E)
act_params = B*T*(2*TOKS+L*(14*E + N*T ))
backprop_model_params = 3*model_params
backprop_act_params = act_params
total_params = model_params+act_params+backprop_model_params+backprop_act_params#=4*mod
gigabytes_used = total_params*param_bytes/bytes_to_gigs
gigabytes_used

36.237594624

In [43]:
total_params

9059398656