In [1]:
from src.process_parallel import *
from src.parallelizer import Parallelizer
import requests
import time
from tqdm import tqdm

# Parallelizer

In [2]:
# we'll create a list with 100 websites (for simplicity 100 time the same)

websites = ["https://octoshrew.com"]*100
parallel_websites = [[website] for website in websites]
multi = [[0,1], [1,2], [2,3], [3,4], [5,6], [6,7], [7,8], [8,9]]

In [3]:
# create paralellizer object
parallelizer = Parallelizer()


# define some functions
def scrape(website): 
    return requests.get(website).content

def multiply(x, y):
    return x*y

# add the functions & input to parallelizer
parallelizer.add_function(scrape, parallel_websites)
parallelizer.add_function(multiply, multi)

In [4]:
# run it with any number of desired threads
result = parallelizer.run(n_threads = 30)

Results complete


In [5]:
result["multiply"]

[0, 2, 6, 12, 30, 42, 56, 72]

# Process One Function in Parallel

running it traditionally takes around 14 seconds (may vary depending on your internet speed)

In [3]:
start = time.time()
responses = []
for website in tqdm(websites):
    responses.append(requests.get(website))
stop = time.time()
print(f"Execution time: {stop-start}")

100%|██████████| 100/100 [00:12<00:00,  7.95it/s]

Execution time: 12.618801832199097





with the convenient parallelization, we can get it done in ~2 seconds with less lines of code

In [4]:
start = time.time()
results = process_parallel(requests.get, parallel_websites, n_threads=50)
stop = time.time()
print(f"Execution time: {stop-start}")

Results complete
Execution time: 2.408428907394409


# Find first returned result

This time around, let's say we have multiple websites, and all of them have the information we need (say we want to serve fast information about the weather and have multiple websites we can get that info from). We want to query all of the website and then get the result from the one that responded the fastest:

In [5]:
start = time.time()
results = process_first(requests.get, parallel_websites, n_threads=100)
stop = time.time()
print(f"Execution time: {stop-start}")

Execution time: 0.28972387313842773


Note, this is also extremely usefuls in situations like for instance pathfinding and other computational problems where you can explore multiple options but only care about the one that gives the first result.

# Parallelization with multiple functions

This works similarly to the previous process_parallel function, but here we also allow different functions to be passed at the same time

In [7]:
funcA = lambda x,y: x*y
funcB = lambda x,y: x+y

args = [[1,2], [3,4], [5,6], [7,8]]
funcs = [funcA, funcB, funcA, funcB]

results = process_parallel_multifunc(funcs, args, timeout = 5)
print(results[-1])

Results complete
15
