In [430]:
import numpy as np
import time

In [431]:
NUM_WORKERS = 8
arr = np.arange(5_000_000)
size = len(arr) // NUM_WORKERS
N = -1 # Worst case scenario - find element that is not on the list

approach_time = {}

In [432]:
def compute_final_index(results, size):
    for i, result in enumerate(results):
        if result is not None:
            return result + i * size
    return None

In [433]:
from threading import Thread

def worker(arr, num, results, result_idx):
    for pos in range(len(arr)):
        if num == arr[pos]:
            results[result_idx] = pos
            return

threads = []
results = [None] * NUM_WORKERS

start_time = time.time()

for i in range(NUM_WORKERS):
    start = i * size
    end = start + size
    t = Thread(target=worker, args=(arr[start:end], N, results, i))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

elem_idx = compute_final_index(results, size)

end_time = time.time()

print(f"Element {N} found at index: {elem_idx}" if elem_idx is not None else "Element not found")
print(f"Time taken (thread): {end_time - start_time:.4f} seconds")
approach_time['thread'] = end_time - start_time

Element not found
Time taken (thread): 0.3672 seconds


In [434]:
from multiprocessing import Process, Manager

def worker(arr, num, results, result_idx):
    for pos in range(len(arr)):
        if num == arr[pos]:
            results[result_idx] = pos
            return

processes = []
manager = Manager()
results = manager.list([None] * NUM_WORKERS)

start_time = time.time()

# Take care with the overhead of creating processes (especially if the task is small - 50.000 elements)
for i in range(NUM_WORKERS):
    start = i * size
    end = start + size
    p = Process(target=worker, args=(arr[start:end], N, results, i))
    processes.append(p)
    p.start()

for p in processes:
    p.join()

elem_idx = compute_final_index(results, size)

end_time = time.time()

print(f"Element {N} found at index: {elem_idx}" if elem_idx is not None else "Element not found")
print(f"Time taken (process): {end_time - start_time:.4f} seconds")
approach_time['process'] = end_time - start_time

Element not found
Time taken (process): 0.0740 seconds


In [435]:
from concurrent.futures import ThreadPoolExecutor

def worker(args):
    arr, num = args
    for pos in range(len(arr)):
        if num == arr[pos]:
            return pos
    return None


start_time = time.time()

with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
    results = executor.map(
        worker,
        [(arr[i * size:(i + 1) * size], N) for i in range(NUM_WORKERS)]
    )
elem_idx = compute_final_index(results, size)

end_time = time.time()

print(f"Element {N} found at index: {elem_idx}" if elem_idx is not None else "Element not found")
print(f"Time taken (thread pool): {end_time - start_time:.4f} seconds")
approach_time['thread_pool'] = end_time - start_time

Element not found
Time taken (thread pool): 0.3250 seconds


In [436]:
from concurrent.futures import ProcessPoolExecutor

def worker(args):
    arr, num = args
    for pos in range(len(arr)):
        if num == arr[pos]:
            return pos
    return None


start_time = time.time()

with ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor:
    results = executor.map(
        worker,
        [(arr[i * size:(i + 1) * size], N) for i in range(NUM_WORKERS)]
    )
elem_idx = compute_final_index(results, size)

end_time = time.time()

print(f"Element {N} found at index: {elem_idx}" if elem_idx is not None else "Element not found")
print(f"Time taken (process pool): {end_time - start_time:.4f} seconds")
approach_time['process_pool'] = end_time - start_time

Element not found
Time taken (process pool): 0.1495 seconds


In [437]:
# Serial approach for comparison
start_time = time.time()

for pos in range(len(arr)):
    if N == arr[pos]:
        elem_idx = pos

end_time = time.time()

print(f"Element {N} found at index: {elem_idx}" if elem_idx is not None else "Element not found")
print(f"Time taken (serial): {end_time - start_time:.4f} seconds")
approach_time['serial'] = end_time - start_time

Element not found
Time taken (serial): 0.4127 seconds


In [438]:
print("Approach time comparison:")
for approach, time_taken in approach_time.items():
    print(f"{approach}: {time_taken:.4f} seconds")

print("\nSpeedup:")
for approach, time_taken in approach_time.items():
    if approach != 'serial':
        speedup = approach_time['serial'] / time_taken
        print(f"{approach}: {speedup:.2f}x")

Approach time comparison:
thread: 0.3672 seconds
process: 0.0740 seconds
thread_pool: 0.3250 seconds
process_pool: 0.1495 seconds
serial: 0.4127 seconds

Speedup:
thread: 1.12x
process: 5.58x
thread_pool: 1.27x
process_pool: 2.76x
