<a href="https://colab.research.google.com/github/InNoobWeTrust/made-up-noob-algo/blob/main/parallel_programming/parallel_programming_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''
# Problem statement:
Given a sequence of numbers, find the missing numbers?

Example data: 1 2 3 5

Assumption:
- sorted
- incrementally
- data length n is a fixed constant
- fixed size, can be large (but manageable size)

Edge case:
- Array can start from any number
- No number overflow (int64)
'''

import random
import sys
import numpy as np

NUM_ITEMS = 10
def prepare_test_data(n = NUM_ITEMS, step = 1):
    '''
    TDD approach to problem: define test datasets first
    This kind of data is easy to augment
    '''
    if n == 1:
        n = NUM_ITEMS
    # randomly pick a start number
    rand_start = random.randint(0, sys.maxsize - n * step)
    arr = list(range(rand_start, rand_start + (n + 1) * step, step))
    # randomly remove a number in the middle
    rand_idx = random.choice(range(1, n))
    missing = arr.pop(rand_idx)
    return arr, missing

In [2]:
'''
Check the generated test data
'''
test_arr, missing_num = prepare_test_data(5, 2)
print('Test data:', test_arr)
print('Missing number:', missing_num)

Test data: [1776779146027347821, 1776779146027347823, 1776779146027347825, 1776779146027347827, 1776779146027347831]
Missing number: 1776779146027347829


In [3]:
'''
Intuition:
- We know the step size
- First and last number in an array is not the missing number
- If 2 consecutive numbers in the array is not increasing with the step
  size, then we have a missing number in between. Simple?
'''

def validate_increasing(a, b, step = 1):
    '''
    Simple logic to check if 2 consecutive numbers is increasing
    with given step size
    '''
    return b - step == a

def check_missing_number(arr, step = 1):
    '''
    Simple check logic on a stride of sliding window
    '''
    for i in range(0, len(arr) - 1):
        if not validate_increasing(arr[i], arr[i+1], step):
            return arr[i] + step
    return None

import os
from multiprocessing.pool import ThreadPool

def check_missing_parallel(arr, step = 1):
    '''
    - Split the data to multiple windows with overlap of 1 item.
    - Then for each window, let a thread/process do the job of
    finding the missing number. Then wait and collect results
    - Find a truthy value among the returned results
    '''
    num_proc = os.cpu_count() or 2
    window = len(arr) // num_proc
    splits = [arr[i : i + window + 1] for i in range(num_proc)]
    with ThreadPool(os.cpu_count()) as pool:
        candidates = pool.map(lambda w: check_missing_number(w, step), splits)
    return list(filter(None, candidates))[:1] or None

In [4]:
%time
check_missing_number(test_arr)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 8.82 µs


1776779146027347822

In [5]:
%time
check_missing_parallel(test_arr)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 8.11 µs


[1776779146027347822]