# Multiprocessing using Pools 
A simple framework for assessing the impact of multiprocessing on runtime on a multi-core machine. 

In [3]:
import time
import math
import multiprocessing
from multiprocessing import Pool

# A function for timing a job that uses a pool of processes.
#  f is a function that takes a single argument
#  data is an array of arguments on which f will be mapped
#  pool_size is the number of processes in the pool. 
def pool_process(f, data, pool_size):
    tp1 = time.time()
    pool = Pool(processes=pool_size) # initialize the Pool.
    result = pool.map(f, data)       # map f to the data using the Pool of processes to do the work 
    pool.close() # No more processes
    pool.join()  # Wait for the pool processing to complete. 
    #print("Results", result)
    print("Overall Time:", int(time.time()-tp1))
 

In [None]:
def my_func(x):
    s = math.sqrt(x)
    return s

In [None]:
# This verbose version shows which process in the pool is running each task. 
def my_func_verbose(x):
    s = math.sqrt(x)
    print("Task", multiprocessing.current_process(), x, s)
    return s

In [None]:
dataRange = range(10)

Use the pool_process function to apply my_func to the data in dataRange.  
This task is so light it requires very little processing time. 

In [None]:
#pool_process(my_func, dataRange, 1)
pool_process(my_func_verbose, dataRange, 2)

## A naive function for checking primes 

In [2]:
def check_prime(num):
    t1 = time.time()
    res = False
    if num > 0:
        # check for factors
        for i in range(2,num):
            if (num % i) == 0:
                print(num,"is not a prime number")
                print(i,"times",num//i,"is",num)
                print("Time:", int(time.time()-t1))
                break
        else:
            #print(num,"is a prime number")
            #print("Time:", time.time()-t1) 
            res = True
            # if input number is less than
            # or equal to 1, it is not prime
    return res


In [None]:
check_prime(15488801, )

In [None]:
check_prime(15488803)

## Multiprocessing Test 1 (check_prime function)

After ‘connecting’ the **check_prime** function to the Pool processing function, we will test it with 3 different input sizes (called **test_data**) to quantify the speedup achieved with multiple cores. The idea is to, for each **test_data**, check the performance obatained by 1, 2, 3 and 4 cores.

The different inputs will be:
- 1 prime number
- 30 prime numbers
- 70 prime numbers

The prime numbers were obtained from the website provided for the assignment.

In [19]:
# Setting different test_data variables

# 1 prime number
test_data1 = [60817387]

# 30 prime numbers
test_data2 = [5312207,302053,4580413,9406697,6975407,3130243,7880009,3330311,7041193,3097531,8566289,681997,8974241,1712311,9644909,8831479,9208561,2867077,7923737,2385553,9360979,2663497,5808083,8896171,3112919,1789783,2890127,4653511,7813661,3494669]

# 70 prime numbers
test_data3 = [8823413,1466771,1726379,8822159,2445769,1891429,1633171,9382757,7183783,1127183,234383,7032919,545599,6390653,8782919,4240069,9888803,268883,6961541,7662401,6874187,3587317,9087343,722489,2805071,884699,9549539,8492927,572707,2682359,2058197,2656663,9193193,1179281,283769,886339,8140529,1406089,4469753,2978047,3090679,6871679,5228351,9309341,1124239,1111853,1325227,5806987,9939647,6836369,3624289,1987483,779021,6896371,945899,3842431,9197509,1801549,3563407,7296871,1229209,5981851,2361379,4935607,1975609,1170437,1405721,7081603,2814179,1493071]

### Checking performances for test_data1

In [6]:
# 1 core
pool_process(check_prime, test_data1, 1)

Overall Time: 4


In [7]:
# 2 cores
pool_process(check_prime, test_data1, 2)

Overall Time: 4


In [8]:
# 3 cores
pool_process(check_prime, test_data1, 3)

Overall Time: 4


In [9]:
# 4 cores
pool_process(check_prime, test_data1, 4)

Overall Time: 4


### Checking performances for test_data2

In [20]:
# 1 core
pool_process(check_prime, test_data2, 1)

Overall Time: 11


In [21]:
# 2 cores
pool_process(check_prime, test_data2, 2)

Overall Time: 6


In [22]:
# 3 cores
pool_process(check_prime, test_data2, 3)

Overall Time: 6


In [23]:
# 4 cores
pool_process(check_prime, test_data2, 4)

Overall Time: 7


### Checking performances for test_data3

In [24]:
# 1 core
pool_process(check_prime, test_data3, 1)

Overall Time: 21


In [25]:
# 2 cores
pool_process(check_prime, test_data3, 2)

Overall Time: 12


In [26]:
# 3 cores
pool_process(check_prime, test_data3, 3)

Overall Time: 12


In [27]:
# 4 cores
pool_process(check_prime, test_data3, 4)

Overall Time: 13


**Note:** The results of these evaluations, alongside with the lessons that we could learn from them, are documented in a separate PDF file.

## Multiprocessing Test 2 (alternative function)
We now execute the same performance test using a different function related to integer numbers: the aim is to ask the machine to **find the maximum, the minimum in each sequence (list) contained in an array and return the greatest common divisor between each pair**. Each sequence is made by a custom number of randomly generated integers. 

The different inputs will be:
- 10 lists
- 30 lists
- 50 lists
- 100 lists

The method is defined as follows.

In [4]:
def gcd(my_list):
    """Find max and min, then compute greatest common divisor between the two"""

    def find_max(arr):
        maxi = 0
        for i in arr:
            if i > maxi:
                maxi = i
        return maxi

    def find_min(arr):
        mini = arr[0]
        for i in arr:
            if i < mini:
                mini = i
        return mini

    a = find_max(my_list)
    #print("Max in the list is: ", a)
    b = find_min(my_list)
    #print("Min in the list is: ", b)
    
    if (b == 0):
        return a
    else:
        my_newlist = []
        my_newlist.append(b)
        my_newlist.append(a % b)
        return gcd(my_newlist)

We used the **random** python library to develop a simple function to create a list of lists containing random integers, so we can use it on our test function **(gdc)** to assess how its performance changes when using multiprocessing.

In [5]:
import random
def random_integer_list(n, n2):
    """Generates a list containing n lists of n2 random integers each"""
    
    result_list = []
    # iterates over n number of lists
    for i in range(n):
        #creates a single list of n2 random integers
        random_list = random.sample(range(10**12), n2)
        result_list.append(random_list)
    return result_list

In [25]:
# Different test_data variables that will be used

# 30 lists
#test_data1 = random_integer_list(30, 10**6)

# 50 lists
#test_data2 = random_integer_list(50, 10**6)

# 70 lists
#test_data3 = random_integer_list(70, 10**6)

# 100 lists
#test_data4 = random_integer_list(100, 10**6)

### Checking performances for test_data1

In [8]:
# 1 core
pool_process(gcd, random_integer_list(30, 10**6), 1)

Overall Time: 6


In [9]:
# 2 cores
pool_process(gcd, random_integer_list(30, 10**6), 2)

Overall Time: 4


In [10]:
# 3 cores
pool_process(gcd, random_integer_list(30, 10**6), 3)

Overall Time: 4


In [11]:
# 4 cores
pool_process(gcd, random_integer_list(30, 10**6), 4)

Overall Time: 4


### Checking performances for test_data2

In [12]:
# 1 core
pool_process(gcd, random_integer_list(50, 10**6), 1)

Overall Time: 11


In [13]:
# 2 cores
pool_process(gcd, random_integer_list(50, 10**6), 2)

Overall Time: 7


In [14]:
# 3 cores
pool_process(gcd, random_integer_list(50, 10**6), 3)

Overall Time: 7


In [15]:
# 4 cores
pool_process(gcd, random_integer_list(50, 10**6), 4)

Overall Time: 7


### Checking performances for test_data3

In [16]:
# 1 core
pool_process(gcd, random_integer_list(70, 10**6), 1)

Overall Time: 15


In [17]:
# 2 cores
pool_process(gcd, random_integer_list(70, 10**6), 2)

Overall Time: 12


In [18]:
# 3 cores
pool_process(gcd, random_integer_list(70, 10**6), 3)

Overall Time: 11


In [19]:
# 4 cores
pool_process(gcd, random_integer_list(70, 10**6), 4)

Overall Time: 10


### Checking performances for test_data4

In [20]:
# 4 cores
pool_process(gcd, random_integer_list(100, 10**6), 1)

Overall Time: 37


In [22]:
# 4 cores
pool_process(gcd, random_integer_list(100, 10**6), 2)

Overall Time: 19


In [23]:
# 4 cores
pool_process(gcd, random_integer_list(100, 10**6), 3)

Overall Time: 19


In [24]:
# 4 cores
pool_process(gcd, random_integer_list(100, 10**6), 4)

Overall Time: 18


**Note:** The results of these evaluations, alongside with the lessons that we could learn from them, are documented in a separate PDF file.