# Execution Benchmarking

In this module, we will look at how threading and multiprocesses perform on multiple differnt tasks.

In [1]:
from utils.timer import DecoTimer

import os
import requests
import uuid
from queue import Queue
import logging

from threading import Thread
from multiprocessing.pool import Pool


In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

### 1. Download Images

In [3]:
num_of_pics = 20
image_endpoints = [f'https://picsum.photos/800/800?image={i}' for i in range(num_of_pics)]
filenames = [str(uuid.uuid4())[:8] for _ in range(num_of_pics)]

def download_image(image_url, filename):
    # Set stream to True to prevent the requests library suck memory 
    # and stays around 30kb regardless size of the download file
    img_data = requests.get(image_url, stream = True, headers={'Cache-Control': 'no-cache'}).content
    with open(f'images/{filename}.bmp', 'wb') as handler:
        handler.write(img_data)
        logger.info(f'writing {filename}')

def create_image_dir():
    os.system("mkdir -p images")

def remove_images():
    os.system("rm images/*")

download_dir = create_image_dir()

#### 1. 1 Single Threaded

In [5]:

with DecoTimer("Download Image - Single Thread"):
    for image_endpoint, filename in zip(image_endpoints, filenames):
        download_image(image_endpoint, filename)        


>>>>Starting Function Download Image - Single Thread...


INFO:__main__:writing 2d65d646
INFO:__main__:writing 3c72af7b
INFO:__main__:writing a78d53a0
INFO:__main__:writing 6b145f94
INFO:__main__:writing a6b52912
INFO:__main__:writing a1343615
INFO:__main__:writing 234ade86
INFO:__main__:writing 9d50f75b
INFO:__main__:writing 22cc0a70
INFO:__main__:writing 1946a05d
INFO:__main__:writing 74cced27
INFO:__main__:writing ff786e05
INFO:__main__:writing bb31b409
INFO:__main__:writing 251e4326
INFO:__main__:writing aa3d339d
INFO:__main__:writing c11b37e8
INFO:__main__:writing 2e2aafe1
INFO:__main__:writing 66df0be7
INFO:__main__:writing f5abd9a8
INFO:__main__:writing 76dfca0b


<<Finished function Download Image - Single Thread in 20.550068140029907 seconds


#### 1.2 Multithreading Example

In [15]:
class DownloadWorker(Thread):
    
    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue # create a worker queue to pass in the image filename sequentially
        
    def run(self):
        while True:
            image_endpoint, filename = self.queue.get()
            try:
                download_image(image_endpoint, filename)
            finally:
                self.queue.task_done()

with DecoTimer("Download Image - Multithreading with 4 Workers"):
    queue = Queue()
    for x in range(4):
        worker = DownloadWorker(queue)
        worker.daemon = True # enable daemon to allow main thread exit even when workers are blocking
        worker.start()
    # send in filenames to workers from main thread
    for image_endpoint, filename in zip(image_endpoints, filenames):
        queue.put((image_endpoint, filename))
    queue.join()

INFO:__main__:writing 0b74dfed


>>>>Starting Function Download Image - Multithreading with 4 Workers...


INFO:__main__:writing 63a7c567
INFO:__main__:writing 896f3cb8
INFO:__main__:writing 6a4f4b62
INFO:__main__:writing 62d4211d
INFO:__main__:writing 12b75344
INFO:__main__:writing 156b9285
INFO:__main__:writing a5dab5f3
INFO:__main__:writing a12089a8
INFO:__main__:writing 64e84ef4
INFO:__main__:writing 3927960b
INFO:__main__:writing c654d4e9
INFO:__main__:writing dadeaa9d
INFO:__main__:writing 9c8ffd24
INFO:__main__:writing b4bf86c2
INFO:__main__:writing 1e8874b9
INFO:__main__:writing 662e7747
INFO:__main__:writing 17d1b34e
INFO:__main__:writing 51061329
INFO:__main__:writing 029844e0


<<Finished function Download Image - Multithreading with 4 Workers in 2.2167422771453857 seconds


#### 1.3 Multiprocessing Example

In [17]:

with DecoTimer("Download Image - Multiprocessing with 4 Workers"):
    args = list(zip(image_endpoints, filenames))
    with Pool(4) as p:
        p.starmap(download_image, args)


>>>>Starting Function Download Image - Multiprocessing with 4 Workers...


INFO:__main__:writing 12b75344
INFO:__main__:writing 6a4f4b62
INFO:__main__:writing 896f3cb8
INFO:__main__:writing 62d4211d
INFO:__main__:writing 0b74dfed
INFO:__main__:writing a5dab5f3
INFO:__main__:writing 156b9285
INFO:__main__:writing dadeaa9d
INFO:__main__:writing 64e84ef4
INFO:__main__:writing 63a7c567
INFO:__main__:writing c654d4e9
INFO:__main__:writing a12089a8
INFO:__main__:writing b4bf86c2
INFO:__main__:writing 3927960b
INFO:__main__:writing 662e7747
INFO:__main__:writing 1e8874b9
INFO:__main__:writing 51061329
INFO:__main__:writing 9c8ffd24
INFO:__main__:writing 029844e0
INFO:__main__:writing 17d1b34e


<<Finished function Download Image - Multiprocessing with 4 Workers in 2.072489023208618 seconds


### 2. Load Image and convert it to Numpy Array

<i>Make sure you have the images downloaded in the images/ directory from the previous step.</i>

In [8]:
from PIL import Image
import numpy as np

def image_read_as_nparray(filename):
    image = Image.open(f'images/{filename}.bmp')
    logger.info(f'loading image {filename}')
    return np.array(image.getdata()).reshape(image.size[0], image.size[1], 3)

#### 2.1 Single Threaded Example

In [9]:
with DecoTimer("Loading Images and Convert it to Numpy Array - Single Threaded Example"):
    images = [image_read_as_nparray(filename) for filename in filenames]


INFO:__main__:loading image 2d65d646


>>>>Starting Function Loading Images and Convert it to Numpy Array - Single Threaded Example...


INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image a78d53a0
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image a6b52912
INFO:__main__:loading image a1343615
INFO:__main__:loading image 234ade86
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image 74cced27
INFO:__main__:loading image ff786e05
INFO:__main__:loading image bb31b409
INFO:__main__:loading image 251e4326
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 76dfca0b


<<Finished function Loading Images and Convert it to Numpy Array - Single Threaded Example in 12.624919176101685 seconds


#### 2.2 Multithreading Example

In [10]:
class LoadingWorker(Thread):
    
    def __init__(self, in_queue, out_queue):
        Thread.__init__(self)
        self.in_queue = in_queue # create a worker queue to pass in the image filename sequentially
        self.out_queue = out_queue # creaet a worker queue to store the output data
        
    def run(self):
        while True:
            filename = self.in_queue.get()
            try:
                self.out_queue.put(image_read_as_nparray(filename))
            finally:
                self.in_queue.task_done()

with DecoTimer("Loading Images and Convert it to Numpy Array - Multithreading Example"):
    in_queue = Queue()
    out_queue = Queue()
    for x in range(4):
        worker = LoadingWorker(in_queue, out_queue)
        worker.daemon = True
        worker.start()
    
    for filename in filenames:
        in_queue.put((filename))
    in_queue.join()
    
    images = []
    while out_queue.qsize() > 0:
        try:
            images.append(out_queue.get())
        finally:    
            out_queue.task_done()
    out_queue.join()

        
    
    

INFO:__main__:loading image 2d65d646
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image a78d53a0


>>>>Starting Function Loading Images and Convert it to Numpy Array - Multithreading Example...


INFO:__main__:loading image a6b52912
INFO:__main__:loading image 234ade86
INFO:__main__:loading image a1343615
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image 74cced27
INFO:__main__:loading image 251e4326
INFO:__main__:loading image bb31b409
INFO:__main__:loading image ff786e05
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Loading Images and Convert it to Numpy Array - Multithreading Example in 13.617112159729004 seconds


#### 2.3 Multiprocessing Example

In [11]:
with DecoTimer("Load Images and Convert it to Numpy Array - Multiprocessing Example"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
        

>>>>Starting Function Load Images and Convert it to Numpy Array - Multiprocessing Example...


INFO:__main__:loading image 2d65d646
INFO:__main__:loading image a78d53a0
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 234ade86
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image a1343615
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image bb31b409
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Load Images and Convert it to Numpy Array - Multiprocessing Example in 8.577088117599487 seconds


### 3. Grayscale Image

In [32]:
def grayscale_for_loop(image, n):
    for i in range(len(image)):
        for j in range(len(image[0])):
            average = (image[i][j][0] + image[i][j][1] + image[i][j][2]) / 3
            image[i][j][0] = image[i][j][1] = image[i][j][2] = average
    im = Image.fromarray(np.uint8(image))
    logging.info('Grayscaled image')
    if n == 0:
        im.show() # only display one image to prove that the image is grayscaled
    return im

#### 3.1 Single Threaded (using For-Loop) 51-53 seconds

In [33]:
# Resetting images using multiprocessing
with DecoTimer("Resetting Images"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
Image.fromarray(np.uint8(images[0])).show()

>>>>Starting Function Resetting Images...


INFO:__main__:loading image a78d53a0
INFO:__main__:loading image 2d65d646
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 234ade86
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image a1343615
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image bb31b409
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Resetting Images in 8.712460041046143 seconds


In [34]:
with DecoTimer("Grayscale Image - Single Threaded For Loop"):
    for n, image in enumerate(images):
        im = grayscale_for_loop(image, n)

>>>>Starting Function Grayscale Image - Single Threaded For Loop...


INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image


<<Finished function Grayscale Image - Single Threaded For Loop in 52.40959429740906 seconds


#### Multithreading Example (using For-Loop) 55-58 seconds

In [37]:
# Resetting images using multiprocessing
with DecoTimer("Resetting Images"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
Image.fromarray(np.uint8(images[0])).show()

>>>>Starting Function Resetting Images...


INFO:__main__:loading image a78d53a0
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 2d65d646
INFO:__main__:loading image 234ade86
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image a1343615
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image bb31b409
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Resetting Images in 8.421438932418823 seconds


In [38]:
class GrayscaleWorker(Thread):
    
    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue # create a worker queue to pass in numpy array sequentially
        
    def run(self):
        while True:
            n, image = self.queue.get()
            try:
                im = grayscale_for_loop(image, n)
            finally:
                self.queue.task_done()

with DecoTimer("Grayscale Image - Multithreading For Loop"):
    queue = Queue()
    for x in range(4):
        worker = GrayscaleWorker(queue)
        worker.daemon = True
        worker.start()
    
    for n, image in enumerate(images):
        queue.put((n, image))
    queue.join()

>>>>Starting Function Grayscale Image - Multithreading For Loop...


INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image


<<Finished function Grayscale Image - Multithreading For Loop in 55.79206895828247 seconds


#### Multiprocessing Example (using For-Loop) 28-30 seconds

In [39]:
# Resetting images using multiprocessing
with DecoTimer("Resetting Images"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
Image.fromarray(np.uint8(images[0])).show()

>>>>Starting Function Resetting Images...


INFO:__main__:loading image a78d53a0
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 2d65d646
INFO:__main__:loading image 234ade86
INFO:__main__:loading image a1343615
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image bb31b409
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Resetting Images in 8.24036192893982 seconds


In [40]:

with DecoTimer("Grayscale Image - MultiProcessing Example "):
    with Pool(4) as p:
        im_arr = p.starmap(grayscale_for_loop, zip(images, [i for i in range(len(images))]))
        

>>>>Starting Function Grayscale Image - MultiProcessing Example ...


INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image


<<Finished function Grayscale Image - MultiProcessing Example  in 28.870956897735596 seconds


#### 2.4 Numpy Vector Operations (Single Thread, Eliminate Inner Loops) 48~51 seconds

In [77]:
# Resetting images using multiprocessing
with DecoTimer("Resetting Images"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
Image.fromarray(np.uint8(images[0])).show()

>>>>Starting Function Resetting Images...


INFO:__main__:loading image 2d65d646
INFO:__main__:loading image a78d53a0
INFO:__main__:loading image 234ade86
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image a1343615
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image bb31b409
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Resetting Images in 8.567822217941284 seconds


In [80]:
def grayscale_vector(image, n):
    average = ((image[:, :, 0] + image[:, :, 1] + image[:, :, 2]) / 3).astype(int)
    image[:, :, 0] = image[:, :, 1] = image[:, :, 2] = average
    im = Image.fromarray(np.uint8(image)).show()
    logging.info('Grayscaled image')
    if n == 0:
        im.show()
    return im

In [85]:
with DecoTimer("Single Threaded with Numpy Vectorization, Eliminate Inner Loop"):
    for n, image in enumerate(images):
        im = grayscale_for_loop(image, n)

>>>>Starting Function Single Threaded with Numpy Vectorization, Eliminate Inner Loop...


INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image
INFO:root:Grayscaled image


<<Finished function Single Threaded with Numpy Vectorization, Eliminate Inner Loop in 48.89266324043274 seconds


#### 2.5 Numpy Vector Operation (Single Thread, Vectorized Across All Images):    **Less than 1 Second**

In [103]:
# Resetting images using multiprocessing
with DecoTimer("Resetting Images"):
    with Pool(4) as p:
        images = p.map(image_read_as_nparray, filenames)
Image.fromarray(np.uint8(images[0])).show()

>>>>Starting Function Resetting Images...


INFO:__main__:loading image 234ade86
INFO:__main__:loading image a78d53a0
INFO:__main__:loading image a6b52912
INFO:__main__:loading image 2d65d646
INFO:__main__:loading image a1343615
INFO:__main__:loading image 6b145f94
INFO:__main__:loading image 9d50f75b
INFO:__main__:loading image 3c72af7b
INFO:__main__:loading image 22cc0a70
INFO:__main__:loading image 74cced27
INFO:__main__:loading image bb31b409
INFO:__main__:loading image aa3d339d
INFO:__main__:loading image 1946a05d
INFO:__main__:loading image ff786e05
INFO:__main__:loading image 251e4326
INFO:__main__:loading image c11b37e8
INFO:__main__:loading image 2e2aafe1
INFO:__main__:loading image f5abd9a8
INFO:__main__:loading image 66df0be7
INFO:__main__:loading image 76dfca0b


<<Finished function Resetting Images in 8.952479124069214 seconds


In [104]:
with DecoTimer("Vectorized Across All Images"):
    npimages = np.array(images)
    average = ((npimages[:, :, :, 0] + npimages[:, :, :, 1] + npimages[:, :, :, 2]) / 3).astype(int)
    npimages[:, :, :, 0] = npimages[:, :, :, 1] = npimages[:, :, :, 2] = average
    
    for n in range(len(npimages)):
        im = Image.fromarray(np.uint8(npimages[n]))
        if n == 0:
            im.show()


>>>>Starting Function Vectorized Across All Images...
<<Finished function Vectorized Across All Images in 0.7732219696044922 seconds


In [None]:
remove_images() # clean up