In [1]:
import os
import time
import cv2
import dlib
import glob
import random
import nltk
import numpy as np
from functools import reduce
import concurrent.futures
import xml.etree.ElementTree as ET
from nltk.stem import PorterStemmer, LancasterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize

In [2]:
def sleep_fun(seconds):
    print("Sleeping for {} second(s)".format(seconds))
    time.sleep(seconds)

In [3]:
sleep_times = [1,2,3]
start = time.time()
for i in sleep_times:
    sleep_fun(i)
end = time.time()
print("Series computation: {} secs ".format(end - start))

Sleeping for 1 second(s)
Sleeping for 2 second(s)
Sleeping for 3 second(s)
Series computation: 6.007047891616821 secs 


In [4]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(sleep_fun, sleep_times) 
end = time.time()
print("Multithreading computation: {} secs ".format(end - start))

Sleeping for 1 second(s)
Sleeping for 2 second(s)
Sleeping for 3 second(s)
Multithreading computation: 3.0117909908294678 secs 


In [6]:
def calculation(number):
    random_list = random.sample(range(10000000), number)
    return reduce(lambda x, y: x*y, random_list)

In [7]:
numbers = [200000, 200000, 200000]
start = time.time()
for i in numbers:
    result = calculation(i)
end = time.time()
print("Series computation: {} sec".format(end - start))

Series computation: 39.45596265792847 sec


In [8]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(calculation, numbers)   
end = time.time()
print("Multithreading computation: {} secs ".format(end - start))

Multithreading computation: 40.47984790802002 secs 


In [9]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(calculation, numbers) 
end = time.time()
print("MultiProcessing computation: {} secs ".format(end - start))

MultiProcessing computation: 13.550559282302856 secs 


# Example from Computer Vision

In [10]:
def face_detection(image_path):
    image_name = os.path.basename(image_path)
    image = cv2.imread(image_path)
    face_rect = image.copy()
    faces = face_detector(image)
    if len(faces) !=0:
        for face in faces:
            x1 = face.left()
            y1 = face.top()
            x2 = face.right()
            y2 = face.bottom()
            face_rect = cv2.rectangle(face_rect, (x1,y1), (x2,y2), (255,0,0), 5)
        cv2.imwrite("./Datasets/face_processed/" + image_name, face_rect)

In [11]:
face_detector = dlib.get_frontal_face_detector()
images = list(glob.iglob("./Datasets/face_raw/*.jpg"))
images.sort()
start = time.time()
for i in images:
    face_detection(i)
end = time.time()
print("Series computation: {} seconds".format(end - start))

Series computation: 16.28821086883545 seconds


In [12]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(face_detection, images)    
end = time.time()
print("Multiprocessing computation: {} sec".format(end - start))

Multiprocessing computation: 3.3354105949401855 sec


In [13]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(face_detection, images)  
end = time.time()
print("Multithreading computation: {} sec".format(end - start))

Multithreading computation: 14.369969844818115 sec


# Example from NLP

In [None]:
# Make sure you unzip the blogs.zip file in ./Datasets/blog_xml/ before running this segement

In [15]:
def stemSentence(sentence, stemmer):
    token_words=word_tokenize(sentence)
    stem_sentence=[]
    for word in token_words:
        stem_sentence.append(stemmer.stem(word))
        stem_sentence.append(" ")
    return "".join(stem_sentence)

In [16]:
def xml_process(xml_path):
    try:
        root = ET.parse(xml_path).getroot()
        posts = []
        file_name = os.path.basename(xml_path)[:-4] + ".txt"
        
        for i in root.iter("post"):
            posts.append(i.text)
        porter = PorterStemmer()    
        sentences = map(lambda x: stemSentence(x, porter), posts)
        
        for i in sentences:
            with open("./Datasets/blog_processed/" + file_name, "a+") as file:
                file.write(i)
                file.write("\n")
    except:
        pass

In [17]:
xml_files = glob.glob("./Datasets/blog_xml/*.xml")
start = time.time()
for i in xml_files:
    xml_process(i)
end = time.time()
print("Series computation: {} seconds".format(end - start))

Series computation: 7.939080715179443 seconds


In [20]:
start = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    executor.map(xml_process, xml_files)    
end = time.time()
print("Multiprocessing computation: {} sec".format(end - start))

Multiprocessing computation: 1.252767562866211 sec


In [19]:
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(xml_process, xml_files)  
end = time.time()
print("Multithreading computation: {} sec".format(end - start))

Multithreading computation: 9.674808025360107 sec
