## Reference : https://docs.python.org/3/library/multiprocessing.html

# Simple Word Counter Code

In [5]:
import time
import multiprocessing
from itertools import groupby
import re 

In [1]:
def split_lines(lines):
    return lines.split(' ')

def convert_lower(lines_list):
    return list(map(lambda x: x.lower(), lines_list))

def trim_lines(lines_list):
    return list(map(lambda x: x.strip(), lines_list))

def remove_empty(words_list):
    return list(filter(lambda x: x!='', words_list))

def group_words(words_list):
    return groupby(sorted(words_list))

def get_word_count(grouped_words):
    return list(map(lambda x: (x[0], len(list(x[1]))),grouped_words))

def remove_punctuation(my_string):
    return re.sub(r'[^\w\s]','',my_string)

def wordcount(text):
    trim_split_punct = trim_lines(split_lines(remove_punctuation(text)))
    sanitize_words_list = convert_lower(remove_empty(trim_split_punct))
    word_count_dict = get_word_count(group_words(sanitize_words_list))
    return word_count_dict

In [2]:
# Decorating to return execution time of function
def time_deco(my_func):
    def inner_wrap(*args, **kwargs):
        start = time.time()
        my_func(*args, **kwargs)
        end = time.time()
        print("Time taken is {} seconds".format(end-start))
    return inner_wrap


In [3]:
multiple_strs = ["This is a string with a single string"]*120000

In [6]:
# Run wordcount linearly

@time_deco
def linear_word_count(multiple_strs):
    total_word_count = {}
    for single_str in multiple_strs:
        single_word_count = wordcount(single_str)
        total_word_count.update(single_word_count)
    return total_word_count
        
linear_word_count(multiple_strs)    


Time taken is 1.530799150466919 seconds


In [11]:
@time_deco
def multiprocess_word_count(multiple_strs):
    total_word_count = {}
    with multiprocessing.Pool(4) as workers:
        result_iter = workers.map(wordcount, multiple_strs)
        for single_word_count in result_iter:
            total_word_count.update(single_word_count)
    return total_word_count

multiprocess_word_count(multiple_strs)

Time taken is 0.8768208026885986 seconds


## Also take a look at map_async and imap_unordered from multiprocessing module and compare their performances