In [1]:
from word2number import w2n
import threading
import multiprocessing
from threading import Thread
from multiprocessing import Process
import concurrent.futures
from concurrent import futures
import time
import math

In [2]:
def preprocess(content):
    content = content.replace("Fourty", "Forty")
    content = content.replace("Tweleve", "Twelve")
    content = content.replace("fourty", "forty")
    content = content.replace("tweleve", "twelve")
    
    return content

In [3]:
class Task:
    def __init__(self, path):
        self.path = path
        self.numbers = []
        
    def get_sum(self):
        with open(self.path) as file:
            content = file.read()
            content = preprocess(content)
            lines = [s for s in content.splitlines() if s.strip()]
            
        self.numbers = [w2n.word_to_num(num) for num in lines]
        
        return sum(self.numbers)
    
    def get_count(self, number):
        with open(self.path) as file:
            content = file.read()
            content = preprocess(content)
            lines = [s for s in content.splitlines() if s.strip()]
            
        counts = [str(num).count(str(number)) for num in self.numbers]
        
        return sum(counts)

In [4]:
t = Task("test/1.txt")
print(t.get_sum())
print(t.get_count(1))

23746560
215040


## One way to use threading

In [10]:
class ThreadedTask:
    def __init__(self, path, number, results):
        t = Task(path)
        s = t.get_sum()
        c = t.get_count(number)
        results[path] = (s,c)
        print(f"Sum = {s} and Count = {c} of path {path}")

In [11]:
start = time.perf_counter()
results = {}

for num in range(1,11):
    path = f"test/{num}.txt"
    t = threading.Thread(target=ThreadedTask, args=(path, 1, results))
    t.start()
    finish = time.perf_counter()
    print(f'Thread {num} started at {round(finish-start, 2)} second(s)')

t.join()
time.sleep(10)

sums = [val[0] for val in results.values()]
counts = [val[1] for val in results.values()]

print(f"Total sum is = {sum(sums)}")
print(f"Total count is = {sum(counts)}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Thread 1 started at 0.0 second(s)
Thread 2 started at 0.02 second(s)
Thread 3 started at 0.16 second(s)
Thread 4 started at 0.81 second(s)
Thread 5 started at 0.94 second(s)
Thread 6 started at 1.15 second(s)
Thread 7 started at 1.81 second(s)
Thread 8 started at 2.24 second(s)
Thread 9 started at 2.61 second(s)
Thread 10 started at 4.15 second(s)
Sum = 7915520 and Count = 71680 of path test/6.txt
Sum = 15831040 and Count = 143360 of path test/2.txt
Sum = 15831040 and Count = 143360 of path test/7.txt
Sum = 23746560 and Count = 215040 of path test/1.txt
Sum = 15831040 and Count = 143360 of path test/10.txt
Sum = 15831040 and Count = 143360 of path test/8.txt
Sum = 31662080 and Count = 286720 of path test/5.txt
Sum = 31662080 and Count = 286720 of path test/9.txt
Sum = 31662080 and Count = 286720 of path test/3.txt
Sum = 31662080 and Count = 286720 of path test/4.txt
Total sum is = 221634560
Total count is = 2007040
Finished in 23.77 second(s)


## Second way to use threading

In [36]:
def ThreadedTaskFunc(path):
    t = Task(path)
    s = t.get_sum()
    c = t.get_count(1)
    print(f"Sum = {s} and Count = {c} of path {path}")
    return (s,c)

In [42]:
start = time.perf_counter()
results = []
total_sum = 0
total_count = 0
    
with concurrent.futures.ThreadPoolExecutor() as executor:
    for num in range(1,11):
        path = f"test/{num}.txt"
        results.append(executor.submit(ThreadedTaskFunc, path))
        finish = time.perf_counter()
        print(f'Thread {num} started at {round(finish-start, 2)} second(s)')
        
for job in futures.as_completed(results):
    result = job.result()
    total_sum += result[0]
    total_count += result[1]


time.sleep(10)
    
print(f"Total sum is = {total_sum}")
print(f"Total count is = {total_count}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Thread 1 started at 0.01 second(s)
Thread 2 started at 0.07 second(s)
Thread 3 started at 0.1 second(s)
Thread 4 started at 0.11 second(s)
Thread 5 started at 0.27 second(s)
Thread 6 started at 0.56 second(s)
Thread 7 started at 0.72 second(s)
Thread 8 started at 1.56 second(s)
Thread 9 started at 1.56 second(s)
Thread 10 started at 1.56 second(s)
Sum = 7915520 and Count = 71680 of path test/6.txt
Sum = 15831040 and Count = 143360 of path test/7.txt
Sum = 15831040 and Count = 143360 of path test/2.txt
Sum = 15831040 and Count = 143360 of path test/8.txt
Sum = 31662080 and Count = 286720 of path test/3.txt
Sum = 23746560 and Count = 215040 of path test/1.txt
Sum = 15831040 and Count = 143360 of path test/10.txt
Sum = 31662080 and Count = 286720 of path test/4.txt
Sum = 31662080 and Count = 286720 of path test/5.txt
Sum = 31662080 and Count = 286720 of path test/9.txt
Total sum is = 221634560
Total count is = 2007040
Finished in 26.43 second(s)


## Third way to use threading

In [11]:
class MultiTask(threading.Thread):
    def __init__(self, path, threadID, number):
        threading.Thread.__init__(self)
        self.path = path
        self.threadID = threadID
        self.number = number
        self.numbers = []
        self.res_sum = 0
        self.res_count = 0
        
    # Overrided Method
    def run(self):
        print(f"Starting {self.threadID}")
        self.res_sum = self.get_sum()
        self.res_count = self.get_count(self.number)
        print(f"Sum = {self.res_sum} and Count = {self.res_count} of path {self.path}")
        
    def get_sum(self):
        with open(self.path) as file:
            content = file.read()
            content = preprocess(content)
            lines = [s for s in content.splitlines() if s.strip()]
            
        self.numbers = [w2n.word_to_num(num) for num in lines]
        
        return sum(self.numbers)
    
    def get_count(self, number):
        with open(self.path) as file:
            content = file.read()
            content = preprocess(content)
            lines = [s for s in content.splitlines() if s.strip()]
            
        counts = [str(num).count(str(number)) for num in self.numbers]
        
        return sum(counts)

In [12]:
start = time.perf_counter()
results = {}
total_sum = 0
total_count = 0
    
for num in range(1,11):
    path = f"test/{num}.txt"
    
    thread = MultiTask(path, num, 1)
    thread.start()
    thread.join()
    results[path] = (thread.res_sum, thread.res_count)
    finish = time.perf_counter()
    print(f'Thread {num} started at {round(finish-start, 2)} second(s)')
    
thread.join()
time.sleep(5)
    
sums = [val[0] for val in results.values()]
counts = [val[1] for val in results.values()]

print(f"Total sum is = {sum(sums)}")
print(f"Total count is = {sum(counts)}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Starting 1
Sum = 23746560 and Count = 215040 of path test/1.txt
Thread 1 started at 1.68 second(s)
Starting 2
Sum = 15831040 and Count = 143360 of path test/2.txt
Thread 2 started at 2.73 second(s)
Starting 3
Sum = 31662080 and Count = 286720 of path test/3.txt
Thread 3 started at 4.92 second(s)
Starting 4
Sum = 31662080 and Count = 286720 of path test/4.txt
Thread 4 started at 7.19 second(s)
Starting 5
Sum = 31662080 and Count = 286720 of path test/5.txt
Thread 5 started at 9.53 second(s)
Starting 6
Sum = 7915520 and Count = 71680 of path test/6.txt
Thread 6 started at 10.29 second(s)
Starting 7
Sum = 15831040 and Count = 143360 of path test/7.txt
Thread 7 started at 12.13 second(s)
Starting 8
Sum = 15831040 and Count = 143360 of path test/8.txt
Thread 8 started at 13.74 second(s)
Starting 9
Sum = 31662080 and Count = 286720 of path test/9.txt
Thread 9 started at 16.57 second(s)
Starting 10
Sum = 15831040 and Count = 143360 of path test/10.txt
Thread 10 started at 18.24 second(s)
Tota

## Fourth way of multithreading

In [14]:
def ThreadedTaskMap(*args):
    t = Task(args[0])
    s = t.get_sum()
    c = t.get_count(1)
    print(f"Sum = {s} and Count = {c} of path {args[0]}")
    return (s,c)

In [15]:
start = time.perf_counter()
results = []
total_sum = 0
total_count = 0
    
with concurrent.futures.ThreadPoolExecutor() as executor:
    for num in range(1,11):
        path = f"test/{num}.txt"
        results = executor.map(ThreadedTaskMap, [path])
        finish = time.perf_counter()
        print(f'Thread {num} started at {round(finish-start, 2)} second(s)')
        
for result in results:
    total_sum += result[0]
    total_count += result[1]


time.sleep(10)
    
print(f"Total sum is = {total_sum}")
print(f"Total count is = {total_count}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Thread 1 started at 0.0 second(s)
Thread 2 started at 0.07 second(s)
Thread 3 started at 0.08 second(s)
Thread 4 started at 0.29 second(s)
Thread 5 started at 0.35 second(s)
Thread 6 started at 0.47 second(s)
Thread 7 started at 0.65 second(s)
Thread 8 started at 0.77 second(s)
Thread 9 started at 0.8 second(s)
Thread 10 started at 0.97 second(s)
Sum = 7915520 and Count = 71680 of path test/6.txt
Sum = 15831040 and Count = 143360 of path test/2.txt
Sum = 15831040 and Count = 143360 of path test/7.txt
Sum = 15831040 and Count = 143360 of path test/8.txt
Sum = 23746560 and Count = 215040 of path test/1.txt
Sum = 31662080 and Count = 286720 of path test/5.txt
Sum = 31662080 and Count = 286720 of path test/9.txt
Sum = 15831040 and Count = 143360 of path test/10.txt
Sum = 31662080 and Count = 286720 of path test/4.txt
Sum = 31662080 and Count = 286720 of path test/3.txt
Total sum is = 15831040
Total count is = 143360
Finished in 26.06 second(s)


# Multiprocessing

## One way to use multiprocessing

In [14]:
def ProcessedTaskFunc(path):
    t = Task(path)
    s = t.get_sum()
    c = t.get_count(1)
    print(f"Sum = {s} and Count = {c} of path {path}")
    return (s,c)

In [15]:
start = time.perf_counter()
results = []
total_sum = 0
total_count = 0
    
with concurrent.futures.ProcessPoolExecutor() as executor:
    for num in range(1,11):
        path = f"test/{num}.txt"
        results.append(executor.submit(ProcessedTaskFunc, path))
        finish = time.perf_counter()
        print(f'Process {num} started at {round(finish-start, 2)} second(s)')
        
for job in futures.as_completed(results):
    result = job.result()
    total_sum += result[0]
    total_count += result[1]


time.sleep(10)
    
print(f"Total sum is = {total_sum}")
print(f"Total count is = {total_count}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Process 1 started at 0.04 second(s)
Process 2 started at 0.04 second(s)
Process 3 started at 0.04 second(s)
Process 4 started at 0.04 second(s)
Process 5 started at 0.04 second(s)
Process 6 started at 0.05 second(s)
Process 7 started at 0.05 second(s)
Process 8 started at 0.05 second(s)
Process 9 started at 0.05 second(s)
Process 10 started at 0.05 second(s)
Sum = 15831040 and Count = 143360 of path test/2.txt
Sum = 23746560 and Count = 215040 of path test/1.txt
Sum = 31662080 and Count = 286720 of path test/4.txt
Sum = 31662080 and Count = 286720 of path test/3.txt
Sum = 7915520 and Count = 71680 of path test/6.txt
Sum = 15831040 and Count = 143360 of path test/8.txt
Sum = 15831040 and Count = 143360 of path test/7.txt
Sum = 31662080 and Count = 286720 of path test/5.txt
Sum = 15831040 and Count = 143360 of path test/10.txt
Sum = 31662080 and Count = 286720 of path test/9.txt
Total sum is = 221634560
Total count is = 2007040
Finished in 17.91 second(s)


## Second way to use multiprocessing

In [8]:
def ProcessedTaskMap(*args):
    t = Task(args[0])
    s = t.get_sum()
    c = t.get_count(1)
    print(f"Sum = {s} and Count = {c} of path {args[0]}")
    return (s,c)

In [9]:
start = time.perf_counter()
results = []
total_sum = 0
total_count = 0
    
with concurrent.futures.ProcessPoolExecutor() as executor:
    for num in range(1,11):
        path = f"test/{num}.txt"
        results = executor.map(ProcessedTaskMap, [path])
        finish = time.perf_counter()
        print(f'Process {num} started at {round(finish-start, 2)} second(s)')
        
for result in results:
    total_sum += result[0]
    total_count += result[1]


time.sleep(10)
    
print(f"Total sum is = {total_sum}")
print(f"Total count is = {total_count}")

finish = time.perf_counter()

print(f'Finished in {round(finish-start, 2)} second(s)')

Process 1 started at 0.04 second(s)
Process 2 started at 0.04 second(s)
Process 3 started at 0.04 second(s)
Process 4 started at 0.04 second(s)
Process 5 started at 0.04 second(s)
Process 6 started at 0.04 second(s)
Process 7 started at 0.04 second(s)
Process 8 started at 0.04 second(s)
Process 9 started at 0.04 second(s)
Process 10 started at 0.04 second(s)
Sum = 15831040 and Count = 143360 of path test/2.txt
Sum = 23746560 and Count = 215040 of path test/1.txt
Sum = 31662080 and Count = 286720 of path test/3.txt
Sum = 31662080 and Count = 286720 of path test/4.txt
Sum = 7915520 and Count = 71680 of path test/6.txt
Sum = 15831040 and Count = 143360 of path test/7.txt
Sum = 15831040 and Count = 143360 of path test/8.txt
Sum = 31662080 and Count = 286720 of path test/5.txt
Sum = 15831040 and Count = 143360 of path test/10.txt
Sum = 31662080 and Count = 286720 of path test/9.txt
Total sum is = 15831040
Total count is = 143360
Finished in 18.08 second(s)


## Third way to use multiprocessing

In [2]:
from multiprocessing import Pool

In [3]:
def f(x):
    return x*x

if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(f, [1, 2, 3]))

[1, 4, 9]
