In [19]:
# !pip install pathos  # Multi processing
# !pip install tqdm  # Progress bar
# !pip install TextBlob  # Sentiment analysis

from pathos.multiprocessing import ProcessPool
from textblob import TextBlob
from tqdm import tqdm
import time
import urllib.request

# Create a process pool

Pools are a group of processes where you will send tasks. Inside you will define the number of processes to create. By defualt it will be number of CPU cores, however you **can** define more than that. 

Scheduling more processes than you have CPU cores can increase performance where the processes run into wait times or I/O (Input/Output)

In [2]:
pool = ProcessPool(nodes=3)

# Functions

Map methods provided:

    map         - blocking and ordered worker pool        [returns: list]
    imap        - non-blocking and ordered worker pool    [returns: iterator]
    uimap       - non-blocking and unordered worker pool  [returns: iterator]
    amap        - asynchronous worker pool                [returns: object]

Blocking: handles jobs in batches rather than 1 by 1

Ordered: Batches must be completed in order

In [3]:
# pool.map(function to run, data to run it on, other arguments)

pool.map(pow, [1,2,3,4], [5,6,7,8])

[1, 64, 2187, 65536]

In [4]:
# Iterate through the returned data using imap
for x in pool.imap(pow, [1,2,3,4], [5,6,7,8]):
  print(x)
  # computes function on data and other arguments pair by pair

1
64
2187
65536


In [5]:
# do an asynchronous map, then get the results

results = pool.amap(pow, [1,2,3,4], [5,6,7,8])

while not results.ready():
    time.sleep(5); print(".", end=' ')

. 

# Build your function
First lets build a function that can take a line of text and produce the sentiment

In [20]:
def get_sentiment(text):
    from textblob import TextBlob
    blob = TextBlob(text)
    score = blob.sentiment.polarity
    return score

Then we will need a function that will download the poems for us

In [21]:
def download_poem(url):

    poems = []
    
    with urllib.request.urlopen(url) as f: 
        for line in f:
            line = line.decode("utf-8")
            line = line.strip()
            if line:
                poems.append(line)

    return poems

Let's check out what one of these poems look like

In [22]:
test_url = 'https://raw.githubusercontent.com/okfn/openmilton/master/miltondata/texts/poems.txt'

poem = download_poem(test_url)

print(len(poem))

print(poem[:10])

6203
['The Poetical Works of John Milton', 'PREFACE by the Rev. H. C. Beeching, M. A.', "This edition of Milton's Poetry is a reprint, as careful as Editor", 'and Printers have been able to make it, from the earliest printed', 'copies of the several poems.  First the 1645 volume of the', 'Minor Poems has been printed entire; then follow in order the', 'poems added in the reissue of 1673; the Paradise Lost, from the', "edition of 1667; and the Paradise Regain'd and Samson", 'Agonistes from the edition of 1671.', 'The most interesting portion of the book must be reckoned the']


Finally we will build out main function that puts the whole process together

In [23]:
def process_poems(url):
    
    scores = []

    poem = download_poem(url)
    
    for line in poem:
        scores.append(get_sentiment(line))
    
    return scores

In [27]:
def process_poems(url):
    
    scores = []

    # poem = download_poem(url)
    poem = []
    
    with urllib.request.urlopen(url) as f: 
        for line in f:
            line = line.decode("utf-8")
            line = line.strip()
            if line:
                poem.append(line)

    # return poems
    
    for line in poem:
        scores.append(get_sentiment(line))
    
    return scores

Let's build a hard task, like having to download and process multiple poems

In [24]:
urls = ['https://raw.githubusercontent.com/okfn/openmilton/master/miltondata/texts/poems.txt']

# Duplicating the list to make it larger

for _ in range(0, 3):  # using _ means it is an unused variable
    urls += urls

print(len(urls))

8


Now let's test how long it takes to process the sentiment for each line of our poems dataset

We can use TQDM to show us the progress of any for-loop operation

In [25]:
# Serial Processing
scores = []

for url in tqdm(urls, position=0):  # position=0 forces the bars into the same line when printing
    scores += process_poems(url)

100%|██████████| 8/8 [00:12<00:00,  1.59s/it]


In [29]:
# Pool
scores = []

for score in tqdm(pool.uimap(process_poems, urls), total=len(urls), position=0):
    print(score)
    scores += score

 12%|█▎        | 1/8 [00:02<00:14,  2.10s/it]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

 25%|██▌       | 2/8 [00:04<00:12,  2.15s/it]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

 38%|███▊      | 3/8 [00:04<00:07,  1.49s/it]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

 62%|██████▎   | 5/8 [00:06<00:02,  1.07it/s]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

 75%|███████▌  | 6/8 [00:06<00:01,  1.29it/s]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

 88%|████████▊ | 7/8 [00:06<00:00,  1.48it/s]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062

100%|██████████| 8/8 [00:07<00:00,  1.04it/s]

[0.0, 0.0, -0.1, 0.5, 0.125, -0.025, 0.0, 0.0, 0.0, 0.5, 0.25, -0.09999999999999999, 0.1, 0.0, 0.0, 0.0, -0.25, 0.0, 0.0, 0.5333333333333333, 0.0, 0.4, -0.125, -0.25, 0.0, 0.05000000000000002, 0.0, -0.25, 0.041666666666666664, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.1875, -0.10833333333333334, 0.0, 0.0, -0.4, -0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.35, -0.075, 0.0, 0.10000000000000002, 0.0, 0.0, 0.0, -0.012499999999999997, 0.0, 0.0, 0.0, 0.1, 0.03333333333333333, 0.0, 0.125, -0.3333333333333333, 0.6, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.2, 0.375, 0.0, 0.25, 0.6, 0.0, 0.2, -0.05, 0.08333333333333333, 0.7, -0.05, 0.0, 0.0, -0.3, 0.0, 0.21428571428571427, 0.35, 0.0, 0.6, 0.0, 0.0, 0.0, 0.0, -0.30000000000000004, 0.0, 0.0, 0.0, 0.21428571428571427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.0, 0.0, 0.0, -0.8, 0.0, 0.0, 0.0, 0.1, 0.0, 0.375, -0.033333333333333326, 0.0, -0.0125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 1.0, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062


