In [None]:
!pip install pathos #Multi processing
!pip install tqdm #Progress bar
!pip install TextBlob #Sentiment analysis

from pathos.multiprocessing import ProcessPool
from textblob import TextBlob
from tqdm import tqdm 



#Create a process pool

Pools are a group of poccesses where you will send tasks. Inside you will define the number of processes to create. By defualt it will be number of CPU cores, however you **can** define more than that. 

Scheduling more processes than you have CPU cores can increase performance where the processes run into wait times or I/O

In [None]:
pool = ProcessPool(nodes=3)

#Functions

Map methods provided:


    map         - blocking and ordered worker pool        [returns: list]
    imap        - non-blocking and ordered worker pool    [returns: iterator]
    uimap       - non-blocking and unordered worker pool  [returns: iterator]
    amap        - asynchronous worker pool                [returns: object]

Blocking: handles jobs in batches rather than 1 by 1

Ordered: Batches must be completed in order

In [None]:
#pool.map(function to run, data to run it on, other arguments )

pool.map(pow, [1,2,3,4], [5,6,7,8])

[1, 64, 2187, 65536]

In [None]:
#Iterate through the returned data using imap
for x in pool.imap(pow, [1,2,3,4], [5,6,7,8]):
  print(x)

1
64
2187
65536


In [None]:
# do an asynchronous map, then get the results
import time

results = pool.amap(pow, [1,2,3,4], [5,6,7,8])
while not results.ready():
  time.sleep(5); print(".", end=' ')

. 

#Build your function
First lets build a function that can take a line of text and produce the sentiment

In [None]:
  def get_sentiment(text):
      from textblob import TextBlob
      blob = TextBlob(text)
      score = blob.sentiment.polarity
      return score

Then we will need a function that will download the poems for us

In [None]:
import urllib.request

def download_poem(url):
  poems = []
  with urllib.request.urlopen(url) as f: 
        for line in f:
            line = line.decode("utf-8") 
            line = line.strip()
            if line:
                poems.append(line)
  return poem

Let's check out what one of these poems look like

In [None]:
test_url = 'https://raw.githubusercontent.com/okfn/openmilton/master/miltondata/texts/poems.txt'
poem = download_poem(test_url)

print(len(poem))
print(poem[:10])

6203
['The Poetical Works of John Milton', 'PREFACE by the Rev. H. C. Beeching, M. A.', "This edition of Milton's Poetry is a reprint, as careful as Editor", 'and Printers have been able to make it, from the earliest printed', 'copies of the several poems.  First the 1645 volume of the', 'Minor Poems has been printed entire; then follow in order the', 'poems added in the reissue of 1673; the Paradise Lost, from the', "edition of 1667; and the Paradise Regain'd and Samson", 'Agonistes from the edition of 1671.', 'The most interesting portion of the book must be reckoned the']


Finally we will build out main function that puts the whole process together

In [None]:
def process_poems(url):
    scores = []
    poem = download_poem(url)
    
    for line in poem:
        scores.append(get_sentiment(line))
    return scores

Let's build a hard task, like having to download and process multiple poems

In [None]:
urls = ['https://raw.githubusercontent.com/okfn/openmilton/master/miltondata/texts/poems.txt']

#Duplicating the list to make it larger
for _ in range(0,3):
  urls += urls

print(len(urls))

8


Now let's test how long it takes to process the sentiment for each line of our poems dataset

We can use TQDM to show us the progress of any for-loop operation

In [None]:
#Serial Processing
scores = []

for url in tqdm(urls, position=0 ): #position=0 forces the bars into the same line when printing
    score += process_poems(url)


100%|██████████| 8/8 [00:21<00:00,  2.78s/it]


In [None]:
scores = []

for score in tqdm(pool.uimap(process_poems, urls), total=len(urls), position=0):
  scores += score

100%|██████████| 8/8 [00:16<00:00,  2.33s/it]
