In [27]:
from spacy import displacy
from textblob import TextBlob
from textblob import Word
import spacy
nlp = spacy.load('en')

# Deep Dive of TextBlob

In [25]:
#Textblob is a powerful natural language tool that sits on top of the most popular (and complicated) NLTK toolkit in python
example = TextBlob('''Like other fields of data science, natural language 
                   processing converts language into machine-understandable tokens. 
                   This becomes valuable when computers begin to parse news articles, 
                   social media, and other media outlets to better understand the world.''')

print("___Sentences___")
print(example.sentences)
print("\n\n")
print("___Words___")
print(example.words)

___Sentences___
[Sentence("Like other fields of data science, natural language 
                   processing converts language into machine-understandable tokens."), Sentence("This becomes valuable when computers begin to parse news articles, 
                   social media, and other media outlets to better understand the world.")]



___Words___
['Like', 'other', 'fields', 'of', 'data', 'science', 'natural', 'language', 'processing', 'converts', 'language', 'into', 'machine-understandable', 'tokens', 'This', 'becomes', 'valuable', 'when', 'computers', 'begin', 'to', 'parse', 'news', 'articles', 'social', 'media', 'and', 'other', 'media', 'outlets', 'to', 'better', 'understand', 'the', 'world']


In [34]:
#NOTE: These conversions seem to be case sensitive, so make sure things are lowercase before doing any
#Detailed textblob work

#TextBlob can also covert words or sentences into different tenses or pluralities
print(Word("child").pluralize())

#Or do the reverse
print(Word("cacti").singularize())

#If you need, a definition can also be derived from these tokens
print(Word("giraffe").define())

#This technology can even be used to change tenses
print(Word("went").lemmatize("v"))

children
cactus
['tallest living quadruped; having a spotted coat and small horns and very long neck and legs; of savannahs of tropical Africa']
go


In [58]:
#What makes TextBlob particularly powerful is all of the commands above than be applied to lists of words
list_test = TextBlob("cat dog person car")
print(list_test.words.pluralize())

#Additionally, TextBlob will correct poor spelling and grammar
spel_check = TextBlob("Sometims in the commets poeple don't spel so gooood.")
print(spel_check.correct())

#This same concept can be used for using spellcheck on words
print(Word("respunsiblt").spellcheck())

#If needed, it can even provide translation
print(TextBlob("A man, a plan, a canal, Panama").translate(to = "es"))

#TextBlobs can also be indexed as if they were lists or Python strings
print(example.words[2:6])


['cats', 'dogs', 'people', 'cars']
Sometimes in the comments people don't spell so good.
[('responsible', 1.0)]
Un hombre, un plan, un canal, Panamá.
['fields', 'of', 'data', 'science']


In [59]:
#Getting closer to the predictive analytics side, this tech will automatically tag each token with a suggested part of speech
print(example.tags)

#A legend for all of the POS tags can be found here.
#https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

#In many cases, POS tagging makes use of networks, such as a Markov Chain network or recurrent net. 
#They will also leverage dictionaries

[('Like', 'IN'), ('other', 'JJ'), ('fields', 'NNS'), ('of', 'IN'), ('data', 'NNS'), ('science', 'NN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('converts', 'NNS'), ('language', 'NN'), ('into', 'IN'), ('machine-understandable', 'JJ'), ('tokens', 'NNS'), ('This', 'DT'), ('becomes', 'VBZ'), ('valuable', 'JJ'), ('when', 'WRB'), ('computers', 'NNS'), ('begin', 'VBP'), ('to', 'TO'), ('parse', 'VB'), ('news', 'NN'), ('articles', 'NNS'), ('social', 'JJ'), ('media', 'NNS'), ('and', 'CC'), ('other', 'JJ'), ('media', 'NNS'), ('outlets', 'NNS'), ('to', 'TO'), ('better', 'RBR'), ('understand', 'VB'), ('the', 'DT'), ('world', 'NN')]


In [16]:
#textblob can infer sentiment using its different libraries and can understand different uses of typically negative words
positive = TextBlob("West Monroe Partners is a great company!")
negative = TextBlob("Chicago winter weather is terrible!")
pos_neg = TextBlob("West Monroe Partners is a terribly great company!")

print("Positive: {0}\nNegative: {1}\n\nAlso Positive!: {2}\n".format(positive.sentiment, negative.sentiment, pos_neg.sentiment))

Positive: Sentiment(polarity=1.0, subjectivity=0.75)
Negative: Sentiment(polarity=-1.0, subjectivity=1.0)

Also Positive!: Sentiment(polarity=1.0, subjectivity=0.75)



# Now lets look at spacy, an industrial strength version of TextBlob built for Productionalizable solutions

In [2]:
#Spacy has many additional features beyond TextBlob, included a VERY visual representation of tags that are not 
#necessarily parts of speech

doc = nlp('I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ')
displacy.render(doc, style='ent', jupyter=True)

In [61]:
#Even more interesting is its ability to interconnect tokens in a sentence based on their role in the sentence
#The result is a visual directed graph

doc = nlp('West Monroe Partners completes many fascinating deals with prominent private equity firms.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 80})

# With so many options and capabilities, the models that can be applied to technical metadata are numerous. 

## For a snapshot of a custom built POS tagger and NLG story writer, visit BA_Source_Code on the WMP Azure DevOps Page