In [1]:
 #pip install textblob

Note: you may need to restart the kernel to use updated packages.


# Sentiment Analysis

In [2]:
from textblob import TextBlob

text = TextBlob("I hope you are enjoying this tutorial.")
print(text.sentiment)

Sentiment(polarity=0.5, subjectivity=0.6)


In [3]:
# Tokenization
text = TextBlob("I am a fan of Apple Products")
print(text.words)

['I', 'am', 'a', 'fan', 'of', 'Apple', 'Products']


In [4]:
# Spelling Correction
text = TextBlob("I love Machne Learnin")
print(text.correct())

I love Machine Learning


In [6]:
text.sentiment

Sentiment(polarity=0.5, subjectivity=0.6)

In [7]:
text.sentiment.polarity

0.5

# Tokenization

In [9]:
zen = TextBlob("Beautiful is better than ugly. "
 "Explicit is better than implicit. "
 "Simple is better than complex.")
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [10]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [12]:
for sentence in zen.sentences:
    print(sentence.sentiment)

Sentiment(polarity=0.2166666666666667, subjectivity=0.8333333333333334)
Sentiment(polarity=0.5, subjectivity=0.5)
Sentiment(polarity=0.06666666666666667, subjectivity=0.41904761904761906)


# Words Inflection and Lemmatization


#### Each word in TextBlob.words or Sentence.words is a Word object (a subclass of unicode) with useful methods, e.g. for word inflection.

###### >>> sentence = TextBlob('Use 4 spaces per indentation level.')
>>> sentence.words
WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])
>>> sentence.words[2].singularize()
'space'
>>> sentence.words[-1].pluralize()
'levels'

# lemmatize method.

In [15]:
from textblob import Word

In [16]:
w = Word("octopi")
w.lemmatize()

'octopus'

In [19]:
w = Word("went")
w.lemmatize("v")  # Pass in WordNet part of speech (verb)

'go'

# Parsing

In [20]:
#parsing = analysing

In [21]:
b = TextBlob("And now for something completely different.")
print(b.parse())

And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O


# TextBlobs Are Like Python Strings

In [24]:
zen[0:19]
zen.upper()


TextBlob("BEAUTIFUL IS BETTER THAN UGLY. EXPLICIT IS BETTER THAN IMPLICIT. SIMPLE IS BETTER THAN COMPLEX.")

In [25]:
zen.find("Simple")

65

# n-grams

In [26]:
blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

# Get Start and End Indices of Sentences

In [27]:
 for s in zen.sentences:
...     print(s)
...     print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95
