In [None]:
# Create a textblob
!pip install textblob



In [None]:
from textblob import TextBlob

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [None]:
wiki = TextBlob("Python is a high-level, general-purpose programming language.")

In [None]:
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

In [None]:
import nltk
nltk.download('brown')

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!


True

In [None]:
wiki.noun_phrases

WordList(['python'])

In [None]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [None]:
testimonial.sentiment.subjectivity

0.4357142857142857

In [None]:
zen = TextBlob("Beautiful is better than ugly. "
"Explicit is better than implicit. "
"Simple is better than complex.")
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [None]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [None]:
for sentence in zen.sentences:
  print(sentence)

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.


Word Inflection and Lemmatization

In [None]:
a = TextBlob("Use 4 spaces per indentation level")
a.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [None]:
a.words[2].singularize()

'space'

In [None]:
a.words[0].pluralize()

'Uses'

In [None]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
from textblob import Word
q = Word("lions")
q.lemmatize()

'lion'

In [None]:
q = Word("went")
q.lemmatize("v")

'go'

WordNet Integration

In [None]:
Word("went").definitions

['change location; move, travel, or proceed, also metaphorically',
 'follow a procedure or take a course',
 'move away from a place into another direction',
 'enter or assume a certain state or condition',
 'be awarded; be allotted',
 'have a particular form',
 'stretch out over a distance, space, time, or scope; run or extend between two points or beyond a certain point',
 'follow a certain course',
 'be abolished or discarded',
 'be or continue to be in a certain condition',
 'make a certain noise or sound',
 'perform as expected when applied',
 'to be spent or finished',
 'progress by being changed',
 'continue to live through hardship or adversity',
 'pass, fare, or elapse; of a certain state of affairs or action',
 'pass from physical life and lose all bodily attributes and functions necessary to sustain life',
 'be in the right place or situation',
 'be ranked or compare',
 'begin or set in motion',
 "have a turn; make one's move in a game",
 'be contained in',
 'be sounded, play

In [None]:
from textblob.wordnet import Synset
octopus = Synset("octopus.n.02")
shrimp = Synset("shrimp.n.03")
octopus.path_similarity(shrimp)

0.1111111111111111

WordLists

In [None]:
animals = TextBlob("cat dog octopus")
animals.words

WordList(['cat', 'dog', 'octopus'])

In [None]:
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

Spelling Correction

In [None]:
g = TextBlob("It is a verry goood weather today")
print(g.correct())

It is a very good weather today


In [None]:
k = Word("sincce")
k.spellcheck()

[('since', 1.0)]

Get Word and Noun Phrase Frequencies

In [None]:
sent = TextBlob("She sales sea shells at the sea shore")
sent.word_counts['sea']

2

In [None]:
sent.words.count('sea')

2

In [None]:
sent.words.count('Sea', case_sensitive=True)

0

Translation and Language Detection

In [None]:
!pip install textblob



In [None]:
!pip install googletrans==4.0.0-rc1



In [None]:
from googletrans import Translator

translator = Translator()
text = "Something is better than nothing"
translated = translator.translate(text, dest='hi')
print(translated.text)

कुछ होना कुछ नहीं होने से बेहतर है


In [None]:
from googletrans import Translator
from textblob import TextBlob

translator = Translator()
chinese_blob = TextBlob("有总比没有好")
chinese_text = chinese_blob.raw
translated_en = translator.translate(chinese_text, src='zh-CN', dest='en')
print(translated_en)

Translated(src=zh-cn, dest=en, text=There is always better than not, pronunciation=None, extra_data="{'confiden...")


In [None]:
d = TextBlob("Something is better than nothing")
d.detect_language()

HTTPError: HTTP Error 400: Bad Request

In [None]:
zen[0:15]

TextBlob("Beautiful is be")

In [None]:
zen.upper()

TextBlob("BEAUTIFUL IS BETTER THAN UGLY. EXPLICIT IS BETTER THAN IMPLICIT. SIMPLE IS BETTER THAN COMPLEX.")

In [None]:
zen.find('than')

20

In [None]:
a_blob = TextBlob('apple')
s_blob = TextBlob('samsung')
a_blob < s_blob

True

In [None]:
a_blob == 'apple'

True

In [None]:
a_blob + ' and ' + s_blob

TextBlob("apple and samsung")

In [None]:
"{} {} is a better company than {} {}".format(a_blob, 'Iphone', s_blob, 'Pixel')

'apple Iphone is a better company than samsung Pixel'

n-grams

In [None]:
blob = TextBlob("Now is better than ever.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'ever'])]

In [None]:
for k in zen.sentences:
  print(k)
  print("---- Starts at index {}, Ends at index {}".format(k.start, k.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95


In [None]:
# Loading Data and Creating a Classifier
train = [
     ('I love this sandwich.', 'pos'),
     ('This is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
]

test = [
     ('The beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg')
]

In [None]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [None]:
# Classifying Text
cl.classify("This is an amazing library!")

'pos'

In [None]:
prob_dist = cl.prob_classify("I am feeling good.")
prob_dist.max()

'pos'

In [None]:
round(prob_dist.prob("pos"), 2)

1.0

In [None]:
round(prob_dist.prob("neg"), 2)

0

Classifying TextBlobs

In [None]:
from textblob import TextBlob
blob = TextBlob("Alcohol is good. But the hangover is horrible.", classifier=cl)
blob.classify()

'pos'

In [None]:
for b in blob.sentences:
  print(b)
  print(b.classify())

Alcohol is good.
pos
But the hangover is horrible.
pos


Evaluating Classifiers

In [None]:
cl.accuracy(test)

0.5

In [None]:
cl.show_informative_features(5)

Most Informative Features


In [None]:
data = [
    ('I feel happy this morning.', 'pos'),
    ('This is my best work.', 'pos'),
    ("What an awesome view", 'pos'),
    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
]

In [None]:
cl.update(data)

True

In [None]:
cl.accuracy(data)

1.0

Feature Extractors

In [None]:
def end_word_extractor(document):
  tokens = document.split()
  first_word, last_word = tokens[0], tokens[-1]
  feats = {}
  feats["first({0})".format(first_word)] = True
  feats["last({0})".format(last_word)] = False
  return feats

In [None]:
features = end_word_extractor("I love")
assert features == {"last(love)": False, "first(I)": True}

In [None]:
cl2 = NaiveBayesClassifier(train, feature_extractor=end_word_extractor)

In [None]:
blob = TextBlob("I'm excited to try my new classifier.", classifier=cl2)
blob.classify()

'pos'