In [1]:
from textblob import TextBlob

In [2]:
test = TextBlob('Python is a high-level, general-purpose programming language.')
test.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

In [3]:
test.noun_phrases

WordList(['python'])

In [4]:
test = TextBlob('Textblob is amazingly simple to use. What great fun!')
test.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [71]:
test.sentiment

AttributeError: 'list' object has no attribute 'sentiment'

In [5]:
test.sentiment.polarity

0.39166666666666666

In [6]:
zen = TextBlob("Beautiful is better than ugly. " "Explicit is better than implicit. " "Simple is better than complex.")

In [7]:
zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [8]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

In [9]:
for senten in zen.sentences:
    print(senten.sentiment)

Sentiment(polarity=0.2166666666666667, subjectivity=0.8333333333333334)
Sentiment(polarity=0.5, subjectivity=0.5)
Sentiment(polarity=0.06666666666666667, subjectivity=0.41904761904761906)


In [10]:
sentence = TextBlob('Use 4 spaces per indentation level.')
sentence.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [11]:
sentence.words[2].singularize()

'space'

In [12]:
sentence.words[5].pluralize()

'levels'

In [13]:
from textblob import Word
w = Word('octopi')
w.lemmatize()

'octopus'

In [14]:
w = Word('went')
w.lemmatize('v')

'go'

In [15]:
w = Word('Octopus')
w.synsets

[Synset('octopus.n.01'), Synset('octopus.n.02')]

In [16]:
from textblob.wordnet import VERB
Word('hacking').get_synsets(pos=VERB)

[Synset('chop.v.05'),
 Synset('hack.v.02'),
 Synset('hack.v.03'),
 Synset('hack.v.04'),
 Synset('hack.v.05'),
 Synset('hack.v.06'),
 Synset('hack.v.07'),
 Synset('hack.v.08')]

In [17]:
Word('Cricket').definitions

['leaping insect; male makes chirping noises by rubbing the forewings together',
 'a game played with a ball and bat by two teams of 11 players; teams take turns trying to score runs',
 'play cricket']

In [18]:
from textblob.wordnet import Synset
octopus = Synset('octopus.n.02')
shrimp = Synset('shrimp.n.03')
octopus.path_similarity(shrimp)

0.1111111111111111

In [19]:
animals = TextBlob('cat dog octopus')
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

In [20]:
b = TextBlob('I havv a goood spelling')
b.correct()

TextBlob("I have a good spelling")

In [21]:
w = Word('falibility')
w.spellcheck()

[('fallibility', 1.0)]

In [22]:
monty = TextBlob("We are no longer the Knights who say Ni. " "We are now the Knights who say Ekki ekki ekki PTANG.")
monty.word_counts['ekki']

3

In [23]:
en_blob = TextBlob(u'Simple is better than complex.')
en_blob.translate(to='es')

TextBlob("Simple es mejor que complejo.")

In [24]:
chinese_blob = TextBlob(u"美丽优于丑陋")
chinese_blob.translate(from_lang="zh-CN", to='en')

TextBlob("Beauty is better than ugly")

In [25]:
b = TextBlob(u"بسيط هو أفضل من مجمع")
b.detect_language()

'ar'

In [26]:
b = TextBlob("And now for something completely different.")
b.parse()

'And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O'

In [27]:
b[:20]

TextBlob("And now for somethin")

In [28]:
b.upper()

TextBlob("AND NOW FOR SOMETHING COMPLETELY DIFFERENT.")

In [29]:
b.find('DIFFERENT')

-1

In [30]:
b.ngrams(n=3)

[WordList(['And', 'now', 'for']),
 WordList(['now', 'for', 'something']),
 WordList(['for', 'something', 'completely']),
 WordList(['something', 'completely', 'different'])]

In [31]:
train = [('I love this sandwich.', 'pos'),('this is an amazing place!', 'pos'),('I feel very good about these beers.', 'pos'),
         ('this is my best work.', 'pos'),("what an awesome view", 'pos'),('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),("I can't deal with this", 'neg'),('he is my sworn enemy!', 'neg'),
         ('my boss is horrible.', 'neg')]

In [32]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [33]:
test = [
    ('the beer was good.', 'pos'),     ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),('Gary is a friend of mine.', 'pos'),("I can't believe I'm doing this.", 'neg')]

In [34]:
cl.classify('This is an amazing library!')

'pos'

In [35]:
prob_dist = cl.prob_classify('This one is a doozy')
prob_dist.max()

'pos'

In [36]:
round(prob_dist.prob('pos'),2)

0.63

In [37]:
round(prob_dist.prob('neg'),2)

0.37

In [38]:
blob = TextBlob('The beer is good. But the hangover is horrible.',classifier = cl)
blob.classify()

'pos'

In [39]:
for s in blob.sentences:
    print(s + ' : ' + s.classify())

The beer is good. : pos
But the hangover is horrible. : neg


In [40]:
cl.accuracy(test)

0.8333333333333334

In [41]:
cl.show_informative_features()

Most Informative Features
            contains(my) = True              neg : pos    =      1.7 : 1.0
            contains(an) = False             neg : pos    =      1.6 : 1.0
             contains(I) = True              neg : pos    =      1.4 : 1.0
             contains(I) = False             pos : neg    =      1.4 : 1.0
            contains(my) = False             pos : neg    =      1.3 : 1.0
          contains(deal) = False             pos : neg    =      1.2 : 1.0
           contains(not) = False             pos : neg    =      1.2 : 1.0
          contains(what) = False             neg : pos    =      1.2 : 1.0
          contains(work) = False             neg : pos    =      1.2 : 1.0
       contains(awesome) = False             neg : pos    =      1.2 : 1.0


In [42]:
new_data = [('She is my best friend.', 'pos'),("I'm happy to have a new friend.", 'pos'),("Stay thirsty, my friend.", 'pos'),("He ain't from around here.", 'neg')]

In [43]:
cl.update(new_data)

True

In [44]:
cl.accuracy(test)

1.0

In [45]:
def end_word_extractor(document):
    tokens = document.split()
    first_word,last_word = tokens[0],tokens[-1]
    feats = {}
    feats["first({0})".format(first_word)] = True
    feats["last({0})".format(last_word)] = False
    return feats
features = end_word_extractor("I feel happy")
features

{'first(I)': True, 'last(happy)': False}

In [46]:
cl2 = NaiveBayesClassifier(test,feature_extractor = end_word_extractor)
blob = TextBlob('I\'m exited to use my new classifier',classifier = cl2)
blob.classify()

'pos'

In [47]:
from textblob.sentiments import NaiveBayesAnalyzer

In [48]:
blob = TextBlob("I love this library",analyzer=NaiveBayesAnalyzer())

In [56]:
blob.sentiment

Sentiment(classification='pos', p_pos=0.7996209910191279, p_neg=0.2003790089808724)

In [57]:
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer()
blob = TextBlob('This id\ta rather tubby\tblob',tokenizer=tokenizer)
blob.tokens

WordList(['This id', 'a rather tubby', 'blob'])

In [58]:
from nltk.tokenize import BlanklineTokenizer
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

WordList(['A token', 'of appreciation'])

In [67]:
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob('Karma is not going to be in his house. draft the party\'s election manifesto',np_extractor=extractor)
blob.noun_phrases

WordList(['karma', 'election manifesto'])

In [69]:
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob('This string is tagged',pos_tagger=nltk_tagger)
blob.pos_tags

[('This', 'DT'), ('string', 'NN'), ('is', 'VBZ'), ('tagged', 'VBN')]