http://textblob.readthedocs.io/en/dev/quickstart.html

In [1]:
from textblob import TextBlob

In [2]:
wiki = TextBlob("Python is a high-level, general-purpose programming language.")

In [3]:
print(wiki.tags)
print("--------------------")
print(wiki.noun_phrases)

[('Python', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('high-level', 'JJ'), ('general-purpose', 'JJ'), ('programming', 'NN'), ('language', 'NN')]
--------------------
['python']


In [4]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")
print(testimonial.sentiment)
print(testimonial.sentiment.polarity)

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)
0.39166666666666666


In [32]:
zen = TextBlob("Beautiful is better than ugly. " "Explicit is better than implicit. " "Simple is better than complex.")
print(zen.words)
print(zen.sentences)
for sentence in zen.sentences:
    print(sentence.sentiment)

['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex']
[Sentence("Beautiful is better than ugly."), Sentence("Explicit is better than implicit."), Sentence("Simple is better than complex.")]
Sentiment(polarity=0.2166666666666667, subjectivity=0.8333333333333334)
Sentiment(polarity=0.5, subjectivity=0.5)
Sentiment(polarity=0.06666666666666667, subjectivity=0.41904761904761906)


In [35]:
sentence = TextBlob('Use 4 spaces per indentation level.')
print(sentence.words)
print(sentence.words[2].singularize())
print(sentence.words[-1].pluralize())

['Use', '4', 'spaces', 'per', 'indentation', 'level']
space
levels


In [45]:
from textblob import Word
w = Word("octopi")
print(w.lemmatize())
we = Word("went")
print(we.lemmatize("v")) # Pass in part of speech (verb)

octopus
go


In [58]:
from textblob import Word
from textblob.wordnet import VERB
word = Word("octopus")
print(word.synsets)
print("---------------------")
print(Word("hack").get_synsets(pos=VERB))

[Synset('octopus.n.01'), Synset('octopus.n.02')]
---------------------
[Synset('chop.v.05'), Synset('hack.v.02'), Synset('hack.v.03'), Synset('hack.v.04'), Synset('hack.v.05'), Synset('hack.v.06'), Synset('hack.v.07'), Synset('hack.v.08')]


In [61]:
Word("python").definitions

['large Old World boas',
 'a soothsaying spirit or a person who is possessed by such a spirit',
 '(Greek mythology) dragon killed by Apollo at Delphi']

In [62]:
from textblob.wordnet import Synset
octopus = Synset('octopus.n.02')
shrimp = Synset('shrimp.n.03')
octopus.path_similarity(shrimp)

0.1111111111111111

In [65]:
animals = TextBlob("cat dog octopus")
words = animals.words
words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

In [77]:
# Spelig Correct
b = TextBlob("I havv goood speling!")
print(b.correct())

I have good spelling!


In [84]:
#SpellCheck
from textblob import Word
w = Word('claime')
w.spellcheck()

[('claim', 0.4095238095238095),
 ('claims', 0.38095238095238093),
 ('claimed', 0.20952380952380953)]

In [115]:
monty = TextBlob("Hey python We are no longer the Knights who say Ni. "
                     "We are now the Knights who say Ekki ekki ekki PTANG. Python is amazing")
print(monty.word_counts['ekki'])
print(monty.words.count('ekki'))
print(monty.words.count('ekki', case_sensitive=True))
print(monty.noun_phrases.count('python'))

3
3
2
1


In [122]:
en_blob = TextBlob(u'you have my word.')
print(en_blob.translate(to='hi'))
print(en_blob.translate(to='fr'))
print(en_blob.translate(to='es'))

आपसे मेरा वादा है।
vous avez ma parole.
tienes mi palabra.


In [123]:
chinese_blob = TextBlob(u"美丽优于丑陋")
chinese_blob.translate(from_lang="zh-CN", to='en')

TextBlob("Beauty is better than ugly")

In [124]:
b = TextBlob(u"بسيط هو أفضل من مجمع")
b.detect_language()

'ar'

In [127]:
b = TextBlob("And now for something completely different.")
print(b.parse())

And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O


In [129]:
zen = TextBlob("Beautiful is better than ugly. " "Explicit is better than implicit. " "Simple is better than complex.")
print(zen[0:19])
print(zen.upper())
print(zen.find("Simple"))

Beautiful is better
BEAUTIFUL IS BETTER THAN UGLY. EXPLICIT IS BETTER THAN IMPLICIT. SIMPLE IS BETTER THAN COMPLEX.
65


In [130]:
apple_blob = TextBlob('apples')
banana_blob = TextBlob('bananas')
apple_blob < banana_blob

True

In [133]:
blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

In [134]:
for s in zen.sentences:
    print(s)
    print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95


## Building a Text Classification System

http://textblob.readthedocs.io/en/dev/classifiers.html#classifiers

In [6]:
train = [
     ('I love this sandwich.', 'pos'),
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg')] 

test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')]

In [7]:
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier(train)

In [9]:
cl.classify("This is an amazing library!")

'pos'

In [181]:
prob_dist = cl.prob_classify("I'm excited to try my new classifier.")
print(prob_dist.max())
print(round(prob_dist.prob("pos"), 4))
print(round(prob_dist.prob("neg"), 4))

pos
0.9573
0.0427


In [167]:
blob = TextBlob("The beer is good. But the hangover is horrible.", classifier=cl)
print(blob.classify())
for s in blob.sentences:
    print(s)
    print(s.classify())

pos
The beer is good.
pos
But the hangover is horrible.
neg


In [171]:
cl.accuracy(test)

0.8333333333333334

In [172]:
cl.show_informative_features(5) 

Most Informative Features
            contains(my) = True              neg : pos    =      1.7 : 1.0
            contains(an) = False             neg : pos    =      1.6 : 1.0
             contains(I) = True              neg : pos    =      1.4 : 1.0
             contains(I) = False             pos : neg    =      1.4 : 1.0
            contains(my) = False             pos : neg    =      1.3 : 1.0


In [174]:
new_data = [('She is my best friend.', 'pos'),
            ("I'm happy to have a new friend.", 'pos'),
             ("Stay thirsty, my friend.", 'pos'),
             ("He ain't from around here.", 'neg')]
print(cl.update(new_data))
cl.accuracy(test)

True


1.0

In [176]:
def end_word_extractor(document):
    tokens = document.split()
    first_word, last_word = tokens[0], tokens[-1]
    feats = {}
    feats["first({0})".format(first_word)] = True
    feats["last({0})".format(last_word)] = False
    return feats
features = end_word_extractor("I feel happy")
assert features == {'last(happy)': False, 'first(I)': True}

In [177]:
features

{'first(I)': True, 'last(happy)': False}

In [180]:
cl2 = NaiveBayesClassifier(test, feature_extractor=end_word_extractor)
blob = TextBlob("I'm excited to try my new classifier.", classifier=cl2)
blob.classify()

'pos'

## Overriding Models and the Blobber Class

http://textblob.readthedocs.io/en/dev/advanced_usage.html#advanced

In [3]:
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
blob = TextBlob("I like this library", analyzer=NaiveBayesAnalyzer())
blob.sentiment

Sentiment(classification='pos', p_pos=0.7579480331391911, p_neg=0.2420519668608092)

In [5]:
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer()
blob = TextBlob("This is\ta rather tabby\tblob.", tokenizer=tokenizer)
blob.tokens

WordList(['This is', 'a rather tabby', 'blob.'])

In [7]:
from textblob import TextBlob
from nltk.tokenize import BlanklineTokenizer
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

WordList(['A token', 'of appreciation'])

In [9]:
# Noun phrase chunker
from textblob import TextBlob
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob("Python is a high-level programming language. Subhrajit loves python", np_extractor=extractor)
blob.noun_phrases

WordList(['python', 'high-level programming language', 'subhrajit'])

In [10]:
from textblob import TextBlob
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger)
blob.pos_tags

[('Tag', 'NN'), ('You', 'PRP'), ("'re", 'VBP'), ('It', 'PRP')]

In [1]:
from textblob import TextBlob
from textblob.parsers import PatternParser
blob = TextBlob("Parsing is fun.", parser=PatternParser())
blob.parse()

'Parsing/VBG/B-VP/O is/VBZ/I-VP/O fun/NN/B-NP/O ././O/O'

In [2]:
from textblob import Blobber
from textblob.taggers import NLTKTagger
tb = Blobber(pos_tagger=NLTKTagger())

In [3]:
blob1 = tb("This is a blob.")
blob2 = tb("This is another blob.")
blob1.pos_tagger is blob2.pos_tagger

True

### Tag Details

In [4]:
import pandas as pd
tags = pd.read_csv("tags_details.dsv",sep="|")

In [18]:
tags[tags["tag_name"]=="JJ"]["tag_details"]

5    third ill-mannered pre-war regrettable oiled c...
Name: tag_details, dtype: object