# Create a textblob

In [1]:
from textblob import TextBlob

In [2]:
wiki = TextBlob("Python is a high-level, general-purpose programming language.")
wiki

TextBlob("Python is a high-level, general-purpose programming language.")

# Part-of-speech Tagging

In [3]:
wiki.tags

[('Python', 'NNP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('high-level', 'JJ'),
 ('general-purpose', 'JJ'),
 ('programming', 'NN'),
 ('language', 'NN')]

# Noun Phrase Extraction

In [4]:
wiki.noun_phrases

WordList(['python'])

# Sentiment Analysis

In [5]:
testimonial = TextBlob("Textblob is amazingly simple to use. What great fun!")
testimonial.sentiment

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)

In [6]:
testimonial.sentiment.polarity

0.39166666666666666

# Tokenization

In [7]:
zen = TextBlob("Beautiful is better than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex.")

zen.words

WordList(['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [8]:
zen.sentences

[Sentence("Beautiful is better than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

# Words Inflection and Lemmatization
- Lemmatization is the process of grouping together the inflected forms of a word so they can be analysed as a single item

In [9]:
sentence = TextBlob('Use 4 spaces per indentation level.')
sentence.words

WordList(['Use', '4', 'spaces', 'per', 'indentation', 'level'])

In [10]:
sentence.words[2].singularize()

'space'

In [11]:
sentence.words[-2].pluralize()

'indentations'

In [12]:
sentence.words[-1].pluralize()

'levels'

In [13]:
from textblob import Word
w = Word("octopi")
w.lemmatize()

'octopus'

In [14]:
w = Word("went")

# Pass in WordNet part of speech (verb)
w.lemmatize("v")

'go'

# WordNet Integration

In [15]:
from textblob import Word
from textblob.wordnet import VERB
word = Word("octopus")
word.synsets

[Synset('octopus.n.01'), Synset('octopus.n.02')]

In [16]:
Word("hack").get_synsets(pos=VERB)

[Synset('chop.v.05'),
 Synset('hack.v.02'),
 Synset('hack.v.03'),
 Synset('hack.v.04'),
 Synset('hack.v.05'),
 Synset('hack.v.06'),
 Synset('hack.v.07'),
 Synset('hack.v.08')]

In [17]:
Word("octopus").definitions

['tentacles of octopus prepared as food',
 'bottom-living cephalopod having a soft oval body with eight long tentacles']

# WordLists
- Python list with additional methods

In [18]:
animals = TextBlob("cat dog octopus")
animals.words

WordList(['cat', 'dog', 'octopus'])

In [19]:
animals.words.pluralize()

WordList(['cats', 'dogs', 'octopodes'])

# Spelling Correction

In [20]:
b = TextBlob("I havv goood speling!")
b.correct()

TextBlob("I have good spelling!")

In [21]:
from textblob import Word

w = Word('falibility')
w.spellcheck()

[('fallibility', 1.0)]

# Get Word and Noun Phrase Frequencies

In [22]:
monty = TextBlob("We are no longer the Knights who say Ni. "
                 "We are now the Knights who say Ekki ekki ekki PTANG.")

monty.word_counts['ekki']

3

In [23]:
monty.words.count('ekki')

3

In [24]:
monty.words.count('ekki', case_sensitive=True)

2

In [25]:
wiki.noun_phrases.count('python')

1

# Translation and Language Detection

In [26]:
en_blob = TextBlob(u'Simple is better than complex.')
en_blob.translate(to='es')

TextBlob("Simple es mejor que complejo.")

In [27]:
chinese_blob = TextBlob(u"美丽优于丑陋")
chinese_blob.translate(from_lang="zh-CN", to='en')

TextBlob("Beautiful is better than ugly")

In [28]:
b = TextBlob(u"بسيط هو أفضل من مجمع")
b.detect_language()

'ar'

# Parsing

In [29]:
b = TextBlob("And now for something completely different.")
b.parse()

'And/CC/O/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-ADJP/O different/JJ/I-ADJP/O ././O/O'

# n-grams

In [30]:
blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

[WordList(['Now', 'is', 'better']),
 WordList(['is', 'better', 'than']),
 WordList(['better', 'than', 'never'])]

In [31]:
for s in zen.sentences:
    print(s)
    print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))

Beautiful is better than ugly.
---- Starts at index 0, Ends at index 30
Explicit is better than implicit.
---- Starts at index 31, Ends at index 64
Simple is better than complex.
---- Starts at index 65, Ends at index 95
