1. pip install -U textblob 
2. python -m textblob.download_corpora

## Basic

In [None]:
from textblob import TextBlob

In [3]:
## creating a textblob object
blob = TextBlob("Analytics Vidhya is a great platform to learn data science.")

In [4]:
## textblobs are like python strings

blob[1:5]

TextBlob("naly")

In [5]:
blob.upper()

TextBlob("ANALYTICS VIDHYA IS A GREAT PLATFORM TO LEARN DATA SCIENCE.")

In [6]:
blob2 = TextBlob("It also helps community through blogs, hackathons, discussions,etc.")

In [7]:
## concat

blob + " And " + blob2

TextBlob("Analytics Vidhya is a great platform to learn data science. And It also helps community through blogs, hackathons, discussions,etc.")

## Tokenization

In [48]:
blob = TextBlob("Analytics Vidhya is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions,etc.")

In [49]:
blob.sentences

[Sentence("Analytics Vidhya is a great platform to learn data science."),
 Sentence("It helps community through blogs, hackathons, discussions,etc.")]

In [31]:
blob.sentences[0]

Sentence("Analytics Vidhya is a great platform to learn data science.")

In [33]:
for words in blob.sentences[0].words:
    print (words)

Analytics
Vidhya
is
a
great
platform
to
learn
data
science


## Noun phrase extraction

In [140]:
blob = TextBlob("Analytics Vidhya is a great platform to learn data science.")
for np in blob.noun_phrases:
    print (np)

analytics vidhya
great platform
data science


As we can see that isn't correct but we were working with machines.

## POS tagging

In [62]:
for words, tag in blob.tags:
    print (words, tag)

Analytics NNS
Vidhya NNP
is VBZ
a DT
great JJ
platform NN
to TO
learn VB
data NNS
science NN


## Sentiment Analysis

In [64]:
print (blob)
blob.sentiment

Analytics Vidhya is a great platform to learn data science.


Sentiment(polarity=0.8, subjectivity=0.75)

## Word Inflection and Lemmatization

In [141]:
blob = TextBlob("Analytics Vidhya is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions,etc.")
print (blob.sentences[1].words[1])
print (blob.sentences[1].words[1].singularize())

helps
help


In [51]:
from textblob import Word
w = Word('Platform')
w.pluralize()

'Platforms'

In [65]:
## using tags
for word,pos in blob.tags:
    if pos == 'NN':
        print (word.pluralize())

platforms
sciences


In [54]:
## lemmatization

w = Word('running')
w.lemmatize("v")  ## v here represents verb

'run'

## Ngrams

In [69]:
for ngram in blob.ngrams(2):
    print (ngram)

['Analytics', 'Vidhya']
['Vidhya', 'is']
['is', 'a']
['a', 'great']
['great', 'platform']
['platform', 'to']
['to', 'learn']
['learn', 'data']
['data', 'science']


## Spelling correction

In [73]:
blob = TextBlob('Analytics Vidhya is a gret platfrm to learn data scence')
blob.correct()

TextBlob("Analytics Vidhya is a great platform to learn data science")

In [74]:
blob.words[4].spellcheck()

[('great', 0.5351351351351351),
 ('get', 0.3162162162162162),
 ('grew', 0.11216216216216217),
 ('grey', 0.026351351351351353),
 ('greet', 0.006081081081081081),
 ('fret', 0.002702702702702703),
 ('grit', 0.0006756756756756757),
 ('cret', 0.0006756756756756757)]

## Creating a short summary from a text

In [143]:
import random

blob = TextBlob('Analytics Vidhya is a thriving community for data driven industry. This platform allows \
    people to know more about analytics from its articles, Q&A forum, and learning paths. Also, we help \
    professionals & amateurs to sharpen their skillsets by providing a platform to participate in Hackathons.')

In [149]:
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())

print ("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print (word.pluralize())

This text is about...
communities
platforms
forums
platforms
industries


## Language Translation

In [117]:
##blob = TextBlob('Analytics Vidhya is a great source of learning data science.')

In [118]:
##blob.translate(to ='es')

TextBlob("Analytics Vidhya es una gran fuente de aprendizaje de ciencia de datos.")

In [155]:
blob = TextBlob('هذا رائع')

In [156]:
blob.detect_language()

'ar'

In [157]:
blob.translate(from_lang='ar', to ='en')

TextBlob("that's cool")

In [158]:
blob.translate(to= 'en')

TextBlob("that's cool")

## Text Classification using textblob

In [130]:
training = [
            ('Tom Holland is a terrible spiderman.','pos'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
            ('Fantastic Four should have never been made.','pos'),
            ('Wes Anderson is my favorite director!','neg'),
            ('Captain America 2 is pretty awesome.','neg'),
            ('Let\s pretend "Batman and Robin" never happened..','pos'),
            ]
testing = [
           ('Superman was never an interesting character.','pos'),
           ('Fantastic Mr Fox is an awesome film!','neg'),
           ('Dragonball Evolution is simply terrible!!','pos')
           ]

In [159]:
from textblob import classifiers

classifier = classifiers.NaiveBayesClassifier(training)

In [160]:
print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              neg : pos    =      2.9 : 1.0
      contains(terrible) = False             neg : pos    =      1.8 : 1.0
         contains(never) = False             neg : pos    =      1.8 : 1.0


In [138]:
blob = TextBlob('the weather is terrible!', classifier=classifier)
print (blob.classify())

neg
