1. pip install -U textblob 
2. python -m textblob.download_corpora

## Basic

In [2]:
from textblob import TextBlob

In [4]:
## creating a textblob object
blob = TextBlob("Logitech is a great platform to implement data science.")

In [5]:
## textblobs are like python strings

blob[1:5]

TextBlob("ogit")

In [6]:
blob.upper()

TextBlob("LOGITECH IS A GREAT PLATFORM TO IMPLEMENT DATA SCIENCE.")

In [9]:
blob2 = TextBlob("It also helps employee through conducting session, discussions,etc.")

In [10]:
## concat

blob + " And " + blob2

TextBlob("Logitech is a great platform to implement data science. And It also helps employee through conducting session, discussions,etc.")

## Tokenization

In [14]:
blob = TextBlob("Logitech is a great platform to implement data science. It also helps employee through conducting session, discussions,etc.")

In [15]:
blob.sentences

[Sentence("Logitech is a great platform to implement data science."),
 Sentence("It also helps employee through conducting session, discussions,etc.")]

In [16]:
blob.sentences[0]

Sentence("Logitech is a great platform to implement data science.")

In [17]:
blob.sentences[0].words

WordList(['Logitech', 'is', 'a', 'great', 'platform', 'to', 'implement', 'data', 'science'])

In [18]:
for words in blob.sentences[0].words:
    print (words)

Logitech
is
a
great
platform
to
implement
data
science


## Noun phrase extraction

In [19]:
blob = TextBlob("Logitech is a great platform to implement data science.")
for np in blob.tags:
    print (np)

('Logitech', 'NNP')
('is', 'VBZ')
('a', 'DT')
('great', 'JJ')
('platform', 'NN')
('to', 'TO')
('implement', 'VB')
('data', 'NNS')
('science', 'NN')


As we can see that isn't correct but we were working with machines.

## POS tagging

In [37]:
for words, tag in blob.tags:
    print (words, tag)

Analytics NNS
Vidhya NNP
is VBZ
a DT
great JJ
platform NN
to TO
learn VB
data NNS
science NN


## Sentiment Analysis

In [38]:
print (blob)
blob.sentiment

Analytics Vidhya is a great platform to learn data science.


Sentiment(polarity=0.8, subjectivity=0.75)

## Word Inflection and Lemmatization

In [21]:
blob = TextBlob("Logitech is a great platform to implement data science. It also helps employee through conducting session, discussions,etc.")
print (blob.sentences[1].words[1])
print (blob.sentences[1].words[1].singularize())

also
also


In [22]:
from textblob import Word
w = Word('Platform')
w.pluralize()

'Platforms'

In [23]:
## using tags
for word,pos in blob.tags:
    if pos == 'NN':
        print (word.pluralize())

platforms
sciences
employees
sessions


In [25]:
## lemmatization

w = Word('bought')
w.lemmatize()  ## v here represents verb

'bought'

## Ngrams

In [27]:
blob = TextBlob("I went to New York")
for ngram in blob.ngrams(2):
    print (ngram)

['I', 'went']
['went', 'to']
['to', 'New']
['New', 'York']


## Spelling correction

In [28]:
blob = TextBlob('Logitech is a gret platfrm to implemet data scence')
blob.correct()

TextBlob("Logitech is a great platform to implement data science")

In [29]:
blob.words[3].spellcheck()

[('great', 0.5351351351351351),
 ('get', 0.3162162162162162),
 ('grew', 0.11216216216216217),
 ('grey', 0.026351351351351353),
 ('greet', 0.006081081081081081),
 ('fret', 0.002702702702702703),
 ('grit', 0.0006756756756756757),
 ('cret', 0.0006756756756756757)]

## Creating a short summary from a text

In [30]:
import random

blob = TextBlob('Logitech is a thriving Organization for data driven industry. This Company allows \
    people to know more about technology from its work, forum, and learning paths. Also, we help \
    professionals & amateurs to sharpen their skillsets.')

In [31]:
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())

print ("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print (word.pluralize())

This text is about...
forums
technologies
industries
works
Organizations


## Language Translation

In [32]:
blob = TextBlob('Hi How are you?')

In [33]:
blob.translate(to ='es')

TextBlob("¿Hola como estás?")

In [34]:
blob1 = TextBlob('هذا رائع')

In [35]:
blob.detect_language()

'en'

In [39]:
blob = TextBlob("Bonjour")
blob.detect_language()


'fr'

In [40]:
blob1.translate(from_lang='ar', to ='en')

TextBlob("that's cool")

In [41]:
blob.translate(to= 'en')

TextBlob("Hello")

## Text Classification using textblob

In [105]:
training = [
            ('Tom Holland is a terrible spiderman.','neg'),
            ('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','neg'),
            ('The Dark Knight Rises is the greatest superhero movie ever!','pos'),
            ('Fantastic Four should have never been made.','neg'),
            ('Wes Anderson is my favorite director!','pos'),
            ('Captain America 2 is pretty awesome.','pos'),
            ('Let\s pretend "Batman and Robin" never happened..','neg'),
            ]
testing = [
           ('Superman was never an interesting character.','neg'),
           ('Fantastic Mr Fox is an awesome film!','pos'),
           ('Dragonball Evolution is simply terrible!!','neg')
           ]

In [106]:
from textblob import classifiers

classifier = classifiers.NaiveBayesClassifier(training)

In [107]:
print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              pos : neg    =      2.9 : 1.0
         contains(never) = False             pos : neg    =      1.8 : 1.0
             contains(a) = False             pos : neg    =      1.8 : 1.0


In [110]:
blob = TextBlob('Robin ', classifier=classifier)
print (blob.classify())

neg


In [54]:
!python summarize.py < The_Hunger_Games.txt

This text is about...
sights
heads
okays
flowers
packages
mounds
worlds
arenas
lives
Someones
