In [3]:
from textblob import TextBlob
string1=TextBlob("Pratap")

In [3]:
string2=TextBlob("Kumar")

In [8]:
string1[1:5]

TextBlob("rata")

In [9]:
string1.upper()

TextBlob("PRATAP")

In [13]:
string1.upper() +"  "+ string2.upper()

TextBlob("PRATAP  KUMAR")

In [None]:
#NLP TASKS USING TEXT BLOB
#1.Tokenization

In [None]:
#Tokenization refers to divided text or a sentence into a sequence of tokens

In [4]:
Text=TextBlob("India is a popular tourist destination with its history, architecture and geographical splendor.\n There are several languages spoken all over India.")

In [None]:
#Now, this textblob can be tokenized into a sentence and further into words. Let’s look at the code shown below.

In [6]:
Text.sentences

[Sentence("India is a popular tourist destination with its history, architecture and geographical splendor."),
 Sentence("There are several languages spoken all over India.")]

In [15]:
T1=Text.sentences[0]  ## extracting only first sentence

In [12]:
## printing words of second sentence in single line format

print(Text.sentences[0].words)

['India', 'is', 'a', 'popular', 'tourist', 'destination', 'with', 'its', 'history', 'architecture', 'and', 'geographical', 'splendor']


In [28]:
for words in Text.sentences[1].words:  ## printing words of second sentence line by line
 print(words)

There
are
several
languages
spoken
all
over
India


In [30]:
2.#Noun Phrase Extraction

for np in Text.sentences[0].noun_phrases:
    print(np)

india
popular tourist destination
geographical splendor


In [32]:
#3 Part-of-speech Tagging
#Part-of-speech tagging or grammatical tagging is a method to mark words present in a text on the basis of its definition and context. In simple words, it tells whether a word is a noun, or an adjective, or a verb, etc. This is just a complete version of noun phrase extraction, where we want to find all the the parts of speech in a sentence.
for words, tag in Text.sentences[0].tags:
    
    print(words,tag)


India NNP
is VBZ
a DT
popular JJ
tourist NN
destination NN
with IN
its PRP$
history NN
architecture NN
and CC
geographical JJ
splendor NN


In [33]:
#Here NNP is Noun,JJ is Adjective, IN  is Conjection etc.You can check the full list of tags from https://www.clips.uantwerpen.be/pages/mbsp-tags

In [35]:
#4 Words Inflection and Lemmatization
#Inflection is a process of word formation in which characters are added to the base form of a word to express grammatical meanings. Word inflection in TextBlob is very simple, i.e., the words we tokenized from a textblob can be easily changed into singular or plural


print(Text.sentences[1].words[3])
print(Text.sentences[1].words[3].singularize())

languages
language


In [37]:
#TextBlob library also offers an in-build object known as Word. We just need to create a word object and then apply a function directly to it as shown below.

from textblob import Word

T1=Word("string")
T1.pluralize()

'strings'

In [39]:
#using tags

for words,pos in Text.sentences[0].tags:
    if pos=="NN":
        print(words.pluralize())


tourists
destinations
histories
architectures
splendors


In [40]:
#Words can be lemmatized using the lemmatize function.
## lemmatization
w = Word('walking')
w.lemmatize("v") ## v here represents verb

'walk'

In [45]:
#5 N-grams
#A combination of multiple words together are called N-Grams. N grams (N > 1) are generally more informative as compared to words, and can be used as features for language modelling.  N-grams can be easily accessed in TextBlob using the ngrams function, which returns a tuple of n successive words.

for ngram in Text.sentences[0].ngrams(2):
    print(ngram)

['India', 'is']
['is', 'a']
['a', 'popular']
['popular', 'tourist']
['tourist', 'destination']
['destination', 'with']
['with', 'its']
['its', 'history']
['history', 'architecture']
['architecture', 'and']
['and', 'geographical']
['geographical', 'splendor']


In [47]:
#6 Sentiment Analysis
#Sentiment analysis is basically the process of determining the attitude or the emotion of the writer, i.e., whether it is positive or negative or neutral.

#The sentiment function of textblob returns two properties, polarity, and subjectivity.

#Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement. Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information. Subjectivity is also a float which lies in the range of [0,1].

#Let’s check the sentiment of our Text






In [50]:
print(Text.sentences[0])

print(Text.sentences[0].sentiment)

India is a popular tourist destination with its history, architecture and geographical splendor.
Sentiment(polarity=0.6, subjectivity=0.9)


In [51]:
#We can see that polarity is 0.6, which means that the statement is positive and 0.9 subjectivity refers that mostly it is a public opinion and not a factual information.

In [52]:
#7. Spelling Correction

Text1=TextBlob("India is a poplar touist destinatin with its histary")
Text1.correct()

TextBlob("India is a popular tourist destination with its history")

In [58]:
#We can also check the list of suggested word and its confidence using the spellcheck function.

Text1.words[3].spellcheck()

[('popular', 0.9925373134328358), ('polar', 0.007462686567164179)]

In [72]:
#8.Translation and Language Detection
Text=TextBlob(" j'aime mon inde ")


In [74]:
Text.detect_language()  #It is French

'fr'

In [75]:
#Now translate that word into English

Text.translate(from_lang='fr' ,to='en')

TextBlob("i like my india")

In [76]:
Text.translate(to='en')

TextBlob("i like my india")

In [None]:
#Text classification using TextBlob

In [78]:
#Let’s build a simple text classification model using TextBlob. For this, first, we need to prepare a training and testing data.

training = [
('Tom Holland is a terrible spiderman.','pos'),
('a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),
('The Dark Knight Rises is the greatest superhero movie ever!','neg'),
('Fantastic Four should have never been made.','pos'),
('Wes Anderson is my favorite director!','neg'),
('Captain America 2 is pretty awesome.','neg'),
('Let\s pretend "Batman and Robin" never happened..','pos'),
]
testing = [
('Superman was never an interesting character.','pos'),
('Fantastic Mr Fox is an awesome film!','neg'),
('Dragonball Evolution is simply terrible!!','pos')
]


In [79]:
#Textblob provides in-build classifiers module to create a custom classifier. So, let’s quickly import it and create a basic classifier.

from textblob import classifiers
classifier = classifiers.NaiveBayesClassifier(training)
 

#As you can see above, we have passed the training data into the classifier.

#Note that here we have used Naive Bayes classifier, but TextBlob also offers Decision tree classifier which is as shown below.

## decision tree classifier
dt_classifier = classifiers.DecisionTreeClassifier(training)
#Now, let’s check the accuracy of this classifier on the testing dataset and also TextBlob provides us to check the most informative features.

print (classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              neg : pos    =      2.9 : 1.0
      contains(terrible) = False             neg : pos    =      1.8 : 1.0
         contains(never) = False             neg : pos    =      1.8 : 1.0


In [None]:
#So, based on the training on the above dataset, our classifier has provided us the right result.

#Note that here we could have done some preprocessing and data cleaning but here my aim was to give you an intuition that how we can do text classification using TextBlob.

#Pros and Cons
#Pros:
#Since, it is built on the shoulders of NLTK and Pattern, therefore making it simple for beginners by providing an intuitive interface to NLTK.
#It provides language translation and detection which is powered by Google Translate ( not provided with Spacy).
#Cons:
#It is little slower in the comparison to spacy but faster than NLTK. (Spacy > TextBlob > NLTK)
#It does not provide features like dependency parsing, word vectors etc. which is provided by spacy.
 

In [None]:
#End Notes
#I hope that you that a fun time learning about this library. TextBlob, actually provided a very easy interface for beginners to learn basic NLP tasks.

#I would recommend every beginner to start with this library and then in order to do advance work you can learn spacy as well. We will still be using TextBlob for initial prototyping in the almost every NLP project.