In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob
#import spacy
from wordcloud import WordCloud
import nltk
nltk.download('punkt')
nltk.download('brown')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Rishav\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Rishav\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Rishav\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Rishav\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### TextBlob is python library nd offer  simple API to access its methods and perform basic NLP task

A good thing about TextBlob is that they are just like python strings.

In [2]:
#Basic String Operation
string1 = TextBlob("Edlightened - Knowledge Decode")
string1[:11]

TextBlob("Edlightened")

In [3]:
string1.upper()

TextBlob("EDLIGHTENED - KNOWLEDGE DECODE")

In [4]:
string2 = TextBlob("AIML")
string1 + " " + string2

TextBlob("Edlightened - Knowledge Decode AIML")

In [5]:
!pip install -U textblob

Requirement already up-to-date: textblob in c:\users\rishav\anaconda3\envs\tensorflow_env\lib\site-packages (0.15.3)


#### Tokenization

In [6]:
blob = TextBlob("Edlightened is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions, etc.")
print(blob.sentences)

[Sentence("Edlightened is a great platform to learn data science."), Sentence("It helps community through blogs, hackathons, discussions, etc.")]


### Tokenization leads the sentence into words

In [7]:
for words in blob.sentences[0].words:
    print(words)

Edlightened
is
a
great
platform
to
learn
data
science


In [8]:
blob = TextBlob("Enlightened is a great pltform to learn data science")
for np in blob.noun_phrases:
    print(np)

enlightened
great pltform
data science


In [9]:
for words, tag in blob.tags:
    print(words, tag)

Enlightened VBN
is VBZ
a DT
great JJ
pltform NN
to TO
learn VB
data NNS
science NN


####  This link would show what does this tags mean
https://www.clips.uantwerpen.be/pages/mbsp-tags

In [10]:
blob = TextBlob("Edlightened is a great platform to learn Data Science. \n It helps community through blogs, hackathons, discussions, etc")
print(blob.sentences[1].words[1])
print(blob.sentences[1].words[1].singularize())

helps
help


### TextBlob offers an object known word

In [11]:
from textblob import Word
w = Word('Platform')
w.pluralize()

'Platforms'

### We can use tags to inflect a particular words as shown

In [12]:
for word, pos in blob.tags:
    if pos == 'NN':
        print(word.pluralize())

platforms
communities


### Words can be lemmatized using the lemmatize function

In [13]:
w = Word('running')
w.lemmatize("v") #v verb here

'run'

##### A combination of multiple words together are called N-Grams, N grams(N>1) are generally more informative as compared to words, and can be used as features for language modelling.
N-grams can be easily accessed in TextBlob using ngrams function, which returns a tuple of n successive words.

In [14]:
for ngram in blob.ngrams(2):
    print(ngram)

['Edlightened', 'is']
['is', 'a']
['a', 'great']
['great', 'platform']
['platform', 'to']
['to', 'learn']
['learn', 'Data']
['Data', 'Science']
['Science', 'It']
['It', 'helps']
['helps', 'community']
['community', 'through']
['through', 'blogs']
['blogs', 'hackathons']
['hackathons', 'discussions']
['discussions', 'etc']


###### Sentiment analysis is basically the process of determning the attitude or the emotion of the writer, i.e., whether it is positive or negative or neutral. the sentiment function of textblob returns two properties, polarity, and subjectivity. Polarity is float which lies in range of [-1, 1] where 1 means a negative statement. Subjective sentence generally refers to personal opinnion, emotion or judgement whereas object refers to fctual informtion. Subjectivity is also  float which lies in range of [0, 1]

In [15]:
print(blob)
blob.sentiment

Edlightened is a great platform to learn Data Science. 
 It helps community through blogs, hackathons, discussions, etc


Sentiment(polarity=0.8, subjectivity=0.75)

###### This throws polarity to 0.8 which means that statement is positive and 0.75 subjectivity refers the mostly it is public opinion and not  facctual information

In [16]:
# Spelling correction is feature which TextBlob offers, we can be accessed using the correct function as shown below
blob = TextBlob('Edlightened is a gret platfor to learn data sciene')
blob.correct()

TextBlob("Enlightened is a great platform to learn data science")

###### We can check the list of suggested word  and its confidece using the spellcheck function.

In [17]:
blob.words[4].spellcheck()

[('platform', 1.0)]

####### Summary of Text

In [18]:
import random
blob = TextBlob('Edlightened is a thriving community for data driven industry. This platform allows \
people to know more about analytics form its article, Q&A forums, and learning paths. Also, we help \
professionals & amatures to sharpen their skillsets by providing a platform to participate in Hackathons.')
nouns = list()
for word, tag in blob.tags:
    if tag == 'NN':
        nouns.append(word.lemmatize())
print("This text is about...")
for item in random.sample(nouns, 5):
    word = Word(item)
    print(word.pluralize())

This text is about...
communities
platforms
platforms
articles
industries


###### What we did above that we extracted out a list of nouns from text to give a gerneral idea to the the things that the text related to

In [19]:
blob = TextBlob("मैं यहां हूं")
blob.detect_language()

'hi'

In [20]:
blob.translate(from_lang='hi', to='en')
blob.translate(to='en')

TextBlob("I am here")

In [21]:
training = [
("Tom Holland is a terrible spiderman.","neg"),
("a terrible Javert (Russell Crowe) ruined Les Miserables for me...","neg"),
("The Dark Knight Rises is the greatest superhero movie ever!","pos"),
("Fantastic Four should have never been made.","neg"),
("Wes Anderson is my favourite director!","pos"),
("Captain America 2 is pretty awesome.","pos"),
("Let\s pretend 'Batman and Robin' never happened..","neg")
]

In [22]:
testing = [
("Superman was never an interesting character.","neg"),
("Fantastic Mr Fox is an awesome film!","pos"),
("Dragonball Evolution is simple terrible!!","neg"),
]

###### TextBlob provides in-build classifiers module to create a customer classifier. So let's quickly import it and create a basic classifier.

In [23]:
from textblob import classifiers
classifier = classifiers.NaiveBayesClassifier(training)

##### Naive Bayes classifier and TextBlob

In [24]:
dt_classifier = classifiers.DecisionTreeClassifier(training)

In [25]:
print(classifier.accuracy(testing))
classifier.show_informative_features(3)

1.0
Most Informative Features
            contains(is) = True              pos : neg    =      2.9 : 1.0
             contains(a) = False             pos : neg    =      1.8 : 1.0
         contains(never) = False             pos : neg    =      1.8 : 1.0


In [26]:
blob = TextBlob('The weather is Beautiful!', classifier = classifier)
print(blob.classify())

pos
