# Textblob

In [1]:
!pip install textblob



In [2]:
!python -m textblob.download_corpora

[nltk_data] Downloading package brown to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     /Users/ahmadbasha/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
Finished.


In [3]:
# First, the import.
import textblob
from textblob import TextBlob
from textblob import Word

# from textblob.wordnet import VERB
from textblob.classifiers import NaiveBayesClassifier

In [4]:
wiki = TextBlob("Python is a high-level, general-purpose programming language.")

In [5]:
wiki

TextBlob("Python is a high-level, general-purpose programming language.")

In [6]:
wiki.words

WordList(['Python', 'is', 'a', 'high-level', 'general-purpose', 'programming', 'language'])

# Tokenization

In [7]:
zen = TextBlob("Mr. Beautiful is better, than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex.")

In [8]:
zen.words # tokenization

WordList(['Mr', 'Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than', 'complex'])

In [9]:
zen.words.count('Better') # lowwer case is done

3

In [10]:
zen.sentences

[Sentence("Mr. Beautiful is better, than ugly."),
 Sentence("Explicit is better than implicit."),
 Sentence("Simple is better than complex.")]

# Words and noun phrase counts

# Using word_counts dictionary.

In [11]:
monty = TextBlob("We are no longer the Knights who say Delhi India. "
                 "We are now the Knights who say Ekki ekki ekki PTANG.")

In [12]:
monty.word_counts 

defaultdict(int,
            {'we': 2,
             'are': 2,
             'no': 1,
             'longer': 1,
             'the': 2,
             'knights': 2,
             'who': 2,
             'say': 2,
             'delhi': 1,
             'india': 1,
             'now': 1,
             'ekki': 3,
             'ptang': 1})

In [13]:
monty.word_counts['ekki']

3

In [14]:
# case sensitive
monty.word_counts['PTANG']

0

In [15]:
# no case sensitive
monty.words.count('PTANG', case_sensitive=True)

1

# Words Inflection and Lemmatization

In [16]:
sentence = TextBlob('Uses 4 spaces per indentation levels. We are no longer the Knights who says Delhi India Germany Bharat.')

In [17]:
sentence.words

WordList(['Uses', '4', 'spaces', 'per', 'indentation', 'levels', 'We', 'are', 'no', 'longer', 'the', 'Knights', 'who', 'says', 'Delhi', 'India', 'Germany', 'Bharat'])

In [18]:
sentence.words.singularize()

WordList(['Use', '4', 'space', 'per', 'indentation', 'level', 'We', 'are', 'no', 'longer', 'the', 'Knight', 'who', 'say', 'Delhi', 'Indium', 'Germany', 'Bharat'])

In [19]:
sentence.words[2].singularize()

'space'

In [20]:
sentence.words.pluralize()

WordList(['Usess', '4s', 'spacess', 'pers', 'indentations', 'levelss', 'Wes', 'ares', 'noes', 'longers', 'thes', 'Knightss', 'whoes', 'sayss', 'Delhis', 'Indias', 'Germanys', 'Bharats'])

In [21]:
sentence.words[-4:-1].pluralize()

WordList(['Delhis', 'Indias', 'Germanys'])

In [22]:
w = Word("stripes", pos_tag = 'n')
w.lemmatize()

'stripe'

In [23]:
w = Word("went")
w.lemmatize('v') 

'go'

# POS tagging

In [24]:
zen = TextBlob("Beautiful is better than ugly. "
               "Explicit is better than implicit. "
               "Simple is better than complex.")
zen.tags

[('Beautiful', 'NNP'),
 ('is', 'VBZ'),
 ('better', 'JJR'),
 ('than', 'IN'),
 ('ugly', 'RB'),
 ('Explicit', 'NNP'),
 ('is', 'VBZ'),
 ('better', 'JJR'),
 ('than', 'IN'),
 ('implicit', 'NN'),
 ('Simple', 'NN'),
 ('is', 'VBZ'),
 ('better', 'JJR'),
 ('than', 'IN'),
 ('complex', 'JJ')]

In [25]:
for word, pos in zen.tags:
    print(word.lower() + " => " + pos)

beautiful => NNP
is => VBZ
better => JJR
than => IN
ugly => RB
explicit => NNP
is => VBZ
better => JJR
than => IN
implicit => NN
simple => NN
is => VBZ
better => JJR
than => IN
complex => JJ


# Noun Phrase Extraction

In [26]:
document = ("In computer science, artificial intelligence (AI), \
            sometimes called machine intelligence, is intelligence \
            demonstrated by machines, in contrast to the natural intelligence \
            displayed by humans and animals. Computer science defines AI \
            research as the study of \"intelligent agents\": any device that \
            perceives its environment and takes actions that maximize its\
            chance of successfully achieving its goals.[1] Colloquially,\
            the term \"artificial intelligence\" is used to describe machines\
            that mimic \"cognitive\" functions that humans associate with other\
            human minds, such as \"learning\" and \"problem solving\".[2]")

In [27]:
text_blob_object = TextBlob(document)


for noun_phrase in text_blob_object.noun_phrases:
    
    print(noun_phrase)

computer science
artificial intelligence
ai
machine intelligence
natural intelligence
computer
science defines
ai
intelligent agents
colloquially
artificial intelligence
describe machines
human minds


In [28]:
text_blob_object.noun_phrases

WordList(['computer science', 'artificial intelligence', 'ai', 'machine intelligence', 'natural intelligence', 'computer', 'science defines', 'ai', 'intelligent agents', 'colloquially', 'artificial intelligence', 'describe machines', 'human minds'])

# Spelling Correction

Spelling correction is based on Peter Norvig’s “How to Write a Spelling Corrector” as implemented in the pattern library. It is about 70% accurate

In [29]:
b = TextBlob("I havv written goood speling!. speling corection is based. howw tooo writt a speling corect ")

for i in b.sentences:
    
    print(i.correct())

I have written good spelling!.
spelling correction is based.
how took write a spelling correct


In [30]:
b.correct()

TextBlob("I have written good spelling!. spelling correction is based. how took write a spelling correct ")

In [31]:
b = TextBlob("Thh frenh fries wer good at the resturat")

b.correct()

TextBlob("The french fires her good at the restaurant")

In [32]:
from textblob import Word
w = Word('falibility')

w.spellcheck()# 1 is the confidnece

[('fallibility', 1.0)]

In [33]:
b = TextBlob("Th frenh fries wer good at the resturt")

for i in b.words:    
    print(i.spellcheck(),'\n\n')

[('Oh', 0.47953216374269003), ('Ah', 0.2596491228070175), ('Eh', 0.10409356725146199), ('H', 0.09239766081871345), ('Th', 0.05964912280701754), ('Ch', 0.0035087719298245615), ('Wh', 0.0011695906432748538)] 


[('french', 0.8699186991869918), ('fresh', 0.13008130081300814)] 


[('fires', 0.4222222222222222), ('cries', 0.34444444444444444), ('flies', 0.08888888888888889), ('tries', 0.05555555555555555), ('dries', 0.044444444444444446), ('furies', 0.011111111111111112), ('frise', 0.011111111111111112), ('fried', 0.011111111111111112), ('frees', 0.011111111111111112)] 


[('her', 0.4200985848306567), ('were', 0.34099220861822227), ('we', 0.15153442518683416), ('war', 0.07004293210367309), ('per', 0.0068373350294164414), ('wet', 0.0047702337414533315), ('wear', 0.0023851168707266657), ('web', 0.0016695818095086659), ('er', 0.0003975194784544443), ('der', 0.0003975194784544443), ('wee', 0.00023851168707266656), ('weir', 0.0001590077913817777), ('ver', 0.0001590077913817777), ('yer', 7.950389

# Translation and Language Detection


One of the most powerful capabilities of the TextBlob library is to translate from one language to another. On the backend, the TextBlob language translator uses the __Google Translate API__



https://cloud.google.com/translate/docs/languages

#### before running translation part try and see this source 
https://github.com/sloria/TextBlob/issues/397

en_blob = TextBlob('Simple is better than complex.')

In [40]:
#en_blob.translate(to = 'ar') # 

In [41]:
chinese_blob = TextBlob("美丽优于丑陋")

#chinese_blob.translate(from_lang="zh-CN", to='en') # en == english

In [44]:
b = TextBlob("بسيط هو أفضل من مجمع")
# b.detect_language()

## n-grams

N-Grams refer to n combination of words in a sentence. For instance, for a sentence "I love watching football", some 2-grams would be (I love), (love watching) and (watching football). 

N-Grams can play a crucial role in text classification.

The TextBlob.ngrams() method returns a list of tuples of n successive words.

In [45]:
blob = TextBlob("Now is better than never.")

In [46]:
blob.ngrams(n=2)

[WordList(['Now', 'is']),
 WordList(['is', 'better']),
 WordList(['better', 'than']),
 WordList(['than', 'never'])]

# WordNet Integration

In [48]:
word = Word("pass")
word.synsets

[Synset('base_on_balls.n.01'),
 Synset('pass.n.02'),
 Synset('pass.n.03'),
 Synset('pass.n.04'),
 Synset('pass.n.05'),
 Synset('pass.n.06'),
 Synset('pass.n.07'),
 Synset('pass.n.08'),
 Synset('pass.n.09'),
 Synset('pass.n.10'),
 Synset('bye.n.01'),
 Synset('pass.n.12'),
 Synset('pass.n.13'),
 Synset('crack.n.09'),
 Synset('pass.n.15'),
 Synset('passing.n.07'),
 Synset('pas.n.01'),
 Synset('pass.v.01'),
 Synset('travel_by.v.01'),
 Synset('legislate.v.01'),
 Synset('elapse.v.01'),
 Synset('pass.v.05'),
 Synset('run.v.03'),
 Synset('pass.v.07'),
 Synset('happen.v.01'),
 Synset('pass.v.09'),
 Synset('spend.v.01'),
 Synset('guide.v.05'),
 Synset('communicate.v.01'),
 Synset('evanesce.v.01'),
 Synset('pass.v.14'),
 Synset('exceed.v.02'),
 Synset('pass.v.16'),
 Synset('pass.v.17'),
 Synset('pass.v.18'),
 Synset('sink.v.03'),
 Synset('pass.v.20'),
 Synset('fall.v.21'),
 Synset('pass.v.22'),
 Synset('authorize.v.01'),
 Synset('die.v.01'),
 Synset('excrete.v.01'),
 Synset('passing.a.02')]

In [49]:
word.definitions

['(baseball) an advance to first base by a batter who receives four balls',
 '(military) a written leave of absence',
 '(American football) a play that involves one player throwing the ball to a teammate',
 'the location in a range of mountains of a geological formation that is lower than the surrounding peaks',
 'any authorization to pass or go somewhere',
 'a document indicating permission to do something without restrictions',
 'a flight or run by an aircraft over a target',
 'a bad or difficult situation or state of affairs',
 'a difficult juncture',
 'one complete cycle of operations (as by a computer)',
 'you advance to the next round in a tournament without playing an opponent',
 'a permit to enter or leave a military installation',
 'a complimentary ticket',
 'a usually brief attempt',
 '(sports) the act of throwing the ball to another member of your team',
 'success in satisfying a test or requirement',
 '(ballet) a step in dancing (especially in classical ballet)',
 'go acros

## Sentiment Analysis

The sentiment property returns a named tuple of the form Sentiment(polarity, subjectivity). 

Polarity is a float value within the range [-1.0 to 1.0] where 

    0 indicates neutral, 
    +1 indicates a very positive sentiment and 
    -1 represents a very negative sentiment.

Subjectivity is a float value within the range [0.0 to 1.0] where 

    0.0 is very objective and 
    1.0 is very subjective. 



**Polarity score  [- 1, 1 ]** 

1.   closer to -1 -- negative sentiment
2.   closer to +1 - positive sentiment



**Subjectivity Score  [0,1 ]** 
 

1.   close to 1 mean more of personal opinion
2.   closer to 0 mean more of factual information

In [50]:
TextBlob("so the two together did the job").sentiment

Sentiment(polarity=0.0, subjectivity=0.0)

In [51]:
TextBlob("the movie was worst").sentiment

Sentiment(polarity=-1.0, subjectivity=1.0)

In [52]:
TextBlob("the movie was not worst").sentiment

Sentiment(polarity=0.5, subjectivity=1.0)

In [53]:
TextBlob("I am happy").sentiment

Sentiment(polarity=0.8, subjectivity=1.0)

In [54]:
TextBlob("I am very happy").sentiment

Sentiment(polarity=1.0, subjectivity=1.0)

In [56]:
testimonial1 = TextBlob("so the two together did the job")
testimonial4 = TextBlob("Mumbai is a city in the UK")

print('Sentiment 1: ', testimonial1.sentiment)
print('Sentiment 2: ', testimonial4.sentiment)

Sentiment 1:  Sentiment(polarity=0.0, subjectivity=0.0)
Sentiment 2:  Sentiment(polarity=0.0, subjectivity=0.0)


In [57]:
print('Polarity: ', testimonial1.sentiment.polarity)

Polarity:  0.0


In [58]:
print('Subjectivity: ', testimonial1.sentiment.subjectivity)

Subjectivity:  0.0


# from textblob.classifiers import NaiveBayesClassifier

In [59]:
train = [
     ('I love this sandwich.', 'pos'),  
     ('this is an amazing place!', 'pos'),
     ('I feel very good about these beers.', 'pos'),
     ('this is my best work.', 'pos'),
     ("what an awesome view", 'pos'),
     ('I do not like this restaurant', 'neg'),
     ('I am tired of this stuff.', 'neg'),
     ("I can't deal with this", 'neg'),
     ('he is my sworn enemy!', 'neg'),
     ('my boss is horrible.', 'neg')
 ]

In [61]:
test = [
     ('the beer was good.', 'pos'),
     ('I do not enjoy my job', 'neg'),
     ("I ain't feeling dandy today.", 'neg'),
     ("I feel amazing!", 'pos'),
     ('Gary is a friend of mine.', 'pos'),
     ("I can't believe I'm doing this.", 'neg')
 ]

In [62]:
cl = NaiveBayesClassifier(train) # Trained my classifier on Train data

In [63]:
# Loading Data from Files
# You can also load data from common file formats including CSV, JSON, and TSV.

# CSV files should be formatted like so:

# I love this sandwich.,pos
# This is an amazing place!,pos
# I do not like this restaurant,neg

In [64]:
# Classifying Text
# Call the classify(text) method to use the classifier.

cl.classify("This is best library!")

'pos'

In [69]:
# You can get the label probability distribution with the prob_classify(text) method.
prob_dist = cl.prob_classify("This one's a doozy.")

prob_dist.prob('pos')

0.631147540983605

In [70]:
round(prob_dist.prob("pos"), 2)

0.63

In [71]:
round(prob_dist.prob("neg"), 2)

0.37

In [72]:
cl.accuracy(test)

0.8333333333333334

# Updating Classifiers with New Data

In [73]:
new_data = [('She is my best friend.', 'pos'),
             ("I'm happy to have a new friend.", 'pos'),
             ("Stay thirsty, my friend.", 'pos'),
             ("He ain't from around here.", 'neg')]

In [74]:
cl.update(new_data)

True

In [75]:
cl.accuracy(test)

1.0

In [76]:
cl.classify("He is a bad boy")

'pos'

In [77]:
prob_dist = cl.prob_classify("He is a bad boy")

In [78]:
prob_dist.prob('pos')

0.7451205155145105

In [79]:
prob_dist.prob('neg')

0.2548794844854889