![WhatsApp%20Image%202023-06-13%20at%2003.07.10.jpeg](attachment:WhatsApp%20Image%202023-06-13%20at%2003.07.10.jpeg)

# # Tokenization & Part-of-speech Tagging

In [None]:
from textblob import TextBlob
zen = TextBlob ("Beautiful is better than ugly. ",
"Explicit is better than implicit.",
"Simple is better than complex.")
print (zen.words)
print(zen.sentences)
wiki = TextBlob("Python is a high-level, general-purpose programming language.")
print (wiki.tags)
print (wiki.noun_phrases)

['Beautiful', 'is', 'better', 'than', 'ugly', 'Explicit', 'is', 'better', 'than', 'implicit', 'Simple', 'is', 'better', 'than',
'complex']                           
[Sentence("Beautiful is better than ugly."), Sentence("Explicit is better than implicit."), Sentence("Simple is better than complex.")]                           
[('Python', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('high-level', 'JJ'), ('general-purpose', 'JJ'), ('programming', 'NN'), ('language', 'NN)]
['python']

# Tokenization

In [None]:
from textblob import TextBlob 
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer ()
blob = TextBlob ("This is\ta rather tabby\blob.", tokenizer=tokenizer)
print(blob.tokens)

from textblob import TextBlob from nitk.tokenize import BlanklineTokenizer
tokenizer = BlanklineTokenizer()
blob = TextBlob ("A token\n\nof appreciation")
print (blob.tokenize(tokenizer))

['This is', 'a rather tabby', 'blob.']                             
['A token', 'of appreciation']

# Part-of-speech Tagging

In [None]:
from textblob import TextBlob
document = ("Is computer science, artificial intelligence (AI), \
sometimes called machine intelligence, is intelligence \ 
demonstrated by machines, in contrast to the natural intelligence")
text_blob_object = TextBlob (document)
for word, pos in text_blob_object.tags:
    print(word +" => " + Dos)

In=> IN                    
computer => NN                             
science => NN                           
artificial => JJ                                      
intelligence => NN                                 
AI => NNP                         
sometimes => RB                                  
called => VBD                                          
machine => NN                                                       
intelligence => NN                                        
is => VBZ                                 
intelligence => NN                                   
demonstrated => VBN                                   
by => IN                                 
machines => NNS                                   
in => IN                                         
contrast => NN                                        
to => TO                                
the => DT                                             
natural => JJ                                     
intelligence => NN                                      

# Tokenization Arabic

In [None]:
from textblob import TextBlob
zen = TextBlob("اجتهد قطف ثمرة عملك. المحبة عنوان السلام.  ")
print (zen.words)
print (zen.sentences)
# هيقسم الجمل هنا لجملتين ل2 و الكلمات 

# Words Inflection and Lemmatization

Lemmatization refers to reducing the word to its root form as found in a dictionary


In [None]:
from textblob import TextBlob from textblob import Word
sentence = TextBlob ('Use 4 spaces per indentation level.')
print(sentence.words)
print(sentence.words [2].singularize())
print(sentence.words [-1].pluralize())
w = Word("octopi")
print(w.lemmatize())
W = Word("went")
print(w.lemmatize("v") )

['Use', '4', 'spaces', 'per', 'indentation', 'level']                           
space                         
Levels                        

octopus                              
go

# Noun Phrase Extraction

In [None]:
from textblob import TextBlob
document = ("In computer science, artificial intelligence (AI), \
sometimes called machine intelligence, is intelligence \ 
demonstrated by machines, in contrast to the natural intelligence")
text_blob_object = TextBlob (document)
for noun_phrase in text_blob_object.noun_phrases:
    print (noun_phrase)

computer science                                      
artificial intelligence                                              
ai                                                   
machine intelligence                                                 
natural intelligence

# Sentiment Analysis

Sentiment analysis is basically the process of determining the attitude or the emotion of the writer, i.e., whether it is positive or negative or neutral.
The sentiment function of textblob returns two properties, polarity, and subjectivity.
Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement.
Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information. Subjectivity is also a float which lies in the range of [0, 1].


In [None]:
from textblob import TextBlob
testimonial = TextBlob ("Textblob is amazingly simple to use. What great fun!")
print (testimonial.sentiment)
print(testimonial.sentiment.polarity)
print(testimonial.sentiment.subjectivity)
zen = TextBlob ("Beautiful is better than ugly. "
"Explicit is better than implicit.
"Simple is better than complex.")
for sentence in zen.sentences:
    print (sentence.sentiment)

Sentiment(polarity=0.39166666666666666, subjectivity=0.4357142857142857)                                
0.39166666666666666
0.4357142857142857

Sentiment (polarity=0.2166666666666667, subjectivity=0.8333333333333334)                                  
Sentiment (polarity=0.5, subjectivity=0.5)                                      
Sentiment(polarity=0.06666666666666667, subjectivity=0.41904761904761906)                                        

In [None]:
from textblob import TextBlob 
from textblob.sentiments import NaiveBayesAnalyzer
blob = TextBlob ("I love this library", analyzer=NaiveBayesAnalyzer ())
print (blob.sentiment)

Sentiment(classification='pos', P_pos=0.7996209910191279, _neg=0.2003790089808724)

# WordLists & Spelling Correction

In [None]:
from textblob import TextBlob
animals = TextBlob ("cat dog octopus")
print(animals.words)
print (animals.words.pluralize())

from textblob import Word
w = Word('falibility')
print(w.spellcheck())
w = Word( 'The')
print(w.spellcheck())

b = TextBlob ("I havv goood speling!")
print(b.correct())

['cat', 'dog', 'octopus']                              
['cats', 'dogs', 'octopodes']

[('fallibility', 1.0)]                                
[('The', 0.8303848428566982), ('He', 0.1286717785363728),
('She', 0.04094337860692904)]

I have good spelling!

# Get Word and Noun Phrase Frequencies

In [None]:
from textblob import TextBlob
monty = TextBlob ("We are no longer the Knights who say Ni. ","We are now the Knights who say Ekki ekki ekki PTANG.")
print (monty.word_counts ['ekki'])  # 3
print(monty.words.count('ekki'))    # 3
print (monty.words. count('ekki', case_sensitive=True))  # 2
wiki = TextBlob("Python is a high-level, general-purpose programming language.")
print (wiki.noun_phrases.count ('python'))   # 1

# Parsing

In [None]:
from textblob import TextBlob
b = TextBlob ("And now for something completely different.")
print (b.parse())

And/CC/0/O now/RB/B-ADVP/O for/IN/B-PP/B-PNP something/NN/B-NP/I-PNP completely/RB/B-
ADJP/O different/JJ/I-ADJP/0 ././0/0

# TextBlobs Are Like Python Strings

In [None]:
from textblob import TextBlob
zen = TextBlob ("Beautiful is better than ugly.",
"Explicit is better than implicit. ",
"Simple is better than complex.")
print(zen[0:19])
print(zen.upper )
print(zen.find("Simple"))
apple_blob = TextBlob ('apples')
banana_blob = TextBlob ('bananas')
print( apple blob ‹ banana blob)
print(apple blob == 'apples')
print ( apple_blob + ' and ' + banana_blob)
print("{0} and {1}".format(apple_blob, banana_blob))

Beautiful is better                           
BEAUTIFUL IS BETTER THAN UGLY. EXPLICIT IS BETTER THAN IMPLICIT. SIMPLE IS BETTER THAN COMPLEX.                        
65                           
True                            
True                          
apples and bananas apples and bananas   

# N-grams

In [None]:
from textblob import TextBlob
blob = TextBlob ("Now is better than never.")
print(blob.ngrams (n=2))
print(blob.ngrams (n=3))
print(blob.ngrams (n=4))

[WordList(['Now', 'is']), WordList(['is', 'better']), WordList (['better', 'than']), WordList(['than', 'never'])]            
[WordList(['Now', 'is', 'better']), WordList(['s', 'better', 'than']), WordList(l'better', 'than', 'never'])]               
[WordList(['Now', 'is', 'better', 'than']), WordList(l'is', 'better', 'than', 'never'])]   

# Get Start and End Indices of Sentences

In [None]:
from textblob import TextBlob
zen = TextBlob ("Beautiful is better than ugly. ",
"Explicit is better than implicit. ",
"Simple is better than complex.")
for s in zen.sentences:
    print(s)
    print("---- Starts at index {}, Ends at index {}".format (s.start, s.end))

Beautiful is better than ugly.                     
---- Starts at index 0, Ends at index 30                              
Explicit is better than implicit.                                       
-Starts at index 31, Ends at index 64                                
Simple is better than complex.                                           
---- Starts at index 65, Ends at index 95

# Definitions & Similarity

In [None]:
from textblob import Word
print(Word("octopus").definitions)
from textblob.wordnet import Synset
octopus = Synset ('octopus.n.02')
shrimp = Synset('shrimp.n.03')
print(octopus.path similarity(shrimp))

['tentacles of octopus prepared as food', 'bottom-living cephalopod having a soft oval body with eight long tentacles']

0.1111111111111111

# Translation and Language Detection

In [None]:
from textblob import TextBlob
blob=TextBlob ("هذا الطعام بارد")
print(blob.detect_language ())
print (blob.translate(from_lang='ar', to ='en'))
print (blob.translate(to= 'en'))

Ar                                          
This food is cold                                   
This food is cold

# Text Classification

In [None]:
from textblob import TextBlob
train = [
        ('I love this sandwich.', 'pos'), ('this is an amazing place!', 'pos'),
        ('I feel very good about these beers.', 'pos'), ('this is my best work.', 'pos'), ("what an awesome view", 'pos'),
        ('I do not like this restaurant', 'neg'),
        ('I am tired of this stuff.', 'neg'),
        ("I can't deal with this", 'neg'), ('he is my sworn enemy!', 'neg'), ('my boss is horrible.', 'neg')
test = [
        ('the beer was good.', 'pos'),
        ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'),
        ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.",'neg')
    
from textblob.classifiers import NaiveBayesClassifier
cl = NaiveBayesClassifier (train)
# Classifying Text
print(cl.classify("This is an amazing library!"))
#You can get the Label probability distribution with the prob_classify(text) method.
prob_dist = cl.prob_classify("This one's a doozy.")
print (prob_dist.max())
print( round (prob_dist.prob("pos"), 2))
print(round (prob_dist.prob("neg"), 2))
#Classifying TextBlobs from textblob import TextBlob
blob = TextBlob ("The beer is good. But the hangover is horrible.", classifier=c1)
print(blob.classify())
for s in blob.sentences:
    print(s)
    print(s.classify())
#Evaluating Classifiers
#To compute the accuracy on our test set, use the accuracy (test_data) method.
print (cl.accuracy (test))    

pos


pos                                 
0.63                                   
0.37           


pos                         
The beer is good.                                      
pos                               
But the hangover is horrible.                           
Neg 


0.8333333333333334

# Sentiment Analysis Project 1

project link:
https://stackabuse.com/python-for-nlp-introduction-to-the-textblob-library/

Dataset link:                                                 
https://www.kaggle.com/sdxingaijing/topicamodel-Ida-algorithm/data                                     
https://www.kaggle.com/snap/amazon-fine-food-reviews?select=Reviews.csv


Dataset name:
Amazon Fine Food Reviews                                      
Analyze ~500,000 food reviews from Amazon

In [None]:
import pandas as pd 
import numpy as np 
from textblob import TextBlob
reviews datasets = pd.read csv('Reviews.csv')
reviews_datasets = reviews_datasets. head (20000)
reviews_datasets.dropna()
print (reviews_datasets.head ())

import seaborn as sns 
import matplotlib.pyplot as plt
sns.distplot (reviews_datasets['Score'])
plt. show ()
sns.countplot (x= 'Score', data=reviews_datasets) 
plt.show()

Id ...                  Text                                                                 
0 1 ... I have bought several of the Vitality canned d...                                                
1 2 ... Product arrived labeled as Jumbo Salted Peanut...                                     
2 3 ... This is a confection that has been around a fe...                                         
3 4 ... If you are looking for the secret ingredient i...                                                  
4 5 ... Great taffy at a great price. There was a wid...

![WhatsApp%20Image%202023-07-08%20at%2022.47.13.jpeg](attachment:WhatsApp%20Image%202023-07-08%20at%2022.47.13.jpeg)

In [None]:
print (reviews datasets [ 'Text '][350])
text_blob_object = TextBlob (reviews_datasets ['Text'][350])
print (text_blob_object.sentiment)

These chocolate covered espresso beans are wonderful! The chocolate is very dark and rich and the "bean" inside is a very delightful blend of flavors with just enough caffine to really give it a zing.                                 
Sentiment (polarity=0.39666666666666667, subjectivity=0.6616666666666667)

In [None]:
def find_pol (review):
return TextBlob (review).sentiment.polarity
reviews_datasets ['Sentiment_Polarity'] = reviews_datasets ['Text'].apply(find_pol)
print (reviews_datasets. head())

sns.distplot(reviews_datasets ['Sentiment_Polarity'])
plt.show()

Id ... Sentiment_Polarity
0 1 ... 0.450000                             
1 2 ... -0.033333                                
2 3 ... 0.133571                                
3 4 ... 0.166667                                  
4 5 ... 0.483333                       

![WhatsApp%20Image%202023-07-08%20at%2022.47.14.jpeg](attachment:WhatsApp%20Image%202023-07-08%20at%2022.47.14.jpeg)

In [None]:
sns.barplot (x='Score', y='Sentiment_Polarity', data=reviews_datasets) 
plt.show()

![WhatsApp%20Image%202023-07-08%20at%2022.47.14%20%281%29.jpeg](attachment:WhatsApp%20Image%202023-07-08%20at%2022.47.14%20%281%29.jpeg)

In [None]:
most_negative = reviews_datasets (reviews _datasets.Sentiment Polarity =3 -1). Text. head()
print (most_negative)
print (reviews_datasets ['Text'][545])
most_positive = reviews_datasets [reviews_datasets.Sentiment_Polarity == 1].Text. head()
print (most_positive)
print (reviews_datasets ['Text'][106])
print(reviews_datasets ['Text'][223])

# Sentiment Analysis Project 2

project link:                                           
https://pythonprogramming.net/sentiment-analysis-python-textblob-vader/          

Dataest link:                                                        
https://pythonprogramming.net/static/downloads/short_reviews/positive.txt                          
https://pythonprogramming.net/static/downloads/short_reviews/negative.txt                        

In [None]:
from textblob import TextBlob
pos_count = 0
pos_correct = 0
with open("positive.txt", "p") as f:
    for line in f.read().split('\n'):
        analysis = TextBlob(line)
        if analysis.sentiment.polarity > 0:
            pos correct += 1
        pos_count +=1
neg_count = 0
neg_correct = 0
with open("negative.txt","r") as f:
    for line in f.read().split('\n'):
        analysis = TextBlob (line)
        if analysis.sentiment.polarity <= 0:
            neg_correct += 1
        neg_count +=1
print("Positive accuracy = {}% via {} samples".format (pos_correct/pos_count*100.0, pos_count))
print ("Negative accuracy = {}% via {} samples" .format (neg_correct/neg_count*100.0, neg_count))

Positive accuracy = 71.11777944486121% via 5332 samples                                 
Negative accuracy = 55.8702175543886% via 5332 samples

# References:

https://pythonprogramming.net/sentiment-analysis-python-textblob-vader/

https://textblob.readthedocs.io/en/dev/quickstart.html

https://textblob.readthedocs.io/en/dev/advanced_usage.html

https://textblob.readthedocs.io/en/dev/classifiers.html

https://stackabuse.com/python-for-nlp-introduction-to-the-textblob-library/

https://www.kaggle.com/sdxingaijing/topicamodel-Ida-algorithm/data

https://www.kaggle.com/snap/amazon-fine-food-reviews?select=Reviews.csv

https://pythonprogramming.net/sentiment-analysis-python-textblob-vader/

https://pythonprogramming.net/static/downloads/short_reviews/positive.txt

https://pythonprogramming.net/static/downloads/short_reviews/negative.txt