In [11]:
from textblob import TextBlob

In [12]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [15]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Renesa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

### Convert the text into textblob object

In [13]:

analysis= TextBlob("Textblob surely has some interesting features!!")
#To figure out what all can be done with the textblob object
print(dir(analysis))

['__add__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_cmpkey', '_compare', '_create_sentence_objects', '_strkey', 'analyzer', 'classifier', 'classify', 'correct', 'detect_language', 'ends_with', 'endswith', 'find', 'format', 'index', 'join', 'json', 'lower', 'ngrams', 'noun_phrases', 'np_counts', 'np_extractor', 'parse', 'parser', 'polarity', 'pos_tagger', 'pos_tags', 'raw', 'raw_sentences', 'replace', 'rfind', 'rindex', 'sentences', 'sentiment', 'sentiment_assessments', 'serialized', 'split', 'starts_with', 'startswith', 'string', 'strip', 'stripped', 'subjectivity', 'tags', 'title', 'to_json', 'tokenize', 'tokenizer', 'tokens', 'tra

In [9]:
print(analysis.translate(from_lang="en",to='hi'))

TextBlob निश्चित रूप से कुछ दिलचस्प विशेषताएं हैं !!


In [16]:
#To get POS tags of every text element in the sentence
print(analysis.tags)

[('Textblob', 'NNP'), ('surely', 'RB'), ('has', 'VBZ'), ('some', 'DT'), ('interesting', 'JJ'), ('features', 'NNS')]


In [17]:
print(analysis.sentiment)

Sentiment(polarity=0.640625, subjectivity=0.6944444444444444)


#### Both subjectivity & polarity are considered for sentiment analysis

In [36]:
pos_count= 0
pos_correct= 0

In [37]:
with open('positive.txt', 'r') as f:
    for line in f.read().split('\n'):
        analysis_text= TextBlob(line)
        if analysis_text.sentiment.subjectivity>0.8:
            if analysis_text.sentiment.polarity> 0:
                pos_correct+=1
            pos_count+=1
        

In [38]:
neg_count= 0
neg_correct= 0

In [39]:
with open('negative.txt', 'r') as fneg:
    for line in fneg.read().split('\n'):
        analysis_textneg= TextBlob(line)
        if analysis_textneg.sentiment.subjectivity>0.8:
            if analysis_textneg.sentiment.polarity<= 0:
                neg_correct+=1
            neg_count+=1

In [40]:
print("Positive accuracy: {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy: {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))

Positive accuracy: 78.72928176795581% via 724 samples
Negative accuracy: 64.568345323741% via 556 samples


#### Considering the neutrality factor with polarity for the sentiment analysis 

In [60]:
%%time
pos_count= 0
pos_correct= 0
with open('positive.txt', 'r') as f:
    for line in f.read().split('\n'):
        analysis_text= TextBlob(line)
        if analysis_text.sentiment.polarity>=0.5:
            if analysis_text.sentiment.polarity> 0:
                pos_correct+=1
            pos_count+=1
            
neg_count= 0
neg_correct= 0
with open('negative.txt', 'r') as fneg:
    for line in fneg.read().split('\n'):
        analysis_textneg= TextBlob(line)
        if analysis_textneg.sentiment.polarity<=-0.5:
            if analysis_textneg.sentiment.polarity<= 0:
                neg_correct+=1
            neg_count+=1
            
print("Positive accuracy: {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy: {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))

Positive accuracy: 100.0% via 766 samples
Negative accuracy: 100.0% via 282 samples
CPU times: total: 2.08 s
Wall time: 2.08 s


### Convert the text into vader sentiment's SentimentIntensityAnalyzer object

In [42]:
analyzer= SentimentIntensityAnalyzer()
vs= analyzer.polarity_scores("VADER Sentiment looks interesting, I have high hopes.")
print(vs)

{'neg': 0.0, 'neu': 0.522, 'pos': 0.478, 'compound': 0.6705}


#### Compound score is considered for sentiment analysis

In [43]:
pos_count= 0
pos_correct= 0
with open('positive.txt', 'r') as f:
    for line in f.read().split('\n'):
        vs= analyzer.polarity_scores(line)
        if vs['compound']> 0:
            pos_correct+=1
        pos_count+=1
        
neg_count= 0
neg_correct= 0
with open('negative.txt', 'r') as fneg:
    for line in fneg.read().split('\n'):
        vsneg= analyzer.polarity_scores(line)
        if vsneg['compound']<= 0:
            neg_correct+=1
        neg_count+=1

In [44]:
print("Positive accuracy: {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy: {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))

Positive accuracy: 70.18004501125282% via 5332 samples
Negative accuracy: 57.389347336834206% via 5332 samples


#### Both compound score & neutrality factors are considered for sentiment analysis

In [47]:
threshold= 0.5

pos_count= 0
pos_correct= 0
with open('positive.txt', 'r') as f:
    for line in f.read().split('\n'):
        vs= analyzer.polarity_scores(line)
        if vs['compound']<= -threshold or vs['compound']>= threshold:
            if vs['compound']> 0:
                pos_correct+=1
            pos_count+=1
        
neg_count= 0
neg_correct= 0
with open('negative.txt', 'r') as fneg:
    for line in fneg.read().split('\n'):
        vsneg= analyzer.polarity_scores(line)
        if vsneg['compound']<= -threshold or vsneg['compound']>= threshold:
            if vsneg['compound']> 0:
                neg_correct+=1
            neg_count+=1

In [48]:
print("Positive accuracy: {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy: {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))

Positive accuracy: 87.66037735849056% via 2650 samples
Negative accuracy: 50.43859649122807% via 1824 samples


#### Both positive & negative scores are considered for sentiment analysis

In [59]:
%%time
pos_count= 0
pos_correct= 0
with open('positive.txt', 'r') as f:
    for line in f.read().split('\n'):
        vs= analyzer.polarity_scores(line)
        if not vs['neg']> 0.1:
            if vs['pos']-vs['neg']> 0:
                pos_correct+=1
            pos_count+=1
            
neg_count= 0
neg_correct= 0
with open('negative.txt', 'r') as fneg:
    for line in fneg.read().split('\n'):
        vsneg= analyzer.polarity_scores(line)
        if not vsneg['pos']> 0.1:
            if vsneg['pos']-vsneg['neg']<= 0:
                neg_correct+=1
            neg_count+=1
print("Positive accuracy: {}% via {} samples".format(pos_correct/pos_count*100.0, pos_count))
print("Negative accuracy: {}% via {} samples".format(neg_correct/neg_count*100.0, neg_count))            

Positive accuracy: 81.0302734375% via 4096 samples
Negative accuracy: 89.26862611073138% via 2926 samples
CPU times: total: 1.27 s
Wall time: 1.29 s
