### Installing NLTK toolkit

In [2]:
!pip install nltk



You are using pip version 9.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


### Importing Moduls

In [4]:
import nltk
from nltk.sentiment import vader

In [5]:
# instatitae vased to be used
sia = vader.SentimentIntensityAnalyzer()

In [6]:
# to get polarity scores on the sentiments
sia.polarity_scores("What a terrible restaurant")

{'compound': -0.4767, 'neg': 0.608, 'neu': 0.392, 'pos': 0.0}

In [7]:
#finding a polarity scores for a single word 
sia.polarity_scores("terrible")

{'compound': -0.4767, 'neg': 1.0, 'neu': 0.0, 'pos': 0.0}

In [8]:
#finding polarity of emoticons
sia.polarity_scores(":D")

{'compound': 0.5106, 'neg': 0.0, 'neu': 0.0, 'pos': 1.0}

In [9]:
sia.polarity_scores(":/")

{'compound': -0.34, 'neg': 1.0, 'neu': 0.0, 'pos': 0.0}

In [10]:
# finding punctuation score
sia.polarity_scores("the food was good")

{'compound': 0.4404, 'neg': 0.0, 'neu': 0.508, 'pos': 0.492}

In [12]:
# finding punctuation score after exclamation mark compund score has risen up
sia.polarity_scores("the food was good!")

{'compound': 0.4926, 'neg': 0.0, 'neu': 0.484, 'pos': 0.516}

## Sentiment Analysis on Rule Based Approach vader

In [28]:
import os
positive = "rt-polaritydata/rt-polarity.pos"

In [29]:
with open(positive,'r') as f:
    positive = f.readlines()

In [30]:
positive[0]

'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal . \n'

In [31]:
positive[10]

'this is a film well worth seeing , talking and singing heads and all . \n'

In [32]:
len(positive)

5331

In [33]:
negative = "rt-polaritydata/rt-polarity.neg"

In [34]:
with open(negative,'r') as f:
    negative = f.readlines()

In [45]:
negative[0]

'simplistic , silly and tedious . \n'

In [46]:
sia = vader.SentimentIntensityAnalyzer()
def vaderSentiment(review):
  return sia.polarity_scores(review)['compound']

In [48]:
review = "this is the best restaurant in the city"

In [49]:
vaderSentiment(review)

0.6369

In [51]:
def getReviewSentiments(sentimentCalculator): 
  negReviewResult = [sentimentCalculator(oneNegativeReview) for oneNegativeReview in negative]
  posReviewResult = [sentimentCalculator(onePositiveReview) for onePositiveReview in positive]
  return {'results-on-positive':posReviewResult, 'results-on-negative':negReviewResult}

In [53]:
vaderResults = getReviewSentiments(vaderSentiment)

In [54]:
vaderResults.keys()

dict_keys(['results-on-positive', 'results-on-negative'])

In [55]:
len(vaderResults['results-on-negative'])

5331

In [52]:
def runDiagnostics(reviewResult):
  positiveReviewsResult = reviewResult['results-on-positive']
  negativeReviewsResult = reviewResult['results-on-negative']
  pctTruePositive = float(sum(x > 0 for x in positiveReviewsResult))/len(positiveReviewsResult)
  pctTrueNegative = float(sum(x < 0 for x in negativeReviewsResult))/len(negativeReviewsResult)
  totalAccurate = float(sum(x > 0 for x in positiveReviewsResult)) + float(sum(x < 0 for x in negativeReviewsResult))
  total = len(positiveReviewsResult) + len(negativeReviewsResult)
  print("Accuracy on positive reviews = " +"%.2f" % (pctTruePositive*100) + "%")
  print("Accurance on negative reviews = " +"%.2f" % (pctTrueNegative*100) + "%")
  print("Overall accuracy = " + "%.2f" % (totalAccurate*100/total) + "%")

In [56]:
runDiagnostics(getReviewSentiments(vaderSentiment))

Accuracy on positive reviews = 69.44%
Accurance on negative reviews = 40.09%
Overall accuracy = 54.76%


### sentiment analysis based on sentiwordnet

In [64]:
from nltk.corpus import sentiwordnet as swn

In [65]:
swn.senti_synsets('dog')

<filter at 0x1973823b6d8>

In [59]:
def superNaiveSentiment(review):
 reviewPolarity = 0.0
 numExceptions = 0
 for word in review.lower().split():
   weight = 0.0
   try:
     common_meaning = swn.senti_synsets(word)[0]
     if common_meaning.pos_score()>common_meaning.neg_score():
        weight = weight + common_meaning.pos_score()
     elif common_meaning.pos_score()<common_meaning.neg_score():
        weight = weight - common_meaning.neg_score()
   except:
       numExceptions = numExceptions + 1
   #print "Word: " + word + " weight: " + str(weight)
   reviewPolarity = reviewPolarity + weight
 return reviewPolarity


In [66]:
runDiagnostics(getReviewSentiments(superNaiveSentiment))

Accuracy on positive reviews = 0.00%
Accurance on negative reviews = 0.00%
Overall accuracy = 0.00%


In [67]:
from string import punctuation
from nltk.corpus import stopwords

In [68]:
stopwords= set(stopwords.words('english')+list(punctuation))

In [69]:
list(punctuation)

['!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 '{',
 '|',
 '}',
 '~']

In [70]:
stopwords

{'!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'need

In [71]:
def naiveSentiment(review):
 reviewPolarity = 0.0
 numExceptions = 0
 for word in review.lower().split():
   numMeanings = 0
   if word in stopwords:
     continue
   weight = 0.0
   try:
     for meaning in swn.senti_synsets(word):
       if meaning.pos_score() > meaning.neg_score():
          weight = weight + (meaning.pos_score() - meaning.neg_score())
          numMeanings = numMeanings + 1
       elif meaning.pos_score() < meaning.neg_score():
          weight = weight - (meaning.neg_score() - meaning.pos_score())
          numMeanings = numMeanings + 1
   except: 
       numExceptions = numExceptions + 1
   if numMeanings > 0:
     reviewPolarity = reviewPolarity + (weight/numMeanings)
 return reviewPolarity

In [72]:
runDiagnostics(getReviewSentiments(naiveSentiment))

Accuracy on positive reviews = 75.56%
Accurance on negative reviews = 42.79%
Overall accuracy = 59.17%
