## Sentiment Analysis on same dataset using different Techniques:

1. Using Nltk and VADER (Valence Aware Dictionary and sEntiment Reasoner)

2. Using TextBlob

In [0]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
from textblob import TextBlob
import spacy
%matplotlib inline

In [4]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [6]:
df = pd.read_csv("/content/amazonreviews.tsv",sep="\t")
df.head()

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."


In [7]:
df.label.value_counts()

neg    5097
pos    4903
Name: label, dtype: int64

In [0]:
# finding null /empty values in reviews
blanks = []
for index, label, review in df.itertuples():
  if type(review) == str:
    if review.isspace():
      if review.isaplha():
        blanks.append(index)

In [9]:
# No empty values in reviews
blanks

[]

## Using NLTK and VADER method

In [0]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [0]:
sia = SentimentIntensityAnalyzer()

In [0]:
# finding polarity socres for the rieviews
df['scores'] = df['review'].apply(lambda x : sia.polarity_scores(x))

In [14]:
df.head()

Unnamed: 0,label,review,scores
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."


In [15]:
df['commpound']  = df['scores'].apply(lambda x : x['compound'])
df.head()

Unnamed: 0,label,review,scores,commpound
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781


In [20]:
df['compound_score'] = df['commpound'].apply(lambda x : "pos" if x > 0.49 else "neg")
df.head()

Unnamed: 0,label,review,scores,commpound,compound_score
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos


In [0]:
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix

In [21]:
print(classification_report(df['label'],df['compound_score']))
print('\n')
print(accuracy_score(df['label'],df['compound_score']))
print('\n')
print(confusion_matrix(df['label'],df['compound_score']))

              precision    recall  f1-score   support

         neg       0.80      0.67      0.73      5097
         pos       0.70      0.83      0.76      4903

    accuracy                           0.74     10000
   macro avg       0.75      0.75      0.74     10000
weighted avg       0.75      0.74      0.74     10000



0.7447


[[3398 1699]
 [ 854 4049]]


## Using TextBlob

In [23]:
df['polarity'] = df['review'].apply(lambda x : TextBlob(x).sentiment.polarity)
df.head()

Unnamed: 0,label,review,scores,commpound,compound_score,polarity
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos,-0.021875
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos,0.261111
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos,0.274691
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos,0.272727
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos,0.324802


In [29]:
df[df['polarity'] == 0.00]['label'].value_counts()

neg    106
pos     55
Name: label, dtype: int64

In [30]:
df['polarity_score'] = df['polarity'].apply(lambda x : "pos" if x >0.01 else "neg")
df.head()

Unnamed: 0,label,review,scores,commpound,compound_score,polarity,polarity_score
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos,-0.021875,neg
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos,0.261111,pos
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos,0.274691,pos
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos,0.272727,pos
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos,0.324802,pos


In [31]:
print(classification_report(df['label'],df['polarity_score']))
print('\n')
print(accuracy_score(df['label'],df['polarity_score']))
print('\n')
print(confusion_matrix(df['label'],df['polarity_score']))

              precision    recall  f1-score   support

         neg       0.89      0.47      0.62      5097
         pos       0.63      0.94      0.76      4903

    accuracy                           0.70     10000
   macro avg       0.76      0.71      0.69     10000
weighted avg       0.76      0.70      0.69     10000



0.7017


[[2414 2683]
 [ 300 4603]]


### ***Seems like VADER performs better than TextBlob in this particular dataset.***