In [1]:
import nltk
nltk.download('vader_lexicon') # movie review dictionary

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [3]:
sid = SentimentIntensityAnalyzer() # creating an instance of it

In [4]:
rev_1 = "This is a really good movie!"
sid.polarity_scores(rev_1)

{'compound': 0.54, 'neg': 0.0, 'neu': 0.534, 'pos': 0.466}

In [5]:
rev_2 = "This is one of the really good and BEST MOVIE ever made in 2021!!!"
sid.polarity_scores(rev_2)

{'compound': 0.8825, 'neg': 0.0, 'neu': 0.564, 'pos': 0.436}

In [6]:
rev_3 = "Such a bad movie! Wasted 2 hours and no fun!! BADDDDDDDDDDD"
sid.polarity_scores(rev_3)

{'compound': -0.7562, 'neg': 0.541, 'neu': 0.277, 'pos': 0.183}

`# Let's use VADER to analyse Amazon's product reviews`

In [7]:
import pandas as pd

In [8]:
df = pd.read_csv('/content/amazonreviews.tsv', sep='\t')
df.head()

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."


In [9]:
df['label'].value_counts()

neg    5097
pos    4903
Name: label, dtype: int64

In [10]:
df.isnull().sum()

label     0
review    0
dtype: int64

In [11]:
df.dropna(inplace=True)

In [12]:
# Removing empty strings

blanks = []
for i,lb,rv in df.itertuples():
  # (index, label, review)
  if type(rv) == str:
    if rv.isspace():
      blanks.append(i)

In [13]:
blanks
# Let's assume if it had blanks, then this loc would have removed it
## df.drop(blanks, inplace=True)

[]

In [14]:
# Let's grab text of the first review and check it's polarity
sid.polarity_scores(df.iloc[1029]['review'])

{'compound': 0.8315, 'neg': 0.024, 'neu': 0.842, 'pos': 0.134}

In [15]:
df.iloc[1029]['review']

"best album I've heard in a while: On my first listen to 'Catch Without Arms,' I was a little disappointed. I had been hoping for something that was closer in style to Leitmotif, but that's not what they delivered. But after listening to it a few times, I've gotten over the fact that their style has diverged a bit from their original sound. And, now I think that I do like 'Catch Without Arms' even more than their previous efforts."



```
# Let's apply scores to the reviews
```



In [16]:
df['scores'] = df['review'].apply(lambda review: sid.polarity_scores(review))

In [17]:
df.head(10)

Unnamed: 0,label,review,scores
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."
5,pos,an absolute masterpiece: I am quite sure any o...,"{'neg': 0.014, 'neu': 0.737, 'pos': 0.249, 'co..."
6,neg,"Buyer beware: This is a self-published book, a...","{'neg': 0.124, 'neu': 0.806, 'pos': 0.069, 'co..."
7,pos,Glorious story: I loved Whisper of the wicked ...,"{'neg': 0.064, 'neu': 0.588, 'pos': 0.349, 'co..."
8,pos,A FIVE STAR BOOK: I just finished reading Whis...,"{'neg': 0.113, 'neu': 0.712, 'pos': 0.174, 'co..."
9,pos,Whispers of the Wicked Saints: This was a easy...,"{'neg': 0.033, 'neu': 0.777, 'pos': 0.19, 'com..."




```
# If we just want to add one column, let's say 'compound_score'
```



In [18]:
df['compound'] = df['scores'].apply(lambda d: d['compound'])
df.head()

Unnamed: 0,label,review,scores,compound
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781


In [19]:
df.iloc[1029]['compound']

0.8315



```
# Comparing the compound scores to the true labels (pos/neg)
```



In [20]:
df['compute_score'] = df['compound'].apply(lambda score: 'pos' if score >=0 else 'neg')
df.head()

Unnamed: 0,label,review,scores,compound,compute_score
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos




```
# Let's compare accuracy with the vader's compounded score.
```



In [21]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [22]:
accuracy_score(df['label'], df['compute_score'])

0.7091

In [23]:
print(classification_report(df['label'], df['compute_score']))

              precision    recall  f1-score   support

         neg       0.86      0.51      0.64      5097
         pos       0.64      0.91      0.75      4903

    accuracy                           0.71     10000
   macro avg       0.75      0.71      0.70     10000
weighted avg       0.75      0.71      0.70     10000



In [24]:
print(confusion_matrix(df['label'], df['compute_score']))

[[2623 2474]
 [ 435 4468]]
