In [None]:
# VADER

### Vader (Valence Aware Dictionary and Sentiment Reasoner)

+ Is a lexicon ***(the vocabulary of a person, language, or branch of knowledge. basically a dictionary)*** and rule-based sentiment analysis tool (pre-print library) that is specifically attuned to sentiments expressed in social media
+ It is used for sentiment analysis of text which has both the polarities.
    + for example: **positive & negative**
+ Vader is used to quantify how much positive or neegative emotion the text has and also the intensity of emotion (by printing a numerical values to be interpreted)

### Advantages
+ It does not require any training data
+ It can very well understand the sentiment of a text containing:
    + emoticons
    + slangs
    + conjugations
    + capital words
    + punctuations
    + etc
+ It works excellent on social media text

In [None]:
!pip install nltk[twitter]
# Twitter is gonna use twython library

Collecting twython
  Downloading twython-3.9.1-py3-none-any.whl (33 kB)
Installing collected packages: twython
Successfully installed twython-3.9.1


In [None]:
# Download a dictionary for VADER
import nltk 
nltk.download ('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
# Use package from nltk.sentiment.vader, import class SentimentIntensityAnalyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Create object for class SentimentIntensityAnalyzer
vader = SentimentIntensityAnalyzer()

In [None]:
# Use the object to check whether given text is +ve or -ve

# 1. Example of +ve Review
sample = 'I really love Podd and Earth'
#vader provides you a method called vader.polarity_scores() and return a float value to determine whether it's a +ve or -ve 
vader.polarity_scores(sample)

{'compound': 0.6697, 'neg': 0.0, 'neu': 0.471, 'pos': 0.529}

### Compound Score
The compound score is the sum of +ve, -ve & neutral scores which is then normalized between:
+ -1 (most extreme negative)
+ +1 (most extreme positive)

The more compound score closer to +1, the higher the positivity of the text

In [None]:
# 2. Example of -ve Review
sample = 'I really don\'t like pizza'
vader.polarity_scores(sample)

{'compound': -0.3374, 'neg': 0.443, 'neu': 0.557, 'pos': 0.0}

In [None]:
# 3. Example of neutral Review
sample = 'Yesterday, football match was okay'
vader.polarity_scores(sample)

{'compound': 0.2263, 'neg': 0.0, 'neu': 0.678, 'pos': 0.322}

# Use VADER on .tsv (tab separated value) dataset

In [None]:
# Import dataset
from google.colab import files
upload = files.upload ()

Saving amazonreviews.tsv to amazonreviews.tsv


In [None]:
# Check whether the file has been uploaded or not
!ls

amazonreviews.tsv  sample_data


# EDA

In [None]:
# Feed the dataset to pandas
import pandas as pd
data = pd.read_csv ('amazonreviews.tsv', sep = '\t') # --> \t for tab since the file is .tsv
data

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."
...,...,...
9995,pos,A revelation of life in small town America in ...
9996,pos,Great biography of a very interesting journali...
9997,neg,Interesting Subject; Poor Presentation: You'd ...
9998,neg,Don't buy: The box looked used and it is obvio...


In [None]:
# Check how many number of records
data.shape

(10000, 2)

In [None]:
# Check for missing values
data.dropna(inplace = True) # remove all missing values

In [None]:
# Recheck the shape
data.shape
# The shape is the same, so there's no missing values

(10000, 2)

In [None]:
data.columns

Index(['label', 'review'], dtype='object')

In [None]:
# To check how many +ve & -ve reviews
data['label'].value_counts()

neg    5097
pos    4903
Name: label, dtype: int64

In [None]:
# Lambda Functions
# Also known as anonymous functions
# Benefit:
        # 1. Can take any number of arguments but can only have one expression

x = lambda x : x + 10
    # 1. First x is the name of the function
    # 2. Second x is the argument
    # 3. x + 10 is the expression or condition

print(x(5))

    # The value of 5 will be passed to the argument
    # It makes the expression of x + 10 becomes 5 + 10
    # 5 + 10 = 15

# Multiple arguments
x = lambda x,y,z : x + y + z
print (x (10,5,6))

15
21


#### Use lambda for sentiment analysis

In [None]:
# To compute score cards and store in the new column 'Scores''
data['scores'] = data['review'].apply (lambda review : vader.polarity_scores (review)) # use apply to invoke the lambda function

In [None]:
data

Unnamed: 0,label,review,scores
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."
...,...,...,...
9995,pos,A revelation of life in small town America in ...,"{'neg': 0.017, 'neu': 0.846, 'pos': 0.136, 'co..."
9996,pos,Great biography of a very interesting journali...,"{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'comp..."
9997,neg,Interesting Subject; Poor Presentation: You'd ...,"{'neg': 0.096, 'neu': 0.768, 'pos': 0.137, 'co..."
9998,neg,Don't buy: The box looked used and it is obvio...,"{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'comp..."


In [None]:
# Print Review and Scores columns
data[['review', 'scores']]

Unnamed: 0,review,scores
0,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."
...,...,...
9995,A revelation of life in small town America in ...,"{'neg': 0.017, 'neu': 0.846, 'pos': 0.136, 'co..."
9996,Great biography of a very interesting journali...,"{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'comp..."
9997,Interesting Subject; Poor Presentation: You'd ...,"{'neg': 0.096, 'neu': 0.768, 'pos': 0.137, 'co..."
9998,Don't buy: The box looked used and it is obvio...,"{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'comp..."


In [None]:
# Print Compound Score
data['compound'] = data['scores'].apply(lambda scores : scores['compound'])
data

Unnamed: 0,label,review,scores,compound
0,pos,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454
1,pos,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957
2,pos,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858
3,pos,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814
4,pos,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781
...,...,...,...,...
9995,pos,A revelation of life in small town America in ...,"{'neg': 0.017, 'neu': 0.846, 'pos': 0.136, 'co...",0.9610
9996,pos,Great biography of a very interesting journali...,"{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'comp...",0.9544
9997,neg,Interesting Subject; Poor Presentation: You'd ...,"{'neg': 0.096, 'neu': 0.768, 'pos': 0.137, 'co...",0.7074
9998,neg,Don't buy: The box looked used and it is obvio...,"{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'comp...",-0.3595


In [None]:
data[['review', 'compound']]

Unnamed: 0,review,compound
0,Stuning even for the non-gamer: This sound tra...,0.9454
1,The best soundtrack ever to anything.: I'm rea...,0.8957
2,Amazing!: This soundtrack is my favorite music...,0.9858
3,Excellent Soundtrack: I truly like this soundt...,0.9814
4,"Remember, Pull Your Jaw Off The Floor After He...",0.9781
...,...,...
9995,A revelation of life in small town America in ...,0.9610
9996,Great biography of a very interesting journali...,0.9544
9997,Interesting Subject; Poor Presentation: You'd ...,0.7074
9998,Don't buy: The box looked used and it is obvio...,-0.3595


In [None]:
data['sentiment'] = data['compound'].apply(lambda c : 'pos' if c>=0 else 'neg')

In [None]:
data[['review', 'sentiment']]

Unnamed: 0,review,sentiment
0,Stuning even for the non-gamer: This sound tra...,pos
1,The best soundtrack ever to anything.: I'm rea...,pos
2,Amazing!: This soundtrack is my favorite music...,pos
3,Excellent Soundtrack: I truly like this soundt...,pos
4,"Remember, Pull Your Jaw Off The Floor After He...",pos
...,...,...
9995,A revelation of life in small town America in ...,pos
9996,Great biography of a very interesting journali...,pos
9997,Interesting Subject; Poor Presentation: You'd ...,pos
9998,Don't buy: The box looked used and it is obvio...,neg


In [None]:
# Compare sentiment with label
data[['label', 'sentiment']].head(10)

Unnamed: 0,label,sentiment
0,pos,pos
1,pos,pos
2,pos,pos
3,pos,pos
4,pos,pos
5,pos,pos
6,neg,neg
7,pos,pos
8,pos,pos
9,pos,pos
