In [1]:
# Install vaderSentiment package for VADER
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [2]:
# Data processing
import pandas as pd
import numpy as np


# Import VADER sentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Import accuracy_score to check performance
from sklearn.metrics import accuracy_score

# Set a wider colwith
pd.set_option('display.max_colwidth', 1000) #pengaturan display


In [3]:
# Mount Google Drive (untuk mengkoneksikan dengan google drive)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Change directory
import os
os.chdir("./drive/My Drive/NLP/")

In [6]:
# Print out the current directory
!pwd

/content/drive/My Drive/NLP


In [8]:
# Read in data
amz_review = pd.read_csv('amazon_cells_labelled .txt', sep='\t', names=['review', 'label'])

In [9]:
# Take a look at the data
amz_review.head(10)

Unnamed: 0,review,label
0,So there is no way for me to plug it in here in the US unless I go by a converter.,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more than 45 minutes.MAJOR PROBLEMS!!,0
4,The mic is great.,1
5,I have to jiggle the plug to get it to line up right to get decent volume.,0
6,"If you have several dozen or several hundred contacts, then imagine the fun of sending each of them one by one.",0
7,If you are Razr owner...you must have this!,1
8,"Needless to say, I wasted my money.",0
9,What a waste of money and time!.,0


In [10]:
# Get the dataset information (command .info) -> ternyata ada 1000 entries (1000 reviews)
amz_review.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   review  1000 non-null   object
 1   label   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB


In [11]:
# cek distribusi sentiment positif dan sentiment negative (dari kolom label)
# Check the label distribution
amz_review['label'].value_counts()

0    500
1    500
Name: label, dtype: int64

In [12]:
# Example text
#text = 'GrabNGoInfo.com is a great machine learning tutorial website.'
text = 'The products are expensive and not good'

# VADER Sentiment
vader = SentimentIntensityAnalyzer() #inisilaisasi vader
vader_sentiment = vader.polarity_scores(text) #jalankan vader mengetahui polarity score untuk variabel text


In [13]:
vader_sentiment # print out hasil score nya

{'neg': 0.286, 'neu': 0.714, 'pos': 0.0, 'compound': -0.3412}

In [14]:
vader_sentiment['compound'] # untuk print compund score nya saja

-0.3412

In [15]:
# Get sentiment score for each review
vader_sentiment = SentimentIntensityAnalyzer() #initialisasi
amz_review['scores_VADER'] = amz_review['review'].apply(lambda s: vader_sentiment.polarity_scores(s)['compound']) #mengambil


In [16]:
amz_review.head(10)

Unnamed: 0,review,label,scores_VADER
0,So there is no way for me to plug it in here in the US unless I go by a converter.,0,-0.3535
1,"Good case, Excellent value.",1,0.8402
2,Great for the jawbone.,1,0.6249
3,Tied to charger for conversations lasting more than 45 minutes.MAJOR PROBLEMS!!,0,-0.6145
4,The mic is great.,1,0.6249
5,I have to jiggle the plug to get it to line up right to get decent volume.,0,0.0
6,"If you have several dozen or several hundred contacts, then imagine the fun of sending each of them one by one.",0,0.5106
7,If you are Razr owner...you must have this!,1,0.0
8,"Needless to say, I wasted my money.",0,-0.4939
9,What a waste of money and time!.,0,-0.4753


In [17]:
# mengassign positive (1) atau negative (0) berdasar nilai scores_vader dan diletakan di kolom baru 'pred_VADER'
amz_review['pred_VADER'] = amz_review['scores_VADER'].apply(lambda x: 1 if x >=0 else 0)
amz_review.head(10)

Unnamed: 0,review,label,scores_VADER,pred_VADER
0,So there is no way for me to plug it in here in the US unless I go by a converter.,0,-0.3535,0
1,"Good case, Excellent value.",1,0.8402,1
2,Great for the jawbone.,1,0.6249,1
3,Tied to charger for conversations lasting more than 45 minutes.MAJOR PROBLEMS!!,0,-0.6145,0
4,The mic is great.,1,0.6249,1
5,I have to jiggle the plug to get it to line up right to get decent volume.,0,0.0,1
6,"If you have several dozen or several hundred contacts, then imagine the fun of sending each of them one by one.",0,0.5106,1
7,If you are Razr owner...you must have this!,1,0.0,1
8,"Needless to say, I wasted my money.",0,-0.4939,0
9,What a waste of money and time!.,0,-0.4753,0


In [18]:
# Compare Actual and Predicted
accuracy_score(amz_review['label'],amz_review['pred_VADER'])

0.768