In [1]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/akshatsharma/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
data = pd.read_csv('./movie.csv')
print("Shape of data is : ", data.shape)

Shape of data is :  (40000, 2)


In [3]:
df = data.iloc[:1000, :]
print("Shape of data is : ", df.shape)

Shape of data is :  (1000, 2)


In [4]:
df

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1
...,...,...
995,"Oh, this is so bad, it is funny. The only way ...",0
996,I couldn't believe this terrible movie was act...,0
997,Even though i am slightly older than the recom...,1
998,Reading web sites on Bette Davis one can find ...,1


In [5]:
sentiment_analyzer = SentimentIntensityAnalyzer()

In [6]:
def get_sentiment(review):
    sentiment_score = sentiment_analyzer.polarity_scores(review)
    return sentiment_score

In [7]:
def extract_scores(score):
    pos = score['pos']
    neg = score['neg']
    neu = score['neu']
    return pos, neg, neu

In [10]:
df.loc[:,'score'] = df['text'].apply(get_sentiment)

In [11]:
df

Unnamed: 0,text,label,score
0,I grew up (b. 1965) watching and loving the Th...,0,"{'neg': 0.084, 'neu': 0.78, 'pos': 0.136, 'com..."
1,"When I put this movie in my DVD player, and sa...",0,"{'neg': 0.098, 'neu': 0.755, 'pos': 0.147, 'co..."
2,Why do people who do not know what a particula...,0,"{'neg': 0.116, 'neu': 0.848, 'pos': 0.036, 'co..."
3,Even though I have great interest in Biblical ...,0,"{'neg': 0.204, 'neu': 0.656, 'pos': 0.14, 'com..."
4,Im a die hard Dads Army fan and nothing will e...,1,"{'neg': 0.066, 'neu': 0.839, 'pos': 0.095, 'co..."
...,...,...,...
995,"Oh, this is so bad, it is funny. The only way ...",0,"{'neg': 0.123, 'neu': 0.786, 'pos': 0.091, 'co..."
996,I couldn't believe this terrible movie was act...,0,"{'neg': 0.198, 'neu': 0.706, 'pos': 0.096, 'co..."
997,Even though i am slightly older than the recom...,1,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp..."
998,Reading web sites on Bette Davis one can find ...,1,"{'neg': 0.102, 'neu': 0.717, 'pos': 0.18, 'com..."


In [15]:
df.loc[:,'pos_score'], df.loc[:,'neg_score'], df.loc[:,'neu_score'] = zip(*df['score'].apply(extract_scores))

In [16]:
df

Unnamed: 0,text,label,score,pos_score,neg_score,neu_score
0,I grew up (b. 1965) watching and loving the Th...,0,"{'neg': 0.084, 'neu': 0.78, 'pos': 0.136, 'com...",0.136,0.084,0.780
1,"When I put this movie in my DVD player, and sa...",0,"{'neg': 0.098, 'neu': 0.755, 'pos': 0.147, 'co...",0.147,0.098,0.755
2,Why do people who do not know what a particula...,0,"{'neg': 0.116, 'neu': 0.848, 'pos': 0.036, 'co...",0.036,0.116,0.848
3,Even though I have great interest in Biblical ...,0,"{'neg': 0.204, 'neu': 0.656, 'pos': 0.14, 'com...",0.140,0.204,0.656
4,Im a die hard Dads Army fan and nothing will e...,1,"{'neg': 0.066, 'neu': 0.839, 'pos': 0.095, 'co...",0.095,0.066,0.839
...,...,...,...,...,...,...
995,"Oh, this is so bad, it is funny. The only way ...",0,"{'neg': 0.123, 'neu': 0.786, 'pos': 0.091, 'co...",0.091,0.123,0.786
996,I couldn't believe this terrible movie was act...,0,"{'neg': 0.198, 'neu': 0.706, 'pos': 0.096, 'co...",0.096,0.198,0.706
997,Even though i am slightly older than the recom...,1,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.277,0.000,0.723
998,Reading web sites on Bette Davis one can find ...,1,"{'neg': 0.102, 'neu': 0.717, 'pos': 0.18, 'com...",0.180,0.102,0.717


In [17]:
def get_words(review):
    words = sentiment_analyzer.lexicon.keys()
    positive_words = []
    negative_words = []
    for word in words:
        if sentiment_analyzer.polarity_scores(word)['pos'] >= 0:
            positive_words.append(word)
        elif sentiment_analyzer.polarity_scores(word)['neg'] >= 0:
            negative_words.append(word)
            
    return positive_words, negative_words

In [22]:
pos_words, neg_words = zip(*df['text'].apply(get_words))
df.loc[:,['pos_words', 'neg_words']] = pd.DataFrame({'pos_words': pos_words, 'neg_words': neg_words})

In [23]:
df

Unnamed: 0,text,label,score,pos_score,neg_score,neu_score,pos_words,neg_words
0,I grew up (b. 1965) watching and loving the Th...,0,"{'neg': 0.084, 'neu': 0.78, 'pos': 0.136, 'com...",0.136,0.084,0.780,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
1,"When I put this movie in my DVD player, and sa...",0,"{'neg': 0.098, 'neu': 0.755, 'pos': 0.147, 'co...",0.147,0.098,0.755,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
2,Why do people who do not know what a particula...,0,"{'neg': 0.116, 'neu': 0.848, 'pos': 0.036, 'co...",0.036,0.116,0.848,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
3,Even though I have great interest in Biblical ...,0,"{'neg': 0.204, 'neu': 0.656, 'pos': 0.14, 'com...",0.140,0.204,0.656,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
4,Im a die hard Dads Army fan and nothing will e...,1,"{'neg': 0.066, 'neu': 0.839, 'pos': 0.095, 'co...",0.095,0.066,0.839,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
...,...,...,...,...,...,...,...,...
995,"Oh, this is so bad, it is funny. The only way ...",0,"{'neg': 0.123, 'neu': 0.786, 'pos': 0.091, 'co...",0.091,0.123,0.786,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
996,I couldn't believe this terrible movie was act...,0,"{'neg': 0.198, 'neu': 0.706, 'pos': 0.096, 'co...",0.096,0.198,0.706,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
997,Even though i am slightly older than the recom...,1,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.277,0.000,0.723,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
998,Reading web sites on Bette Davis one can find ...,1,"{'neg': 0.102, 'neu': 0.717, 'pos': 0.18, 'com...",0.180,0.102,0.717,"[$:, %), %-), &-:, &:, ( '}{' ), (%, ('-:, (':...",[]
