In [None]:
"""
comparing TextBlob with Vader sentiment analysis
of r/technews comments
"""

In [1]:
import pickle
import pandas as pd
import numpy as np

from textblob import TextBlob

In [45]:
# TextBlob and Vader sentiment analysis tools were trained on internet posts, 
# and should work well for the subreddit comments

In [46]:
"""The sentiment property returns a named tuple of the form Sentiment(polarity, subjectivity).
The polarity score is a float within the range [-1.0, 1.0]. The subjectivity is a float 
within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective."""

'The sentiment property returns a named tuple of the form Sentiment(polarity, subjectivity).\nThe polarity score is a float within the range [-1.0, 1.0]. The subjectivity is a float \nwithin the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective.'

In [2]:
with open('post_comments_top.pickle', 'rb') as read_file:
    comments = pickle.load(read_file)
    
comments.shape

(203847, 4)

In [48]:
comments.head()

Unnamed: 0,comment_id,comment_parent_id,comment_body,comment_link_id
0,fpgu5a7,t3_gdd9d8,[deleted],t3_gdd9d8
1,fpgnsbq,t3_gdd9d8,big props to him for having the huge balls to ...,t3_gdd9d8
2,fpgquur,t3_gdd9d8,Chickenshit is correct. Attempting or even see...,t3_gdd9d8
3,fpgtifa,t3_gdd9d8,When you're in a position of power but still c...,t3_gdd9d8
4,fpgtfiy,t3_gdd9d8,"Nice, I got to interview him a few years ago w...",t3_gdd9d8


In [26]:
comments['sentiment_polarity'] = comments['comment_body'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [27]:
comments['sentiment_subjectivity'] = comments['comment_body'].apply(lambda x: TextBlob(x).sentiment.subjectivity)

In [28]:
comments.head()

Unnamed: 0,comment_id,comment_parent_id,comment_body,comment_link_id,sentiment_polarity,sentiment_subjectivity
0,fpgu5a7,t3_gdd9d8,[deleted],t3_gdd9d8,0.0,0.0
1,fpgnsbq,t3_gdd9d8,big props to him for having the huge balls to ...,t3_gdd9d8,0.2,0.5
2,fpgquur,t3_gdd9d8,Chickenshit is correct. Attempting or even see...,t3_gdd9d8,0.325,0.5
3,fpgtifa,t3_gdd9d8,When you're in a position of power but still c...,t3_gdd9d8,0.0,0.75
4,fpgtfiy,t3_gdd9d8,"Nice, I got to interview him a few years ago w...",t3_gdd9d8,0.077778,0.527778


In [29]:
min(comments.sentiment_polarity)

-1.0

In [30]:
comments[comments['sentiment_polarity']==-1].head()

Unnamed: 0,comment_id,comment_parent_id,comment_body,comment_link_id,sentiment_polarity,sentiment_subjectivity
289,fphqm5h,t1_fpgquur,Evil chickenshit? Can chickenshit be evil?,t3_gdd9d8,-1.0,1.0
684,fpk1cfr,t1_fph2fl7,Horrible take.,t3_gdd9d8,-1.0,1.0
878,fphjq9i,t1_fpha6ql,It's not *just* that they have money. \n\nIt's...,t3_gdd9d8,-1.0,1.0
885,fphk0gw,t1_fphgyj6,It's not just that they have money. \n\nIt's w...,t3_gdd9d8,-1.0,1.0
1355,fphf5gl,t1_fphesox,"""pretend you're in this insane world. your vi...",t3_gdd9d8,-1.0,1.0


In [37]:
comments.iloc[289, 2]

'Evil chickenshit?  Can chickenshit be evil?'

In [38]:
comments.iloc[878, 2]

"It's not *just* that they have money. \n\nIt's what and how they got it. \n\nWho's backs they stood on. \n\nAnd now that they have it, how they hoard it and how horribly they treat the people below them. \n\nIt's not *just* having it."

In [40]:
comments.iloc[1355, 2]

'"pretend you\'re in this insane world.  your views don\'t make sense now, do they?"\n\nwell, no.  You got me there.  I have no idea how things would work in a world where the currency was sex.'

In [31]:
max(comments.sentiment_polarity)

1.0

In [32]:
comments[comments['sentiment_polarity']==1].head()

Unnamed: 0,comment_id,comment_parent_id,comment_body,comment_link_id,sentiment_polarity,sentiment_subjectivity
113,fphuy0o,t3_gdd9d8,Firing someone like that is perfect for descri...,t3_gdd9d8,1.0,1.0
144,fpij6zs,t3_gdd9d8,This is awesome he should go to tesla,t3_gdd9d8,1.0,1.0
298,fpheno9,t1_fpgtfiy,Just wanna say how awesome a name is GOTO Conf,t3_gdd9d8,1.0,1.0
577,fpjq6fk,t1_fph7wjc,Happy cake day!,t3_gdd9d8,1.0,1.0
579,fphlm41,t1_fphbrvb,Stallman still has your best interests at hear...,t3_gdd9d8,1.0,0.3


In [34]:
comments.iloc[113, 2]

'Firing someone like that is perfect for describing them'

In [35]:
comments.iloc[144, 2]

'This is awesome he should go to tesla'

In [36]:
comments.iloc[298, 2]

'Just wanna say how awesome a name is GOTO Conf'

In [41]:
#################

In [42]:
#compare to Vader

In [3]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 

In [5]:
sid_obj = SentimentIntensityAnalyzer()
sentiment = []
for text in comments.comment_body:
    sentiment.append(sid_obj.polarity_scores(text))

In [7]:
len(sentiment)

203847

In [8]:
comments.shape

(203847, 4)

In [9]:
vader_results = pd.concat([comments,pd.DataFrame(sentiment)], axis=1)

In [10]:
vader_results.head()

Unnamed: 0,comment_id,comment_parent_id,comment_body,comment_link_id,neg,neu,pos,compound
0,fpgu5a7,t3_gdd9d8,[deleted],t3_gdd9d8,0.0,1.0,0.0,0.0
1,fpgnsbq,t3_gdd9d8,big props to him for having the huge balls to ...,t3_gdd9d8,0.0,0.827,0.173,0.3182
2,fpgquur,t3_gdd9d8,Chickenshit is correct. Attempting or even see...,t3_gdd9d8,0.109,0.665,0.227,0.4404
3,fpgtifa,t3_gdd9d8,When you're in a position of power but still c...,t3_gdd9d8,0.0,0.847,0.153,0.7579
4,fpgtfiy,t3_gdd9d8,"Nice, I got to interview him a few years ago w...",t3_gdd9d8,0.068,0.776,0.155,0.7655


In [None]:
#the "compound" score, ranging from -1 (most neg) to 1 (most pos) would provide a single measure of polarity.

In [None]:
"""
The Compound score is a metric that calculates the sum of all the 
lexicon ratings which have been normalized between -1(most extreme negative) 
and +1 (most extreme positive).

positive sentiment : (compound score >= 0.05)
neutral sentiment : (compound score > -0.05) and (compound score < 0.05)
negative sentiment : (compound score <= -0.05)"""