In [11]:
import pandas as pd
from textblob import TextBlob
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Read in the data frame

In [5]:
#read in file
file_path = Path("../data/cleaned_data/cleaned_training_tweets.csv")

df = pd.read_csv(file_path, encoding='ISO-8859-1')
df.head()

Unnamed: 0,id,user,date,text,target
0,1467810672,scotthamilton,Mon Apr 06 22:19:49 PDT 2009,is upset that he can't update his Facebook by ...,0
1,1467810917,mattycus,Mon Apr 06 22:19:53 PDT 2009,@Kenichan I dived many times for the ball. Man...,0
2,1467811184,ElleCTF,Mon Apr 06 22:19:57 PDT 2009,my whole body feels itchy and like its on fire,0
3,1467811193,Karoli,Mon Apr 06 22:19:57 PDT 2009,"@nationwideclass no, it's not behaving at all....",0
4,1467811372,joy_wolf,Mon Apr 06 22:20:00 PDT 2009,@Kwesidei not the whole crew,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599999 entries, 0 to 1599998
Data columns (total 5 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   id      1599999 non-null  int64 
 1   user    1599999 non-null  object
 2   date    1599999 non-null  object
 3   text    1599999 non-null  object
 4   target  1599999 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 61.0+ MB


## Copy the DataFrame to test both TextBlob and VaderSentiment

In [9]:
text_blob_df = df.copy()

In [13]:
vader_sentiment_df = df.copy()

## TextBlob Sentiment analysis

In [8]:
def analyze_sentiment(text):
    blob = TextBlob(text)
    #grab polarity score
    return blob.sentiment.polarity
#apply function to the text column of the df
text_blob_df['sentiment_blob'] = text_blob_df['text'].apply(analyze_sentiment)
text_blob_df.head()

Unnamed: 0,id,user,date,text,target,sentiment
0,1467810672,scotthamilton,Mon Apr 06 22:19:49 PDT 2009,is upset that he can't update his Facebook by ...,0,0.0
1,1467810917,mattycus,Mon Apr 06 22:19:53 PDT 2009,@Kenichan I dived many times for the ball. Man...,0,0.5
2,1467811184,ElleCTF,Mon Apr 06 22:19:57 PDT 2009,my whole body feels itchy and like its on fire,0,0.2
3,1467811193,Karoli,Mon Apr 06 22:19:57 PDT 2009,"@nationwideclass no, it's not behaving at all....",0,-0.625
4,1467811372,joy_wolf,Mon Apr 06 22:20:00 PDT 2009,@Kwesidei not the whole crew,0,0.2


In [10]:
df.value_counts('sentiment')

sentiment
 0.000000    568723
 0.500000     59501
 0.200000     43778
 0.250000     35536
-0.500000     35322
              ...  
 0.026278         1
 0.026215         1
 0.026190         1
 0.026190         1
 0.076786         1
Length: 30889, dtype: int64

## VaderSentiment analysis

In [14]:
sentiment = SentimentIntensityAnalyzer()
#create a function to apply to our data frame
def analyze_sentiment_vader (text):
    sentiment_score = sentiment.polarity_scores(text)
    return sentiment_score['compound']
#apply the created function to the copied vader_sentiment_df data frame    
vader_sentiment_df['sentiment_vader'] = vader_sentiment_df['text'].apply(analyze_sentiment_vader)
vader_sentiment_df.head()

Unnamed: 0,id,user,date,text,target,sentiment,sentiment_vader
0,1467810672,scotthamilton,Mon Apr 06 22:19:49 PDT 2009,is upset that he can't update his Facebook by ...,0,0.0,-0.75
1,1467810917,mattycus,Mon Apr 06 22:19:53 PDT 2009,@Kenichan I dived many times for the ball. Man...,0,0.5,0.4939
2,1467811184,ElleCTF,Mon Apr 06 22:19:57 PDT 2009,my whole body feels itchy and like its on fire,0,0.2,-0.25
3,1467811193,Karoli,Mon Apr 06 22:19:57 PDT 2009,"@nationwideclass no, it's not behaving at all....",0,-0.625,-0.4939
4,1467811372,joy_wolf,Mon Apr 06 22:20:00 PDT 2009,@Kwesidei not the whole crew,0,0.2,0.0


In [15]:
vader_sentiment_df.value_counts('sentiment_vader')

sentiment_vader
 0.0000    417713
 0.4404     30596
 0.3612     20066
-0.2960     18511
 0.4019     17227
            ...  
 0.0977         1
 0.0981         1
 0.0984         1
 0.0988         1
 0.9987         1
Length: 18266, dtype: int64