# VADER Sentiment

In [1]:
# Initial imports
from dotenv import load_dotenv
import os
from path import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/shuran/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
# Import twitter feed from csv file

# Load the sentiment data for vaccine
trump_df = pd.read_csv('trump_tweets_Sep_2019_Sep_2020.csv', infer_datetime_format=True, parse_dates=True)
trump_df = trump_df.drop(columns="Unnamed: 0")
trump_df.head()

Unnamed: 0,Time,Tweet
0,2020-09-23T14:03:08.000Z,"Very important that, in order to watch that AL..."
1,2020-09-23T13:58:47.000Z,Bay of Pigs Veterans & Hispanic Heritage Remar...
2,2020-09-23T13:21:10.000Z,White House News Conference today at 6:00 P.M....
3,2020-09-23T11:22:58.000Z,I hardly know Cindy McCain other than having p...
4,2020-09-22T22:53:22.000Z,"A few weeks ago, I BANNED efforts to indoctrin..."


In [4]:
# Create the sentiment scores DataFrame
sentiments = []

for index, row in trump_df.iterrows():
    try:
        text = row['Tweet']
        date = row['Time']
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
trump_df2 = pd.DataFrame(sentiments)

# # Reorder DataFrame columns
# cols = ["date", "text", "compound", "positive", "negative", "neutral"]
# gsk_df = gsk_df[cols]

trump_df2.head()

Unnamed: 0,text,date,compound,positive,negative,neutral
0,"Very important that, in order to watch that AL...",2020-09-23T14:03:08.000Z,0.4532,0.099,0.0,0.901
1,Bay of Pigs Veterans & Hispanic Heritage Remar...,2020-09-23T13:58:47.000Z,0.7088,0.269,0.0,0.731
2,White House News Conference today at 6:00 P.M....,2020-09-23T13:21:10.000Z,0.8188,0.32,0.0,0.68
3,I hardly know Cindy McCain other than having p...,2020-09-23T11:22:58.000Z,-0.8816,0.064,0.235,0.701
4,"A few weeks ago, I BANNED efforts to indoctrin...",2020-09-22T22:53:22.000Z,-0.8668,0.0,0.272,0.728


In [5]:
print(len(sentiments))

3398


In [21]:
print(trump_df['Tweet'])

0       Very important that, in order to watch that AL...
1       Bay of Pigs Veterans & Hispanic Heritage Remar...
2       White House News Conference today at 6:00 P.M....
3       I hardly know Cindy McCain other than having p...
4       A few weeks ago, I BANNED efforts to indoctrin...
5       I will be announcing my Supreme Court Nominee ...
6       The Democrats are only interested in BAILING O...
7       Nissan is rolling out it’s 2021 ROGUE from TEN...
8       “The Trump Century, How Our President Changed ...
9       The Democrats are only interested in BAILING O...
10      Nissan is rolling out it’s 2021 ROGUE from TEN...
11      “The Trump Century, How Our President Changed ...
12      Will be interviewed on @foxandfriends at 8:00 ...
13      Will be interviewed by @marklevinshow on @FoxN...
14                SAVE YOUR SECOND AMENDMENT, VOTE TRUMP!
15                              MAKE AMERICA GREAT AGAIN!
16                                           LAW & ORDER!
17      VIRGIN

In [7]:
# Create the sentiment scores DataFrame
gsk_sentiments = []

for article in gsk_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        gsk_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
gsk_df = pd.DataFrame(gsk_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
gsk_df = gsk_df[cols]

gsk_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-09-03,Posted \r\nDrugmakers Sanofi and GlaxoSmithKli...,0.128,0.091,0.0,0.909
1,2020-09-04,Drugmaker GlaxoSmithKline has won a preliminar...,0.7964,0.261,0.072,0.667
2,2020-09-04,Expert reports filed in a sprawling multidistr...,-0.5859,0.0,0.151,0.849
3,2020-09-03,PARIS (Reuters) - French drugmaker Sanofi and ...,0.0,0.0,0.0,1.0
4,2020-09-18,BRUSSELS/PARIS (Reuters) - The European Union ...,0.5423,0.127,0.0,0.873


In [8]:
# Get descriptive stats from the DataFrame
gsk_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,97.0,97.0,97.0,97.0
mean,0.231789,0.071103,0.016113,0.912794
std,0.356856,0.066757,0.043868,0.077354
min,-0.8176,0.0,0.0,0.637
25%,0.0,0.0,0.0,0.864
50%,0.25,0.078,0.0,0.919
75%,0.4939,0.124,0.0,1.0
max,0.7964,0.261,0.223,1.0
