# VADER Sentiment

In [1]:
# Initial imports
from dotenv import load_dotenv
import os
from path import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\daphy\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
# Load environment variables and read the News API key enviroment variable
load_dotenv(".env")
api_key = os.getenv("NEWS_API_KEY")

In [4]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)

In [6]:
# Fetch all the news about GSK
gsk_headlines = newsapi.get_everything(
    q='"GSK"',
    language="en",
    page_size=100,
    sort_by="relevancy",
    page=1
)

# Print total articles
print(f"Total articles about GSK: {gsk_headlines['totalResults']}")

# Show sample article
gsk_headlines["articles"][0]

Total articles about GSK: 533


{'source': {'id': 'reuters', 'name': 'Reuters'},
 'author': 'Reuters Editorial',
 'title': 'Sanofi, GSK launch vaccine trials - Reuters',
 'description': 'Drugmakers Sanofi and GlaxoSmithKline say they have launched clinical trials of a protein-based vaccine against COVID-19.',
 'url': 'https://www.reuters.com/video/watch/idPS9T?now=true',
 'urlToImage': 'https://ajo.prod.reuters.tv/api/v2/img/5f5104b7e4b04d445762e64c-1599145143974?location=LANDSCAPE',
 'publishedAt': '2020-09-03T15:28:38Z',
 'content': 'Posted \r\nDrugmakers Sanofi and GlaxoSmithKline say they have launched clinical trials of a protein-based vaccine against COVID-19.'}

In [7]:
# Create the GSK sentiment scores DataFrame
gsk_sentiments = []

for article in gsk_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        gsk_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
gsk_df = pd.DataFrame(gsk_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
gsk_df = gsk_df[cols]

gsk_df.head()

Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-09-03,Posted \r\nDrugmakers Sanofi and GlaxoSmithKli...,0.128,0.091,0.0,0.909
1,2020-09-04,Drugmaker GlaxoSmithKline has won a preliminar...,0.7964,0.261,0.072,0.667
2,2020-09-04,Expert reports filed in a sprawling multidistr...,-0.5859,0.0,0.151,0.849
3,2020-09-03,PARIS (Reuters) - French drugmaker Sanofi and ...,0.0,0.0,0.0,1.0
4,2020-09-18,BRUSSELS/PARIS (Reuters) - The European Union ...,0.5423,0.127,0.0,0.873


In [8]:
# Get descriptive stats from the DataFrame
gsk_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,97.0,97.0,97.0,97.0
mean,0.231789,0.071103,0.016113,0.912794
std,0.356856,0.066757,0.043868,0.077354
min,-0.8176,0.0,0.0,0.637
25%,0.0,0.0,0.0,0.864
50%,0.25,0.078,0.0,0.919
75%,0.4939,0.124,0.0,1.0
max,0.7964,0.261,0.223,1.0
