# Instructor Do: Intro to VADER Sentiment

In [3]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv("newsapi.env")


True

In [4]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\josh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [5]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")



In [6]:
type(api_key)

str

In [7]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)



In [27]:
news_df = pd.DataFrame(index = pd.RangeIndex(start = 0, stop = 100))
payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
first_table = payload[0]
second_table = payload[1]

sp500 = first_table
companies = sp500['Security'].tolist()

In [29]:
companies

['3M',
 'A. O. Smith',
 'Abbott',
 'AbbVie',
 'Abiomed',
 'Accenture',
 'Activision Blizzard',
 'ADM',
 'Adobe',
 'ADP',
 'Advance Auto Parts',
 'AES',
 'Aflac',
 'Agilent Technologies',
 'AIG',
 'Air Products',
 'Akamai',
 'Alaska Air Group',
 'Albemarle',
 'Alexandria',
 'Align',
 'Allegion',
 'Alliant Energy',
 'Allstate',
 'Alphabet (Class A)',
 'Alphabet (Class C)',
 'Altria',
 'Amazon',
 'Amcor',
 'AMD',
 'Ameren',
 'American Airlines Group',
 'American Electric Power',
 'American Express',
 'American Tower',
 'American Water',
 'Ameriprise Financial',
 'AmerisourceBergen',
 'Ametek',
 'Amgen',
 'Amphenol',
 'Analog Devices',
 'Ansys',
 'Anthem',
 'Aon',
 'APA Corporation',
 'Apple',
 'Applied Materials',
 'Aptiv',
 'Arista',
 'Assurant',
 'AT&T',
 'Atmos Energy',
 'Autodesk',
 'AutoZone',
 'AvalonBay Communities',
 'Avery Dennison',
 'Baker Hughes',
 'Ball',
 'Bank of America',
 'Bath & Body Works',
 'Baxter',
 'Becton Dickinson',
 'Berkley',
 'Berkshire Hathaway',
 'Best Buy',


In [28]:
# Fetch all the news about Facebook Libra
    
for company in companies:
    news = newsapi.get_top_headlines(
        q=company,
        language="en",
        page_size=100,
    )
    contents = []
    print('done')
    for article in news['articles']:
        contents.append(article)
    
    news_df[company] = pd.Series(contents)
# Print total articles

news_df.head()

NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
aapl = newsapi.get_top_headlines(
        q="Apple",
        language="en",
        page_size=100,
    )

my_list = []

for article in aapl['articles']:
    my_list.append(article['content'])


In [39]:
aapl_df = pd.DataFrame()
aapl_df['Apple'] = my_list

In [40]:
aapl_df.head()

Unnamed: 0,Apple
0,A SpaceX rocket ship was due for liftoff on Fr...
1,Apple has allowed NFT app Sticky back onto its...
2,"MANILA, Philippines Any concerned citizen can..."
3,"MANILA, Philippines Jericho Rosales makes his ..."
4,Aside from allowing users to utilise Face ID w...


In [6]:
# Create the Facebook Libra sentiment scores DataFrame
libra_sentiments = []

for article in news_df:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        libra_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
libra_df = pd.DataFrame(libra_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
libra_df = libra_df[cols]

libra_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2019-06-26,"When Facebook unveiledLibra a few days ago, th...",0.2846,0.073,0.0,0.927
1,2019-06-19,"On Tuesday Facebook announced Libra, a cryptoc...",0.7196,0.152,0.0,0.848
2,2019-07-03,The $10 million entry fee to join the Facebook...,0.296,0.052,0.0,0.948
3,2019-07-03,"""We write to request that Facebook and its par...",0.3612,0.056,0.0,0.944
4,2019-06-18,Illustration by Alex Castro / The Verge\r\nAs ...,0.0,0.0,0.0,1.0


In [7]:
# Get descriptive stats from the DataFrame
libra_df.describe()



Unnamed: 0,compound,positive,negative,neutral
count,99.0,99.0,99.0,99.0
mean,0.260642,0.067091,0.017646,0.915273
std,0.352937,0.062159,0.034538,0.07398
min,-0.7351,0.0,0.0,0.71
25%,0.0,0.0,0.0,0.8715
50%,0.2263,0.057,0.0,0.926
75%,0.5485,0.113,0.0145,1.0
max,0.8979,0.225,0.153,1.0
