# Instructor Do: Intro to VADER Sentiment

In [2]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv
load_dotenv()


True

In [3]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Kelsey\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
# Reading the News API key enviroment variable
api_key = os.getenv("news_api")



In [5]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)



In [35]:
# Fetch all the news about Facebook Libra
eth_headlines = newsapi.get_everything(
    q="Ethereum OR ETH",
    language="en",
    page_size=20,
    sort_by="relevancy"
   # from_param="2021-01-01",
   # to="2021-07-20"
)

# Print total articles
print(f"Total articles about Ethereum: {eth_headlines['totalResults']}")

# Show sample article
eth_headlines["articles"][0]



Total articles about Ethereum: 2935


{'source': {'id': 'techcrunch', 'name': 'TechCrunch'},
 'author': 'Connie Loizos',
 'title': 'Crypto investors like Terraform Labs so much, they’re committing $150 million to its ‘ecosystem’',
 'description': 'There are many blockchain platforms competing for investors’ and developers’ attention right now, from the big daddy of them all, Ethereum, to so-called “Ethereum Killers” like Solana, which we wrote about in May. Often, these technologies are seen as so prom…',
 'url': 'http://techcrunch.com/2021/07/16/crypto-investors-like-terraform-labs-so-much-theyre-committing-150-million-to-its-ecosystem/',
 'urlToImage': 'https://techcrunch.com/wp-content/uploads/2020/06/GettyImages-1174590894.jpg?w=667',
 'publishedAt': '2021-07-16T16:00:55Z',
 'content': 'There are many blockchain platforms competing for investors’ and developers’ attention right now, from the big daddy of them all, Ethereum, to so-called “Ethereum Killers” like Solana, which we wrote… [+2563 chars]'}

In [37]:
# Create the Facebook Libra sentiment scores DataFrame
eth_sentiments = []

for article in eth_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
eth_df = pd.DataFrame(eth_sentiments)
#eth_df.set_index(eth_df["date"], inplace=True)
#eth_df.drop(columns=['date'], inplace=True)
#eth_df.sort_index(inplace=True)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
eth_df = eth_df[cols]


eth_df



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2021-07-16,There are many blockchain platforms competing ...,0.3612,0.075,0.0,0.925
1,2021-07-29,Blockchain infrastructure startups are heating...,-0.2411,0.0,0.061,0.939
2,2021-07-14,While retail investors grew more comfortable b...,0.7264,0.164,0.0,0.836
3,2021-07-16,ETHEREUM: THE INFINITE GARDEN is a “feature-le...,0.6486,0.175,0.0,0.825
4,2021-07-27,Cryptocurrencies spiked Monday after Amazon li...,0.34,0.105,0.0,0.895
5,2021-07-02,Bitcoin and Ethereum\r\nYuriko Nakao\r\nEther ...,0.3612,0.11,0.041,0.849
6,2021-07-23,Indicted Ethereum Foundation developer Virgil ...,0.1027,0.043,0.0,0.957
7,2021-07-17,"""Anthony Di Iorio, a co-founder of the Ethereu...",0.6369,0.157,0.0,0.843
8,2021-07-30,Representations of cryptocurrency Ethereum are...,0.0,0.0,0.0,1.0
9,2021-07-05,"Ether holders have ""staked"" more than $13 bill...",0.7717,0.194,0.0,0.806


In [29]:
eth_df.dtypes

date        datetime64[ns]
text                object
compound           float64
positive           float64
negative           float64
neutral            float64
dtype: object

In [13]:
# Get descriptive stats from the DataFrame
eth_df.describe()



Unnamed: 0,compound,positive,negative,neutral
count,100.0,100.0,100.0,100.0
mean,0.145841,0.05693,0.0226,0.92045
std,0.355997,0.063974,0.045436,0.073079
min,-0.8126,0.0,0.0,0.714
25%,0.0,0.0,0.0,0.86775
50%,0.0,0.051,0.0,0.9265
75%,0.426225,0.10125,0.03325,1.0
max,0.8402,0.237,0.249,1.0
