In [1]:
import os
import pandas as pd
import nltk
nltk.download('vader_lexicon')
%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kellymaldonado/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# News Headlines Sentiment

Use the news api to pull the latest news articles for bitcoin and ethereum and create a DataFrame of sentiment scores for each coin. 

Use descriptive statistics to answer the following questions:
1. Which coin had the highest mean positive score?
2. Which coin had the highest negative score?
3. Which coin had the highest positive score?

In [3]:
# Read your api key environment variable
from dotenv import load_dotenv

In [4]:
load_dotenv()

True

In [5]:

NEWS_API_KEY = os.getenv("NEWS_API_KEY")

type(NEWS_API_KEY)


str

In [6]:
# Create a newsapi client
from newsapi import NewsApiClient

In [7]:
newsapi = NewsApiClient(api_key=os.getenv("NEWS_API_KEY"))

In [8]:
# Fetch the Bitcoin news articles
#only allowed to get from may 5 2020 to june 1 2020
all_articles_btc = newsapi.get_everything(
    q="bitcoin",
    language="en",
    page_size=100,
    sort_by="relevancy"
)

In [9]:
# Fetch the Ethereum news articles
all_articles_eth = newsapi.get_everything(
    q="ethereum",
    language="en",
    page_size=100,
    sort_by="relevancy"
)

In [10]:
# Print total articles about BTC
print(f"Total articles about Bitcoin: {all_articles_btc['totalResults']}")

Total articles about Bitcoin: 3672


In [11]:
# Print total articles about ETH
print(f"Total articles about Ethereum: {all_articles_eth['totalResults']}")

Total articles about Ethereum: 887


In [12]:
# Show sample bitcoin article
all_articles_btc["articles"][0]

{'source': {'id': 'mashable', 'name': 'Mashable'},
 'author': 'Jack Morse',
 'title': 'J.K. Rowling is four cocktails in and talking trash about Bitcoin',
 'description': "J.K. Rowling has been drinking a lot and you'd better believe she has some thoughts on Bitcoin.\nNoted TERF and author of The Cuckoo’s Calling, Rowling decided Friday was the perfect time to learn about the cryptocurrency Bitcoin. And so, like any celebrity wh…",
 'url': 'https://mashable.com/article/jk-rowling-asked-twitter-about-bitcoin/',
 'urlToImage': 'https://mondrian.mashable.com/2020%252F05%252F16%252Fed%252F65127486efd14faea068927124f3b1ee.253ab.jpg%252F1200x630.jpg?signature=XjJ4aqKaX56MW5k29Zn1ygz0Evc=',
 'publishedAt': '2020-05-16T00:34:00Z',
 'content': "J.K. Rowling has been drinking a lot and you'd better believe she has some thoughts on Bitcoin.\r\nNotedTERF and author of The Cuckoos Calling, Rowlingdecided Friday was the perfect time to learn about… [+2124 chars]"}

In [13]:
# Show sample ethereum article
all_articles_eth["articles"][0]

{'source': {'id': 'techcrunch', 'name': 'TechCrunch'},
 'author': 'Henry Pickavet',
 'title': 'Crypto Startup School: How to scale companies using crypto',
 'description': 'In week two of a16z’s Crypto Startup School, three company-builders provide real-world advice on using the qualities of crypto to create new business models and networks.',
 'url': 'http://techcrunch.com/2020/05/20/crypto-startup-school-how-to-scale-companies-using-crypto/',
 'urlToImage': 'https://techcrunch.com/wp-content/uploads/2020/05/GettyImages-1170889477.jpg?w=711',
 'publishedAt': '2020-05-20T19:00:17Z',
 'content': 'Editors note:Andreessen Horowitzs Crypto Startup School brought together 45 participants from around the U.S. and overseas in a seven-week course to learn how to build crypto companies. Andreessen Ho… [+2503 chars]'}

In [14]:
# Create the Bitcoin sentiment scores DataFrame
btc_sentiments = []

for article in all_articles_btc["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass

# Create DataFrame
bitcoin_df = pd.DataFrame(btc_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
bitcoin_df = bitcoin_df[cols]

bitcoin_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-05-16,J.K. Rowling has been drinking a lot and you'd...,0.765,0.171,0.0,0.829
1,2020-05-08,"Yesterday, the prominent bitcoin investor Mich...",0.3182,0.067,0.0,0.933
2,2020-05-08,"Early next week, Bitcoin is about to experienc...",0.2716,0.061,0.0,0.939
3,2020-05-13,Over the last few weeks all eyes in the crypto...,0.0,0.0,0.0,1.0
4,2020-06-04,If youre planning to do some protesting and yo...,-0.3464,0.046,0.134,0.819


In [17]:
# Create the ethereum sentiment scores DataFrame

eth_sentiments = []

for article in all_articles_eth["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        eth_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass

# Create DataFrame
ethereum_df = pd.DataFrame(eth_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
ethereum_df = ethereum_df[cols]

ethereum_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-05-20,Editors note:Andreessen Horowitzs Crypto Start...,0.0,0.0,0.0,1.0
1,2020-06-03,For developers looking to quickly build identi...,0.0,0.0,0.0,1.0
2,2020-05-14,Use this to send Ether from one account to ano...,0.6249,0.141,0.0,0.859
3,2020-05-24,"Some open-source groups, such as The Linux Fou...",-0.4457,0.031,0.086,0.882
4,2020-05-12,With unemployment surging and businesses strug...,0.0,0.148,0.148,0.703


In [18]:
# Describe the Bitcoin Sentiment
bitcoin_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,89.0,89.0,89.0,89.0
mean,0.168948,0.073281,0.039022,0.887719
std,0.4328,0.071972,0.059899,0.080541
min,-0.8221,0.0,0.0,0.629
25%,0.0,0.0,0.0,0.846
50%,0.2732,0.077,0.0,0.91
75%,0.507,0.09,0.068,0.932
max,0.9198,0.335,0.244,1.0


In [19]:
# Describe the Ethereum Sentiment
ethereum_df.describe()

Unnamed: 0,compound,positive,negative,neutral
count,96.0,96.0,96.0,96.0
mean,0.271948,0.092813,0.029063,0.878115
std,0.363202,0.073049,0.049379,0.087057
min,-0.8074,0.0,0.0,0.607
25%,0.0,0.03325,0.0,0.82225
50%,0.3182,0.1025,0.0,0.875
75%,0.6124,0.13,0.056,0.94
max,0.9198,0.335,0.223,1.0


### Questions:

Q: Which coin had the highest mean positive score?

A: 

Q: Which coin had the highest compound score?

A: 

Q. Which coin had the highest positive score?

A: 

---

# Tokenizer

In this section, you will use NLTK and Python to tokenize the text for each coin. Be sure to:
1. Lowercase each word
2. Remove Punctuation
3. Remove Stopwords

In [24]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [None]:
# Expand the default stopwords list if necessary


In [None]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    
    # Create a list of the words

    # Convert the words to lowercase
    
    # Remove the punctuation
    
    # Remove the stop words
    
    # Lemmatize Words into root words
    
    return tokens


In [None]:
# Create a new tokens column for bitcoin

In [None]:
# Create a new tokens column for ethereum

---

# NGrams and Frequency Analysis

In this section you will look at the ngrams and word frequency for each coin. 

1. Use NLTK to produce the n-grams for N = 2. 
2. List the top 10 words for each coin. 

In [None]:
from collections import Counter
from nltk import ngrams

In [None]:
# Generate the Bitcoin N-grams where N=2

In [None]:
# Generate the Ethereum N-grams where N=2

In [None]:
# Use the token_count function to generate the top 10 words from each coin
def token_count(tokens, N=10):
    """Returns the top N tokens from the frequency count"""
    return Counter(tokens).most_common(N)

In [None]:
# Get the top 10 words for Bitcoin

In [None]:
# Get the top 10 words for Ethereum

# Word Clouds

In this section, you will generate word clouds for each coin to summarize the news for each coin

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [20.0, 10.0]

In [None]:
# Generate the Bitcoin word cloud

In [None]:
# Generate the Ethereum word cloud

# Named Entity Recognition

In this section, you will build a named entity recognition model for both coins and visualize the tags using SpaCy.

In [None]:
import spacy
from spacy import displacy

In [None]:
# Optional - download a language model for SpaCy
# !python -m spacy download en_core_web_sm

In [None]:
# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

## Bitcoin NER

In [None]:
# Concatenate all of the bitcoin text together

In [None]:
# Run the NER processor on all of the text

# Add a title to the document

In [None]:
# Render the visualization

In [None]:
# List all Entities

---

## Ethereum NER

In [None]:
# Concatenate all of the bitcoin text together

In [None]:
# Run the NER processor on all of the text

# Add a title to the document

In [None]:
# Render the visualization

In [None]:
# List all Entities