## News Sentiment Analysis and Crypto price prediction of Dogecoin

In [1]:
#project description

In [2]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import requests
import yfinance as yf
from yahoofinancials import YahooFinancials
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
## Load API Keys from Environment Variables

load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get Dogecoin Crypto Price

In [4]:
# Set the ticker
ticker = "DOGE"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Display data
df.head()

Unnamed: 0_level_0,DOGE,DOGE,DOGE,DOGE,DOGE
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2


In [5]:
# Crypto API URLs
doge_url = "https://api.alternative.me/v2/ticker/dogecoin/?convert=CAD"

In [6]:
# Fetch Dogecoin Price - Step 1 : Use the requests library to fetch the current price
doge_request = requests.get(doge_url).json()
doge_request

{'data': {'74': {'id': 74,
   'name': 'Dogecoin',
   'symbol': 'DOGE',
   'website_slug': 'dogecoin',
   'rank': 7,
   'circulating_supply': 132670764300,
   'total_supply': 132670764300,
   'max_supply': 0,
   'quotes': {'USD': {'price': 0.122492,
     'volume_24h': 536752584,
     'market_cap': 16264009040,
     'percentage_change_1h': -0.512712181540443,
     'percentage_change_24h': -2.77913022271379,
     'percentage_change_7d': -4.2393924799438,
     'percent_change_1h': -0.512712181540443,
     'percent_change_24h': -2.77913022271379,
     'percent_change_7d': -4.2393924799438},
    'CAD': {'price': 0.1560180604,
     'volume_24h': 683661766.2408,
     'market_cap': 20715468314.248,
     'percent_change_1h': -0.512712181540443,
     'percent_change_24h': -2.77913022271379,
     'percent_change_7d': -4.2393924799438}},
   'last_updated': 1646454560}},
 'metadata': {'timestamp': 1646454560,
  'num_cryptocurrencies': 3105,
  'error': None}}

In [8]:
# Fetch current BTC price - Step 2 : Parse the API JSON response to select only the crypto prices and store each price in a variable
#doge_price = doge_request["data"]["7"]["quotes"]["CAD"]["price"]
#doge_price

In [9]:
doge_df = yf.download('DOGE-USD', 
                      start='2019-01-01', 
                      end='2022-03-04', 
                      progress=False,
)
doge_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-01,0.002346,0.002392,0.002322,0.002392,0.002392,17364744
2019-01-02,0.002388,0.002458,0.002372,0.002407,0.002407,18015392
2019-01-03,0.002404,0.002414,0.002356,0.00236,0.00236,17619234
2019-01-04,0.002364,0.002397,0.002274,0.002315,0.002315,19530100
2019-01-05,0.00232,0.00235,0.00228,0.002319,0.002319,17148586


In [10]:
# Drop Outer Table Level
#doge_df = doge_df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
doge_df = doge_df.drop(columns=["Open", "High", "Low", "Adj Close", "Volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
doge_df.index = doge_df.index.date

# Display sample data
doge_df.head()

Unnamed: 0,Close
2019-01-01,0.002392
2019-01-02,0.002407
2019-01-03,0.00236
2019-01-04,0.002315
2019-01-05,0.002319


In [11]:
# Use the `pct_change` function to calculate daily returns of Dogecoin
doge_returns = doge_df.pct_change().dropna()

# Display sample data
doge_returns.head()

Unnamed: 0,Close
2019-01-02,0.006271
2019-01-03,-0.019526
2019-01-04,-0.019068
2019-01-05,0.001728
2019-01-06,0.003881


In [12]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [13]:
# Get first topic
doge_headlines, dates = get_headlines("dogecoin")

Fetching news about 'dogecoin'
******************************
retrieving news from: 2022-03-04 00:00:00
retrieving news from: 2022-03-03 00:00:00
retrieving news from: 2022-03-02 00:00:00
retrieving news from: 2022-03-01 00:00:00
retrieving news from: 2022-02-28 00:00:00
retrieving news from: 2022-02-27 00:00:00
retrieving news from: 2022-02-26 00:00:00
retrieving news from: 2022-02-25 00:00:00
retrieving news from: 2022-02-24 00:00:00
retrieving news from: 2022-02-23 00:00:00
retrieving news from: 2022-02-22 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
# Get second topic
crypto_headlines, _ = get_headlines("crypto")

In [None]:
# Get third topic
bitcoin_headlines, _ = get_headlines("bitcoin")

In [None]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [None]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [None]:
# Get averages of each topics sentiment
doge_avg = headline_sentiment_summarizer_avg(doge_headlines)
crypto_avg = headline_sentiment_summarizer_avg(crypto_headlines)
bitcoin_avg = headline_sentiment_summarizer_avg(bitcoin_headlines)

In [None]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "doge_avg": doge_avg,
        "crypto_avg": crypto_avg,
        "bitcoin_avg": bitcoin_avg,
    }
)

In [None]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [None]:
# Merge with AAPL returns
topic_sentiments = doge_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

In [None]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()