In [None]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API"])
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_2"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [48]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword,fdate,edate):
    all_headlines = []
    all_contents = []
    all_dates = []    
    # date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    # end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    #while date > end_date:
    print(f"retrieving news from: {fdate} - {edate}")
    articles = newsapi.get_everything(
        q=keyword,
        # from_param=str(date)[:10],
        # to=str(date)[:10],
        from_param=fdate,
        to=edate,
        language="en",
        sort_by="relevancy",
        page=1,
    )
    headlines = []
    contents = []
    for i in range(0, len(articles["articles"])):
        headlines.append(articles["articles"][i]["title"])
        contents.append(articles["articles"][i]["content"])
    all_headlines.append(headlines)
    all_contents.append(contents)
    all_dates.append(fdate+"-"+edate)
    #date = date - timedelta(weeks=1)
    return all_headlines, all_dates, all_contents

# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(data):
    df=data.copy()
    sentiment = []
    sentiment_pos = []
    sentiment_neg = []
    for day in data:
        day_score = []
        day_positive = []
        day_negative = []
        for h in day:
            
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
                day_positive.append(sid.polarity_scores(h)["pos"])
                day_negative.append(sid.polarity_scores(h)["neg"])
        sentiment.append(sum(day_score) / len(day_score))
        sentiment_pos.append(sum(day_positive) / len(day_positive))
        sentiment_neg.append(sum(day_negative) / len(day_negative))
    return sentiment #, sentiment_pos, sentiment_neg]

In [None]:
# Setting up Variable initial values
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(7), tz="America/New_York").isoformat()
date = datetime.strptime(current_date[:10], "%Y-%m-%d")
end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")

# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [55]:
btc_headlines, btc_dates, btc_contents = get_headlines("bitcoin","2020-06-01","2022-06-07")
# eth_headlines, eth_dates = get_headlines("ethereum")
# gold_headlines, gold_dates = get_headlines("gold")
# crude_oil_headlines, crude_oil_dates, crude_oil_contents = get_headlines("crude oil")
# sp500_headlines, sp500_dates = get_headlines("S&P 500")

Fetching news about 'bitcoin'
******************************
retrieving news from: 2020-06-01 - 2022-06-07


NewsAPIException: {'status': 'error', 'code': 'parameterInvalid', 'message': 'You are trying to request results too far in the past. Your plan permits you to request articles as far back as 2022-05-22, but you have requested 2020-06-01. You may need to upgrade to a paid plan.'}

In [53]:
#btc_avg_compound_sentiment,btc_avg_pos_sentiment,btc_neg_sentiment = headline_sentiment_summarizer_avg(btc_contents)
btc_avg_compound_sentiment = headline_sentiment_summarizer_avg(btc_contents)

In [54]:
btc_avg_compound_sentiment

[0.041544999999999985]

In [None]:
btc_headlines