In [4]:
#import libraries
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [6]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["news_api_key"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [20]:
# Set the ticker
#ticker = "AAPL"

# Set timeframe to '1D'
#timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-11", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-10", tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
#df = api.get_barset(
 #   ticker,
  #  timeframe,
   # limit=None,
    #start=past_date,
    #end=current_date,
    #after=None,
    #until=None,
#).df

# Display data
#df.head()

In [21]:
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [22]:
# Get vote topics
vote_headlines, dates = get_headlines("vote")

Fetching news about 'vote'
******************************
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news from: 2020-12-21 00:00:00
retrieving news fr

In [23]:
sid = SentimentIntensityAnalyzer()

# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [24]:
# Get averages of each topics sentiment
vote_avg = headline_sentiment_summarizer_avg(vote_headlines)


In [26]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "vote_avg": vote_avg,
    }
)

topic_sentiments.index = pd.to_datetime(dates)

display(topic_sentiments)

Unnamed: 0,vote_avg
2021-01-11,-0.042335
2021-01-10,-0.061795
2021-01-09,-0.178465
2021-01-08,-0.147315
2021-01-07,-0.33845
2021-01-06,0.058345
2021-01-05,-0.09968
2021-01-04,0.100445
2021-01-03,0.04914
2021-01-02,-0.15559


In [8]:
#Use API to pull in news data using key words 
#bloomberg, newsAPI, bing


In [3]:
#put it in a dataframe

In [4]:
#segment/cut dataframe by date of article.  Drop unnecessary data columns 

In [5]:
#combine or classify articles by date

In [6]:
#Data cleaning to news content as needed: lowercase, lemmantize

In [None]:
#TD-IDF word counts by date 