In [26]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi

# The News API allows only a limited number of articles to be accessed each day.
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [27]:
# create a sentiment index from News API headlines and correlate it to S&P 500 daily returns, 
# looking for a text topic that generates the highest correlation.

# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("alpaca_api_key")
alpaca_secret_key = os.getenv("alpaca_api_secret_key")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [28]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()

# Set timeframe to '1D'
timeframe = "1D"

In [37]:
# Get 4 weeks worth of historical data for AAPL
TSLA = api.get_barset(
    'TSLA',
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Drop Outer Table Level
TSLA = TSLA.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
TSLA = TSLA.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
TSLA.index = TSLA.index.date

# Display sample data
TSLA.head()

Unnamed: 0,close
2021-06-28,688.47
2021-06-29,680.76
2021-06-30,679.78
2021-07-01,677.92
2021-07-02,678.9


In [41]:
# Use the `pct_change` function to calculate daily returns of TSLA
TSLA_returns = TSLA.pct_change().dropna()

# Display sample data
TSLA_returns.head()

Unnamed: 0,close
2021-06-29,-0.011199
2021-06-30,-0.00144
2021-07-01,-0.002736
2021-07-02,0.001446
2021-07-06,-0.02859


In [39]:
# Get 4 weeks worth of historical data for AAPL
NIO = api.get_barset(
    'NIO',
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Drop Outer Table Level
NIO = NIO.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
NIO = NIO.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
NIO.index = NIO.index.date

# Display sample data
NIO.head()

Unnamed: 0,close
2021-06-28,49.39
2021-06-29,50.34
2021-06-30,53.19
2021-07-01,50.91
2021-07-02,50.38


In [43]:
# Use the `pct_change` function to calculate daily returns of NIO
NIO_returns = NIO.pct_change().dropna()

# Display sample data
NIO_returns.head()

Unnamed: 0,close
2021-06-29,0.019235
2021-06-30,0.056615
2021-07-01,-0.042865
2021-07-02,-0.010411
2021-07-06,-0.001985


In [40]:
# Get 4 weeks worth of historical data for AAPL
ford = api.get_barset(
    'F',
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Drop Outer Table Level
ford = ford.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
ford = ford.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
ford.index = ford.index.date

# Display sample data
ford.head()

Unnamed: 0,close
2021-06-28,14.96
2021-06-29,15.02
2021-06-30,14.85
2021-07-01,14.91
2021-07-02,14.935


In [42]:
# Use the `pct_change` function to calculate daily returns of Ford
ford_returns = ford.pct_change().dropna()

# Display sample data
ford_returns.head()

Unnamed: 0,close
2021-06-29,0.004011
2021-06-30,-0.011318
2021-07-01,0.00404
2021-07-02,0.001677
2021-07-06,-0.028577


In [4]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [8]:
# Get first topic
electric_vehicle_headlines, dates = get_headlines("electric vehicles")

Fetching news about 'electric vehicles'
******************************
retrieving news from: 2021-07-27 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [6]:
# Get second topic
EV_headlines, dates = get_headlines("EV")

Fetching news about 'EV'
******************************
retrieving news from: 2021-07-27 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [7]:
# Get first topic
electirc_car_headlines, dates = get_headlines("electric car")

Fetching news about 'electric car'
******************************
retrieving news from: 2021-07-27 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [9]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [10]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for d in headlines: 
        day_score = []
        for h in d:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [11]:
# Get averages of each topics sentiment
electric_vehicle_avg = headline_sentiment_summarizer_avg(electric_vehicle_headlines)
EV_avg = headline_sentiment_summarizer_avg(EV_headlines)
electric_car_avg = headline_sentiment_summarizer_avg(electirc_car_headlines)

NameError: name 'electric_vehicle_headlines' is not defined

In [None]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "electric vehicle": electric_vehicle_avg,
        "EV": EV_avg,
        "electric car": electric_car_avg,
    }
)

In [None]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [None]:
# Merge with TSLA returns
topic_sentiments_TSLA = TSLA_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments_TSLA)

In [None]:
# Correlate the headlines' sentiment to returns
topic_sentiments_TSLA.corr().style.background_gradient()

In [None]:
# Merge with NIO returns
topic_sentiments_NIO = NIO_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments_NIO)

In [None]:
# Correlate the headlines' sentiment to returns
topic_sentiments_NIO.corr().style.background_gradient()

In [None]:
# Merge with ford returns
topic_sentiments_ford = ford_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments_ford)

In [None]:
# Correlate the headlines' sentiment to returns
topic_sentiments_ford.corr().style.background_gradient()