# Correlating Returns

In [75]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [84]:
# Load .env enviroment variables
load_dotenv("example.env")

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [85]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2020-08-25", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-07-25", tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Display data
df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
2020-07-27 00:00:00-04:00,374.84,379.62,373.92,379.215,26920001
2020-07-28 00:00:00-04:00,377.47,378.1986,372.99,373.04,23023333
2020-07-29 00:00:00-04:00,375.0,380.92,374.85,380.27,20387893
2020-07-30 00:00:00-04:00,376.75,385.19,375.07,384.88,29577510
2020-07-31 00:00:00-04:00,411.535,425.66,403.3,425.19,86940020


In [86]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2020-07-27,379.215
2020-07-28,373.04
2020-07-29,380.27
2020-07-30,384.88
2020-07-31,425.19


In [87]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2020-07-28,-0.016284
2020-07-29,0.019381
2020-07-30,0.012123
2020-07-31,0.104734
2020-08-03,0.024624


In [88]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=datetime.strftime(date, '%Y-%m-%dT00:00:00'),
            to=datetime.strftime(date, '%Y-%m-%dT00:00:00'),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [89]:
# Get first topic
aapl_headlines, dates = get_headlines("apple")

Fetching news about 'apple'
******************************
retrieving news from: 2020-08-25 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 500 requests over a 24 hour period (250 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [82]:
# Get second topic
trade_headlines, _ = get_headlines("trade")

Fetching news about 'trade'
******************************
retrieving news from: 2020-08-25 00:00:00
retrieving news from: 2020-08-24 00:00:00
retrieving news from: 2020-08-23 00:00:00
retrieving news from: 2020-08-22 00:00:00
retrieving news from: 2020-08-21 00:00:00
retrieving news from: 2020-08-20 00:00:00
retrieving news from: 2020-08-19 00:00:00
retrieving news from: 2020-08-18 00:00:00
retrieving news from: 2020-08-17 00:00:00
retrieving news from: 2020-08-16 00:00:00
retrieving news from: 2020-08-15 00:00:00
retrieving news from: 2020-08-14 00:00:00
retrieving news from: 2020-08-13 00:00:00
retrieving news from: 2020-08-12 00:00:00
retrieving news from: 2020-08-11 00:00:00
retrieving news from: 2020-08-10 00:00:00
retrieving news from: 2020-08-09 00:00:00
retrieving news from: 2020-08-08 00:00:00
retrieving news from: 2020-08-07 00:00:00
retrieving news from: 2020-08-06 00:00:00
retrieving news from: 2020-08-05 00:00:00
retrieving news from: 2020-08-04 00:00:00
retrieving news f

In [69]:
# Get third topic
economy_headlines, _ = get_headlines("economy")

Fetching news about 'economy'
******************************
retrieving news from: 2020-08-25 00:00:00
retrieving news from: 2020-08-24 00:00:00
retrieving news from: 2020-08-23 00:00:00
retrieving news from: 2020-08-22 00:00:00
retrieving news from: 2020-08-21 00:00:00
retrieving news from: 2020-08-20 00:00:00
retrieving news from: 2020-08-19 00:00:00
retrieving news from: 2020-08-18 00:00:00
retrieving news from: 2020-08-17 00:00:00
retrieving news from: 2020-08-16 00:00:00
retrieving news from: 2020-08-15 00:00:00
retrieving news from: 2020-08-14 00:00:00
retrieving news from: 2020-08-13 00:00:00
retrieving news from: 2020-08-12 00:00:00
retrieving news from: 2020-08-11 00:00:00
retrieving news from: 2020-08-10 00:00:00
retrieving news from: 2020-08-09 00:00:00
retrieving news from: 2020-08-08 00:00:00
retrieving news from: 2020-08-07 00:00:00
retrieving news from: 2020-08-06 00:00:00
retrieving news from: 2020-08-05 00:00:00
retrieving news from: 2020-08-04 00:00:00
retrieving news

In [90]:
# Get fourth topic
iphone_headlines, _ = get_headlines("iphone")

Fetching news about 'iphone'
******************************
retrieving news from: 2020-08-25 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 500 requests over a 24 hour period (250 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
# Get fifth topic
gold_headlines, _ = get_headlines("gold")

In [None]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [36]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [38]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
iphone_avg = headline_sentiment_summarizer_avg(iphone_headlines)
gold_avg = headline_sentiment_summarizer_avg(gold_headlines)

In [39]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "aapl_avg": aapl_avg,
        "trade_avg": trade_avg,
        "economy_avg": economy_avg,
        "iphone_avg": iphone_avg,
        "gold_avg": gold_avg,
    }
)

In [40]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [41]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
2020-07-28,-0.016284,0.063105,0.06052,0.09673,0.14326,0.109165
2020-07-29,0.019381,-0.04738,-0.018815,0.035985,0.074155,0.142205
2020-07-30,0.012123,0.1048,-0.088845,-0.09373,0.14851,0.00591
2020-07-31,0.104734,0.20625,0.063255,-0.08803,0.31957,0.2096
2020-08-03,0.024624,-0.037105,0.100315,-0.00799,-0.006315,0.021625
2020-08-04,0.00723,0.04249,-0.129925,0.038625,0.179645,0.068175
2020-08-05,0.003484,0.042465,-0.068375,-0.028325,-0.042,0.112405
2020-08-06,0.035293,0.028655,-0.01254,-0.08074,0.12358,0.075495
2020-08-07,-0.02516,0.04972,0.20817,0.056365,-0.0471,0.14594
2020-08-10,0.014694,-0.1082,0.049335,-0.040755,-0.013095,0.113015


In [42]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg,iphone_avg,gold_avg
close,1.0,0.209755,-0.00079,-0.374446,0.534055,-0.141538
aapl_avg,0.209755,1.0,-0.251966,-0.012188,0.459899,0.030791
trade_avg,-0.00079,-0.251966,1.0,0.207673,-0.429606,0.183695
economy_avg,-0.374446,-0.012188,0.207673,1.0,-0.15805,0.378469
iphone_avg,0.534055,0.459899,-0.429606,-0.15805,1.0,0.021426
gold_avg,-0.141538,0.030791,0.183695,0.378469,0.021426,1.0
