# Correlating Returns

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [2]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [3]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Display data
df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-12-02 00:00:00-05:00,122.02,123.37,120.89,123.08,75619249
2020-12-03 00:00:00-05:00,123.52,123.78,122.21,122.94,67399458
2020-12-04 00:00:00-05:00,122.6,122.8608,121.52,122.24,65287653
2020-12-07 00:00:00-05:00,122.31,124.57,122.25,123.8,72463180
2020-12-08 00:00:00-05:00,124.37,124.98,123.09,124.33,69695298


In [4]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2020-12-02,123.08
2020-12-03,122.94
2020-12-04,122.24
2020-12-07,123.8
2020-12-08,124.33


In [5]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2020-12-03,-0.001137
2020-12-04,-0.005694
2020-12-07,0.012762
2020-12-08,0.004281
2020-12-09,-0.021395


In [6]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [7]:
# Get first topic
# YOUR CODE HERE

Fetching news about 'apple'
******************************
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news from: 2020-12-21 00:00:00
retrieving news from: 2020-12-20 00:00:00
retrieving news from: 2020-12-19 00:00:00
retrieving news from: 2020-12-18 00:00:00
retrieving news from: 2020-12-17 00:00:00
retrieving news from: 2020-12-16 00:00:00
retrieving news from: 2020-12-15 00:00:00
retrieving news from: 2020-12-14 00:00:00
retrieving news from: 2020-12-13 00:00:00
retrieving news from: 2020-12-12 00:00:00
retrieving news from: 2020-12-11 00:00:00
retrieving news f

In [8]:
# Get second topic
# YOUR CODE HERE

Fetching news about 'trade'
******************************
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news from: 2020-12-21 00:00:00
retrieving news from: 2020-12-20 00:00:00
retrieving news from: 2020-12-19 00:00:00
retrieving news from: 2020-12-18 00:00:00
retrieving news from: 2020-12-17 00:00:00
retrieving news from: 2020-12-16 00:00:00
retrieving news from: 2020-12-15 00:00:00
retrieving news from: 2020-12-14 00:00:00
retrieving news from: 2020-12-13 00:00:00
retrieving news from: 2020-12-12 00:00:00
retrieving news from: 2020-12-11 00:00:00
retrieving news f

In [9]:
# Get third topic
# YOUR CODE HERE

Fetching news about 'economy'
******************************
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news from: 2020-12-24 00:00:00
retrieving news from: 2020-12-23 00:00:00
retrieving news from: 2020-12-22 00:00:00
retrieving news from: 2020-12-21 00:00:00
retrieving news from: 2020-12-20 00:00:00
retrieving news from: 2020-12-19 00:00:00
retrieving news from: 2020-12-18 00:00:00
retrieving news from: 2020-12-17 00:00:00
retrieving news from: 2020-12-16 00:00:00
retrieving news from: 2020-12-15 00:00:00
retrieving news from: 2020-12-14 00:00:00
retrieving news from: 2020-12-13 00:00:00
retrieving news from: 2020-12-12 00:00:00
retrieving news from: 2020-12-11 00:00:00
retrieving news

In [10]:
# Instantiate SentimentIntensityAnalyzer
# YOUR CODE HERE

In [11]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [12]:
# Get averages of each topics sentiment
# YOUR CODE HERE

In [13]:
# Combine Sentiment Averages into DataFrame
# YOUR CODE HERE

In [14]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
# YOUR CODE HERE

In [15]:
# Merge with AAPL returns
# YOUR CODE HERE

# Display data
# YOUR CODE HERE

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg
2020-12-03,-0.001137,-0.09009,0.155805,0.19957
2020-12-04,-0.005694,0.008565,-0.03214,0.15928
2020-12-07,0.012762,0.12677,0.1322,-0.08392
2020-12-08,0.004281,0.08206,0.187675,0.12821
2020-12-09,-0.021395,0.033005,-0.01841,0.031285
2020-12-10,0.012739,0.145985,0.107165,0.179785
2020-12-11,-0.005924,0.098565,0.047545,0.048705
2020-12-14,-0.005878,0.089605,-0.005755,-0.021345
2020-12-15,0.050259,0.2406,0.18542,0.18556
2020-12-16,-0.001095,0.07235,0.09843,0.08607


In [16]:
# Correlate the headlines' sentiment to returns
# YOUR CODE HERE

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg
close,1.0,0.547862,0.455142,-0.0524333
aapl_avg,0.547862,1.0,-0.0109107,-0.425666
trade_avg,0.455142,-0.0109107,1.0,0.170973
economy_avg,-0.0524333,-0.425666,0.170973,1.0
