# Correlating Returns

In [9]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [10]:
load_dotenv()

True

In [11]:
# Load .env enviroment variables
load_dotenv(dotenv_path = '../Api_keys.env')

# Set News API Key
newsapi = NewsApiClient(api_key= os.environ['news_api'])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [57]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2020-11-29", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-10-31", tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
    after=None,
    until=None,
).df

# Display data
df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-11-02 00:00:00-05:00,109.11,110.68,107.32,108.77,106260834
2020-11-03 00:00:00-05:00,109.66,111.47,108.73,110.375,93660131
2020-11-04 00:00:00-05:00,114.14,115.59,112.35,114.94,114505478
2020-11-05 00:00:00-05:00,117.99,119.62,116.8686,118.99,107993032
2020-11-06 00:00:00-05:00,118.32,119.2,116.13,118.685,99225280


In [50]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2020-10-29,114.52
2020-10-30,108.9
2020-11-02,108.77
2020-11-03,110.375
2020-11-04,114.94


In [58]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-11-03 00:00:00-05:00,0.005041,0.007138,0.013138,0.014756,-0.118583
2020-11-04 00:00:00-05:00,0.040854,0.036961,0.033293,0.041359,0.222564
2020-11-05 00:00:00-05:00,0.033731,0.034865,0.040219,0.035236,-0.056875
2020-11-06 00:00:00-05:00,0.002797,-0.003511,-0.00632,-0.002563,-0.081188
2020-11-09 00:00:00-05:00,0.018425,0.023406,-0.000689,-0.019927,0.303629


In [59]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [60]:
# Get first topic
aapl_headlines, dates = get_headlines("aapl")
aapl_headlines

Fetching news about 'aapl'
******************************
retrieving news from: 2020-11-29 00:00:00
retrieving news from: 2020-11-28 00:00:00
retrieving news from: 2020-11-27 00:00:00
retrieving news from: 2020-11-26 00:00:00
retrieving news from: 2020-11-25 00:00:00
retrieving news from: 2020-11-24 00:00:00
retrieving news from: 2020-11-23 00:00:00
retrieving news from: 2020-11-22 00:00:00
retrieving news from: 2020-11-21 00:00:00
retrieving news from: 2020-11-20 00:00:00
retrieving news from: 2020-11-19 00:00:00
retrieving news from: 2020-11-18 00:00:00
retrieving news from: 2020-11-17 00:00:00
retrieving news from: 2020-11-16 00:00:00
retrieving news from: 2020-11-15 00:00:00
retrieving news from: 2020-11-14 00:00:00
retrieving news from: 2020-11-13 00:00:00
retrieving news from: 2020-11-12 00:00:00
retrieving news from: 2020-11-11 00:00:00
retrieving news from: 2020-11-10 00:00:00
retrieving news from: 2020-11-09 00:00:00
retrieving news from: 2020-11-08 00:00:00
retrieving news fr

[['BTS’s ‘BE’ Opens At No. 1 With One Of The Largest Debuts Of 2020',
  'Parallels、macOS 11 Big Surに対応した「Parallels Desktop for Mac 1.6.1 App Store Edition」をリリース。',
  'Mariah Carey Continues Christmas Legacy With Star-Studded Holiday Special',
  'Amazonのサイバーマンデーセールで、AnkerのThunderbolt 3/USB-C Dockや急速充電器、Bluetoothスピーカーフォン/イヤホンなどが特別価格で販売中。',
  'Why Connected Fitness Tech Popularity Will Outlast Covid-19',
  'As The Dow Jones Industrial Average Reaches 30,000, ‘The Fed Did It’ Narrative Becomes Even More Ridiculous',
  'Media Advisory - Hidden-Camera Footage Exposes Shocking Animal Cruelty at Ontario Pig Farm',
  'Black Friday store traffic down 52% even as online retail sales hit record high',
  'Buttery Smooth: The Mac ARMed',
  'How the biggest companies are the worst investments',
  'EXD: Breaking Away From Its Peers',
  'Inside Scoop: A Giant Fund Sold Alibaba, Apple, and Intel Stock. Here’s What It Bought.'],
 ['文章作成ツール「Scrivener 3 for macOS/Windows」がブラックフライデーで25%OFFセール中。',
  'ブラックフライ

In [61]:
# Get second topic
trade_headlines, _ = get_headlines("trade")

Fetching news about 'trade'
******************************
retrieving news from: 2020-11-29 00:00:00
retrieving news from: 2020-11-28 00:00:00
retrieving news from: 2020-11-27 00:00:00
retrieving news from: 2020-11-26 00:00:00
retrieving news from: 2020-11-25 00:00:00
retrieving news from: 2020-11-24 00:00:00
retrieving news from: 2020-11-23 00:00:00
retrieving news from: 2020-11-22 00:00:00
retrieving news from: 2020-11-21 00:00:00
retrieving news from: 2020-11-20 00:00:00
retrieving news from: 2020-11-19 00:00:00
retrieving news from: 2020-11-18 00:00:00
retrieving news from: 2020-11-17 00:00:00
retrieving news from: 2020-11-16 00:00:00
retrieving news from: 2020-11-15 00:00:00
retrieving news from: 2020-11-14 00:00:00
retrieving news from: 2020-11-13 00:00:00
retrieving news from: 2020-11-12 00:00:00
retrieving news from: 2020-11-11 00:00:00
retrieving news from: 2020-11-10 00:00:00
retrieving news from: 2020-11-09 00:00:00
retrieving news from: 2020-11-08 00:00:00
retrieving news f

In [62]:
# Get third topic
economy_headlines, _ = get_headlines('economy')

Fetching news about 'economy'
******************************
retrieving news from: 2020-11-29 00:00:00
retrieving news from: 2020-11-28 00:00:00
retrieving news from: 2020-11-27 00:00:00
retrieving news from: 2020-11-26 00:00:00
retrieving news from: 2020-11-25 00:00:00
retrieving news from: 2020-11-24 00:00:00
retrieving news from: 2020-11-23 00:00:00
retrieving news from: 2020-11-22 00:00:00
retrieving news from: 2020-11-21 00:00:00
retrieving news from: 2020-11-20 00:00:00
retrieving news from: 2020-11-19 00:00:00
retrieving news from: 2020-11-18 00:00:00
retrieving news from: 2020-11-17 00:00:00
retrieving news from: 2020-11-16 00:00:00
retrieving news from: 2020-11-15 00:00:00
retrieving news from: 2020-11-14 00:00:00
retrieving news from: 2020-11-13 00:00:00
retrieving news from: 2020-11-12 00:00:00
retrieving news from: 2020-11-11 00:00:00
retrieving news from: 2020-11-10 00:00:00
retrieving news from: 2020-11-09 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
# Get fourth topic
iphone_healdines, _ = get_headlines("iphone")

In [None]:
# Get fifth topic
gold_headlines, _ = get_headlines("gold")

In [40]:
# Instantiate SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [41]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [43]:
# Get averages of each topics sentiment
aapl_avg = headline_sentiment_summarizer_avg(aapl_headlines)
trade_avg = headline_sentiment_summarizer_avg(trade_headlines)
economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
iphone_avg = headline_sentiment_summarizer_avg(iphone_headlines)
gold_avg = headline_sentiment_summarizer_avg(goldl_headlines)

NameError: name 'aapl_headlines' is not defined

In [24]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame({"aapl_avg": aapl_avg, "trade_avg": trade_avg, "economy_avg":economy_avg,"iphone_avg": iphone_avg,"gold_avg":gold_avg })

NameError: name 'aapl_avg' is not defined

In [25]:
# Set the index value of the sentiment average DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datatime(dates)

AttributeError: module 'pandas' has no attribute 'to_datatime'

In [26]:
# Merge with AAPL returns
topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how = "any")

#display Data
display(topic_sentiments)

NameError: name 'topic_sentiments' is not defined

In [27]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

NameError: name 'topic_sentiments' is not defined