# Correlating Returns

In [10]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

file_name = "news_df.pkl"

import os.path

## Load API Keys from Environment Variables

In [11]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["news_api"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [12]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now()- timedelta(1), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(31), tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_bars(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date,
).df

# Display data
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-05-09 04:00:00+00:00,154.83,155.83,151.49,152.06,131274823,1093220,153.100423
2022-05-10 04:00:00+00:00,155.57,156.74,152.93,154.51,114742115,981091,154.837162
2022-05-11 04:00:00+00:00,153.45,155.45,145.81,146.5,142166096,1297098,149.443367
2022-05-12 04:00:00+00:00,142.8,146.19,138.8,142.56,181631246,1663909,142.068993
2022-05-13 04:00:00+00:00,144.65,148.105,143.11,147.11,113656931,917034,146.325214


In [13]:
df.index = df.index.date
df = df[["close"]]
df.head()

Unnamed: 0,close
2022-05-09,152.06
2022-05-10,154.51
2022-05-11,146.5
2022-05-12,142.56
2022-05-13,147.11


In [14]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2022-05-10,0.016112
2022-05-11,-0.051841
2022-05-12,-0.026894
2022-05-13,0.031916
2022-05-16,-0.010672


In [15]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    """
    Returns
    --------

    two variables, eg.
    
    [
        [
            "Elon buying twitter!!!!",
            "Elon NOT buying twitter!!!!"
        ],
        [
            "No news today..."
        ]
    ]

    ['01/01/2020', '01/02/2020']

    """
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d") # global var (current_date)
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d") # past date... 
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date: # loop through each day
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything( # global newsapi....
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = [] # headlines for this date
        for i in range(0, len(articles["articles"])): # for each article on that day...
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines) # add this days news to all the news
        all_dates.append(date) # 
        date = date - timedelta(days=1) # increment out day....
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [16]:
# Get first topic
if not os.path.isfile(file_name): 
    aapl_headlines, dates = get_headlines("apple")
    trade_headlines, _ = get_headlines("trade")
    economy_headlines, _ = get_headlines("economy")

    analyzer = SentimentIntensityAnalyzer()

    new_df = pd.DataFrame(
    
            {
                "aaple_news":aapl_headlines,
                "trade_news":trade_headlines,
                "economy_news":economy_headlines,
                "date":dates
            }

    )

    new_df.to_pickle(file_name)

else:
    new_df = pd.read_pickle(file_name)

new_df.head()

Fetching news about 'apple'
******************************
retrieving news from: 2022-06-05 00:00:00
retrieving news from: 2022-06-04 00:00:00
retrieving news from: 2022-06-03 00:00:00
retrieving news from: 2022-06-02 00:00:00
retrieving news from: 2022-06-01 00:00:00
retrieving news from: 2022-05-31 00:00:00
retrieving news from: 2022-05-30 00:00:00
retrieving news from: 2022-05-29 00:00:00
retrieving news from: 2022-05-28 00:00:00
retrieving news from: 2022-05-27 00:00:00
retrieving news from: 2022-05-26 00:00:00
retrieving news from: 2022-05-25 00:00:00
retrieving news from: 2022-05-24 00:00:00
retrieving news from: 2022-05-23 00:00:00
retrieving news from: 2022-05-22 00:00:00
retrieving news from: 2022-05-21 00:00:00
retrieving news from: 2022-05-20 00:00:00
retrieving news from: 2022-05-19 00:00:00
retrieving news from: 2022-05-18 00:00:00
retrieving news from: 2022-05-17 00:00:00
retrieving news from: 2022-05-16 00:00:00
retrieving news from: 2022-05-15 00:00:00
retrieving news f

Unnamed: 0,aaple_news,trade_news,economy_news,date
0,[How to Use a Laptop as a Second Display for Y...,[Blue Jays trade Ryan Borucki to Mariners for ...,[US Unemployment drops to lowest level since 1...,2022-06-05
1,[Apple is reportedly enlisting Hollywood talen...,[Peter Navarro Vowed Revenge On Biden Before B...,[Tesla CEO Elon Musk Has 'Super Bad Feeling' A...,2022-06-04
2,[Square will support Apple's Tap to Pay on iPh...,[Factbox: What could the WTO ministerial confe...,[Biden brushes off Musk's warnings about the e...,2022-06-03
3,[How to use your smartwatch as a camera remote...,[Chipotle Lovers Can Now Buy Burritos With Bit...,"[Time is running out for Russia, German econom...",2022-06-02
4,"[How to Watch Apple's WWDC 2022, The iPhone 14...",[Facebook and Instagram’s parent company will ...,[How the UAE went from boycotting Israel to in...,2022-06-01


In [17]:
# Get second topic
def score(headlines):
    """
        This function takes all headlines for a given day, gets the average sentiment score for all of them
    """
    analyzer = SentimentIntensityAnalyzer()
    scores = [analyzer.polarity_scores(headline)["compound"]for headline in headlines]
    return sum(scores) / len(scores)


new_df["aaple_scored"] = new_df["aaple_news"].apply(score)
new_df["trade_scored"] = new_df["trade_news"].apply(score)
new_df["economy_scored"] = new_df["economy_news"].apply(score)

new_df.head()

Unnamed: 0,aaple_news,trade_news,economy_news,date,aaple_scored,trade_scored,economy_scored
0,[How to Use a Laptop as a Second Display for Y...,[Blue Jays trade Ryan Borucki to Mariners for ...,[US Unemployment drops to lowest level since 1...,2022-06-05,0.03306,0.12843,0.03112
1,[Apple is reportedly enlisting Hollywood talen...,[Peter Navarro Vowed Revenge On Biden Before B...,[Tesla CEO Elon Musk Has 'Super Bad Feeling' A...,2022-06-04,0.102585,-0.131365,-0.010195
2,[Square will support Apple's Tap to Pay on iPh...,[Factbox: What could the WTO ministerial confe...,[Biden brushes off Musk's warnings about the e...,2022-06-03,0.243345,-0.12696,-0.088225
3,[How to use your smartwatch as a camera remote...,[Chipotle Lovers Can Now Buy Burritos With Bit...,"[Time is running out for Russia, German econom...",2022-06-02,0.087085,0.03424,0.043555
4,"[How to Watch Apple's WWDC 2022, The iPhone 14...",[Facebook and Instagram’s parent company will ...,[How the UAE went from boycotting Israel to in...,2022-06-01,-0.0269,-0.0889,-0.055425


In [18]:
# Get third topic
new_df.index = new_df["date"]
new_df.head()

Unnamed: 0_level_0,aaple_news,trade_news,economy_news,date,aaple_scored,trade_scored,economy_scored
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-05,[How to Use a Laptop as a Second Display for Y...,[Blue Jays trade Ryan Borucki to Mariners for ...,[US Unemployment drops to lowest level since 1...,2022-06-05,0.03306,0.12843,0.03112
2022-06-04,[Apple is reportedly enlisting Hollywood talen...,[Peter Navarro Vowed Revenge On Biden Before B...,[Tesla CEO Elon Musk Has 'Super Bad Feeling' A...,2022-06-04,0.102585,-0.131365,-0.010195
2022-06-03,[Square will support Apple's Tap to Pay on iPh...,[Factbox: What could the WTO ministerial confe...,[Biden brushes off Musk's warnings about the e...,2022-06-03,0.243345,-0.12696,-0.088225
2022-06-02,[How to use your smartwatch as a camera remote...,[Chipotle Lovers Can Now Buy Burritos With Bit...,"[Time is running out for Russia, German econom...",2022-06-02,0.087085,0.03424,0.043555
2022-06-01,"[How to Watch Apple's WWDC 2022, The iPhone 14...",[Facebook and Instagram’s parent company will ...,[How the UAE went from boycotting Israel to in...,2022-06-01,-0.0269,-0.0889,-0.055425


In [10]:
# Instantiate SentimentIntensityAnalyzer
# YOUR CODE HERE

In [11]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [19]:
# Merge with AAPL returns
df.index = pd.to_datetime(df.index)

all_df = pd.concat([df, new_df], join="inner", axis="columns")

# Display data
all_df = all_df[["close","aaple_scored", "trade_scored", "economy_scored"]]

all_df.head()

Unnamed: 0,close,aaple_scored,trade_scored,economy_scored
2022-05-09,152.06,0.05244,-0.026415,-0.02065
2022-05-10,154.51,0.11166,0.02404,-0.188515
2022-05-11,146.5,0.04207,-0.02703,-0.12238
2022-05-12,142.56,0.079985,0.018275,0.03644
2022-05-13,147.11,0.02363,0.112405,-0.040805


In [20]:
# Correlate the headlines' sentiment to returns
all_df.corr().style.background_gradient()

Unnamed: 0,close,aaple_scored,trade_scored,economy_scored
close,1.0,-0.164685,0.08463,-0.08295
aaple_scored,-0.164685,1.0,-0.075222,0.098846
trade_scored,0.08463,-0.075222,1.0,0.115267
economy_scored,-0.08295,0.098846,0.115267,1.0
