# Correlating Returns

In [81]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [82]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [83]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now()- timedelta(1), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(31), tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_bars(
    ticker,
    timeframe,
    limit=None,
    start=past_date,
    end=current_date
).df

# Display data
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-05-02 04:00:00+00:00,156.65,158.23,153.27,157.96,122860515,1148543,156.032933
2022-05-03 04:00:00+00:00,158.06,160.71,156.32,159.48,88581314,702272,158.800246
2022-05-04 04:00:00+00:00,159.65,166.48,159.26,166.02,108376463,867145,162.405193
2022-05-05 04:00:00+00:00,163.66,164.08,154.95,156.77,129993551,1157695,158.146266
2022-05-06 04:00:00+00:00,155.89,159.44,154.18,157.28,115637784,1016129,157.329789


In [84]:
df.index = df.index.date
# Use the drop function to drop extra columns
df = df[['close']]
# Display sample data
df.head()

Unnamed: 0,close
2022-05-02,157.96
2022-05-03,159.48
2022-05-04,166.02
2022-05-05,156.77
2022-05-06,157.28


In [85]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2022-05-03,0.009623
2022-05-04,0.041008
2022-05-05,-0.055716
2022-05-06,0.003253
2022-05-09,-0.033189


In [86]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    """
    
    """
    all_headlines = []
    all_dates = []    
    pd.Timestamp(datetime.now()- timedelta(1), tz="America/New_York").isoformat()
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [87]:
# Get first topic
aapl_headlines, dates = get_headlines("apple")
trade_headlines, _ = get_headlines("trade")
economy_headlines, _ = get_headlines("economy")

Fetching news about 'apple'
******************************
retrieving news from: 2022-05-31 00:00:00
retrieving news from: 2022-05-30 00:00:00
retrieving news from: 2022-05-29 00:00:00
retrieving news from: 2022-05-28 00:00:00
retrieving news from: 2022-05-27 00:00:00
retrieving news from: 2022-05-26 00:00:00
retrieving news from: 2022-05-25 00:00:00
retrieving news from: 2022-05-24 00:00:00
retrieving news from: 2022-05-23 00:00:00
retrieving news from: 2022-05-22 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [None]:
new_df = pd.DataFrame(
    {
        'aapl_headlines': aapl_headlines,
        'trade_headlines': trade_headlines, 
        'economy_headlines': economy_headlines,   
        'date': dates 
    }
)
new_df.head()

Unnamed: 0,aapl_headlines,trade_headlines,economy_headlines,dates
0,[Deals: Apple TV 4K Available for Lowest-Ever ...,[Shanghai lockdown: China eases Covid restrict...,[Biden and Fed chair Powell are set to meet as...,2022-05-31
1,[Pixel Tablet could include support for third-...,"[Israel, United Arab Emirates to sign free tra...","[Swedish economy slowed sharply in Q1, data sh...",2022-05-30
2,[iOS 16 will reportedly include an always-on d...,[China-Swiss trade talks stall over rights iss...,[Hitting the Books: What the 'Work from Home' ...,2022-05-29
3,[Judge rules Cydia's antitrust case against Ap...,[Gun in Texas Shooting Came From Company Known...,[China's first residential REITs to be launche...,2022-05-28
4,[The Apple Watch 7 on sale for a record-low pr...,[The Fed's plans for a CBDC raise concerns amo...,['Bankruptcies Need to Happen': Elon Musk Soun...,2022-05-27


In [None]:
def score(headlines):
    """
        This function takes headlines for a given day, gets the average sentiment score 
    """
    analyzer = SentimentIntensityAnalyzer()
    scores = [analyzer.polarity_scores(headline)['compound'] for headline in headlines]
    return(sum(scores)/len(scores))

new_df['appl_scored'] = new_df['aapl_headlines'].apply(score)
new_df['trade_scored'] = new_df['trade_headlines'].apply(score)
new_df['economy_scored'] = new_df['economy_headlines'].apply(score)

In [None]:
new_df.head()

Unnamed: 0,aapl_headlines,trade_headlines,economy_headlines,dates,appl_scored,trade_scored,economy_scored
0,[Deals: Apple TV 4K Available for Lowest-Ever ...,[Shanghai lockdown: China eases Covid restrict...,[Biden and Fed chair Powell are set to meet as...,2022-05-31,0.128685,0.03295,0.123325
1,[Pixel Tablet could include support for third-...,"[Israel, United Arab Emirates to sign free tra...","[Swedish economy slowed sharply in Q1, data sh...",2022-05-30,-0.003425,0.092355,0.122845
2,[iOS 16 will reportedly include an always-on d...,[China-Swiss trade talks stall over rights iss...,[Hitting the Books: What the 'Work from Home' ...,2022-05-29,0.212195,0.0289,-0.02995
3,[Judge rules Cydia's antitrust case against Ap...,[Gun in Texas Shooting Came From Company Known...,[China's first residential REITs to be launche...,2022-05-28,0.103855,-0.02011,-0.13294
4,[The Apple Watch 7 on sale for a record-low pr...,[The Fed's plans for a CBDC raise concerns amo...,['Bankruptcies Need to Happen': Elon Musk Soun...,2022-05-27,0.041845,-0.064385,-0.05773


In [None]:
# Instantiate SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [None]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [None]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
new_df.index = new_df['dates']
new_df.head()

Unnamed: 0_level_0,aapl_headlines,trade_headlines,economy_headlines,dates,appl_scored,trade_scored,economy_scored
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-05-31,[Deals: Apple TV 4K Available for Lowest-Ever ...,[Shanghai lockdown: China eases Covid restrict...,[Biden and Fed chair Powell are set to meet as...,2022-05-31,0.128685,0.03295,0.123325
2022-05-30,[Pixel Tablet could include support for third-...,"[Israel, United Arab Emirates to sign free tra...","[Swedish economy slowed sharply in Q1, data sh...",2022-05-30,-0.003425,0.092355,0.122845
2022-05-29,[iOS 16 will reportedly include an always-on d...,[China-Swiss trade talks stall over rights iss...,[Hitting the Books: What the 'Work from Home' ...,2022-05-29,0.212195,0.0289,-0.02995
2022-05-28,[Judge rules Cydia's antitrust case against Ap...,[Gun in Texas Shooting Came From Company Known...,[China's first residential REITs to be launche...,2022-05-28,0.103855,-0.02011,-0.13294
2022-05-27,[The Apple Watch 7 on sale for a record-low pr...,[The Fed's plans for a CBDC raise concerns amo...,['Bankruptcies Need to Happen': Elon Musk Soun...,2022-05-27,0.041845,-0.064385,-0.05773


In [None]:
# Merge with AAPL returns
all_df = pd.concat([df,new_df], join='inner', axis='columns')

# Display data
all_df.head()

  indexer = self._engine.get_indexer(target._get_engine_target())


Unnamed: 0,close,aapl_headlines,trade_headlines,economy_headlines,dates,appl_scored,trade_scored,economy_scored
2022-05-02,157.96,[Apple Music arrives on Roku streaming devices...,[EU-Mercosur trade deal to clear environmental...,[India's jobless rate rises to 7.83% in April ...,2022-05-02,-0.014265,-0.12045,-0.026265


In [None]:
# Correlate the headlines' sentiment to returns
# YOUR CODE HERE