# Correlating Tesla's Returns w/headlines "Inflation", "Security", and "COVID"

In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
# Get news API key
newsapi = NewsApiClient(api_key=os.environ['news_api'])

In [4]:
# Use Alpaca API and secret keys
alpaca_api_key = os.getenv('alpaca_api_key')
alpaca_api_secret_key = os.getenv('alpaca_api_secret_key')

In [5]:


api = tradeapi.REST('alpaca_api_key', 'alpaca_api_secret_key', 'https://paper-api.alpaca.markets')

# account = api.get_account()

# "alpaca_api_key", "alpaca_api_secret_key", "https://paper-api.alpaca.markets"



## Create a dataframe for Tesla's stock over the past month

In [6]:
ticker = "TSLA"

In [7]:
# Timeframe will be one day
timeframe = "1D"

In [8]:
# Date and time under ISO formatting. 
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(20), tz="America/New_York").isoformat()


In [9]:
# Get TSLA historial stock data
tesla_df = api.get_barset(
    ticker, 
    timeframe,  
    start=past_date, 
    end=current_date,  
    ).df

# Drop the "Tesla" header
tesla_df = tesla_df.droplevel(axis=1, level=0)

# Drop open/high/low/volume
tesla_df = tesla_df.drop(columns=["open","high", "low", "volume"])

# Keep YY-MM-DD timeframe
tesla_df.index = tesla_df.index.date

tesla_df


Unnamed: 0,close
2021-06-15,599.2
2021-06-16,604.79
2021-06-17,616.6
2021-06-18,623.32
2021-06-21,620.83
2021-06-22,623.69
2021-06-23,656.57
2021-06-24,679.72
2021-06-25,671.6
2021-06-28,688.47


In [10]:
# calculate Tesla's daily returns for 14 days. 
tesla_returns = tesla_df.pct_change().dropna()
tesla_returns

Unnamed: 0,close
2021-06-16,0.009329
2021-06-17,0.019527
2021-06-18,0.010898
2021-06-21,-0.003995
2021-06-22,0.004607
2021-06-23,0.052718
2021-06-24,0.035259
2021-06-25,-0.011946
2021-06-28,0.025119
2021-06-29,-0.011199


In [11]:
# Use newsapi client to get most relevant headlines per day in the past five days. We use 5 in order to 
# keep under the newsApi limits.
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 20)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [15]:
# First news topic to gather correlation: Tesla
tsla_headlines, dates = get_headlines("tesla")

Fetching news about 'tesla'
********************
retrieving news from: 2021-07-05 00:00:00
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00
retrieving news from: 2021-06-29 00:00:00
retrieving news from: 2021-06-28 00:00:00
retrieving news from: 2021-06-27 00:00:00
retrieving news from: 2021-06-26 00:00:00
retrieving news from: 2021-06-25 00:00:00
retrieving news from: 2021-06-24 00:00:00
retrieving news from: 2021-06-23 00:00:00
retrieving news from: 2021-06-22 00:00:00
retrieving news from: 2021-06-21 00:00:00
retrieving news from: 2021-06-20 00:00:00
retrieving news from: 2021-06-19 00:00:00
retrieving news from: 2021-06-18 00:00:00
retrieving news from: 2021-06-17 00:00:00
retrieving news from: 2021-06-16 00:00:00


In [12]:
# Second news topic to gather correlation: Inflation
inflation_headlines, dates = get_headlines("inflation")

Fetching news about 'inflation'
********************
retrieving news from: 2021-07-05 00:00:00
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00
retrieving news from: 2021-06-29 00:00:00
retrieving news from: 2021-06-28 00:00:00
retrieving news from: 2021-06-27 00:00:00
retrieving news from: 2021-06-26 00:00:00
retrieving news from: 2021-06-25 00:00:00
retrieving news from: 2021-06-24 00:00:00
retrieving news from: 2021-06-23 00:00:00
retrieving news from: 2021-06-22 00:00:00
retrieving news from: 2021-06-21 00:00:00
retrieving news from: 2021-06-20 00:00:00
retrieving news from: 2021-06-19 00:00:00
retrieving news from: 2021-06-18 00:00:00
retrieving news from: 2021-06-17 00:00:00
retrieving news from: 2021-06-16 00:00:00


In [13]:
# Third news topic to gather correlation: Security
security_headlines, dates = get_headlines("security")

Fetching news about 'security'
********************
retrieving news from: 2021-07-05 00:00:00
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00
retrieving news from: 2021-06-29 00:00:00
retrieving news from: 2021-06-28 00:00:00
retrieving news from: 2021-06-27 00:00:00
retrieving news from: 2021-06-26 00:00:00
retrieving news from: 2021-06-25 00:00:00
retrieving news from: 2021-06-24 00:00:00
retrieving news from: 2021-06-23 00:00:00
retrieving news from: 2021-06-22 00:00:00
retrieving news from: 2021-06-21 00:00:00
retrieving news from: 2021-06-20 00:00:00
retrieving news from: 2021-06-19 00:00:00
retrieving news from: 2021-06-18 00:00:00
retrieving news from: 2021-06-17 00:00:00
retrieving news from: 2021-06-16 00:00:00


In [14]:
# Fourth news topic to gather correlation: COVID
covid_headlines, dates = get_headlines("covid")

Fetching news about 'covid'
********************
retrieving news from: 2021-07-05 00:00:00
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00
retrieving news from: 2021-06-29 00:00:00
retrieving news from: 2021-06-28 00:00:00
retrieving news from: 2021-06-27 00:00:00
retrieving news from: 2021-06-26 00:00:00
retrieving news from: 2021-06-25 00:00:00
retrieving news from: 2021-06-24 00:00:00
retrieving news from: 2021-06-23 00:00:00
retrieving news from: 2021-06-22 00:00:00
retrieving news from: 2021-06-21 00:00:00
retrieving news from: 2021-06-20 00:00:00
retrieving news from: 2021-06-19 00:00:00
retrieving news from: 2021-06-18 00:00:00
retrieving news from: 2021-06-17 00:00:00
retrieving news from: 2021-06-16 00:00:00


In [16]:
# Use SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [17]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [18]:
# Get averages of each topics sentiment
tsla_avg = headline_sentiment_summarizer_avg(tsla_headlines)
inflation_avg = headline_sentiment_summarizer_avg(inflation_headlines)
# economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
security_avg = headline_sentiment_summarizer_avg(security_headlines)
covid_avg = headline_sentiment_summarizer_avg(covid_headlines)


In [19]:
# Creating a dataframe from all topic sentiment averages
topic_sentiments = pd.DataFrame(
    {"tsla_avg": tsla_avg, 
    "inflation_avg": inflation_avg, 
    "security_avg": security_avg, 
    "covid_avg": covid_avg
    }
)

In [20]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [21]:
# Merge with AAPL returns
topic_sentiments = tesla_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

Unnamed: 0,close,tsla_avg,inflation_avg,security_avg,covid_avg
2021-06-16,0.009329,0.075395,0.068935,0.01864,-0.07246
2021-06-17,0.019527,0.01386,0.061725,-0.072455,0.05489
2021-06-18,0.010898,-0.075175,0.111965,-0.09828,0.01371
2021-06-21,-0.003995,-0.016815,0.031665,0.105125,-0.124335
2021-06-22,0.004607,0.144515,0.18112,0.09198,-0.02874
2021-06-23,0.052718,0.003585,0.112705,0.01278,0.04745
2021-06-24,0.035259,0.094515,0.00906,-0.154075,0.0933
2021-06-25,-0.011946,0.039325,-0.06061,-0.029625,0.01711
2021-06-28,0.025119,0.002395,-0.053415,0.030735,0.053135
2021-06-29,-0.011199,0.112485,0.037555,-0.04856,0.113005


In [22]:
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,tsla_avg,inflation_avg,security_avg,covid_avg
close,1.0,-0.068608,0.281977,-0.212551,0.265375
tsla_avg,-0.068608,1.0,0.316668,-0.176884,0.015024
inflation_avg,0.281977,0.316668,1.0,-0.122466,-0.125221
security_avg,-0.212551,-0.176884,-0.122466,1.0,-0.469997
covid_avg,0.265375,0.015024,-0.125221,-0.469997,1.0
