# Correlating Tesla's Returns w/headlines "Inflation", "Security", and "COVID"

In [144]:
import pandas as pd
import os
from datetime import datetime, timedelta
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from dotenv import load_dotenv

In [145]:
load_dotenv()

True

In [146]:
# Get news API key
newsapi = NewsApiClient(api_key=os.environ['news_api'])

In [147]:
# Use Alpaca API and secret keys
alpaca_api_key = os.getenv('alpaca_api_key')
alpaca_api_secret_key = os.getenv('alpaca_api_secret_key')

In [148]:


api = tradeapi.REST('alpaca_api_key', 'alpaca_api_secret_key', 'https://paper-api.alpaca.markets')

# account = api.get_account()

# "alpaca_api_key", "alpaca_api_secret_key", "https://paper-api.alpaca.markets"



## Create a dataframe for Tesla's stock over the past month

In [149]:
ticker = "TSLA"

In [150]:
# Timeframe will be one day
timeframe = "1D"

In [194]:
# Date and time under ISO formatting. 
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(5), tz="America/New_York").isoformat()


In [195]:
# Get TSLA historial stock data
tesla_df = api.get_barset(
    ticker, 
    timeframe,  
    start=past_date, 
    end=current_date,  
    ).df

# Drop the "Tesla" header
tesla_df = tesla_df.droplevel(axis=1, level=0)

# Drop open/high/low/volume
tesla_df = tesla_df.drop(columns=["open","high", "low", "volume"])

# Keep YY-MM-DD timeframe
tesla_df.index = tesla_df.index.date

tesla_df


Unnamed: 0,close
2021-06-29,680.76
2021-06-30,679.78
2021-07-01,677.92
2021-07-02,678.9


In [196]:
# calculate Tesla's daily returns for 3 days. 
tesla_returns = tesla_df.pct_change().dropna()
tesla_returns

Unnamed: 0,close
2021-06-30,-0.00144
2021-07-01,-0.002736
2021-07-02,0.001446


In [199]:
# Use newsapi client to get most relevant headlines per day in the past five days. We use 5 in order to 
# keep under the newsApi limits.
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 5)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [198]:
# First news topic to gather correlation: Tesla
tsla_headlines, dates = get_headlines("tesla")

Fetching news about 'tesla'
***
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00


In [181]:
# Second news topic to gather correlation: Inflation
inflation_headlines, dates = get_headlines("inflation")

Fetching news about 'inflation'
*****
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00


In [182]:
# Third news topic to gather correlation: Security
security_headlines, dates = get_headlines("security")

Fetching news about 'security'
*****
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00


In [183]:
# Fourth news topic to gather correlation: COVID
covid_headlines, dates = get_headlines("covid")

Fetching news about 'covid'
*****
retrieving news from: 2021-07-04 00:00:00
retrieving news from: 2021-07-03 00:00:00
retrieving news from: 2021-07-02 00:00:00
retrieving news from: 2021-07-01 00:00:00
retrieving news from: 2021-06-30 00:00:00


In [200]:
# Use SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [201]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [202]:
# Get averages of each topics sentiment
tsla_avg = headline_sentiment_summarizer_avg(tsla_headlines)
inflation_avg = headline_sentiment_summarizer_avg(inflation_headlines)
# economy_avg = headline_sentiment_summarizer_avg(economy_headlines)
security_avg = headline_sentiment_summarizer_avg(security_headlines)
covid_avg = headline_sentiment_summarizer_avg(covid_headlines)


In [203]:
# Creating a dataframe from all topic sentiment averages
topic_sentiments = pd.DataFrame(
    {"tsla_avg": tsla_avg, 
    "inflation_avg": inflation_avg, 
    "security_avg": security_avg, 
    "covid_avg": covid_avg
    }
)

In [204]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [205]:
# Merge with AAPL returns
topic_sentiments = tesla_returns.join(topic_sentiments).dropna(how="any")

# Display data
display(topic_sentiments)

Unnamed: 0,close,tsla_avg,inflation_avg,security_avg,covid_avg
2021-06-30,-0.00144,0.069695,0.00874,-0.0897,-0.032705
2021-07-01,-0.002736,-0.062505,-0.102815,0.15791,0.0152
2021-07-02,0.001446,-0.010915,0.03532,-0.068425,0.118655


In [207]:
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,close,tsla_avg,inflation_avg,security_avg,covid_avg
close,1.0,0.180634,0.848756,-0.684342,0.812428
tsla_avg,0.180634,1.0,0.673401,-0.840782,-0.426719
inflation_avg,0.848756,0.673401,1.0,-0.966409,0.381238
security_avg,-0.684342,-0.840782,-0.966409,1.0,-0.130832
covid_avg,0.812428,-0.426719,0.381238,-0.130832,1.0
