# Correlating Returns

In [1]:
!pip install python-dotenv
!pip install newsapi-python
!pip install -U textblob

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-dotenv
  Downloading python_dotenv-0.20.0-py3-none-any.whl (17 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-0.20.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting newsapi-python
  Downloading newsapi_python-0.2.6-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting textblob
  Downloading textblob-0.17.1-py2.py3-none-any.whl (636 kB)
[K     |████████████████████████████████| 636 kB 20.4 MB/s 
Installing collected packages: textblob
  Attempting uninstall: textblob
    Found existing installation: textblob 0.15.3
    Uninstalling textblob-0.15.3:
      Successfully uninstalled textblob-0.15.3
Succ

In [4]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
from newsapi.newsapi_client import NewsApiClient
from textblob import TextBlob
from pathlib import Path

## Load API Keys from Environment Variables

In [196]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["christine_news_api"])


In [197]:
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(days=31), tz="America/New_York").isoformat()

## News API
  

In [198]:
# Use newsapi client to get most relevant 10 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page_size= 10,
            page=1
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [199]:
# Get first topic
busd_headlines, dates = get_headlines("Binance USD")


Fetching news about 'Binance USD'
******************************
retrieving news from: 2022-08-01 00:00:00
retrieving news from: 2022-07-31 00:00:00
retrieving news from: 2022-07-30 00:00:00
retrieving news from: 2022-07-29 00:00:00
retrieving news from: 2022-07-28 00:00:00
retrieving news from: 2022-07-27 00:00:00
retrieving news from: 2022-07-26 00:00:00
retrieving news from: 2022-07-25 00:00:00
retrieving news from: 2022-07-24 00:00:00
retrieving news from: 2022-07-23 00:00:00
retrieving news from: 2022-07-22 00:00:00
retrieving news from: 2022-07-21 00:00:00
retrieving news from: 2022-07-20 00:00:00
retrieving news from: 2022-07-19 00:00:00
retrieving news from: 2022-07-18 00:00:00
retrieving news from: 2022-07-17 00:00:00
retrieving news from: 2022-07-16 00:00:00
retrieving news from: 2022-07-15 00:00:00
retrieving news from: 2022-07-14 00:00:00
retrieving news from: 2022-07-13 00:00:00
retrieving news from: 2022-07-12 00:00:00
retrieving news from: 2022-07-11 00:00:00
retrieving 

In [200]:
# Create Dataframe for news articles
busd_df = pd.DataFrame(
    {
        "busd_headlines": busd_headlines,
})
busd_df.index = pd.to_datetime(dates)

In [201]:
# Write articles to csv file 
busd_path = Path('busd.csv')
busd_path.parent.mkdir(parents=True, exist_ok=True)
busd_df.to_csv(busd_path)

In [172]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                tb = TextBlob(h)
                day_score.append(tb.sentiment.polarity)
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [173]:
# Get averages of each topics sentiment
btc_avg = headline_sentiment_summarizer_avg(btc_headlines)
# eth_avg = headline_sentiment_summarizer_avg(eth_headlines)
# busd_avg = headline_sentiment_summarizer_avg(busd_headlines)


In [174]:
btc_avg

[0.07366666666666669,
 0.08249999999999999,
 0.033148148148148156,
 -0.005176767676767682,
 0.024444444444444453,
 0.004999999999999999,
 0.04271335807050093,
 0.046666666666666655,
 0.11444444444444443,
 0.049444444444444444,
 -0.018888888888888893,
 0.03277777777777778,
 -0.04416666666666665,
 0.05583333333333333,
 0.039999999999999994,
 0.07944444444444443,
 0.07666666666666667,
 0.014913419913419906,
 0.023333333333333338,
 0.05319444444444445,
 -0.0375,
 0.08983766233766234,
 0.017803030303030303,
 0.10696969696969698,
 -0.019166666666666672,
 0.039444444444444435,
 -0.13462752525252525,
 -0.0019886363636363647,
 0.012045454545454548,
 0.13666666666666666,
 0.02666666666666666]

In [175]:
# Combine Sentiment Averages into DataFrame
topic_sentiments = pd.DataFrame(
    {
        "btc_avg": btc_avg,
        # "busd_avg": busd_avg
    }
)

In [176]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
topic_sentiments.index = pd.to_datetime(dates)

In [177]:
topic_sentiments

Unnamed: 0,btc_avg
2022-08-01,0.073667
2022-07-31,0.0825
2022-07-30,0.033148
2022-07-29,-0.005177
2022-07-28,0.024444
2022-07-27,0.005
2022-07-26,0.042713
2022-07-25,0.046667
2022-07-24,0.114444
2022-07-23,0.049444


In [None]:
# # Merge with AAPL returns
# topic_sentiments = aapl_returns.join(topic_sentiments).dropna(how="any")

# # Display data
# display(topic_sentiments)

In [178]:
# Correlate the headlines' sentiment to returns
topic_sentiments.corr().style.background_gradient()

Unnamed: 0,btc_avg
btc_avg,1.0
