In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")

In [3]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [4]:
# Fetch the BTC news articles
btc_headlines = newsapi.get_everything(
    q="Bitcoin",
    language="en",
    page_size=100,
    sort_by="relevancy"
)
btc_headlines

{'status': 'ok',
 'totalResults': 8916,
 'articles': [{'source': {'id': 'the-verge', 'name': 'The Verge'},
   'author': 'Jay Peters',
   'title': 'Block and Blockstream are partnering with Tesla on an off-grid, solar-powered Bitcoin mine in Texas',
   'description': 'Block and Blockstream are partnering with Tesla on an open-source, solar-powered Bitcoin mine, the companies announced Friday. Tesla’s 3.8-megawatt Solar PV array and its 12 megawatt-hour Megapack will power the facility, and construction has started on the p…',
   'url': 'https://www.theverge.com/2022/4/8/23016553/block-blockstream-tesla-solar-bitcoin-mine-texas',
   'urlToImage': 'https://cdn.vox-cdn.com/thumbor/OYrvaaOHBuEpdTeRO55nZnZdexs=/0x215:3000x1786/fit-in/1200x630/cdn.vox-cdn.com/uploads/chorus_asset/file/8937281/acastro_170726_1777_0007_v2.jpg',
   'publishedAt': '2022-04-08T16:02:52Z',
   'content': 'Its set to open later this year\r\nIf you buy something from a Verge link, Vox Media may earn a commission. See 

In [5]:
# Create the Tesla sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
btc_df = pd.DataFrame(btc_sentiments)   
btc_df["Date"] = pd.to_datetime(btc_df["Date"])
btc_df = btc_df.set_index("Date")
btc_df = btc_df.sort_index()
btc_df


Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-13 10:00:00+00:00,-0.4939,0.000,0.094,0.906
2022-03-14 07:10:29+00:00,0.4019,0.096,0.000,0.904
2022-03-14 07:10:29+00:00,0.4019,0.096,0.000,0.904
2022-03-14 13:00:00+00:00,0.3612,0.070,0.000,0.930
2022-03-14 15:15:03+00:00,0.5719,0.153,0.000,0.847
...,...,...,...,...
2022-04-12 16:35:41+00:00,0.0000,0.000,0.000,1.000
2022-04-12 17:18:42+00:00,0.3400,0.072,0.000,0.928
2022-04-13 16:35:03+00:00,0.1027,0.084,0.071,0.844
2022-04-13 18:21:00+00:00,0.7003,0.142,0.000,0.858


In [14]:
btc_df.index = btc_df.index.round('15min')
btc_df.index = btc_df.index.tz_localize(None)
btc_df

Unnamed: 0_level_0,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-13 10:00:00,-0.4939,0.000,0.094,0.906,0
2022-03-14 07:15:00,0.4019,0.096,0.000,0.904,1
2022-03-14 07:15:00,0.4019,0.096,0.000,0.904,1
2022-03-14 13:00:00,0.3612,0.070,0.000,0.930,1
2022-03-14 15:15:00,0.5719,0.153,0.000,0.847,1
...,...,...,...,...,...
2022-04-12 16:30:00,0.0000,0.000,0.000,1.000,1
2022-04-12 17:15:00,0.3400,0.072,0.000,0.928,1
2022-04-13 16:30:00,0.1027,0.084,0.071,0.844,1
2022-04-13 18:15:00,0.7003,0.142,0.000,0.858,1


In [15]:
# Creacting a binary response for the tsla stock
btc_df['comp_score'] = btc_df['compound'].apply(lambda c: '1' if c >=0 else '0')

btc_df.tail()

Unnamed: 0_level_0,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-12 16:30:00,0.0,0.0,0.0,1.0,1
2022-04-12 17:15:00,0.34,0.072,0.0,0.928,1
2022-04-13 16:30:00,0.1027,0.084,0.071,0.844,1
2022-04-13 18:15:00,0.7003,0.142,0.0,0.858,1
2022-04-13 20:45:00,0.3182,0.138,0.067,0.795,1


In [16]:
btc_trading_data = pd.read_csv(
    Path("btc_data_5m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_trading_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-14 09:45:00,55206.63,55379.13,55150.03,55321.83,2.15009,118860.885099
2022-04-14 10:00:00,55334.09,55422.99,55246.47,55391.24,2.63174,145654.926789
2022-04-14 10:15:00,55369.95,55432.97,55337.8,55412.13,1.14038,63171.589193
2022-04-14 10:30:00,55417.54,55426.63,55320.49,55391.7,2.00093,110804.222743
2022-04-14 10:45:00,55384.15,55384.15,55347.46,55350.18,0.08381,4639.948764


In [23]:
result_df = pd.merge(btc_trading_data, btc_df, left_index=True, right_index=True)
result_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-04 13:30:00,61377.43,61399.5,61262.46,61283.03,2.22698,136598.404258,0.0,0.0,0.0,1.0,1
2022-04-04 16:45:00,60706.41,60741.97,60573.47,60664.72,1.81363,110009.523467,0.6486,0.142,0.0,0.858,1
2022-04-05 10:15:00,61100.01,61228.41,61039.56,61223.53,5.00989,306103.719278,0.34,0.175,0.111,0.714,1
2022-04-05 11:00:00,61322.79,61332.81,61260.96,61264.37,3.24426,198862.279675,-0.2732,0.0,0.057,0.943,0
2022-04-05 19:30:00,60279.43,60842.1,60072.8,60799.6,2.90905,175389.116666,0.0,0.0,0.0,1.0,1
2022-04-05 19:30:00,60279.43,60842.1,60072.8,60799.6,2.90905,175389.116666,0.2263,0.058,0.0,0.942,1
2022-04-05 20:15:00,60821.34,60898.27,60646.68,60782.19,1.57148,95473.252457,0.2023,0.052,0.0,0.948,1
2022-04-05 20:45:00,60589.03,60600.0,60307.63,60400.13,3.037,183336.655366,-0.5423,0.0,0.104,0.896,0
2022-04-05 23:30:00,60580.7,60641.97,60430.17,60490.15,0.91011,55060.287698,0.0,0.0,0.0,1.0,1
2022-04-06 01:00:00,59525.23,59692.29,59470.51,59551.74,7.47204,445224.258511,-0.128,0.068,0.082,0.85,0


In [25]:
#save as a df
result_df.to_csv("btc_sentiment.csv",index=True)