In [1]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
import requests
from pathlib import Path
import datetime as dt

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Read your api key environment variable
load_dotenv()
api_key = os.getenv("NEWS_API_KEY")

In [3]:
# Create a newsapi client
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key)
print(api_key)

1ee42792279e4bb38c96b682f9029b47


In [None]:
q = input("What headline?")

In [4]:
# Fetch the BTC news articles
btc_headlines = newsapi.get_everything(
    q,
    language="en",
    page_size=100,
    sort_by="relevancy"
)
btc_headlines

{'status': 'ok',
 'totalResults': 719,
 'articles': [{'source': {'id': 'wired', 'name': 'Wired'},
   'author': 'Morgan Meaker',
   'title': 'Europe’s Biggest Lithium Mine Is Caught in a Political Maelstrom',
   'description': 'Europe wants to source EV materials within its own borders. But fierce opposition ahead of the elections in Serbia shows locals don’t trust mining companies.',
   'url': 'https://www.wired.com/story/serbia-europe-lithium-mining-electric-cars/',
   'urlToImage': 'https://media.wired.com/photos/62475d33edc71717de2ded87/191:100/w_1280,c_limit/Serbia-Rio-Tinto-EV-Mining-Business-1237164181.jpg',
   'publishedAt': '2022-04-02T11:00:00Z',
   'content': 'Arriving soon after a year marked by protests, this weekends election was supposed to be the breakthrough movement for Serbias environmentalists, says Engjellushe Morina, senior policy fellow at the … [+3122 chars]'},
  {'source': {'id': None, 'name': 'New York Times'},
   'author': 'Ana Swanson',
   'title': 'Biden Inv

In [24]:
# Create the Tesla sentiment scores DataFrame
btc_sentiments = []

for article in btc_headlines["articles"]:
    try:
        Date = article["publishedAt"]
        text = article["content"]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        btc_sentiments.append({
            "Date": Date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
btc_df = pd.DataFrame(btc_sentiments)   
btc_df["Date"] = pd.to_datetime(btc_df["Date"])
btc_df = btc_df.set_index("Date")
btc_df = btc_df.sort_index()
btc_df


Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-18 14:47:59+00:00,0.7430,0.156,0.000,0.844
2022-03-18 15:04:00+00:00,0.7184,0.182,0.000,0.818
2022-03-18 16:38:55+00:00,0.8360,0.235,0.000,0.765
2022-03-18 17:25:10+00:00,0.0000,0.000,0.000,1.000
2022-03-20 00:04:32+00:00,0.1531,0.054,0.000,0.946
...,...,...,...,...
2022-04-18 21:40:00+00:00,0.0000,0.000,0.000,1.000
2022-04-18 21:50:59+00:00,0.0000,0.000,0.000,1.000
2022-04-18 23:50:00+00:00,-0.2960,0.000,0.064,0.936
2022-04-19 02:41:00+00:00,0.5267,0.121,0.000,0.879


In [25]:
btc_df.index = btc_df.index.round('15min')
btc_df.index = btc_df.index.tz_localize(None)
btc_df

Unnamed: 0_level_0,compound,positive,negative,neutral
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-18 14:45:00,0.7430,0.156,0.000,0.844
2022-03-18 15:00:00,0.7184,0.182,0.000,0.818
2022-03-18 16:45:00,0.8360,0.235,0.000,0.765
2022-03-18 17:30:00,0.0000,0.000,0.000,1.000
2022-03-20 00:00:00,0.1531,0.054,0.000,0.946
...,...,...,...,...
2022-04-18 21:45:00,0.0000,0.000,0.000,1.000
2022-04-18 21:45:00,0.0000,0.000,0.000,1.000
2022-04-18 23:45:00,-0.2960,0.000,0.064,0.936
2022-04-19 02:45:00,0.5267,0.121,0.000,0.879


In [26]:
# Creacting a binary response for the tsla stock
btc_df['comp_score'] = btc_df['compound'].apply(lambda c: '1' if c >=0 else '0')

btc_df.tail()

Unnamed: 0_level_0,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-04-18 21:45:00,0.0,0.0,0.0,1.0,1
2022-04-18 21:45:00,0.0,0.0,0.0,1.0,1
2022-04-18 23:45:00,-0.296,0.0,0.064,0.936,0
2022-04-19 02:45:00,0.5267,0.121,0.0,0.879,1
2022-04-19 05:00:00,0.0,0.0,0.0,1.0,1


In [28]:
btc_trading_data = pd.read_csv(
    Path("Sentiment_data/btc_data_5m.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_trading_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-19 08:15:00,55155.87,55200.31,55121.01,55121.01,3.11221,171690.325162
2022-04-19 08:30:00,55121.01,55265.87,55110.97,55255.67,1.75836,97031.437113
2022-04-19 08:45:00,55254.41,55324.55,55221.07,55221.07,2.12392,117354.63636
2022-04-19 09:00:00,55208.59,55225.92,55150.33,55178.9,3.01601,166483.479149
2022-04-19 09:15:00,55165.19,55199.89,55165.19,55195.68,0.77454,42743.37523


In [29]:
result_df = pd.merge(btc_trading_data, btc_df, left_index=True, right_index=True)
result_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume_base,Volume_quote,compound,positive,negative,neutral,comp_score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-09 15:45:00,56931.28,56942.18,56800.0,56870.1,1.1412,64894.330694,0.7964,0.202,0.0,0.798,1
2022-04-10 19:00:00,57981.4,57981.4,57893.23,57894.91,0.22748,13178.550566,0.128,0.108,0.096,0.795,1
2022-04-11 11:00:00,55815.22,55944.14,55799.74,55944.14,4.40993,246473.125716,0.2732,0.068,0.0,0.932,1
2022-04-11 13:15:00,55423.29,55428.12,55199.0,55322.47,1.84872,102289.617215,0.0,0.0,0.0,1.0,1
2022-04-11 14:45:00,55123.6,55238.05,55039.54,55051.26,2.42391,133614.550951,-0.6249,0.0,0.121,0.879,0
2022-04-11 23:45:00,53562.29,53672.74,53300.01,53348.14,4.65824,249197.837169,0.0,0.0,0.0,1.0,1
2022-04-12 17:15:00,53671.95,53819.82,53559.28,53804.44,1.05405,56528.685926,0.7783,0.201,0.0,0.799,1
2022-04-12 18:15:00,53517.09,53613.19,53420.0,53422.67,1.00029,53498.70363,0.6369,0.148,0.0,0.852,1
2022-04-13 00:30:00,53930.67,53930.67,53841.89,53869.34,1.48446,80000.739811,-0.2732,0.062,0.09,0.848,0
2022-04-13 10:00:00,54054.55,54227.73,54032.48,54076.87,5.84132,316071.645408,0.4767,0.105,0.0,0.895,1


In [31]:
#save as a df
result_df.to_csv("Sentiment_data/lithium_sentiment.csv",index=True)