In [125]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import datetime as dt
from datetime import date
import matplotlib.pyplot as plt
import yfinance as yf
import numpy as np

In [126]:
#Load environment variables from .env file
load_dotenv()

# API URL
#base_url = 'https://newsapi.org/v2/everything'
base_url = "https://api.bing.microsoft.com/v7.0/news/search"

# Fetch API key from environment variables
API_KEY = os.getenv('NEWS_API_KEY')

In [127]:
# Load data function
def load_data(ticker):
    data = yf.download(ticker, START, TODAY)
    data.reset_index(inplace=True)
    return data

Sentiment Analysis Integration

using BERT through Hugging Face’s transformers library

In [128]:
from transformers import pipeline

In [129]:
def fetch_news_headlines(query='Cryptocurrency', pages=5):
    all_headlines = []

    for page in range(1, pages + 1):
        params = {
            'q': query,             # Query term
            'sortBy': 'publishedAt',# Sort by publication date
            'apiKey': API_KEY,      # Your API key
            'pageSize': 100,        # Number of results per page (max 100)
            'page': page            # Page number
        }
        
        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            news_data = response.json()
            articles = news_data['articles']
            headlines = [article['title'] for article in articles]
            all_headlines.extend(headlines)
        else:
            print(f"Error fetching news: {response.status_code} - {response.reason}")
            break

    return all_headlines

In [130]:
def analyze_sentiment_for_headlines(headlines):
    # Load BERT-based sentiment analysis model
    sentiment_pipeline = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')

    sentiments = []
    for headline in headlines:
        # Perform sentiment analysis on the headline using sentiment_pipeline
        sentiment = sentiment_pipeline(headline)[0]
        sentiments.append(sentiment)
    return sentiments


# Constants
START = "2020-01-01"
TODAY = date.today().strftime("%Y-%m-%d")

# Load and preprocess data
data = load_data('BTC-USD')
df = data.drop(['Date', 'Adj Close'], axis=1)

# Fetch cryptocurrency-related news headlines (fetching 10 pages of 100 headlines each)
headlines = fetch_news_headlines(query='Cryptocurrency', pages=10)

if headlines:
    sentiments = analyze_sentiment_for_headlines(headlines)
else:
    sentiments = []

[*********************100%%**********************]  1 of 1 completed


Error fetching news: 426 - Upgrade Required


In [131]:
# Combine headlines with their sentiment scores
#news_data = pd.DataFrame({'Headline': headlines, 'Sentiment_Label': [sentiment['label'] for sentiment in sentiments], 'Sentiment_Score': [sentiment['score'] for sentiment in sentiments]})
#news_data

In [132]:
# Ensure sentiments align with the dataframe length
if len(sentiments) > 0:
    sentiment_labels = [sentiment['label'] for sentiment in sentiments]
    sentiment_scores = [sentiment['score'] for sentiment in sentiments]

    # Check if there are more sentiments than rows in df, truncate if necessary
    num_sentiments = min(len(sentiments), len(df))
    sentiment_labels = sentiment_labels[:num_sentiments]
    sentiment_scores = sentiment_scores[:num_sentiments]


In [133]:
# Add sentiment scores to the dataframe
df['Sentiment_Label'] = pd.Series(sentiment_labels, index=df.index[:num_sentiments])
df['Sentiment_Score'] = pd.Series(sentiment_scores, index=df.index[:num_sentiments])


In [134]:
df

Unnamed: 0,Open,High,Low,Close,Volume,Sentiment_Label,Sentiment_Score
0,7194.892090,7254.330566,7174.944336,7200.174316,18565664997,1 star,0.774917
1,7202.551270,7212.155273,6935.270020,6985.470215,20802083465,5 stars,0.379669
2,6984.428711,7413.715332,6914.996094,7344.884277,28111481032,4 stars,0.440640
3,7345.375488,7427.385742,7309.514160,7410.656738,18444271275,1 star,0.602686
4,7410.451660,7544.497070,7400.535645,7411.317383,19725074095,2 stars,0.375282
...,...,...,...,...,...,...,...
1633,64837.988281,65007.546875,63378.894531,64096.199219,26188171739,,
1634,64113.863281,64475.468750,63929.757812,64252.578125,9858198793,,
1635,64248.964844,64491.703125,63180.796875,63180.796875,11170471802,,
1636,63173.351562,63292.527344,58601.699219,60277.414062,43152133651,,


In [135]:
count_valid_sentiments = df[['Sentiment_Label', 'Sentiment_Score']].notna().all(axis=1).sum()
count_valid_sentiments

100