In [29]:
import yfinance
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import finhub_scraper as scraper
from finhub_scraper import fetch_company_news_finhub
import transformers
import os
import requests
import pandas as pd
from datetime import datetime, timedelta
from typing import Optional
import finnhub

recreate this paper: https://pmc.ncbi.nlm.nih.gov/articles/PMC9955765/

In [30]:
os.environ["FINNHUB_API_KEY"] = "d3c9651r01qu125a70cgd3c9651r01qu125a70d0"

In [35]:
class dataFetcher:


    def __init__(self, ticker: str):
        self.ticker = ticker
        self.data = None
    def fetch_financial_data(self, period: str, interval: str = "1d") -> pd.DataFrame:
        """Fetch historical financial data for a given ticker."""
        try:
            stock = yfinance.Ticker(self.ticker)
            hist = stock.history(period=period, interval=interval)
            return hist
        except Exception as e:
            print(f"Error fetching data for {self.ticker}: {e}")
            return pd.DataFrame()
        
    def fetch_financial_data(self, start_date,end_date)->pd.DataFrame:
        """Fetch historical financial data for a given ticker."""
        try:
            stock = yfinance.Ticker(self.ticker)
            hist = stock.history(start=start_date,end=end_date)
            self.data = hist
            return hist
        except Exception as e:
            print(f"Error fetching data for {self.ticker}: {e}")
            return pd.DataFrame()
    def plot_financial_data(data: pd.DataFrame, ticker: str):
        """Plot closing prices of the financial data."""
        if data.empty:
            print("No data to plot.")
            return
        
        plt.figure(figsize=(10, 5))
        sns.lineplot(data=data, x=data.index, y='Close')
        plt.title(f"{ticker} Closing Prices")
        plt.xlabel("Date")
        plt.ylabel("Closing Price (USD)")
        plt.grid(True)
        plt.show()

    def fetch_company_news_finhub(self, _from_date: str, _end_date:str ) -> pd.DataFrame:
        """Fetch company news from Finnhub for the given ticker and start/end dates

        Args:
            ticker: Stock ticker (e.g., 'AAPL').
            _from_date: Start date in 'YYYY-MM-DD' format.
            _end_date: End date in 'YYYY-MM-DD' format.

        Returns:
            pandas.DataFrame with columns: ['published_date', 'headline', 'url', 'summary', 'source', 'related', 'image']

        Raises:
            ValueError: if API key is not provided.
            requests.RequestException: if the HTTP request fails.
        """

        api_key =  os.getenv("FINNHUB_API_KEY")
        

        symbol = self.ticker.upper()
        today = datetime.today()
        
        start_date=_from_date
        end_date=_end_date
    

        client=finnhub.Client(api_key)
        rows={"published_date": [],
                    "headline":[],
                    "url":[],
                    "summary": [],
                    "source":[],
                    "related": [],
        }
        
        try:
            result=client.company_news(symbol, _from=start_date, to=end_date)
            for item in result:
                published = datetime.fromtimestamp(item.get("datetime")).strftime("%Y-%m-%d %H:%M:%S") if item.get("datetime") else None
                rows["published_date"].append(published)
                rows["headline"].append(item["headline"] or "")
                rows["url"].append(item["url"] or "")
                rows["summary"].append(item["summary"] or "")
                rows["source"].append(item["source"] or "")
                rows["related"].append(item["related"] or "")
                #rows["image"].append(item["image"] or "")

            
        except Exception as e:
            print(f"Error fetching Finnhub news for {symbol}: {e}")
            raise

        df = pd.DataFrame(rows)
        # Sort newest first and return
        if "published_date" in df.columns:
            df = df.sort_values(by="published_date", ascending=False).reset_index(drop=True)

        return df






In [3]:
def calculate_rsi(data: pd.DataFrame, rsi_period: int =14)-> pd.Series:
    """Calculate the RSI for each day in the data"""
    delta = data['Close'].diff()
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    avg_gain = gain.rolling(window=rsi_period, min_periods=1).mean()
    avg_loss = loss.rolling(window=rsi_period, min_periods=1).mean()

    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi


In [36]:
SAMPLE_TICKERS = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]
fetcher=dataFetcher("AAPL")
sample_data=fetcher.fetch_financial_data(start_date="10-01-2024",end_date="10-10-2024")

#sample_data['RSI'] = calculate_rsi(sample_data)


#sample_article_data=scraper.scrape_yahoo_finance_headlines("AAPL", days_back=30)

article_data=fetcher.fetch_company_news_finhub("2024-10-01","2024-10-10")
#goal is to merge two datasets so stock price at each date has sentiment score for that date or whichever is closest
article_data.head()

Error fetching data for AAPL: time data '10-10-2024' does not match format '%Y-%m-%d'


Unnamed: 0,published_date,headline,url,summary,source,related
0,2024-10-10 16:39:41,Is Apple Inc. (NASDAQ:AAPL) The Most Popular S...,https://finnhub.io/api/news?id=830a28e313278d7...,We recently made a list of Goldman Sachs’ List...,Yahoo,AAPL
1,2024-10-10 16:25:17,Jeff Bezos-Led Amazon Almost Ran Out Of Cash 2...,https://finnhub.io/api/news?id=c799c529be36965...,Jeff Bezos’ Amazon.com Inc. (NASDAQ:AMZN) once...,Yahoo,AAPL
2,2024-10-10 14:03:39,Apple: High Valuation Does Not Account For The...,https://finnhub.io/api/news?id=415af33a105481f...,We warn of potential Apple troubles with China...,SeekingAlpha,AAPL
3,2024-10-10 13:51:00,Apple CEO Tim Cook Just Sold $50 Million of St...,https://finnhub.io/api/news?id=f686835886ead4d...,"Cook acquired 477,301 Apple shares on Oct. 1 t...",Yahoo,AAPL
4,2024-10-10 13:34:26,"Apple Stock Downgraded, Needs Time To 'Ripen'",https://finnhub.io/api/news?id=96bd1a324aa15f1...,Apple stock fell Thursday after brokerage firm...,Yahoo,AAPL
