In [5]:
import yfinance as yf

tickers = yf.Tickers('msft aapl goog')

# access each ticker using (example)
tickers.tickers['MSFT'].info["longName"]

'Microsoft Corporation'

#### Fetching Microsoft Stock Data (official code example from yfinance)

In [3]:
import yfinance as yf

msft = yf.Ticker("MSFT")

# get all stock info
msft.info

# get historical market data
hist = msft.history(period="1mo")

# show meta information about the history (requires history() to be called first)
msft.history_metadata

# show actions (dividends, splits, capital gains)
msft.actions
msft.dividends
msft.splits
msft.capital_gains  # only for mutual funds & etfs

# show share count
msft.get_shares_full(start="2022-01-01", end=None)

# show financials:
# - income statement
msft.income_stmt
msft.quarterly_income_stmt
# - balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# - cash flow statement
msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options

# show holders
msft.major_holders
msft.institutional_holders
msft.mutualfund_holders
msft.insider_transactions
msft.insider_purchases
msft.insider_roster_holders

# show recommendations
msft.recommendations
msft.recommendations_summary
msft.upgrades_downgrades

# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
msft.earnings_dates

# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
msft.isin

# show options expirations
msft.options

# show news
msft.news

# get option chain for specific expiration
opt = msft.option_chain('2024-06-21')
# data available via: opt.calls, opt.puts

#### Example: Data for Google Stock 

In [1]:
import yfinance as yf
from src.utils import display_df

# Get stock data for Apple (AAPL)
ticker = yf.Ticker("GOOGL")

# Get historical market data
# hist = ticker.history(period="max")  # Get all available data
# or
hist = ticker.history(period="6mo")  # Get data for the past year

# Display the data
hist.tail()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-06-21 00:00:00-04:00,177.0,180.850006,176.610001,179.630005,58582700,0.0,0.0
2024-06-24 00:00:00-04:00,180.160004,180.889999,178.669998,179.220001,18298000,0.0,0.0
2024-06-25 00:00:00-04:00,179.619995,184.289993,179.419998,184.029999,23235600,0.0,0.0
2024-06-26 00:00:00-04:00,182.630005,184.509995,182.479996,183.880005,19839000,0.0,0.0
2024-06-27 00:00:00-04:00,184.179993,186.050003,184.020004,185.410004,18830500,0.0,0.0


In [31]:
start_date = "2024-06-22"
end_date = "2024-06-23"
asset = "GOOGL"
interval="2m"
data = yf.download(tickers=asset,start=start_date,end=end_date, interval=interval)
data.head()

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GOOGL']: YFPricesMissingError('$%ticker%: possibly delisted; No price data found  (2m 2024-06-22 -> 2024-06-23)')


$GOOGL: possibly delisted; No price data found  (2m 2024-06-22 -> 2024-06-23)


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [15]:
import yfinance as yf

def download_yfinance_data(asset, start_date, end_date, interval):
    """Downloads historical data from Yahoo Finance for a given asset.

    Args:
        asset (str): Ticker symbol of the asset (e.g., "GOOGL").
        start_date (str): Start date in the format "YYYY-MM-DD".
        end_date (str): End date in the format "YYYY-MM-DD".
        interval (str): Data interval (e.g., "1m", "2m", "1h", "1d").

    Returns:
        pandas.DataFrame: The downloaded historical data.
    """
    try:
        data = yf.download(asset, start=start_date, end=end_date, interval=interval)
        return data
    except yf.YFinanceError as e:
        print(f"Error downloading data for {asset}: {e}")
        return None

data = download_yfinance_data("GOOGL", "2022-12-30", "2022-12-31", "5m")

[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GOOGL']: YFChartError('%ticker%: 5m data not available for startTime=1672376400 and endTime=1672462800. The requested range must be within the last 60 days.')


In [1]:
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import BDay
from datetime import timedelta
import yfinance as yf

def download_yfinance_data_chunked(asset, start_date, end_date, interval, chunk_size=25):
    all_data = pd.DataFrame()
    current_date = pd.to_datetime(start_date)
    cal = USFederalHolidayCalendar()

    max_days_allowed = {
        "1m": 7, 
        "2m": 7,
        "5m": 60,
        "15m": 60,
        "30m": 60,
        "60m": 730,  
        "90m": 60,  
        "1h": 730,   
    }.get(interval, 730)  # Default to 730 for other intervals (1d, 5d, etc.)

    while current_date < pd.to_datetime(end_date):
        days_remaining = (pd.to_datetime(end_date) - current_date).days
        chunk_size = min(chunk_size, max_days_allowed, days_remaining) 
        
        next_date = current_date + timedelta(days=chunk_size)

        # Handle weekends and holidays
        while next_date.weekday() > 4 or next_date in cal.holidays():
            next_date -= timedelta(days=1)

        # Ensure we're fetching valid business days
        current_date = current_date + BDay(0)
        next_date = next_date + BDay(0)

        try:
            chunk = yf.download(asset, start=current_date, end=next_date, interval=interval)
            if not chunk.empty:
                all_data = pd.concat([all_data, chunk])
        except yf.YFinanceError as e:
            print(f"Error downloading data for {asset}: {e}")

        current_date = next_date

    return all_data


start_date = "2024-06-20"
end_date = "2024-06-28"
asset = "GOOGL"
interval="5m"
data = download_yfinance_data_chunked(asset,start_date,end_date, interval=interval)
data.head()



[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-06-20 09:30:00-04:00,175.399994,177.289993,175.100006,176.699997,176.699997,1594228
2024-06-20 09:35:00-04:00,176.729507,176.970001,176.300003,176.639999,176.639999,480710
2024-06-20 09:40:00-04:00,176.639999,176.660004,175.699997,175.800003,175.800003,327361
2024-06-20 09:45:00-04:00,175.800003,175.849304,175.229996,175.430099,175.430099,444800
2024-06-20 09:50:00-04:00,175.440002,175.529999,174.990005,175.264999,175.264999,556749


In [44]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2574 entries, 2024-05-10 09:30:00-04:00 to 2024-06-27 15:55:00-04:00
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       2574 non-null   float64
 1   High       2574 non-null   float64
 2   Low        2574 non-null   float64
 3   Close      2574 non-null   float64
 4   Adj Close  2574 non-null   float64
 5   Volume     2574 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 140.8 KB


In [4]:
import pandas as pd
from prophet import Prophet
from matplotlib import pyplot as plt
from io import BytesIO
import base64

def forecast_and_plot_time_series(data, image_generation_usecase=None):
    """Performs time series forecasting with Prophet, plots the results, and returns a base64 image string.

    Args:
        data (pd.DataFrame): DataFrame with a 'ds' column (dates) and a 'y' column (values).
        image_generation_usecase (str, optional): Placeholder for future use case customization.

    Returns:
        str: Base64 encoded string of the generated plot image.
    """

    model = Prophet()
    model.fit(data)

    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)

    # Create the plot
    fig = model.plot(forecast)
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.title(f"Forecast for {data['y'].name}")  # Use the column name as the title

    # Save plot to a BytesIO object
    buf = BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)

    # Encode the image in base64
    image_base64 = base64.b64encode(buf.read()).decode()
    
    plt.close(fig) # Close the figure to free up memory

    return image_base64


df = download_yfinance_data_chunked("GOOGL", "2024-06-20", "2024-06-28", "2m")

# Rename columns to match Prophet's expected format
df.rename(columns={'Datetime':'ds', 'Close':'y'}, inplace=True)

image_data = forecast_and_plot_time_series(df)

# Display the images
image_data


AttributeError: `np.float_` was removed in the NumPy 2.0 release. Use `np.float64` instead.

In [17]:
import yfinance as yf
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import io
from base64 import b64encode

def forecast_and_plot_stock_arima(asset, start_date, end_date, interval='1d', order=(5, 1, 0), forecast_periods=5, filename=None):
    """Fetches stock data, forecasts using ARIMA, plots, and returns a base64 image.

    Args:
        asset (str): Ticker symbol (e.g., 'GOOGL').
        start_date (str): Start date in "YYYY-MM-DD" format.
        end_date (str): End date in "YYYY-MM-DD" format.
        interval (str, optional): Data interval (default: '1d').
        order (tuple, optional): ARIMA order (p, d, q) (default: (5, 1, 0)).
        forecast_periods (int, optional): Number of periods to forecast (default: 5).

    Returns:
        tuple: (base64 image string, result dictionary)
    """

    try:
        # Fetch data (using our custom download function)
        data = download_yfinance_data_chunked(asset, start_date, end_date, interval)

        if data is None or data.empty:
            return None, {"error": "Failed to download data."}

        data.rename(columns={'Datetime': 'ds', 'Close': 'y'}, inplace=True)

        # Fit ARIMA model
        model = ARIMA(data['y'], order=order)
        model_fit = model.fit()

        # Forecast
        forecast = model_fit.forecast(steps=forecast_periods)
        forecast_dates = pd.date_range(start=data['ds'].iloc[-1], periods=forecast_periods + 1, freq='B')[1:] 

        # Result data for frontend
        result = {
            'labels': data['ds'].dt.strftime('%Y-%m-%d').tolist() + forecast_dates.strftime('%Y-%m-%d').tolist(),
            'actual': data['y'].tolist(),
            'forecast': list(data['y'][-1:]) + forecast.tolist()
        }

        # Plotting
        plt.figure(figsize=(10, 6))
        plt.plot(data['ds'], data['y'], label='Actual', color='blue', marker='o')  # Added markers for clarity
        plt.plot(forecast_dates, forecast, label='Forecast', color='red', linestyle='dashed')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.title(f'{asset} Stock Price Forecast')
        plt.legend()
        #Display the plot
        #plt.show()

        # Save plot to buffer and optionally to file
        img_buffer = io.BytesIO()
        plt.savefig(img_buffer, format='png')
        img_buffer.seek(0)
        img_base64 = b64encode(img_buffer.read()).decode()

        if filename:
            plt.savefig(filename)

        plt.close()  # Close the figure

        # Display in Jupyter notebook
        display(Image(data=img_base64))
        
        #Display the results
        display(HTML(f'<h3>Forecast Data:</h3>'))
        display(result)

        return img_base64, result

    except Exception as e:
        print(f"Error in forecasting and plotting: {e}")
        return None, {"error": str(e)}


# Example usage
# This will display the plot in the notebook and save it as "GOOGL_forecast.png"
image_data, result = forecast_and_plot_stock_arima("GOOGL", "2023-01-01", "2023-06-28", filename="GOOGL_forecast.png")

# if image_data:
    # In your frontend (e.g., JavaScript):
    # <img src="data:image/png;base64,{{ image_data }}" alt="ARIMA Forecast Plot"> 


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Error in forecasting and plotting: 'ds'


  return get_prediction_index(
  return get_prediction_index(


In [19]:
print(result)

{'error': "'ds'"}


In [14]:
from IPython.display import Image
from IPython.core.display import HTML 

# ... (Your existing code for fetching data and forecasting with ARIMA) ...

# This will display the plot in the notebook and save it as "GOOGL_forecast.png"
image_data, result = forecast_and_plot_stock_arima("GOOGL", "2023-01-01", "2023-06-28", filename="GOOGL_forecast.png")

if image_data:
    display(Image(data=image_data))

    #Display the results
    display(HTML(f'<h3>Forecast Data:</h3>'))
    display(result)



TypeError: forecast_and_plot_stock_arima() got an unexpected keyword argument 'filename'

In [10]:
image_data, result = forecast_and_plot_stock_arima("GOOGL", "2023-01-01", "2023-06-28", interval="1d")

if image_data:
    img_tag = f'<img src="data:image/png;base64,{image_data}" alt="ARIMA Forecast Plot">'
    display(HTML(img_tag))
    
    #Display the results
    display(HTML(f'<h3>Forecast Data:</h3>'))
    display(result)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Error in forecasting and plotting: 'ds'


  return get_prediction_index(
  return get_prediction_index(


#### Preparation for saving to Postgres

In [9]:
import yfinance as yf

symbol = "AAPL"
period = "1y"

# Get stock data for Apple (AAPL)
ticker = yf.Ticker(symbol)
hist = ticker.history(period=period)


In [7]:
from confluent_kafka import Producer
import yfinance as yf
import json
import time

# Callback function to handle delivery reports
def delivery_report(err, msg):
    if err is not None:
        print(f"Message delivery failed: {err}")
    else:
        print(f"Message delivered to {msg.topic()} [{msg.partition()}]")

def fetch_and_send_stock_data(symbol, period="1d", max_retries=2):
    # Fetch stock data
    stock = yf.Ticker(symbol)
    hist = stock.history(period=period)  # Fetch data for the specified period

    # Kafka configuration
    conf = {'bootstrap.servers': "localhost:9094"}
    producer = Producer(**conf)
    topic = 'stock_data'

    # Iterate over each row in the DataFrame
    for index, row in hist.iterrows():
        # Prepare data for sending to Kafka
        data = row.to_dict()
        data['symbol'] = symbol  # Add the symbol to the data
        data['date'] = index.strftime('%Y-%m-%d')  # Add the date to the data

        # Attempt to send data to Kafka with retries
        for attempt in range(max_retries):
            try:
                print(str(data))
                producer.produce(topic, json.dumps(data).encode('utf-8'), callback=delivery_report)
                producer.flush()
                break  # Exit the retry loop on success
            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff
                else:
                    print("Max retries reached. Failed to send data to Kafka.")

# Example usage for a longer period, e.g., 5 days
fetch_and_send_stock_data("GOOGL", "5d")

{'Open': 178.25, 'High': 180.41000366210938, 'Low': 176.11000061035156, 'Close': 177.7899932861328, 'Volume': 27864700.0, 'Dividends': 0.0, 'Stock Splits': 0.0, 'symbol': 'GOOGL', 'date': '2024-06-12'}
Message delivered to stock_data [0]
{'Open': 176.11000061035156, 'High': 176.74000549316406, 'Low': 174.8800048828125, 'Close': 175.16000366210938, 'Volume': 20913300.0, 'Dividends': 0.0, 'Stock Splits': 0.0, 'symbol': 'GOOGL', 'date': '2024-06-13'}
Message delivered to stock_data [0]
{'Open': 174.22000122070312, 'High': 177.05999755859375, 'Low': 174.14999389648438, 'Close': 176.7899932861328, 'Volume': 18063600.0, 'Dividends': 0.0, 'Stock Splits': 0.0, 'symbol': 'GOOGL', 'date': '2024-06-14'}
Message delivered to stock_data [0]
{'Open': 175.4600067138672, 'High': 178.36000061035156, 'Low': 174.80999755859375, 'Close': 177.24000549316406, 'Volume': 19618500.0, 'Dividends': 0.0, 'Stock Splits': 0.0, 'symbol': 'GOOGL', 'date': '2024-06-17'}
Message delivered to stock_data [0]
{'Open': 177