In [1]:
!pip install confluent-kafka yfinance minio pandas

Collecting confluent-kafka
  Downloading confluent_kafka-2.5.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (2.3 kB)
Collecting yfinance
  Downloading yfinance-0.2.43-py2.py3-none-any.whl.metadata (11 kB)
Collecting minio
  Downloading minio-7.2.8-py3-none-any.whl.metadata (6.5 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting lxml>=4.9.1 (from yfinance)
  Downloading lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.8 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.4-py311-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.6.tar.gz (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyp

In [6]:
import yfinance as yf
import pandas as pd

def fetch_stock_data(ticker, start_date, end_date):
    stock = yf.Ticker(ticker)
    data = stock.history(start=start_date, end=end_date)
    return data

In [7]:
from confluent_kafka import Producer
import json

def send_data_to_kafka(topic, data):
    conf = {'bootstrap.servers': 'kafka:9092'}  # Replace with your Kafka broker addresses
    producer = Producer(conf)

    def delivery_report(err, msg):
        if err is not None:
            print(f"Delivery failed: {err}")
        else:
            print(f"Message delivered to {msg.topic()} [{msg.partition()}]")
            
    for index, row in data.iterrows():
        # Convert the row to a dictionary
        message = row.to_dict()
        
        # Convert any Timestamp objects to strings
        for key, value in message.items():
            if isinstance(value, pd.Timestamp):
                message[key] = value.isoformat()
        
        # Serialize the message to JSON and send it to Kafka
        producer.produce(topic, json.dumps(message), callback=delivery_report)
        producer.poll(1)

    producer.flush()


In [8]:
ticker = 'AAPL'
start_date = '2023-01-01'
end_date = '2023-12-31'
data = fetch_stock_data(ticker, start_date, end_date)

In [9]:
# Reset the index to convert Date from index to a column
data = data.reset_index()
# Rename the new index column to 'Date'
data.rename(columns={'index': 'Date'}, inplace=True)
data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2023-01-03 00:00:00-05:00,129.066078,129.680297,123.013009,123.904625,112117500,0.0,0.0
1,2023-01-04 00:00:00-05:00,125.707670,127.461182,123.914537,125.182610,89113600,0.0,0.0
2,2023-01-05 00:00:00-05:00,125.945435,126.579471,123.597523,123.855095,80962700,0.0,0.0
3,2023-01-06 00:00:00-05:00,124.835861,129.075971,123.726294,128.412216,87754700,0.0,0.0
4,2023-01-09 00:00:00-05:00,129.254312,132.166920,128.679714,128.937286,70790800,0.0,0.0
...,...,...,...,...,...,...,...,...
245,2023-12-22 00:00:00-05:00,194.442163,194.671304,192.240525,192.868149,37122800,0.0,0.0
246,2023-12-26 00:00:00-05:00,192.878102,193.157042,192.101051,192.320221,28919300,0.0,0.0
247,2023-12-27 00:00:00-05:00,191.762337,192.768513,190.367620,192.419830,48087700,0.0,0.0
248,2023-12-28 00:00:00-05:00,193.406086,193.924125,192.439752,192.848206,34049900,0.0,0.0


In [10]:
data.index

RangeIndex(start=0, stop=250, step=1)

In [12]:
send_data_to_kafka(f'{ticker}_stock', data)

Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_stock [0]
Message delivered to AAPL_st