In [3]:
pip install --upgrade kafka-python

Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl.metadata (7.8 kB)
Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
Installing collected packages: kafka-python
Successfully installed kafka-python-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
pip show kafka-python

Name: kafka-python
Version: 2.0.2
Summary: Pure Python client for Apache Kafka
Home-page: https://github.com/dpkp/kafka-python
Author: Dana Powers
Author-email: dana.powers@gmail.com
License: Apache License 2.0
Location: c:\users\admin\anaconda3\lib\site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [None]:
# 01_Data_Ingestion.ipynb

# Import necessary libraries
import requests
import json
import time
from kafka import KafkaProducer

# Alpha Vantage API configuration
API_KEY = "your_alpha_vantage_api_key"  # Replace with your API key
STOCK_SYMBOL = "AAPL"  # Replace with your desired stock symbol
URL = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={STOCK_SYMBOL}&interval=1min&apikey={API_KEY}"

# Kafka configuration
KAFKA_BROKER = "localhost:9092"  # Replace with your Kafka broker's address
TOPIC_NAME = "stock_prices"

# Initialize Kafka Producer
producer = KafkaProducer(
    bootstrap_servers=KAFKA_BROKER,
    value_serializer=lambda x: json.dumps(x).encode('utf-8')
)

# Function to fetch stock prices from Alpha Vantage
def fetch_stock_data():
    try:
        response = requests.get(URL)
        data = response.json()
        
        if "Time Series (1min)" in data:
            # Extract the latest data point
            time_series = data["Time Series (1min)"]
            latest_timestamp = sorted(time_series.keys())[0]
            stock_info = time_series[latest_timestamp]
            
            # Format the data
            formatted_data = {
                "symbol": STOCK_SYMBOL,
                "timestamp": latest_timestamp,
                "open": stock_info["1. open"],
                "high": stock_info["2. high"],
                "low": stock_info["3. low"],
                "close": stock_info["4. close"],
                "volume": stock_info["5. volume"]
            }
            return formatted_data
        else:
            print("Error fetching data:", data)
            return None
    except Exception as e:
        print(f"Error: {e}")
        return None

# Main function to fetch and stream data to Kafka
# Wait for the Alpha Vantage API to update data (approximately 60 seconds)
# Reduce unnecessary API calls by fetching every minute
def stream_to_kafka():
    print(f"Starting data ingestion for stock: {STOCK_SYMBOL}...")
    
    while True:
        stock_data = fetch_stock_data()
        
        if stock_data:
            # Send data to Kafka
            producer.send(TOPIC_NAME, value=stock_data)
            print(f"Sent to Kafka: {stock_data}")
        else:
            print("No new data to send.")
        
        # Wait for the Alpha Vantage API to update the data
        time.sleep(60)


# Run the streaming function
if __name__ == "__main__":
    stream_to_kafka()


Starting data ingestion for stock: AAPL...
Sent to Kafka: {'symbol': 'AAPL', 'timestamp': '2024-12-11 18:20:00', 'open': '246.7300', 'high': '246.8200', 'low': '246.7000', 'close': '246.7200', 'volume': '1631'}
Sent to Kafka: {'symbol': 'AAPL', 'timestamp': '2024-12-11 18:20:00', 'open': '246.7300', 'high': '246.8200', 'low': '246.7000', 'close': '246.7200', 'volume': '1631'}
Sent to Kafka: {'symbol': 'AAPL', 'timestamp': '2024-12-11 18:20:00', 'open': '246.7300', 'high': '246.8200', 'low': '246.7000', 'close': '246.7200', 'volume': '1631'}
Sent to Kafka: {'symbol': 'AAPL', 'timestamp': '2024-12-11 18:20:00', 'open': '246.7300', 'high': '246.8200', 'low': '246.7000', 'close': '246.7200', 'volume': '1631'}
Sent to Kafka: {'symbol': 'AAPL', 'timestamp': '2024-12-11 18:20:00', 'open': '246.7300', 'high': '246.8200', 'low': '246.7000', 'close': '246.7200', 'volume': '1631'}
