In [50]:
from confluent_kafka import Consumer
import pandas as pd
import json
import os
from minio import Minio
from minio.error import S3Error

# MinIO configuration
MINIO_ENDPOINT = 'myminio:9000'  # Replace with your MinIO endpoint
MINIO_ACCESS_KEY = 'minio'    # Replace with your MinIO access key
MINIO_SECRET_KEY = 'minio123'    # Replace with your MinIO secret key
MINIO_BUCKET_NAME = 'kafka'   # Replace with your bucket name

# Initialize MinIO client
minio_client = Minio(
    MINIO_ENDPOINT,
    access_key=MINIO_ACCESS_KEY,
    secret_key=MINIO_SECRET_KEY,
    secure=False  # Set to True if using HTTPS
)

def consume_data_from_kafka(topic):
    conf = {
        'bootstrap.servers': 'kafka:9092',
        'group.id': 'my-group',
        'auto.offset.reset': 'earliest'
    }
    consumer = Consumer(conf)
    consumer.subscribe([topic])

    # Collect data
    data = []

    try:
        while True:
            msg = consumer.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                print(f"Consumer error: {msg.error()}")
                continue
            
            # Process message
            message_value = msg.value().decode('utf-8')
            # print(f"Received message: {message_value}")

            # Assuming messages are JSON objects
            try:
                json_data = json.loads(message_value)
                data.append(json_data)
                
            except json.JSONDecodeError as e:
                print(f"Failed to decode JSON: {e}")
            save_to_minio(topic, data)

    except KeyboardInterrupt:
        print("Consuming interrupted.")
    finally:
        consumer.close()

def save_to_minio(stock_name, data):
    # Define the file path and name
    file_path = f'/tmp/{stock_name}.csv'
    
    # Convert data to DataFrame
    df = pd.DataFrame(data)
    
    # Append to existing CSV file or create a new one
    if os.path.exists(file_path):
        df_existing = pd.read_csv(file_path)
        df = pd.concat([df_existing, df], ignore_index=True)
    
    # Save DataFrame to CSV file
    df.to_csv(file_path, index=False)
    
    # Upload the CSV file to MinIO
    try:
        minio_client.fput_object(
            MINIO_BUCKET_NAME,
            f'{stock_name}.csv',
            file_path
        )
        print(f"Uploaded {file_path} to MinIO bucket {MINIO_BUCKET_NAME}")
    except S3Error as e:
        print(f"Error uploading to MinIO: {e}")


In [51]:
ticker = 'AAPL'
consume_data_from_kafka(f'{ticker}_stock')

Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to MinIO bucket kafka
Uploaded /tmp/AAPL_stock.csv to