In [1]:
from kafka import KafkaProducer
import csv
import json
import time
from datetime import datetime
import random

In [2]:
# Kafka producer configuration
topic_name = "credit_card_trans"
bootstrap_servers = "kafka_v2:9092"
csv_path = "/opt/airflow/data/fraudTest.csv" 

In [3]:
# Create a Kafka producer instance
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    value_serializer=lambda v: json.dumps(v).encode("utf-8")  
)

In [4]:
# Save the last processed row number to a file
def save_status(row_num) : 
    with open("/opt/airflow/data/state.json", "w") as f:
        json.dump({"last_id" : row_num}, f) 

# Load the last processed row number from a file
def load_status():
    try:
        with open("/opt/airflow/data/state.json", "r") as f:
            state = json.load(f)
            return state.get("last_id", 0)
    except FileNotFoundError:
        return 0  
    
# Stream CSV rows with status tracking
def stream_reader(path): 
    row_num = load_status()

    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i < row_num:
                continue  # Skip already processed rows
            
            row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            save_status(i + 1)
            yield row

            delay = random.randint(1, 3)
            time.sleep(delay)
    

In [None]:
# Main loop to produce messages
try:
    for i, row in enumerate(stream_reader(csv_path), 1):
        producer.send(topic_name, value=row)
        producer.flush()
    print(f"Produced message {i}: {row}")
except KeyboardInterrupt:
    print(f"Produced message {i}: {row}")
    print("Stopping producer ...\nProducer stopped")
    
finally:
    producer.flush()
    producer.close()
    #admin_client.close()

In [None]:
# if you want to start from zero 
# save_status(0)
# 