In [None]:
from kafka import KafkaProducer
import csv
import json
import time
from datetime import datetime
import random

In [None]:
# Kafka producer configuration
topic_name = "credit_card_trans"
bootstrap_servers = "kafka_v2:9092"
csv_path = "/opt/airflow/data/fraudTest.csv" 

In [None]:
# Create a Kafka producer instance
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    value_serializer=lambda v: json.dumps(v).encode("utf-8")  
)

In [None]:
# def stream_csv_rows(path):
#     with open(path, newline='', encoding='utf-8') as csv_file:
#             reader = csv.DictReader(csv_file)
#             for row in reader:
#                 row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#                 yield row
#                 delay = random.randint(5, 10)
#                 time.sleep(delay)

# Save the last processed row number to a file
def save_status(row_num) : 
    with open("/opt/airflow/data/state.json", "w") as f:
        json.dump({"last_id" : row_num}, f) 

# Load the last processed row number from a file
def load_status():
    try:
        with open("/opt/airflow/data/state.json", "r") as f:
            state = json.load(f)
            return state.get("last_id", 0)
    except FileNotFoundError:
        return 0  
    
# Stream CSV rows with status tracking
def stream_reader(path): 
    row_num = load_status()

    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i < row_num:
                continue  # Skip already processed rows
            
            row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            save_status(i + 1)
            yield row

            delay = random.randint(1, 3)
            time.sleep(delay)
    

In [None]:
# Main loop to produce messages
try:
    for i, row in enumerate(stream_reader(csv_path), 1):
        producer.send(topic_name, value=row)
        producer.flush()
        #print(f"Produced message {i}: {row}")
except KeyboardInterrupt:
    print("Stopping producer ...\nProducer stopped")
finally:
    producer.flush()
    producer.close()
    #admin_client.close()

Produced message 1: {'': '0', 'trans_date_trans_time': '2020-06-21 12:14:25', 'cc_num': '2291163933867244', 'merchant': 'fraud_Kirlin and Sons', 'category': 'personal_care', 'amt': '2.86', 'first': 'Jeff', 'last': 'Elliott', 'gender': 'M', 'street': '351 Darlene Green', 'city': 'Columbia', 'state': 'South Carolina', 'zip': '29209', 'lat': '33.9659', 'long': '-80.9355', 'city_pop': '333497', 'job': 'Mechanical engineer', 'dob': '1968-03-19', 'trans_num': '2da90c7d74bd46a0caf3777415b3ebd3', 'unix_time': '1371816865', 'merch_lat': '33.986391', 'merch_long': '-81.200714', 'is_fraud': '0', 'event_time': '2025-07-28 18:59:31'}


In [None]:
# if you want to start from zero 
# save_status(0)
# 