In [11]:
from kafka import KafkaProducer
from kafka.admin import KafkaAdminClient, NewTopic
import csv
import json
import time
from datetime import datetime
import random
import os 


topic_name = "credit_card_trans"
bootstrap_servers = "kafka_v2:9092"
csv_path = "/opt/airflow/data/fraudTrain.csv" 

In [12]:
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    value_serializer=lambda v: json.dumps(v).encode("utf-8")  
)

In [13]:
# def stream_csv_rows(path):
#     with open(path, newline='', encoding='utf-8') as csv_file:
#             reader = csv.DictReader(csv_file)
#             for row in reader:
#                 row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#                 yield row
#                 delay = random.randint(5, 10)
#                 time.sleep(delay)


def save_status(row_num) : 
    with open("/opt/airflow/data/state.json", "w") as f:
        json.dump({"last_id" : row_num}, f) 

def load_status():
    try:
        with open("/opt/airflow/data/state.json", "r") as f:
            state = json.load(f)
            return state.get("last_id", 0)
    except FileNotFoundError:
        return 0  
    

def stream_reader(path): 
    row_num = load_status()

    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i < row_num:
                continue  # Skip already processed rows
            
            row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            save_status(i + 1)
            yield row

            delay = random.randint(5, 10)
            time.sleep(delay)
    

In [14]:
try:
    for i, row in enumerate(stream_reader(csv_path), 1):
        producer.send(topic_name, value=row)
        producer.flush()
        print(f"Produced message {i}: {row}")
except KeyboardInterrupt:
    print("Stopping producer ...\nProducer stopped")
finally:
    producer.flush()
    producer.close()
    #admin_client.close()

Produced message 1: {'': '4', 'trans_date_trans_time': '2019-01-01 00:03:06', 'cc_num': '375534208663984', 'merchant': 'fraud_Keeling-Crist', 'category': 'misc_pos', 'amt': '41.96', 'first': 'Tyler', 'last': 'Garcia', 'gender': 'M', 'street': '408 Bradley Rest', 'city': 'Doe Hill', 'state': 'VA', 'zip': '24433', 'lat': '38.4207', 'long': '-79.4629', 'city_pop': '99', 'job': 'Dance movement psychotherapist', 'dob': '1986-03-28', 'trans_num': 'a41d7549acf90789359a9aa5346dcb46', 'unix_time': '1325376186', 'merch_lat': '38.674999', 'merch_long': '-78.632459', 'is_fraud': '0', 'event_time': '2025-07-26 20:56:02'}
Produced message 2: {'': '5', 'trans_date_trans_time': '2019-01-01 00:04:08', 'cc_num': '4767265376804500', 'merchant': 'fraud_Stroman, Hudson and Erdman', 'category': 'gas_transport', 'amt': '94.63', 'first': 'Jennifer', 'last': 'Conner', 'gender': 'F', 'street': '4655 David Island', 'city': 'Dublin', 'state': 'PA', 'zip': '18917', 'lat': '40.375', 'long': '-75.2045', 'city_pop': 