In [9]:
from kafka import KafkaProducer
import csv
import json
import time
from datetime import datetime
import random



topic_name = "credit_card_trans"
bootstrap_servers = "kafka_v2:9092"
csv_path = "/opt/airflow/data/fraudTest.csv" 

In [10]:
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    value_serializer=lambda v: json.dumps(v).encode("utf-8")  
)

In [11]:
# def stream_csv_rows(path):
#     with open(path, newline='', encoding='utf-8') as csv_file:
#             reader = csv.DictReader(csv_file)
#             for row in reader:
#                 row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#                 yield row
#                 delay = random.randint(5, 10)
#                 time.sleep(delay)


def save_status(row_num) : 
    with open("/opt/airflow/data/state.json", "w") as f:
        json.dump({"last_id" : row_num}, f) 

def load_status():
    try:
        with open("/opt/airflow/data/state.json", "r") as f:
            state = json.load(f)
            return state.get("last_id", 0)
    except FileNotFoundError:
        return 0  
    

def stream_reader(path): 
    row_num = load_status()

    with open(path, "r") as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i < row_num:
                continue  # Skip already processed rows
            
            row['event_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            save_status(i + 1)
            yield row

            delay = random.randint(5, 10)
            time.sleep(delay)
    

In [12]:
try:
    for i, row in enumerate(stream_reader(csv_path), 1):
        producer.send(topic_name, value=row)
        producer.flush()
        print(f"Produced message {i}: {row}")
except KeyboardInterrupt:
    print("Stopping producer ...\nProducer stopped")
finally:
    producer.flush()
    producer.close()
    #admin_client.close()

Produced message 1: {'': '128', 'trans_date_trans_time': '2020-06-21 12:54:52', 'cc_num': '378278619832195', 'merchant': 'fraud_Abbott-Steuber', 'category': 'personal_care', 'amt': '8.32', 'first': 'Mary', 'last': 'Mcintyre', 'gender': 'F', 'street': '77921 Costa Villages', 'city': 'Eugene', 'state': 'OR', 'zip': '97403', 'lat': '44.0385', 'long': '-123.0614', 'city_pop': '191096', 'job': 'Scientist, physiological', 'dob': '1964-04-06', 'trans_num': '5608b93eb3a58c087fc11a809718d9e1', 'unix_time': '1371819292', 'merch_lat': '43.046296000000005', 'merch_long': '-122.689361', 'is_fraud': '0', 'event_time': '2025-07-28 13:48:19'}
Produced message 2: {'': '129', 'trans_date_trans_time': '2020-06-21 12:54:55', 'cc_num': '3513618443244549', 'merchant': 'fraud_Sporer-Keebler', 'category': 'personal_care', 'amt': '37.0', 'first': 'Amber', 'last': 'Perez', 'gender': 'F', 'street': '954 Reyes Ways', 'city': 'Fulton', 'state': 'SD', 'zip': '57340', 'lat': '43.7588', 'long': '-97.8712', 'city_pop'

In [None]:
# # if you want to start from zero 
# save_status(0)
