In [None]:
import asyncio
import random
import time
import json
from collections import deque
import matplotlib.pyplot as plt

# Simulated distributed log generator
async def generate_logs(num_logs=50000):
    for _ in range(num_logs):
        log = {
            "timestamp": time.time(),
            "service": random.choice(["auth", "payment", "search", "recommendation"]),
            "level": random.choice(["INFO", "WARN", "ERROR"]),
            "message": random.choice(["OK", "Timeout", "DB Failure", "Cache Miss"]),
        }
        yield json.dumps(log)
        await asyncio.sleep(0.0001)

# Ingestion queue (simulating Kafka/Redpanda)
queue = deque()

async def ingest_logs():
    start = time.time()
    count = 0
    timestamps = []
    rates = []

    async for log in generate_logs():
        queue.append(log)
        count += 1

        if count % 5000 == 0:
            elapsed = time.time() - start
            throughput = count / elapsed
            timestamps.append(elapsed)
            rates.append(throughput)

    print("Total logs ingested:", count)
    return timestamps, rates

# Run ingestion benchmark
timestamps, rates = asyncio.run(ingest_logs())

plt.figure(figsize=(10,5))
plt.plot(timestamps, rates)
plt.xlabel("Time (sec)")
plt.ylabel("Throughput (logs/sec)")
plt.title("Log Ingestion Throughput")
plt.grid(True)
plt.show()
