In [5]:
# ╔════════════════════════════════════════════════════════════════╗
# ║           Darooghe – quick dashboard for three metrics         ║
# ╚════════════════════════════════════════════════════════════════╝
import json
import time
from datetime import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from kafka import KafkaConsumer

BOOTSTRAP = "localhost:9092"
TOPIC_A   = "darooghe.comm_total_by_type"
TOPIC_B   = "darooghe.comm_ratio_by_cat"
TOPIC_C   = "darooghe.highest_comm_merchant"
MSG_LIMIT = 500                     # dev: stop after this many per topic

def fetch_topic(topic, limit=MSG_LIMIT):
    """Return a DataFrame with the <limit> newest messages from a topic."""
    cons = KafkaConsumer(
        topic,
        bootstrap_servers=BOOTSTRAP,
        auto_offset_reset="latest",    # newest first
        enable_auto_commit=False,
        consumer_timeout_ms=2000       # stop if no msg for 2 s
    )
    rows = []
    for msg in cons:
        rows.append(json.loads(msg.value.decode()))
        if len(rows) >= limit:
            break
    cons.close()
    if not rows:
        return pd.DataFrame([], columns=["start_time","end_time",
                                         "metric","dimension","value"])
    df = pd.DataFrame(rows)
    # to pandas datetime & sort so lines draw left→right
    df["start_time"] = pd.to_datetime(df["start_time"])
    df = df.sort_values("start_time")
    return df

# ───────────────────────────────────────────────────────── plot helpers ──
def make_time_formatter(ax):
    ax.xaxis.set_major_formatter(
        mdates.DateFormatter("%Y-%m-%d\n%H:%M", tz=mdates.UTC))
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right")

def plot_line(df, title, ylabel):
    fig, ax = plt.subplots(figsize=(10,6))
    for dim, group in df.groupby("dimension"):
        ax.plot(group["start_time"].to_numpy(),
                group["value"].to_numpy(),
                label=str(dim))
    make_time_formatter(ax)
    ax.set_title(title)
    ax.set_xlabel("Event start time (UTC)")
    ax.set_ylabel(ylabel)
    ax.legend()
    fig.tight_layout()
    plt.show()

def plot_bar(df, title, ylabel):
    fig, ax = plt.subplots(figsize=(10,6))
    ax.bar(df["start_time"].to_numpy(),
           df["value"].to_numpy(),
           color="tab:purple")
    make_time_formatter(ax)
    ax.set_title(title)
    ax.set_xlabel("Event start time (UTC)")
    ax.set_ylabel(ylabel)
    fig.tight_layout()
    plt.show()

# ───────────────────────────────────────────────────────── dashboard ────
def dashboard():
    print("⏳ fetching data …")
    df_A = fetch_topic(TOPIC_A)
    df_B = fetch_topic(TOPIC_B)
    df_C = fetch_topic(TOPIC_C)

    if df_A.empty and df_B.empty and df_C.empty:
        print("no messages found – is the streaming job running?")
        return

    if not df_A.empty:
        plot_line(df_A,
                  "Total commission by type (1-min window, slide 20 s)",
                  "Total commission (IRR)")
    if not df_B.empty:
        plot_line(df_B,
                  "Commission / amount ratio by merchant category",
                  "Commission ratio")
    if not df_C.empty:
        plot_bar(df_C,
                 "Highest-commission merchant (5-min window)",
                 "Total commission (IRR)")

# run once
dashboard()


⏳ fetching data …
no messages found – is the streaming job running?
