In [6]:
import json
import pandas as pd
from kafka import KafkaConsumer
import plotly.express as px

consumer = KafkaConsumer(
    'darooghe.user_activity',
    bootstrap_servers='localhost:9092',
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    group_id='visualization-group-user-activity',
    consumer_timeout_ms=15000,
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

data = []
print("Collecting user activity data...")

i = 0

for i, msg in enumerate(consumer):
    value = msg.value
    print(f"[{i}] Raw Message:", value)

    try:
        window_start = value["window_start"]
        window_end = value["window_end"]
        customer_id = value["customer_id"]
        transaction_count = int(value["transaction_count"])
        first_transaction_time = value["first_transaction_time"]
        last_transaction_time = value["last_transaction_time"]

        data.append({
            "window_start": window_start,
            "window_end": window_end,
            "customer_id": customer_id,
            "transaction_count": transaction_count,
            "first_transaction_time": first_transaction_time,
            "last_transaction_time": last_transaction_time
        })
    except Exception as e:
        print(f"Error parsing message #{i}: {e}")
        continue
    
    if i >= 10000:
        break

consumer.close()

if not data:
    print("No data received from Kafka topic.")
else:
    df = pd.DataFrame(data)
    df["window_start"] = pd.to_datetime(df["window_start"])
    df["window_end"] = pd.to_datetime(df["window_end"])

    top_5_users = df.groupby('customer_id')['transaction_count'].sum().sort_values(ascending=False).head(5).index
    df_top_5 = df[df['customer_id'].isin(top_5_users)]

    df_top_5 = df_top_5.head(10000)

    fig = px.line(
        df_top_5,
        x="window_start",
        y="transaction_count",
        color="customer_id",
        title="Top 5 User Activity: Transaction Count Over Time",
        labels={"window_start": "Time", "transaction_count": "Transaction Count"},
        markers=True
    )

    fig.update_layout(
        xaxis_title='Time',
        yaxis_title='Number of Transactions',
        template='plotly_dark'
    )

    fig.show()


Collecting user activity data...
[0] Raw Message: {'window_start': '2025-05-02T22:17:00.000+03:30', 'window_end': '2025-05-02T22:22:00.000+03:30', 'customer_id': 'cust_174', 'transaction_count': 1, 'first_transaction_time': '2025-05-02T22:18:43.061+03:30', 'last_transaction_time': '2025-05-02T22:18:43.061+03:30'}
[1] Raw Message: {'window_start': '2025-05-02T22:14:00.000+03:30', 'window_end': '2025-05-02T22:19:00.000+03:30', 'customer_id': 'cust_379', 'transaction_count': 1, 'first_transaction_time': '2025-05-02T22:18:46.679+03:30', 'last_transaction_time': '2025-05-02T22:18:46.679+03:30'}
[2] Raw Message: {'window_start': '2025-05-02T22:16:00.000+03:30', 'window_end': '2025-05-02T22:21:00.000+03:30', 'customer_id': 'cust_287', 'transaction_count': 1, 'first_transaction_time': '2025-05-02T22:18:46.824+03:30', 'last_transaction_time': '2025-05-02T22:18:46.824+03:30'}
[3] Raw Message: {'window_start': '2025-05-02T22:15:00.000+03:30', 'window_end': '2025-05-02T22:20:00.000+03:30', 'custom