In [1]:
import pandas as pd

# 1. Merchant-level transaction state
merchant_txn_summary_df = pd.DataFrame(columns=["merchant", "total_txn_count", "total_txn_amount"])

# 2. Customer-Merchant-level state
customer_merchant_summary_df = pd.DataFrame(columns=["merchant", "customer", "txn_count", "total_amount", "avg_amount"])

# 3. Merchant-gender state
merchant_gender_counts_df = pd.DataFrame(columns=["merchant", "female_count", "male_count"])

# 4. Detection accumulator
detections_df = pd.DataFrame(columns=[
    "YStartTime", "detectionTime", "patternId", "ActionType", "customerName", "merchantId"
])


In [2]:
from datetime import datetime
import pytz

IST = pytz.timezone('Asia/Kolkata')
YStartTime = datetime.now(IST).strftime("%Y-%m-%d %H:%M:%S")

def get_detection_time():
    return datetime.now(IST).strftime("%Y-%m-%d %H:%M:%S")


In [3]:
def update_merchant_txn_summary(chunk_df):
    global merchant_txn_summary_df

    summary = (
        chunk_df.groupby("merchant")["amount"]
        .agg(total_txn_count="count", total_txn_amount="sum")
        .reset_index()
    )

    merchant_txn_summary_df = pd.concat([merchant_txn_summary_df, summary])
    merchant_txn_summary_df = (
        merchant_txn_summary_df
        .groupby("merchant", as_index=False)
        .agg({"total_txn_count": "sum", "total_txn_amount": "sum"})
    )

In [4]:
def update_customer_merchant_summary(chunk_df):
    global customer_merchant_summary_df

    summary = (
        chunk_df.groupby(["merchant", "customer"])["amount"]
        .agg(txn_count="count", total_amount="sum")
        .reset_index()
    )
    summary["avg_amount"] = summary["total_amount"] / summary["txn_count"]

    customer_merchant_summary_df = pd.concat([customer_merchant_summary_df, summary])
    customer_merchant_summary_df = (
        customer_merchant_summary_df
        .groupby(["merchant", "customer"], as_index=False)
        .agg({
            "txn_count": "sum",
            "total_amount": "sum"
        })
    )
    customer_merchant_summary_df["avg_amount"] = (
        customer_merchant_summary_df["total_amount"] / customer_merchant_summary_df["txn_count"]
    )


In [5]:
def update_merchant_gender_counts(chunk_df):
    global merchant_gender_counts_df

    gender_summary = (
        chunk_df[chunk_df["gender"].isin(["F", "M"])]
        .groupby(["merchant", "gender"])["customer"]
        .nunique()
        .unstack(fill_value=0)
        .reset_index()
    )
    gender_summary.columns.name = None
    gender_summary = gender_summary.rename(columns={"F": "female_count", "M": "male_count"})

    merchant_gender_counts_df = pd.concat([merchant_gender_counts_df, gender_summary])
    merchant_gender_counts_df = (
        merchant_gender_counts_df
        .groupby("merchant", as_index=False)
        .agg({"female_count": "sum", "male_count": "sum"})
    )


In [6]:
def process_chunk(chunk_df):
    # 1. Update intermediate state tables
    update_merchant_txn_summary(chunk_df)
    update_customer_merchant_summary(chunk_df)
    update_merchant_gender_counts(chunk_df)

    # 2. Call pattern detectors (to be plugged in next)
    detect_patterns(chunk_df)
