In [197]:
import pandas as pd

In [195]:
merchant_transaction_count_df = pd.DataFrame(columns=["merchant", "total_txn"])

In [196]:
customer_merchant_stats_df = pd.DataFrame(columns=["customer", "merchant", "txn_count", "avg_weight"])

In [198]:
def process_chunk(chunk_df, customer_df):
    global merchant_transaction_count_df
    global customer_merchant_stats_df

    # Step 0: Join with CustomerImportance to get Weight
    merged_df = chunk_df.merge(
        customer_df,
        how='inner',
        left_on=['customer', 'merchant', 'category', 'amount'],
        right_on=['Source', 'Target', 'typeTrans', 'Weight']
    )

    # Step 1: Update merchant-level transaction count
    merchant_txn = merged_df.groupby('merchant').size().reset_index(name='new_txn_count')
    merchant_transaction_count_df = pd.merge(
        merchant_transaction_count_df,
        merchant_txn,
        on='merchant',
        how='outer'
    ).fillna(0)
    merchant_transaction_count_df['total_txn'] = merchant_transaction_count_df['total_txn'] + merchant_transaction_count_df['new_txn_count']
    merchant_transaction_count_df.drop(columns=['new_txn_count'], inplace=True)

    # Step 2: Update customer-merchant stats (count + avg weight)
    cust_merchant_stats = merged_df.groupby(['customer', 'merchant']).agg(
        txn_count=('step', 'count'),
        avg_weight=('Weight', 'mean')
    ).reset_index()

    customer_merchant_stats_df = pd.merge(
        customer_merchant_stats_df,
        cust_merchant_stats,
        on=['customer', 'merchant'],
        how='outer'
    ).fillna(0)

    customer_merchant_stats_df['txn_count'] = customer_merchant_stats_df['txn_count_x'] + customer_merchant_stats_df['txn_count_y']
    customer_merchant_stats_df['avg_weight'] = (
        (customer_merchant_stats_df['avg_weight_x'] * customer_merchant_stats_df['txn_count_x']) +
        (customer_merchant_stats_df['avg_weight_y'] * customer_merchant_stats_df['txn_count_y'])
    ) / customer_merchant_stats_df['txn_count']

    customer_merchant_stats_df = customer_merchant_stats_df[['customer', 'merchant', 'txn_count', 'avg_weight']]


In [199]:
def detect_patid1():
    detections = []

    for merchant in merchant_transaction_count_df.itertuples():
        if merchant.total_txn < 50000:
            continue

        merchant_name = merchant.merchant
        cust_subset = customer_merchant_stats_df[
            customer_merchant_stats_df['merchant'] == merchant_name
        ]

        if cust_subset.empty:
            continue

        txn_threshold = cust_subset['txn_count'].quantile(0.90)
        weight_threshold = cust_subset['avg_weight'].quantile(0.10)

        eligible = cust_subset[
            (cust_subset['txn_count'] >= txn_threshold) &
            (cust_subset['avg_weight'] <= weight_threshold)
        ]

        for row in eligible.itertuples():
            detections.append({
                "YStartTime": "",  # to be filled in if known
                "detectionTime": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                "patternId": "PatId1",
                "ActionType": "UPGRADE",
                "customerName": row.customer,
                "MerchantId": row.merchant
            })

    return detections


In [None]:
import pandas as pd
from datetime import datetime
import os

# Path to the directory containing chunk files
# folder_path = r"C:\Users\kusha\OneDrive\Desktop\Projects\DevDolphins\Blob files\Chunks"

# # List all CSV files in the folder
# csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# # Read and concatenate all CSVs into one DataFrame
# transactions_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

transactions_df = pd.read_csv(r"C:\Users\kusha\OneDrive\Desktop\Projects\DevDolphins\Blob files\Chunks\chunk_1_20250624_202535.csv")
customer_df = pd.read_csv(r"C:\Users\kusha\OneDrive\Desktop\Projects\DevDolphins\Blob files\customer data\customer data.csv")
customer_df
