In [16]:
import pandas as pd
import os
from datetime import datetime

# -----------------------------
# 🧠 Simulated Postgres-like Table
# -----------------------------
already_detected_pat3 = set()

# -----------------------------
# 🔍 Pattern 3 Detection Logic
# -----------------------------
def detect_patid3(transactions_df):
    detections = []

    # Clean gender values
    filtered_df = transactions_df.dropna(subset=['merchant', 'gender'])
    filtered_df['gender'] = (
        filtered_df['gender']
        .astype(str)
        .str.replace("'", "")  # remove single quotes
        .str.strip()
        .str.upper()
    )

    # Group by merchant & gender
    gender_counts = filtered_df.groupby(['merchant', 'gender'])['customer'].nunique().reset_index(name='count')

    # Pivot into merchant × [F, M]
    pivot_df = gender_counts.pivot(index='merchant', columns='gender', values='count').fillna(0).reset_index()
    pivot_df.columns.name = None

    # Ensure 'F' and 'M' columns exist
    if 'F' not in pivot_df.columns:
        pivot_df['F'] = 0
    if 'M' not in pivot_df.columns:
        pivot_df['M'] = 0

    # 🧾 Print gender stats
    print("\n🧾 Gender distribution per merchant (top 10):")
    print(pivot_df[['merchant', 'F', 'M']].sort_values(by='F', ascending=False).head(10))

    # Apply Pattern 3 logic
    eligible = pivot_df[(pivot_df['F'] > 100) & (pivot_df['F'] < pivot_df['M'])]

    # 🎯 Print eligible merchants
    print("\n🎯 Eligible DEI-NEEDED merchants:")
    print(eligible[['merchant', 'F', 'M']])

    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for _, row in eligible.iterrows():
        merchant_id = row['merchant']
        if merchant_id in already_detected_pat3:
            continue

        detections.append({
            "YStartTime": now,
            "detectionTime": now,
            "patternId": "PatId3",
            "ActionType": "DEI-NEEDED",
            "customerName": "",
            "MerchantId": merchant_id
        })
        already_detected_pat3.add(merchant_id)

    return pd.DataFrame(detections)

# -----------------------------
# 💾 Write Detections (50 per file)
# -----------------------------
def write_detections(detections, output_dir="outputs/", file_prefix="detections_pat3"):
    if detections.empty:
        return

    os.makedirs(output_dir, exist_ok=True)

    for i in range(0, len(detections), 50):
        batch = detections.iloc[i:i+50]
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
        output_path = f"{output_dir}/{file_prefix}_{timestamp}.csv"
        batch.to_csv(output_path, index=False)
        print(f"✅ Wrote {len(batch)} detections to {output_path}")

# -----------------------------
# 🚀 Main Function
# -----------------------------
def main():
    chunk_folder = r"C:\Users\kusha\OneDrive\Desktop\Projects\DevDolphins\Blob files\Chunks"
    all_files = sorted(os.listdir(chunk_folder))

    for file in all_files:
        print(f"\n📦 Processing: {file}")
        chunk_df = pd.read_csv(os.path.join(chunk_folder, file))

        # ✅ Clean gender quotes and format
        chunk_df['gender'] = (
            chunk_df['gender']
            .astype(str)
            .str.replace("'", "")
            .str.strip()
            .str.upper()
        )

        detections3 = detect_patid3(chunk_df)
        write_detections(detections3)

# -----------------------------
# 🔁 Entry Point
# -----------------------------
if __name__ == "__main__":
    main()



📦 Processing: chunk_10_20250624_202551.csv

🧾 Gender distribution per merchant (top 10):
         merchant      F      M
25   'M348934600'  996.0  854.0
15  'M1823072687'  709.0  592.0
40    'M85975013'  135.0  118.0
7    'M151143676'   86.0   71.0
38   'M855959430'   81.0   83.0
0   'M1053599405'   53.0   43.0
22   'M209847108'   50.0   40.0
29   'M480139044'   40.0   30.0
20  'M1946091778'   33.0   32.0
8   'M1535107174'   27.0   13.0

🎯 Eligible DEI-NEEDED merchants:
Empty DataFrame
Columns: [merchant, F, M]
Index: []

📦 Processing: chunk_11_20250624_202553.csv

🧾 Gender distribution per merchant (top 10):
         merchant      F      M
27   'M348934600'  985.0  831.0
15  'M1823072687'  743.0  632.0
41    'M85975013'  152.0  141.0
7    'M151143676'   89.0   60.0
39   'M855959430'   77.0   69.0
0   'M1053599405'   58.0   36.0
31   'M480139044'   51.0   26.0
24   'M209847108'   49.0   38.0
21  'M1946091778'   35.0   36.0
8   'M1535107174'   34.0   17.0

🎯 Eligible DEI-NEEDED merchan