In [0]:
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from datetime import datetime
# Suppose your Spark DataFrame is called transactions_df
transactions_df = spark.read.format("csv").option("header", "true").load("abfss://gdrive-ingest@devdolphinstorage.dfs.core.windows.net/transactions.csv")
# Step 1: Group by customer and merchant
grouped_df = transactions_df.groupBy("customer", "merchant").agg(
    F.count("*").alias("txn_count"),
    F.avg("amount").alias("avg_amount")
)

# Step 2: Filter on criteria
filtered_df = grouped_df.filter(
    (F.col("txn_count") >= 80) & (F.col("avg_amount") < 23)
)

# Step 3: Add detection fields
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

detections_df = filtered_df.withColumn("YStartTime", F.lit(current_time)) \
    .withColumn("detectionTime", F.lit(current_time)) \
    .withColumn("patternId", F.lit("PatId2")) \
    .withColumn("ActionType", F.lit("CHILD")) \
    .withColumnRenamed("customer", "customerName") \
    .withColumnRenamed("merchant", "MerchantId")

# Show or save detections
detections_df.display()
