In [0]:
# ======================================
# Fabric Notebook 3: Association Rules (Fixed for Power BI)
# ======================================

from pyspark.ml.fpm import FPGrowth
import pyspark.sql.functions as F
import mlflow
import mlflow.spark
from mlflow import MlflowClient
from pyspark.sql.functions import concat_ws

# Setup MLflow
mlflow.set_experiment("Retail_ML_Experiments")
client = MlflowClient()
registry_name = "retail_ML_basket"
try:
    client.create_registered_model(registry_name)
except:
    print(f"ℹ️ Registry {registry_name} already exists")

# Load and prepare data
df = spark.table("Fact_Sales_Products") \
    .groupBy("Transaction_ID") \
    .agg(F.collect_set("Product_ID").alias("items"))

# Train FP-Growth
with mlflow.start_run(run_name="FPGrowth") as run:
    df_filtered = df.filter(F.size("items") >= 2)
    fpGrowth = FPGrowth(itemsCol="items", minSupport=0.005, minConfidence=0.05)
    model = fpGrowth.fit(df_filtered)

    mlflow.log_param("minSupport", 0.02)
    mlflow.log_param("minConfidence", 0.3)
    mlflow.spark.log_model(model, "model")

    try:
        mv = client.create_model_version(name=registry_name,
                                         source=f"runs:/{run.info.run_id}/model",
                                         run_id=run.info.run_id)
        print(f"Model registered as version {mv.version}")
    except:
        print("Registry skipped")

    # ==========================
    # Convert arrays to strings
    # ==========================
    rules_df = model.associationRules \
        .withColumn("antecedent_str", concat_ws(",", "antecedent")) \
        .withColumn("consequent_str", concat_ws(",", "consequent")) \
        .drop("antecedent", "consequent")

    items_df = model.freqItemsets \
        .withColumn("items_str", concat_ws(",", "items")) \
        .drop("items")

    # Save results for Power BI
    rules_df.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable("PowerBI_Basket_Analysis")
    print("✅ Saved association rules to PowerBI_Basket_Analysis")

    items_df.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable("PowerBI_Frequent_Itemsets")
    print("✅ Saved frequent itemsets to PowerBI_Frequent_Itemsets")
        
    # Show frequent itemsets
    print("📊 Frequent Itemsets:")
    items_df.show(10, truncate=False)

    # Show association rules
    print("📈 Association Rules:")
    rules_df.show(10, truncate=False)

    # Show predictions (transactions + recommended items as string)
    predictions = model.transform(df_filtered) \
        .withColumn("predicted_items", concat_ws(",", "prediction"))
    print("🛒 Predictions with recommendations:")
    predictions.show(10, truncate=False)

    # Count of rules and itemsets
    print(f"✅ Total frequent itemsets: {items_df.count()}")
    print(f"✅ Total association rules: {rules_df.count()}")

