In [None]:
import logging
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    policy_df = spark.table("genai_demo.jnj.policy")
    claims_df = spark.table("genai_demo.jnj.claims")
    demographics_df = spark.table("genai_demo.jnj.demographics")
    scores_df = spark.table("genai_demo.jnj.scores")
    aiml_insights_df = spark.table("genai_demo.jnj.aiml_insights")
    logger.info("Data loaded from Unity Catalog tables")

    # Select relevant fields
    demographics_df = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code",
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )
    policy_df = policy_df.select(
        "Policy_ID", "Customer_ID", "Policy_Type", "Policy_Status", "Policy_Start_Date", "Policy_End_Date",
        "Policy_Term", "Policy_Premium", "Total_Premium_Paid", "Renewal_Status", "Policy_Addons"
    )
    claims_df = claims_df.select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout"
    )
    scores_df = scores_df.select("Customer_ID", "Credit_Score", "Fraud_Score", "Customer_Risk_Score")
    aiml_insights_df = aiml_insights_df.select(
        "Customer_ID", "Churn_Probability", "Next_Best_Offer", "Claims_Fraud_Probability", "Revenue_Potential"
    )
    logger.info("Relevant fields selected")

    # Data Integration
    demographics_policy_df = demographics_df.join(policy_df, on="Customer_ID", how="inner")
    claims_policy_df = claims_df.join(policy_df, on="Policy_ID", how="inner")
    logger.info("Data integrated")

    # Data Aggregation
    summarized_df = claims_policy_df.groupBy("Customer_ID").agg(
        F.count("Claim_ID").alias("Total_Claims"),
        F.count("Policy_ID").alias("Policy_Count"),
        F.max("Claim_Date").alias("Recent_Claim_Date"),
        F.avg("Claim_Amount").alias("Average_Claim_Amount"),
        F.sum("Claim_Amount").alias("Total_Claim_Amount")
    )
    logger.info("Data aggregated")

    # Join summarized data with combined data
    combined_df = demographics_policy_df.join(summarized_df, on="Customer_ID", how="inner")
    logger.info("Summarized data joined with combined data")

    # Custom Calculations
    combined_df = combined_df.withColumn(
        "Age", (F.datediff(F.current_date(), F.to_date("Date_of_Birth")) / 365).cast(IntegerType())
    ).withColumn(
        "Claim_To_Premium_Ratio", F.when(combined_df["Total_Premium_Paid"] != 0, combined_df["Total_Claim_Amount"] / combined_df["Total_Premium_Paid"]).otherwise(0)
    ).withColumn(
        "Claims_Per_Policy", F.when(combined_df["Policy_Count"] != 0, combined_df["Total_Claims"] / combined_df["Policy_Count"]).otherwise(0)
    ).withColumn(
        "Retention_Rate", F.lit(0.85)
    ).withColumn(
        "Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on")
    ).withColumn(
        "Upsell_Potential", F.lit("Premium Vehicle Coverage")
    )
    logger.info("Custom calculations applied")

    # Integrate AI/ML insights
    customer_360_df = combined_df.join(aiml_insights_df, on="Customer_ID", how="inner").join(scores_df, on="Customer_ID", how="inner")
    logger.info("AI/ML insights integrated")

    # Ensure schema exists before creating table
    spark.sql("CREATE SCHEMA IF NOT EXISTS genai_demo.guardian")
    logger.info("Schema ensured")

    # Write to Unity Catalog target table
    customer_360_df.write.format("delta").mode("overwrite").saveAsTable("genai_demo.guardian.customer_360")
    logger.info("Data written to Unity Catalog target table")

except Exception as e:
    logger.error(f"Error occurred: {e}")
