In [None]:
import logging
from pyspark.sql.functions import col, when, datediff, current_date, avg, max as spark_max, count
from pyspark.sql import functions as F

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    demographics_df = spark.table("catalog.db.demographics")
    policy_df = spark.table("catalog.db.policy")
    claims_df = spark.table("catalog.db.claims")
    scores_df = spark.table("catalog.db.scores")
    aiml_insights_df = spark.table("catalog.db.aiml_insights")

    # Select relevant fields
    demographics_df = demographics_df.select("Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", 
                                             "City", "State", "Postal_Code", "Date_of_Birth", "Gender", 
                                             "Marital_Status", "Occupation", "Income_Level", "Customer_Segment")
    
    policy_df = policy_df.select("policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", 
                                 "policy_end_date", "policy_term", "policy_premium", "total_premium_paid", 
                                 "renewal_status", "policy_addons")
    
    claims_df = claims_df.select("Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", 
                                 "Claim_Amount", "Claim_Payout")
    
    scores_df = scores_df.select("Customer_ID", "Credit_Score", "Fraud_Score", "Customer_Risk_Score")
    
    aiml_insights_df = aiml_insights_df.select("Customer_ID", "Churn_Probability", "Next_Best_Offer", 
                                               "Claims_Fraud_Probability", "Revenue_Potential")

    # Join operations
    joined_df = demographics_df.join(policy_df, demographics_df.Customer_ID == policy_df.customer_id, "inner") \
                               .drop(policy_df.customer_id)
    
    joined_df = joined_df.join(claims_df, joined_df.policy_id == claims_df.Policy_ID, "inner") \
                         .drop(claims_df.Policy_ID)

    # Summarize claims data
    summarized_df = joined_df.groupBy("Customer_ID").agg(
        count("Claim_ID").alias("Total_Claims"),
        count("policy_id").alias("Policy_Count"),
        spark_max("Claim_Date").alias("Recent_Claim_Date"),
        avg("Claim_Amount").alias("Average_Claim_Amount")
    )

    # Join summarized data
    final_df = joined_df.join(summarized_df, "Customer_ID", "inner")

    # Custom calculations
    final_df = final_df.withColumn("Age", datediff(current_date(), col("Date_of_Birth")) / 365) \
                       .withColumn("Claim_To_Premium_Ratio", when(col("total_premium_paid") != 0, 
                                                                 col("Claim_Amount") / col("total_premium_paid")).otherwise(0)) \
                       .withColumn("Claims_Per_Policy", when(col("Policy_Count") != 0, 
                                                             col("Total_Claims") / col("Policy_Count")).otherwise(0)) \
                       .withColumn("Retention_Rate", F.lit(0.85)) \
                       .withColumn("Cross_Sell_Opportunities", F.lit("Multi-Policy Discount, Home Coverage Add-on")) \
                       .withColumn("Upsell_Potential", F.lit("Premium Vehicle Coverage"))

    # Join with AI/ML insights and scores
    final_df = final_df.join(aiml_insights_df, "Customer_ID", "left") \
                       .join(scores_df, "Customer_ID", "left")

    # Write to Unity Catalog table
    spark.sql("DROP TABLE IF EXISTS catalog.db.customer_360")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.db.customer_360")

    logger.info("ETL process completed successfully and data written to catalog.db.customer_360")

except Exception as e:
    logger.error("Error occurred during ETL process", exc_info=True)
