In [None]:
# Databricks notebook source
# COMMAND ----------
# MAGIC %md
# MAGIC # ETL Process for Customer 360 Data
# MAGIC This notebook performs an ETL process to integrate and analyze customer data from various sources, including policy, claims, demographics, scores, and AI/ML insights.

# COMMAND ----------
# MAGIC
# Import necessary libraries
import logging
from pyspark.sql.functions import col, when, datediff, current_date, count, avg, max, lit

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------
# MAGIC %md
# MAGIC ## Load Data
# MAGIC Load data from CSV files into DataFrames.

# COMMAND ----------
# MAGIC
# Load data from CSV files into DataFrames
try:
    logger.info("Loading data from CSV files...")
    policy_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/policy.csv", header=True, inferSchema=True)
    claims_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/claims.csv", header=True, inferSchema=True)
    demographics_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/demographics.csv", header=True, inferSchema=True)
    scores_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/scores.csv", header=True, inferSchema=True)
    aiml_insights_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/aiml_insights.csv", header=True, inferSchema=True)
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Data Selection
# MAGIC Select necessary fields from each DataFrame.

# COMMAND ----------
# MAGIC
# Data Selection
try:
    logger.info("Selecting necessary fields from each DataFrame...")
    selected_demographics_df = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code", 
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )
    selected_policy_df = policy_df.select(
        "policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date", 
        "policy_term", "policy_premium", "total_premium_paid", "renewal_status", "policy_addons"
    )
    selected_claims_df = claims_df.select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout"
    )
    selected_scores_df = scores_df.select(
        "Customer_ID", "Credit_Score", "Fraud_Score", "Customer_Risk_Score"
    )
    selected_aiml_insights_df = aiml_insights_df.select(
        "Customer_ID", "Churn_Probability", "Next_Best_Offer", "Claims_Fraud_Probability", "Revenue_Potential"
    )
except Exception as e:
    logger.error(f"Error selecting fields: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Data Integration
# MAGIC Perform data integration through joins.

# COMMAND ----------
# MAGIC
# Data Integration
try:
    logger.info("Performing data integration through joins...")
    joined_df = selected_demographics_df.join(selected_policy_df, col("Customer_ID") == col("customer_id"), "inner") \
                                        .join(selected_claims_df, col("policy_id") == col("Policy_ID"), "inner")
except Exception as e:
    logger.error(f"Error during data integration: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Aggregation and Summarization
# MAGIC Aggregate and summarize data.

# COMMAND ----------
# MAGIC
# Aggregation and Summarization
try:
    logger.info("Aggregating and summarizing data...")
    summarized_df = joined_df.groupBy("Customer_ID").agg(
        count("Claim_ID").alias("Total_Claims"),
        count("policy_id").alias("Policy_Count"),
        max("Claim_Date").alias("Recent_Claim_Date"),
        avg("Claim_Amount").alias("Average_Claim_Amount")
    )
except Exception as e:
    logger.error(f"Error during aggregation: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Custom Calculations
# MAGIC Perform custom calculations.

# COMMAND ----------
# MAGIC
# Custom Calculations
try:
    logger.info("Performing custom calculations...")
    age_expr = datediff(current_date(), col("Date_of_Birth")) / 365
    claim_to_premium_ratio_expr = when(col("total_premium_paid") != 0, col("Claim_Amount") / col("total_premium_paid")).otherwise(0)
    claims_per_policy_expr = when(col("Policy_Count") != 0, col("Total_Claims") / col("Policy_Count")).otherwise(0)

    final_df = summarized_df.withColumn("Age", age_expr) \
                            .withColumn("Claim_To_Premium_Ratio", claim_to_premium_ratio_expr) \
                            .withColumn("Claims_Per_Policy", claims_per_policy_expr) \
                            .withColumn("Retention_Rate", lit(0.85)) \
                            .withColumn("Cross_Sell_Opportunities", lit("Multi-Policy Discount, Home Coverage Add-on")) \
                            .withColumn("Upsell_Potential", lit("Premium Vehicle Coverage"))
except Exception as e:
    logger.error(f"Error during custom calculations: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Advanced Insights Integration
# MAGIC Integrate AI/ML insights and scores data.

# COMMAND ----------
# MAGIC
# Advanced Insights Integration
try:
    logger.info("Integrating AI/ML insights and scores data...")
    enriched_df = final_df.join(selected_aiml_insights_df, "Customer_ID", "inner") \
                          .join(selected_scores_df, "Customer_ID", "inner")
except Exception as e:
    logger.error(f"Error during insights integration: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Output Generation
# MAGIC Write the final DataFrame to a Unity Catalog table.

# COMMAND ----------
# MAGIC
# Output Generation
try:
    logger.info("Writing the final DataFrame to a Unity Catalog table...")
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.customer_360")
    enriched_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.customer_360")
except Exception as e:
    logger.error(f"Error writing output: {e}")
    raise

logger.info("ETL process completed successfully.")
