In [None]:
# Databricks notebook source
# COMMAND ----------
# MAGIC %md
# MAGIC # Customer 360 ETL Process
# MAGIC This notebook performs an ETL process to create a comprehensive Customer 360 view by integrating multiple datasets.

# COMMAND ----------
# MAGIC
# Import necessary libraries
import logging
from pyspark.sql import functions as F
from pyspark.sql.functions import count, max, avg, datediff, current_date, when, lit

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Assume Spark session is already initialized
# spark = SparkSession.builder.appName("Customer360Migration").getOrCreate()

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 1: Data Ingestion
# MAGIC Load data from CSV files into DataFrames.

# COMMAND ----------
# MAGIC
# Step 1: Data Ingestion
try:
    logger.info("Loading data from CSV files into DataFrames.")
    policy_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/policy.csv", header=True, inferSchema=True)
    claims_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/claims.csv", header=True, inferSchema=True)
    demographics_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/demographics.csv", header=True, inferSchema=True)
    scores_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/scores.csv", header=True, inferSchema=True)
    aiml_insights_df = spark.read.csv("tfs://dataeconomy-9k42/62457/uploads/62457/29aabe9d-c354-4176-8f98-2dd7a5fd7216/aiml_insights.csv", header=True, inferSchema=True)
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 2: Data Selection and Filtering
# MAGIC Select relevant fields from each dataset.

# COMMAND ----------
# MAGIC
# Step 2: Data Selection and Filtering
try:
    logger.info("Selecting relevant fields from each dataset.")
    selected_demographics_df = demographics_df.select(
        "Customer_ID", "Customer_Name", "Email", "Phone_Number", "Address", "City", "State", "Postal_Code", 
        "Date_of_Birth", "Gender", "Marital_Status", "Occupation", "Income_Level", "Customer_Segment"
    )
    selected_claims_df = claims_df.select(
        "Claim_ID", "Policy_ID", "Claim_Date", "Claim_Type", "Claim_Status", "Claim_Amount", "Claim_Payout"
    )
    selected_policy_df = policy_df.select(
        "policy_id", "customer_id", "policy_type", "policy_status", "policy_start_date", "policy_end_date", 
        "policy_term", "policy_premium", "total_premium_paid", "renewal_status", "policy_addons"
    )
except Exception as e:
    logger.error(f"Error selecting fields: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 3: Data Integration
# MAGIC Join datasets based on common identifiers.

# COMMAND ----------
# MAGIC
# Step 3: Data Integration
try:
    logger.info("Joining datasets based on common identifiers.")
    joined_df = selected_demographics_df.join(selected_policy_df, F.col('Customer_ID') == F.col('customer_id'), "inner") \
                                        .join(selected_claims_df, F.col('policy_id') == F.col('Policy_ID'), "inner")
except Exception as e:
    logger.error(f"Error joining datasets: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 4: Data Aggregation and Summarization
# MAGIC Compute aggregate metrics.

# COMMAND ----------
# MAGIC
# Step 4: Data Aggregation and Summarization
try:
    logger.info("Computing aggregate metrics.")
    aggregated_df = joined_df.groupBy("Customer_ID").agg(
        count("Claim_ID").alias("Total_Claims"),
        count("policy_id").alias("Policy_Count"),
        max("Claim_Date").alias("Recent_Claim_Date"),
        avg("Claim_Amount").alias("Average_Claim_Amount")
    )
except Exception as e:
    logger.error(f"Error aggregating data: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 5: Custom Calculations
# MAGIC Implement custom calculations.

# COMMAND ----------
# MAGIC
# Step 5: Custom Calculations
try:
    logger.info("Implementing custom calculations.")
    age_expr = datediff(current_date(), F.col('Date_of_Birth')) / 365
    claim_to_premium_ratio_expr = when(F.col('total_premium_paid') != 0, F.col('Claim_Amount') / F.col('total_premium_paid')).otherwise(0)
    claims_per_policy_expr = when(F.col('Policy_Count') != 0, F.col('Total_Claims') / F.col('Policy_Count')).otherwise(0)

    final_df = aggregated_df.withColumn("Age", age_expr) \
                            .withColumn("Claim_To_Premium_Ratio", claim_to_premium_ratio_expr) \
                            .withColumn("Claims_Per_Policy", claims_per_policy_expr) \
                            .withColumn("Retention_Rate", lit(0.85)) \
                            .withColumn("Cross_Sell_Opportunities", lit("Multi-Policy Discount, Home Coverage Add-on")) \
                            .withColumn("Upsell_Potential", lit("Premium Vehicle Coverage"))
except Exception as e:
    logger.error(f"Error in custom calculations: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 6: Final Data Integration
# MAGIC Combine all data sources into a comprehensive dataset.

# COMMAND ----------
# MAGIC
# Step 6: Final Data Integration
try:
    logger.info("Combining all data sources into a comprehensive dataset.")
    final_customer_360_df = final_df.join(scores_df, "Customer_ID", "inner") \
                                    .join(aiml_insights_df, "Customer_ID", "inner")
except Exception as e:
    logger.error(f"Error in final data integration: {e}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## Step 7: Output Generation
# MAGIC Save the consolidated dataset to Unity Catalog table.

# COMMAND ----------
# MAGIC
# Step 7: Output Generation
try:
    logger.info("Saving the consolidated dataset to Unity Catalog table.")
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.customer_360")
    final_customer_360_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.customer_360")
except Exception as e:
    logger.error(f"Error saving output: {e}")
    raise

logger.info("ETL process completed successfully.")
