In [None]:
import logging
import requests
import psycopg2
from pyspark.sql.functions import col, expr, datediff, current_date, when, count, avg, max

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to fetch data from REST API
def fetch_api_data(api_url, customer_id):
    try:
        response = requests.get(api_url.format(Customer_ID=customer_id))
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching data from API: {e}")
        return None

# Load data from Unity Catalog tables
try:
    policy_df = spark.table("postgresql_catalog.demo.policydb")
    claims_df = spark.table("mysql_catalog.vsco.claimsdb")
    demographics_df = spark.table("sqlserver_catalog.dbo.demographicsdb")
except Exception as e:
    logger.error(f"Error loading data from Unity Catalog: {e}")
    raise

# Join policy data with customer demographics
try:
    policy_demo_df = policy_df.join(demographics_df, "Customer_ID", "inner")
except Exception as e:
    logger.error(f"Error joining policy and demographics data: {e}")
    raise

# Join the result with claims data
try:
    policy_claims_df = policy_demo_df.join(claims_df, "Policy_ID", "inner")
except Exception as e:
    logger.error(f"Error joining policy-demo and claims data: {e}")
    raise

# Aggregate data at the customer level
try:
    agg_df = policy_claims_df.groupBy("Customer_ID").agg(
        count("Claim_ID").alias("Total_Claims"),
        avg("Claim_Amount").alias("Average_Claim_Amount"),
        max("Claim_Date").alias("Recent_Claim_Date"),
        count("Policy_ID").alias("Policy_Count")
    )
except Exception as e:
    logger.error(f"Error aggregating data: {e}")
    raise

# Calculate derived fields
try:
    derived_df = agg_df.withColumn("Age", datediff(current_date(), col("Date_of_Birth")) / 365) \
        .withColumn("Claim_To_Premium_Ratio", when(col("Total_Premium_Paid") != 0, col("Claim_Amount") / col("Total_Premium_Paid")).otherwise(0)) \
        .withColumn("Claims_Per_Policy", when(col("Policy_Count") != 0, col("Total_Claims") / col("Policy_Count")).otherwise(0)) \
        .withColumn("Retention_Rate", expr("0.85")) \
        .withColumn("Cross_Sell_Opportunities", expr("'Multi-Policy Discount, Home Coverage Add-on'")) \
        .withColumn("Upsell_Potential", expr("'Premium Vehicle Coverage'"))
except Exception as e:
    logger.error(f"Error calculating derived fields: {e}")
    raise

# Fetch fraud and credit scores from APIs
try:
    fraud_scores = derived_df.rdd.map(lambda row: (row.Customer_ID, fetch_api_data("http://18.189.118.116:9010/fraudscore?Customer_ID={Customer_ID}", row.Customer_ID)))
    credit_scores = derived_df.rdd.map(lambda row: (row.Customer_ID, fetch_api_data("http://18.189.118.116:9010/creditscore?Customer_ID={Customer_ID}", row.Customer_ID)))
except Exception as e:
    logger.error(f"Error fetching fraud and credit scores: {e}")
    raise

# Convert RDDs to DataFrames
fraud_scores_df = fraud_scores.toDF(["Customer_ID", "Fraud_Score"])
credit_scores_df = credit_scores.toDF(["Customer_ID", "Credit_Score"])

# Join derived data with fraud and credit scores
try:
    final_df = derived_df.join(fraud_scores_df, "Customer_ID", "left") \
        .join(credit_scores_df, "Customer_ID", "left")
except Exception as e:
    logger.error(f"Error joining derived data with scores: {e}")
    raise

# Add AI-driven insights
try:
    final_df = final_df.withColumn("Churn_Probability", expr("0.25")) \
        .withColumn("Next_Best_Offer", expr("'Additional Life Coverage'")) \
        .withColumn("Claims_Fraud_Probability", expr("0.10")) \
        .withColumn("Revenue_Potential", expr("12000.00"))
except Exception as e:
    logger.error(f"Error adding AI-driven insights: {e}")
    raise

# Write the final DataFrame to Unity Catalog target table
try:
    spark.sql("DROP TABLE IF EXISTS catalog_name.schema_name.customer_360")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog_name.schema_name.customer_360")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog: {e}")
    raise

logger.info("ETL process completed successfully.")
