In [None]:
import logging
from pyspark.sql import functions as F

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    hospital_stats_df = spark.table("catalog.db.hospital_stats_north_america")
    sales_associates_df = spark.table("catalog.db.sales_associates_employment_details")
    compensation_guidelines_df = spark.table("catalog.db.compensation_guidelines")
    hospital_sales_assignments_df = spark.table("catalog.db.hospital_sales_assignments")
    logistics_channels_df = spark.table("catalog.db.logistics_channels")
    growth_opportunities_df = spark.table("catalog.db.growth_opportunities")
    third_party_sales_trends_df = spark.table("catalog.db.third_party_sales_trends")
    historical_sales_df = spark.table("catalog.db.historical_sales")
    company_goals_df = spark.table("catalog.db.company_goals")

    # Step 1: Join Sales Associates with Compensation Guidelines
    sales_compensation_df = sales_associates_df.join(
        compensation_guidelines_df, "Associate_ID", "inner"
    ).withColumn(
        "Compensation",
        F.col("Base_Salary") + (F.col("Commission_Percentage") * F.col("Base_Salary")) + F.col("Bonus")
    )

    # Step 2: Join Hospital Stats with Hospital Sales Assignments
    hospital_sales_df = hospital_stats_df.join(
        hospital_sales_assignments_df, ["Hospital_ID", "Hospital_Name"], "inner"
    )

    # Step 3: Join the above two results on Associate_ID and Associate_Name
    combined_df = sales_compensation_df.join(
        hospital_sales_df, ["Associate_ID", "Associate_Name"], "inner"
    )

    # Step 4: Join Logistics Channels with Growth Opportunities
    logistics_growth_df = logistics_channels_df.join(
        growth_opportunities_df, ["Channel_ID", "Channel_Type"], "inner"
    )

    # Step 5: Unique Tool
    unique_logistics_growth_df = logistics_growth_df.dropDuplicates(["Channel_ID", "Channel_Type", "Hospital_ID"])

    # Step 6: Join the result with the previous join result on Hospital_ID
    enriched_df = combined_df.join(
        unique_logistics_growth_df, "Hospital_ID", "inner"
    )

    # Step 7: Join Historical Sales with Third Party Sales Trends
    sales_trends_df = historical_sales_df.join(
        third_party_sales_trends_df, "Channel_Type", "inner"
    )

    # Step 8: Unique Tool
    unique_sales_trends_df = sales_trends_df.dropDuplicates(["Year", "Channel_Type", "Sales_Revenue"])

    # Step 9: Generate Rows for Target Year
    target_years_df = spark.range(2024, 2027).withColumnRenamed("id", "Target Year")

    # Step 10: Calculate Projected Revenue
    projected_revenue_df = unique_sales_trends_df.crossJoin(target_years_df).withColumn(
        "Projected Revenue",
        F.when(F.col("Target Year") == 2024, F.col("Sales_Revenue") * (F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2025, F.col("Sales_Revenue") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2026, F.col("Sales_Revenue") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .otherwise(F.col("Sales_Revenue"))
    )

    # Step 11: Calculate Projected Sales Growth Rate
    projected_growth_rate_df = projected_revenue_df.withColumn(
        "Projected_Sales_Growth_Rate",
        F.when(F.col("Target Year") == 2024, F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2025, (F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2026, ((F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100))
        .otherwise(F.col("Projected_Growth_Rate"))
    )

    # Step 12: Calculate Projected Investments
    projected_investments_df = projected_growth_rate_df.withColumn(
        "projected_investments",
        F.when(F.col("Target Year") == 2024, F.col("Investment_Planned") * (F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2025, F.col("Investment_Planned") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Target Year") == 2026, F.col("Investment_Planned") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .otherwise(F.col("Investment_Planned"))
    )

    # Step 13: Sort by Target Year
    final_df = projected_investments_df.orderBy("Target Year")

    # Write the final output to Unity Catalog table
    spark.sql("DROP TABLE IF EXISTS catalog.db.target_sales_report")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.db.target_sales_report")

    logger.info("ETL process completed successfully and data written to target_sales_report table.")

except Exception as e:
    logger.error(f"An error occurred during the ETL process: {e}")
