In [None]:
import logging
from pyspark.sql import functions as F

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    # Load data from Unity Catalog tables
    hospital_stats_df = spark.table("catalog.db.hospital_stats_north_america")
    sales_associates_df = spark.table("catalog.db.sales_associates_employment_details")
    compensation_guidelines_df = spark.table("catalog.db.compensation_guidelines")
    hospital_sales_assignments_df = spark.table("catalog.db.hospital_sales_assignments")
    logistics_channels_df = spark.table("catalog.db.logistics_channels")
    growth_opportunities_df = spark.table("catalog.db.growth_opportunities")
    third_party_sales_trends_df = spark.table("catalog.db.third_party_sales_trends")
    historical_sales_df = spark.table("catalog.db.historical_sales")
    company_goals_df = spark.table("catalog.db.company_goals")

    # Join Sales Associates Employment Details with Compensation Guidelines
    compensation_df = sales_associates_df.join(
        compensation_guidelines_df,
        sales_associates_df.Associate_ID == compensation_guidelines_df.Associate_ID,
        "inner"
    ).select(
        sales_associates_df.Associate_ID,
        sales_associates_df.Associate_Name,
        compensation_guidelines_df.Base_Salary,
        compensation_guidelines_df.Commission_Percentage,
        compensation_guidelines_df.Bonus
    ).withColumn(
        "Compensation",
        F.expr("Base_Salary + (Commission_Percentage * Base_Salary) + Bonus")
    )

    # Join Hospital Stats North America with Hospital Sales Assignments
    hospital_sales_df = hospital_stats_df.join(
        hospital_sales_assignments_df,
        (hospital_stats_df.Hospital_ID == hospital_sales_assignments_df.Hospital_ID) &
        (hospital_stats_df.Hospital_Name == hospital_sales_assignments_df.Hospital_Name),
        "inner"
    ).select(
        hospital_stats_df.Hospital_ID,
        hospital_stats_df.Hospital_Name,
        hospital_sales_assignments_df.Director_Name,
        hospital_sales_assignments_df.Manager_Name,
        hospital_sales_assignments_df.Associate_ID,
        hospital_sales_assignments_df.Associate_Name
    )

    # Join Logistics Channels with Growth Opportunities
    logistics_growth_df = logistics_channels_df.join(
        growth_opportunities_df,
        (logistics_channels_df.Channel_ID == growth_opportunities_df.Channel_ID) &
        (logistics_channels_df.Channel_Type == growth_opportunities_df.Channel_Type),
        "inner"
    ).select(
        logistics_channels_df.Channel_ID,
        logistics_channels_df.Channel_Type,
        logistics_channels_df.Hospital_ID,
        logistics_channels_df.Growth_Opportunities,
        growth_opportunities_df.Projected_Growth_Rate
    )

    # Join Historical Sales with Third Party Sales Trends
    sales_trends_df = historical_sales_df.join(
        third_party_sales_trends_df,
        historical_sales_df.Channel_Type == third_party_sales_trends_df.Channel_Type,
        "inner"
    ).select(
        historical_sales_df.Year,
        historical_sales_df.Channel_ID,
        historical_sales_df.Channel_Type,
        historical_sales_df.Sales_Revenue,
        historical_sales_df.Hospital_ID,
        third_party_sales_trends_df.Market_Trend,
        third_party_sales_trends_df.Political_Impact,
        third_party_sales_trends_df.Economic_Impact
    )

    # Implement custom calculations
    # Projected Revenue Calculation
    projected_revenue_df = sales_trends_df.withColumn(
        "Projected Revenue",
        F.when(F.col("Year") == 2024, F.col("Sales_Revenue") * (F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Year") == 2025, F.col("Sales_Revenue") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Year") == 2026, F.col("Sales_Revenue") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .otherwise(F.col("Sales_Revenue"))
    )

    # Projected Sales Growth Rate Calculation
    projected_growth_rate_df = projected_revenue_df.withColumn(
        "Projected_Sales_Growth_Rate",
        F.when(F.col("Year") == 2024, F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100))
        .when(F.col("Year") == 2025, (F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100))
        .when(F.col("Year") == 2026, ((F.col("Projected_Growth_Rate") + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100)) + (F.col("Projected_Growth_Rate") / 100))
        .otherwise(F.col("Projected_Growth_Rate"))
    )

    # Projected Investments Calculation
    projected_investments_df = projected_growth_rate_df.withColumn(
        "projected_investments",
        F.when(F.col("Year") == 2024, F.col("Investment_Planned") * (F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Year") == 2025, F.col("Investment_Planned") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .when(F.col("Year") == 2026, F.col("Investment_Planned") * (1 + F.col("Projected_Sales_Growth_Rate") / 100))
        .otherwise(F.col("Investment_Planned"))
    )

    # Filter and sort data
    filtered_sorted_df = projected_investments_df.filter(F.col("Year") > 2023).orderBy(F.col("Year").asc())

    # Write the final output to Unity Catalog table
    spark.sql("DROP TABLE IF EXISTS catalog.db.target_sales_report")
    filtered_sorted_df.write.format("delta").mode("overwrite").saveAsTable("catalog.db.target_sales_report")

    logger.info("ETL process completed successfully.")

except Exception as e:
    logger.error(f"Error during ETL process: {e}")
