In [None]:
# Databricks notebook source
# COMMAND ----------
# MAGIC %md
# MAGIC # ETL Process for Currency Data
# MAGIC This notebook performs an ETL process on currency data, including loading data from Unity Catalog, performing transformations, and writing the transformed data back to Unity Catalog.

# COMMAND ----------
# MAGIC
# Import necessary libraries
import logging
from pyspark.sql import DataFrame
from pyspark.sql.functions import col

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------
# MAGIC
# Define function to load data from Unity Catalog table
def load_unity_catalog_table(table_name: str) -> DataFrame:
    try:
        logger.info(f"Loading data from Unity Catalog table: {table_name}")
        df = spark.table(table_name)
        logger.info(f"Successfully loaded data from {table_name}")
        return df
    except Exception as e:
        logger.error(f"Error loading data from {table_name}: {str(e)}")
        raise

# COMMAND ----------
# MAGIC
# Load flat file data
flat_file_path = "/Volumes/genai_demo/ssis/ssis/SampleCurrencyData.txt"
flat_file_df = spark.read.option("header", "true").csv(flat_file_path)

# Define schema contract for flat file data
flat_file_df = flat_file_df.select('AverageRate', 'CurrencyID', 'CurrencyDate', 'EndOfDayRate')

# COMMAND ----------
# MAGIC
# Load currency dimension data from Unity Catalog
currency_df = load_unity_catalog_table("catalog.vsco.dbo.DimCurrency1")

# Load date dimension data from Unity Catalog
date_df = load_unity_catalog_table("catalog.vsco.dbo.DimDate1")

# COMMAND ----------
# MAGIC
# Perform lookup operations and transformations
try:
    logger.info("Performing lookup operations and transformations")
    # Ensure the join keys are unique to avoid data multiplication
    transformed_df = flat_file_df.join(currency_df, col('CurrencyID') == col('CurrencyAlternateKey'), "left")
    final_df = transformed_df.join(date_df, col('CurrencyID') == col('EnglishMonthName'), "left")
    # Define schema contract for final DataFrame
    final_df = final_df.select('AverageRate', 'EndOfDayRate', 'CurrencyID', 'CurrencyDate', 
                               'CurrencyAlternateKey', 'FullDateAlternateKey', 'EnglishDayNameOfWeek', 
                               'EnglishMonthName', 'DateKey')
    logger.info("Successfully performed transformations")
except Exception as e:
    logger.error(f"Error during transformations: {str(e)}")
    raise

# COMMAND ----------
# MAGIC
# Drop existing table in Unity Catalog before writing new data
try:
    logger.info("Dropping existing table in Unity Catalog if exists")
    spark.sql("DROP TABLE IF EXISTS catalog.vsco.dbo.TransformedCurrencyData")
except Exception as e:
    logger.error(f"Error dropping existing table: {str(e)}")
    raise

# COMMAND ----------
# MAGIC
# Write transformed data to Unity Catalog table
try:
    logger.info("Writing transformed data to Unity Catalog table")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.vsco.dbo.TransformedCurrencyData")
    logger.info("Successfully wrote transformed data to Unity Catalog table")
except Exception as e:
    logger.error(f"Error writing data to Unity Catalog table: {str(e)}")
    raise

# COMMAND ----------
# MAGIC %md
# MAGIC ## ETL Process Completed
# MAGIC The ETL process for currency data has been completed successfully.
