In [None]:
# Databricks notebook source
# COMMAND ----------

# MAGIC %md
# MAGIC # ETL Migration Process
# MAGIC This notebook performs an ETL migration process using PySpark. It loads data from a flat file and a SQL Server database, performs transformations, and writes the transformed data to a target database.

# COMMAND ----------

# MAGIC
# Import necessary libraries
import logging
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.utils import AnalysisException

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# COMMAND ----------

# MAGIC
# Define JDBC connection properties
jdbc_url = "jdbc:sqlserver://vsco-sqlserver.ctiklfxgg9js.us-east-2.rds.amazonaws.com;databaseName=vsco"
connection_properties = {
    "user": dbutils.secrets.get(scope="SSIS", key="username"),
    "password": dbutils.secrets.get(scope="SSIS", key="password"),
    "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver"
}

# COMMAND ----------

# MAGIC
# Function to load data from flat file
def load_flat_file_data(spark):
    logger.info("Loading data from flat file source with explicit schema contract.")
    return spark.read.format("csv").option("header", "true").load("/Volumes/genai_demo/ssis/ssis/SampleCurrencyData.txt").select('AverageRate', 'CurrencyID', 'CurrencyDate', 'EndOfDayRate')

# COMMAND ----------

# MAGIC
# Function to load data from AdventureWorksDW2012 database
def load_adventureworks_data(spark, jdbc_url, connection_properties):
    logger.info("Loading data from AdventureWorksDW2012 database.")
    adventureworks_df = spark.read.jdbc(url=jdbc_url, table="dbo.DimCurrency1", properties=connection_properties)
    date_df = spark.read.jdbc(url=jdbc_url, table="dbo.DimDate1", properties=connection_properties)
    return adventureworks_df, date_df

# COMMAND ----------

# MAGIC
# Function to perform lookup transformations
def perform_lookup_transformations(flat_file_df, adventureworks_df, date_df):
    logger.info("Performing lookup transformations using explicit column selection.")
    enriched_df = flat_file_df.join(adventureworks_df, col('CurrencyID') == col('CurrencyAlternateKey'), "inner")
    final_df = enriched_df.join(date_df, col('CurrencyDate') == col('FullDateAlternateKey'), "inner")
    return final_df

# COMMAND ----------

# MAGIC
# Function to write transformed data to target database
def write_transformed_data(spark, final_df):
    logger.info("Dropping existing table if exists.")
    spark.sql("DROP TABLE IF EXISTS catalog.target_db.TransformedData")
    logger.info("Writing transformed data to the target database.")
    final_df.write.format("delta").mode("overwrite").saveAsTable("catalog.target_db.TransformedData")

# COMMAND ----------

# MAGIC
# Main ETL process
try:
    # Load data
    flat_file_df = load_flat_file_data(spark)
    adventureworks_df, date_df = load_adventureworks_data(spark, jdbc_url, connection_properties)
    
    # Transform data
    final_df = perform_lookup_transformations(flat_file_df, adventureworks_df, date_df)
    
    # Write data
    write_transformed_data(spark, final_df)

except AnalysisException as e:
    logger.error(f"AnalysisException occurred: {e}")
except Exception as e:
    logger.error(f"An error occurred: {e}")

logger.info("ETL process completed successfully.")
