In [None]:
import logging
import psycopg2
from pyspark.sql import functions as F

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define constants for table names and paths
FLAT_FILE_PATH = "/Volumes/genai_demo/ssis/ssis/SampleCurrencyData.txt"
DIM_CURRENCY_TABLE = "sqlserver_catalog.dbo.dimcurrency1"
DIM_DATE_TABLE = "sqlserver_catalog.dbo.dimdate1"
TARGET_TABLE = "sqlserver_catalog.dbo.ssisresult"

# Function to load data from flat file
def load_flat_file_data(file_path):
    try:
        logging.info("Loading data from flat file: %s", file_path)
        flat_file_df = spark.read.option("header", "true").option("delimiter", ",").csv(file_path)
        return flat_file_df
    except Exception as e:
        logging.error("Error loading flat file data: %s", str(e))
        raise

# Function to perform lookup transformations
def perform_lookups(flat_file_df):
    try:
        logging.info("Performing lookup transformations")

        # Load DimCurrency table
        dim_currency_df = spark.table(DIM_CURRENCY_TABLE)

        # Perform lookup on CurrencyID
        flat_file_df = flat_file_df.join(dim_currency_df, flat_file_df.CurrencyID == dim_currency_df.CurrencyAlternateKey, "inner") \
                                   .select(flat_file_df["*"], dim_currency_df["CurrencyKey"])

        # Load DimDate table
        dim_date_df = spark.table(DIM_DATE_TABLE)

        # Perform lookup on CurrencyDate
        flat_file_df = flat_file_df.join(dim_date_df, flat_file_df.CurrencyDate == dim_date_df.FullDateAlternateKey, "inner") \
                                   .select(flat_file_df["*"], dim_date_df["DateKey"])

        return flat_file_df
    except Exception as e:
        logging.error("Error performing lookups: %s", str(e))
        raise

# Function to write data to target table
def write_to_target_table(transformed_df):
    try:
        logging.info("Writing data to target table: %s", TARGET_TABLE)
        spark.sql(f"DROP TABLE IF EXISTS {TARGET_TABLE}")
        transformed_df.write.format("delta").mode("overwrite").saveAsTable(TARGET_TABLE)
    except Exception as e:
        logging.error("Error writing to target table: %s", str(e))
        raise

# Main ETL process
def main():
    try:
        # Step 1: Load data from flat file
        flat_file_df = load_flat_file_data(FLAT_FILE_PATH)

        # Step 2: Perform lookup transformations
        transformed_df = perform_lookups(flat_file_df)

        # Step 3: Write transformed data to target table
        write_to_target_table(transformed_df)

        logging.info("ETL process completed successfully")
    except Exception as e:
        logging.error("ETL process failed: %s", str(e))

# Execute the main ETL process
main()
