## Initializing Spark application 

In [None]:
# Import the necessary libraries
from pyspark.sql import SparkSession
import configparser
import os

__file__ = "01.create_external_tables.ipynb"
script_dir = os.path.dirname(os.path.abspath(__file__))
parser = configparser.ConfigParser()
conf_path = os.path.join(script_dir, "pipeline.conf")
parser.read(conf_path) 

access_key = parser.get("aws_boto_credentials","access_key")
secret_key = parser.get("aws_boto_credentials","secret_key")


# Initialize the SparkSession
spark = SparkSession.builder.appName("GeneratingExternalTables").config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem").config("spark.hadoop.fs.s3a.access.key",access_key).config("spark.hadoop.fs.s3a.secret.key",secret_key).getOrCreate()

schema_name = '_bronze'

## External Table for Orders

In [None]:


from pipeline.extract.bronze_schema import orders_external_table_schema,external_order_s3_bucket_path 

table_name = 'orders_table'

# Execute the CREATE EXTERNAL TABLE SQL statement
spark.sql(f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {schema_name}.{table_name} ({orders_external_table_schema })
    USING csv
    OPTIONS (
        PATH '{external_order_s3_bucket_path}',
        HEADER 'true',
        INFERSCHEMA 'false',
        DELIMITER ','
    )
""")

## External Table for Product

In [None]:
from pipeline.extract.bronze_schema import product_external_table_schema,external_product_s3_bucket_path

table_name = 'product_table'
# Execute the CREATE EXTERNAL TABLE SQL statement
spark.sql(f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {schema_name}.{table_name} ({product_external_table_schema})
    USING csv
    OPTIONS (
        PATH '{external_product_s3_bucket_path}',
        HEADER 'true',
        INFERSCHEMA 'false',
        DELIMITER ','
    )
""")

## External Table for Customers

In [None]:
from pipeline.extract.bronze_schema import customer_external_table_schema,external_customer_s3_bucket_path

table_name = 'customer_table'
# Execute the CREATE EXTERNAL TABLE SQL statement
spark.sql(f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {schema_name}.{table_name} ({customer_external_table_schema})
    USING csv
    OPTIONS (
        PATH '{external_customer_s3_bucket_path}',
        HEADER 'true',
        INFERSCHEMA 'false',
        DELIMITER ','
    )
""")