In [None]:
# Load Raw Data from S3 to Snowflake
#
# This notebook loads data from S3 stages to raw tables in Snowflake.


In [None]:

# Environment variables - will be replaced by Jinja templating
ENV = "{{env}}"
RAW_SCHEMA = f"{ENV}_RAW_SCHEMA"

# Define stages
STAGES = ["DAILYCURRENCY_RAW_STAGE", "MONTHLYCURRENCYEXCHANGE_RAW_STAGE"]

# Use the current Snowflake session
session = snowpark.session()

# Create schema if it doesn't exist
session.sql(f"CREATE SCHEMA IF NOT EXISTS FRED_DB.{RAW_SCHEMA}").collect()
print(f"✅ Schema created/verified for {ENV} environment")

# Iterate over stages
for stage in STAGES:
    print(f"🔍 Checking files in stage: {stage}")

    # Get list of CSV files from the stage
    list_files_sql = f"LIST @{stage};"
    files = session.sql(list_files_sql).collect()
    
    for file in files:
        file_path = file[0]  # Full path
        file_name = os.path.basename(file_path).split('.')[0]  # Extract filename without extension
        table_name = f"RAW_{file_name.upper()}"  # Create table name

        print(f"📂 Processing file: {file_name}, creating table: {table_name}")

        # Create table dynamically with inferred schema
        create_table_sql = f"""
        CREATE OR REPLACE TABLE {RAW_SCHEMA}.{table_name}
        USING TEMPLATE (
            SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) 
            FROM TABLE(INFER_SCHEMA(
                location => '@{stage}/{file_name}.csv',
                FILE_FORMAT => 'CSV_FORMAT'
            ))
        );
        """
        session.sql(create_table_sql).collect()
        print(f"✅ Created table: {table_name}")

        # Copy data into the table
        copy_into_sql = f"""
        COPY INTO {RAW_SCHEMA}.{table_name}
        FROM @{stage}/{file_name}.csv
        FILE_FORMAT = CSV_FORMAT
        MATCH_BY_COLUMN_NAME = CASE_INSENSITIVE;
        """
        session.sql(copy_into_sql).collect()
        print(f"✅ Data loaded into {table_name}")

print("🔄 Raw data loading completed.")