In [1]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session

In [10]:
# Testing the functions in the notebook
user = 'DOLPHIN'
password = 'Maapaa@1603'  # Avoid hardcoding sensitive information
account = 'URB63596'
database = 'mimic_iv_medi_assist'
schema = 'raw'
warehouse = 'my_warehouse'


In [3]:
# Internal stage name
STAGE_NAME = 'my_internal_stage'

In [4]:
# CSV files and their naming conventions
TABLES_TO_LOAD = [
    'admissions.csv.gz',
    'd_icd_diagnoses.csv.gz',
    'd_icd_procedures.csv.gz',
    'discharge.csv.gz',
    'drgcodes.csv.gz',
    'pharmacy.csv.gz',
]


In [5]:
# Function to establish Snowflake session
def create_session(user, password, account, database, schema):
    connection_parameters = {
        "user": user,
        "password": password,
        "account": account,
        "database": database,
        "schema": schema,
    }
    return Session.builder.configs(connection_parameters).create()

In [6]:
session = create_session(user, password, account, database, schema)

In [14]:
print("Ensuring warehouse is active...")
session.sql(f"USE WAREHOUSE {warehouse};").collect()

for file in TABLES_TO_LOAD:
    table_name = f"{file.split('.')[0]}"  # Remove .gz extension
    print(f"Creating table: {table_name} from file: {file}")

    location = f'@{STAGE_NAME}/{file}'  # Reference to internal stage
    print("=================================================================================\n")
    print(f"Loading data into '{table_name}' from {location}...")

    try:
        # Load data into the Snowflake table using the COPY INTO command with ON_ERROR
        session.sql(f"""
            COPY INTO {table_name}
            FROM {location};
        """).collect()

        print(f"Data loaded into table '{table_name}' successfully.")
    except Exception as e:
        print(f"Error loading data into '{table_name}': {e}")
    print("=================================================================================\n")

Ensuring warehouse is active...
Creating table: admissions from file: admissions.csv.gz

Loading data into 'admissions' from @my_internal_stage/admissions.csv.gz...
Error loading data into 'admissions': (1304): 01b793b4-0003-a868-0000-13e7001151ba: 001757 (42601): SQL compilation error:
Table 'ADMISSIONS' does not exist

Creating table: d_icd_diagnoses from file: d_icd_diagnoses.csv.gz

Loading data into 'd_icd_diagnoses' from @my_internal_stage/d_icd_diagnoses.csv.gz...
Error loading data into 'd_icd_diagnoses': (1304): 01b793b4-0003-a5ad-0000-13e700114666: 001757 (42601): SQL compilation error:
Table 'D_ICD_DIAGNOSES' does not exist

Creating table: d_icd_procedures from file: d_icd_procedures.csv.gz

Loading data into 'd_icd_procedures' from @my_internal_stage/d_icd_procedures.csv.gz...
Error loading data into 'd_icd_procedures': (1304): 01b793b5-0003-a868-0000-13e7001151be: 001757 (42601): SQL compilation error:
Table 'D_ICD_PROCEDURES' does not exist

Creating table: discharge fro

In [None]:
# Create file format
def create_file_format(session):
    print("Creating file format...")
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_csv
        TYPE = 'CSV'
        COMPRESSION = 'GZIP'
        FIELD_DELIMITER = ','
        PARSE_HEADER = TRUE
        FIELD_OPTIONALLY_ENCLOSED_BY = '"';
    """).collect()
    print("File format 'file_format_csv' created successfully.")
    print("===========")

In [None]:
# Create table from inferred schema
def create_table(session, table_name, file_path):
    location = f'@{STAGE_NAME}/{file_path}'  # Reference to internal stage
    print(f"Creating table '{table_name}' with inferred schema from {location}...")

    try:
        session.sql(f"""
            CREATE OR REPLACE TABLE {table_name}
            USING TEMPLATE (
                SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*))
                FROM TABLE(
                    INFER_SCHEMA(
                        LOCATION => '{location}',
                        FILE_FORMAT => 'file_format_csv';
                       
                    )
                )
            );
        """).collect()

        print(f"Table '{table_name}' created successfully with inferred schema.")
    except Exception as e:
        print(f"Error creating table '{table_name}': {e}")
    print("===========\n\n")


In [None]:
# Load all raw tables
def load_all_raw_tables(session):
    print("Ensuring warehouse is active...")
    session.sql(f"USE WAREHOUSE 'my_warehouse';").collect()

    for file in TABLES_TO_LOAD:
        table_name = f"{file.split('.')[0]}"  # Remove .gz extension
        print(f"Creating table: {table_name} from file: {file}")
        create_table(session, table_name, file)

    print("All tables created successfully.")

In [None]:
# Create file format and load raw tables
create_file_format(session)
load_all_raw_tables(session)

---
AUTOMATE TEST SCRIPT

---

In [1]:
from snowflake.snowpark import Session
import os
from datetime import datetime

In [2]:
user = 'DOLPHIN'
password = 'Maapaa@1603'  # Avoid hardcoding sensitive information
account = 'URB63596'
database = 'mimic_iv_medi_assist'
schema = 'raw'
warehouse = 'my_warehouse'

In [3]:
def snowpark_basic_auth() -> Session:
    connection_parameters = {
        "ACCOUNT":"URB63596",
        "USER":"DOLPHIN",
        "PASSWORD":"Maapaa@1603"
    }
    return Session.builder.configs(connection_parameters).create()


In [4]:
def generate_ddl_statement(column_names, data_types, table_name):
    ddl_template = "CREATE TABLE IF NOT EXISTS {} (\n{})"
    columns = []
    for name, data_type in zip(column_names, data_types):
        column_definition = f"   {name} {data_type}"
        columns.append(column_definition)

    ddl_statement = ddl_template.format(table_name, ",\n".join(columns))
    return ddl_statement


In [5]:
def generate_copy_statement(table_name,stage_name,csv_file_path,file_format):
    copy_command = f"""
    COPY INTO {table_name}
    FROM @{stage_name}/{csv_file_path}
    FILE_FORMAT = (FORMAT_NAME = '{file_format}')
    ;
    """

    return copy_command


In [6]:
def create_file_format(session):
    print("Creating file format...")
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_csv
        TYPE = 'CSV'
        COMPRESSION = 'GZIP'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        PARSE_HEADER = TRUE                      -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '"';     -- Optional field enclosure
    """).collect()

    print("File format 'file_format_csv' created successfully.")
    print("===========")

In [7]:
def create_file_format(session):
    print("Creating file format for DDL...")
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_ddl
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        PARSE_HEADER = TRUE                    -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
        ESCAPE_UNENCLOSED_FIELD = NONE 
        TRIM_SPACE = TRUE 
        ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE;
    """).collect()

    print("File format 'file_format_ddl' created successfully.")
    print("===========")

    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_load
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        RECORD_DELIMITER = '\n' 
        SKIP_HEADER = 1                      -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '\042'     -- Optional field enclosure
        ;
    """).collect()

    print("File format 'file_format_load' created successfully.")
    print("===========")

In [8]:
utc_start_time = datetime.utcnow()
session_wih_pwd = snowpark_basic_auth()


session_wih_pwd.sql("USE DATABASE mimic_iv_medi_assist").collect()
session_wih_pwd.sql("USE SCHEMA raw").collect()
session_wih_pwd.sql("USE WAREHOUSE my_warehouse").collect()

create_file_format(session_wih_pwd)

Creating file format for DDL...
File format 'file_format_ddl' created successfully.
File format 'file_format_load' created successfully.


In [9]:
stg_files = session_wih_pwd.sql("list @my_internal_stage").collect()
print(stg_files)

[Row(name='my_internal_stage/admissions.csv.gz', size=19652448, md5='e030d760b3e15ce1c6aa9e8e8637ebcd', last_modified='Wed, 9 Oct 2024 06:45:27 GMT'), Row(name='my_internal_stage/d_icd_diagnoses.csv.gz', size=849392, md5='d77fe5f8249b16ed3692e52bd781dc06', last_modified='Wed, 9 Oct 2024 06:45:17 GMT'), Row(name='my_internal_stage/d_icd_procedures.csv.gz', size=549936, md5='d4a9b85d7ee6a75d32f6e1d61ac173cf', last_modified='Wed, 9 Oct 2024 06:44:37 GMT'), Row(name='my_internal_stage/discharge_two.csv.gz', size=1138715888, md5='b85ea55aef83c331f7c821e2e425f0de-136', last_modified='Sat, 12 Oct 2024 04:05:15 GMT'), Row(name='my_internal_stage/drgcodes.csv.gz', size=9509520, md5='89a92ba0a394557ceaa16cfcf7c93bce', last_modified='Wed, 9 Oct 2024 06:45:16 GMT'), Row(name='my_internal_stage/pharmacy.csv.gz', size=28197696, md5='880636d6f06cccdcb30a3b541320f35b', last_modified='Tue, 5 Nov 2024 20:32:14 GMT')]


In [40]:
for row in stg_files:
    print("======================================================\n")
    print(row)
    row_value = row.as_dict()
    print(row_value)
    stg_file_path_value = row_value.get('name')
    print(stg_file_path_value)

    file_path, file_name = os.path.split(stg_file_path_value)
    print(file_path)
    print(file_name)
    if file_name == 'discharge.csv.gz' or file_name == 'pharmacy.csv.gz':
        continue
    
    stg_location = "@"+file_path
    print(stg_location)

    infer_schema_sql = """\
        SELECT * 
        FROM TABLE(
            INFER_SCHEMA(
            LOCATION=>'{}/',
            files => '{}',
            FILE_FORMAT => 'file_format_ddl'
        )    
    )
    """.format(stg_location, file_name)

    print(f"\n=========== INFER SCHEMA SQL =============")
    print(f" {file_name}")
    print(f"\n=========== INFER SCHEMA SQL =============")
    print(infer_schema_sql)

    inferred_schema_rows = session_wih_pwd.sql(infer_schema_sql).collect()
    print(inferred_schema_rows)
    col_name_lst = []
    col_data_type_lst = []

    for row in inferred_schema_rows:
        row_value = row.as_dict()
        print(row_value)
        column_name = row_value.get('COLUMN_NAME')
        column_type = row_value.get('TYPE')

        col_name_lst.append(column_name)
        col_data_type_lst.append(column_type)

    table_name = file_name.split('.')[0]+"_raw"
    create_ddl_stmt = generate_ddl_statement(col_name_lst,col_data_type_lst, table_name.upper())
    print("=================== DDL STATEMENT =====================")
    print(create_ddl_stmt)

    copy_stmt = generate_copy_statement(table_name, 'my_internal_stage', file_name, 'file_format_load')
    print("=================== copy_stmt =====================")
    print(copy_stmt)


    sql_file_path = table_name+".sql"
    print("=========================== sql_file_path =============================")
    print(f"{sql_file_path}")
    with open(sql_file_path, "w") as sql_file:
        sql_file.write("---- Following statement is creating table\n\n")
        sql_file.write(create_ddl_stmt)
        sql_file.write("\n-- Following statement is executing copy command")
        sql_file.write(copy_stmt)

    session_wih_pwd.sql(create_ddl_stmt).collect()
    session_wih_pwd.sql(copy_stmt).collect()
    utc_end_time = datetime.utcnow()

print(utc_end_time-utc_start_time)









Row(name='my_internal_stage/admissions.csv.gz', size=19652448, md5='e030d760b3e15ce1c6aa9e8e8637ebcd', last_modified='Wed, 9 Oct 2024 06:45:27 GMT')
{'name': 'my_internal_stage/admissions.csv.gz', 'size': 19652448, 'md5': 'e030d760b3e15ce1c6aa9e8e8637ebcd', 'last_modified': 'Wed, 9 Oct 2024 06:45:27 GMT'}
my_internal_stage/admissions.csv.gz
my_internal_stage
admissions.csv.gz
@my_internal_stage

 admissions.csv.gz

        SELECT * 
        FROM TABLE(
            INFER_SCHEMA(
            LOCATION=>'@my_internal_stage/',
            files => 'admissions.csv.gz',
            FILE_FORMAT => 'file_format_ddl'
        )    
    )
    


KeyboardInterrupt: 

---
discharge load

---

In [10]:
def create_file_format(session):
    print("Creating file format for DDL...")
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_ddl
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        PARSE_HEADER = TRUE                    -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
        ESCAPE_UNENCLOSED_FIELD = NONE 
        TRIM_SPACE = TRUE 
        ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE;
    """).collect()

    print("File format 'file_format_ddl' created successfully.")
    print("===========")

    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_load
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        RECORD_DELIMITER = '\n' 
        SKIP_HEADER = 1                      -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '"'    -- Optional field enclosure
         ;
    """).collect()

    print("File format 'file_format_load' created successfully.")
    print("===========")

    session.sql("""
            CREATE OR REPLACE FILE FORMAT file_format_generic
            TYPE = 'CSV'
            COMPRESSION = 'GZIP'                   -- Specify GZIP compression for .gz files
            FIELD_DELIMITER = ','                   -- Specify the field delimiter
            PARSE_HEADER = TRUE                      -- Parse the header row for column names
            FIELD_OPTIONALLY_ENCLOSED_BY = '"'     -- Optional field enclosure
            ESCAPE_UNENCLOSED_FIELD = None;
        """).collect()

    print("File format 'file_format_generic' created successfully.")
    print("===========")

In [11]:
create_file_format(session_wih_pwd)

Creating file format for DDL...
File format 'file_format_ddl' created successfully.
File format 'file_format_load' created successfully.
File format 'file_format_generic' created successfully.


In [None]:
import os
from datetime import datetime

utc_start_time = datetime.utcnow()
print("Process started at:", utc_start_time)

for row in stg_files:
    print("======================================================\n")
    print("Processing row:", row)
    
    # Convert row to dictionary
    row_value = row.as_dict()
    print("Row as dictionary:", row_value)
    
    # Extract the staged file path value
    stg_file_path_value = row_value.get('name')
    print("Staged file path value:", stg_file_path_value)

    # Split file path and name
    file_path, file_name = os.path.split(stg_file_path_value)
    print("File path:", file_path)
    print("File name:", file_name)

    # Create staged location variable
    stg_location = "@" + file_path
    print("Staged location:", stg_location)

    # Filter for specific file
    if file_name not in ('pharmacy_two.csv.gz'):
        print(f"Skipping file {file_name} as it doesn't match the target file.")
        continue
    
    print(f"Processing target file: {file_name}")
    
    # Generate SQL for inferring schema
    infer_schema_sql = """\
        SELECT * 
        FROM TABLE(
            INFER_SCHEMA(
            LOCATION=>'{}/',
            files => '{}',
            FILE_FORMAT => 'file_format_generic'
        )    
    )
    """.format(stg_location, file_name)
    
    print("\n=========== INFER SCHEMA SQL =============")
    print(f"File: {file_name}")
    print(infer_schema_sql)

    # Execute schema inference
    inferred_schema_rows = session_wih_pwd.sql(infer_schema_sql).collect()
    print("\nSchema inference completed. Inferred schema rows:")
    print(inferred_schema_rows)

    # Prepare lists for column names and types
    col_name_lst = []
    col_data_type_lst = []

    # Process each row in inferred schema
    for row in inferred_schema_rows:
        row_value = row.as_dict()
        print("Inferred schema row:", row_value)
        
        column_name = row_value.get('COLUMN_NAME')
        column_type = row_value.get('TYPE')

        col_name_lst.append(column_name)
        col_data_type_lst.append(column_type)

    print("Column names list:", col_name_lst)
    print("Column data types list:", col_data_type_lst)

    # Generate table name and DDL statement
    table_name = file_name.split('.')[0] + "_raw"
    create_ddl_stmt = generate_ddl_statement(col_name_lst, col_data_type_lst, table_name.upper())
    print("=================== DDL STATEMENT =====================")
    print(create_ddl_stmt)

    # Generate copy statement for loading data
    copy_stmt = generate_copy_statement(table_name, 'my_internal_stage', file_name, 'file_format_load')
    print("=================== COPY STATEMENT =====================")
    print(copy_stmt)

    # Define SQL file path and save DDL and copy statements to file
    sql_file_path = table_name + ".sql"
    print("=================== SQL FILE PATH =====================")
    print("File path for saving SQL:", sql_file_path)
    with open(sql_file_path, "w") as sql_file:
        sql_file.write("---- Following statement is creating table\n\n")
        sql_file.write(create_ddl_stmt)
        sql_file.write("\n-- Following statement is executing copy command\n")
        sql_file.write(copy_stmt)
    print("SQL statements written to file:", sql_file_path)

    # Execute DDL to create the table
    session_wih_pwd.sql(create_ddl_stmt).collect()
    print("Table created successfully with DDL statement.")

    # Execute copy command to load data into the table
    session_wih_pwd.sql(copy_stmt).collect()
    print("Data loaded into the table with COPY statement.")

# End of processing and time calculation
utc_end_time = datetime.utcnow()
print("Process completed at:", utc_end_time)
print("Total processing time:", utc_end_time - utc_start_time)


Process started at: 2024-11-05 21:27:10.315793

Processing row: Row(name='my_internal_stage/admissions.csv.gz', size=19652448, md5='e030d760b3e15ce1c6aa9e8e8637ebcd', last_modified='Wed, 9 Oct 2024 06:45:27 GMT')
Row as dictionary: {'name': 'my_internal_stage/admissions.csv.gz', 'size': 19652448, 'md5': 'e030d760b3e15ce1c6aa9e8e8637ebcd', 'last_modified': 'Wed, 9 Oct 2024 06:45:27 GMT'}
Staged file path value: my_internal_stage/admissions.csv.gz
File path: my_internal_stage
File name: admissions.csv.gz
Staged location: @my_internal_stage
Skipping file admissions.csv.gz as it doesn't match the target file.

Processing row: Row(name='my_internal_stage/d_icd_diagnoses.csv.gz', size=849392, md5='d77fe5f8249b16ed3692e52bd781dc06', last_modified='Wed, 9 Oct 2024 06:45:17 GMT')
Row as dictionary: {'name': 'my_internal_stage/d_icd_diagnoses.csv.gz', 'size': 849392, 'md5': 'd77fe5f8249b16ed3692e52bd781dc06', 'last_modified': 'Wed, 9 Oct 2024 06:45:17 GMT'}
Staged file path value: my_internal_st

SnowparkSQLException: (1304): 01b82d07-0004-20d3-0000-13e7001a3e6e: 100332 (22000): Error with CSV header: header defined 27 columns while data contains 10 columns. 

  File 'pharmacy.csv.gz'
  Row 0 starts at line 0, column 

DOSES_PER_24_HRS


In [14]:
import os
from datetime import datetime


In [15]:
def create_file_format(session):
    print("Creating file format for DDL...")

    # File format for DDL and loading the CSVs
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_ddl
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        PARSE_HEADER = TRUE                    -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '\042'  -- Handle fields enclosed in double quotes
        ESCAPE_UNENCLOSED_FIELD = NONE 
        TRIM_SPACE = TRUE 
        ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE;
    """).collect()

    print("File format 'file_format_ddl' created successfully.")
    print("===========")

    # File format for loading CSV
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_load
        TYPE = 'CSV'
        COMPRESSION = 'auto'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        RECORD_DELIMITER = '\n' 
        SKIP_HEADER = 1                        -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '"'     -- Handle quoted fields
        ESCAPE_UNENCLOSED_FIELD = NONE;
    """).collect()

    print("File format 'file_format_load' created successfully.")
    print("===========")

    # Generic file format with GZIP compression and quoted fields handling
    session.sql("""
        CREATE OR REPLACE FILE FORMAT file_format_generic
        TYPE = 'CSV'
        COMPRESSION = 'GZIP'                   -- Specify GZIP compression for .gz files
        FIELD_DELIMITER = ','                   -- Specify the field delimiter
        PARSE_HEADER = TRUE                    -- Parse the header row for column names
        FIELD_OPTIONALLY_ENCLOSED_BY = '"'     -- Handle quoted fields
        ESCAPE_UNENCLOSED_FIELD = NONE;
    """).collect()

    print("File format 'file_format_generic' created successfully.")
    print("===========")

In [16]:
create_file_format(session_wih_pwd)

Creating file format for DDL...
File format 'file_format_ddl' created successfully.
File format 'file_format_load' created successfully.
File format 'file_format_generic' created successfully.


In [18]:
# Iterating over the staged files
for row in stg_files:
    print("======================================================\n")
    print(row)
    row_value = row.as_dict()
    print(row_value)

    # Get the file path and name
    stg_file_path_value = row_value.get('name')
    print(stg_file_path_value)

    file_path, file_name = os.path.split(stg_file_path_value)
    print(file_path)
    print(file_name)

    # Define the storage location in Snowflake stage
    stg_location = "@"+file_path
    print(stg_location)

    # Only process 'pharmacy_two.csv.gz' file
    if file_name != 'pharmacy_two.csv.gz':
        continue

    # Prepare SQL to infer the schema of the CSV
    infer_schema_sql = f"""
        SELECT * 
        FROM TABLE(
            INFER_SCHEMA(
            LOCATION=>'{stg_location}/',
            files => '{file_name}',
            FILE_FORMAT => 'file_format_generic'
        )    
    )
    """
    print(f"\n=========== INFER SCHEMA SQL =============")
    print(f" {file_name}")
    print(f"\n=========== INFER SCHEMA SQL =============")
    print(infer_schema_sql)

    # Execute the schema inference
    inferred_schema_rows = session_wih_pwd.sql(infer_schema_sql).collect()
    print("\n============== print(inferred_schema_rows) ================== \n")
    print(inferred_schema_rows)

    # Extract column names and types
    col_name_lst = []
    col_data_type_lst = []

    for row in inferred_schema_rows:
        row_value = row.as_dict()
        print(row_value)
        column_name = row_value.get('COLUMN_NAME')
        column_type = row_value.get('TYPE')

        col_name_lst.append(column_name)
        col_data_type_lst.append(column_type)

    # Generate the DDL statement for creating the table
    table_name = file_name.split('.')[0] + "_raw"
    create_ddl_stmt = generate_ddl_statement(col_name_lst, col_data_type_lst, table_name.upper())
    print("=================== DDL STATEMENT =====================")
    print(create_ddl_stmt)

    # Generate the copy statement for loading data
    copy_stmt = generate_copy_statement(table_name, 'my_internal_stage', file_name, 'file_format_load')
    print("=================== copy_stmt =====================")
    print(copy_stmt)

    # Write the SQL statements to a file
    sql_file_path = table_name + ".sql"
    print("=========================== sql_file_path =============================")
    print(f"{sql_file_path}")
    with open(sql_file_path, "w") as sql_file:
        sql_file.write("---- Following statement is creating table\n\n")
        sql_file.write(create_ddl_stmt)
        sql_file.write("\n-- Following statement is executing copy command")
        sql_file.write(copy_stmt)

    # Execute the DDL and copy commands
    session_wih_pwd.sql(create_ddl_stmt).collect()
    session_wih_pwd.sql(copy_stmt).collect()

# Track the end time of the operations
utc_end_time = datetime.utcnow()

# Print the total time taken for the operations
print(utc_end_time - utc_start_time)



Row(name='my_internal_stage/admissions.csv.gz', size=19652448, md5='e030d760b3e15ce1c6aa9e8e8637ebcd', last_modified='Wed, 9 Oct 2024 06:45:27 GMT')
{'name': 'my_internal_stage/admissions.csv.gz', 'size': 19652448, 'md5': 'e030d760b3e15ce1c6aa9e8e8637ebcd', 'last_modified': 'Wed, 9 Oct 2024 06:45:27 GMT'}
my_internal_stage/admissions.csv.gz
my_internal_stage
admissions.csv.gz
@my_internal_stage

Row(name='my_internal_stage/d_icd_diagnoses.csv.gz', size=849392, md5='d77fe5f8249b16ed3692e52bd781dc06', last_modified='Wed, 9 Oct 2024 06:45:17 GMT')
{'name': 'my_internal_stage/d_icd_diagnoses.csv.gz', 'size': 849392, 'md5': 'd77fe5f8249b16ed3692e52bd781dc06', 'last_modified': 'Wed, 9 Oct 2024 06:45:17 GMT'}
my_internal_stage/d_icd_diagnoses.csv.gz
my_internal_stage
d_icd_diagnoses.csv.gz
@my_internal_stage

Row(name='my_internal_stage/d_icd_procedures.csv.gz', size=549936, md5='d4a9b85d7ee6a75d32f6e1d61ac173cf', last_modified='Wed, 9 Oct 2024 06:44:37 GMT')
{'name': 'my_internal_stage/d_ic