# 06 Load Excel Files

* Author: Jeremiah Hansen
* Last Updated: 2/2/2026

This notebook will load data into the `LOCATION` and `ORDER_DETAIL` tables from Excel files.

This currently does not use Snowpark File Access as it doesn't yet work in Notebooks. So for now we copy the file locally first.

In [None]:
# Import python packages
import sys
import logging

# Set up the logger
logger_name = 'demo_logger'
logger = logging.getLogger(logger_name)
logger.setLevel(logging.INFO)

# Set default values for debugging
notebook_name = '06_load_excel_files.ipynb'
database_name = 'DEMO_DB'
schema_name = 'DEV_SCHEMA'
role_name = 'DEMO_ROLE'

# Override values with passed notebook arguments
if sys.argv[0].endswith('.ipynb'):
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--database-name', type=str)
    parser.add_argument('--schema-name', type=str)
    parser.add_argument('--role-name', type=str)
    args, args_unknown = parser.parse_known_args()

    notebook_name = parser.prog  # same as argv[0]
    database_name = args.database_name or database_name
    schema_name = args.schema_name or schema_name
    role_name = args.role_name or role_name

# Get a Snowpark session
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Set the default database and schema for the following cells
session.use_schema(f"{database_name}.{schema_name}")

# Set the role (required with initial GA which uses the user's default role)
session.use_role(role_name)

# Get details about the current state
current_state_df = session.sql(f"""
        SELECT OBJECT_CONSTRUCT(
            'current_user', CURRENT_USER(),
            'current_role', CURRENT_ROLE(),
            'current_secondary_roles', PARSE_JSON(CURRENT_SECONDARY_ROLES()),
            'current_database', CURRENT_DATABASE(),
            'current_schema', CURRENT_SCHEMA()
        )::STRING AS session_context;
    """).collect()

logger.info(f"Begin executing notebook {notebook_name}", extra = {'logger_name': logger_name})
logger.info(f"Using parameters database: {database_name}, schema: {schema_name}, role: {role_name}", extra = {'logger_name': logger_name})
logger.info(f"Using session context {current_state_df[0]['SESSION_CONTEXT']}", extra = {'logger_name': logger_name})

In [None]:
!pip install openpyxl

In [None]:
%%sql -r dataframe_1
-- Temporary solution to load in the metadata, this should be replaced with a directy query to a directory table (or a metadata table)
SELECT '@INTEGRATIONS.FROSTBYTE_RAW_STAGE/intro/order_detail.xlsx' AS STAGE_FILE_PATH, 'order_detail' AS WORKSHEET_NAME, 'ORDER_DETAIL' AS TARGET_TABLE
UNION
SELECT '@INTEGRATIONS.FROSTBYTE_RAW_STAGE/intro/location.xlsx', 'location', 'LOCATION';

## Create a function to load Excel worksheet to table

Create a reusable function to load an Excel worksheet to a table in Snowflake.

Note: Until we can use scoped URLs in Notebooks, via the `BUILD_SCOPED_FILE_URL()` function, we need to temporarily copy the file to a temp stage and then process from there.

In [None]:
from snowflake.snowpark.files import SnowflakeFile
from openpyxl import load_workbook
import pandas as pd

# 1. Create a temp internal stage (once at the start)
session.sql("CREATE TEMP STAGE IF NOT EXISTS temp_excel_stage").collect()

def load_excel_worksheet_to_table(session, external_path, worksheet_name, target_table):
    """Load an Excel worksheet by copying to internal stage first"""
    
    # Extract filename from path
    filename = external_path.split('/')[-1]
    
    # 2. Copy file from external to internal stage
    session.sql(f"""
        COPY FILES INTO @temp_excel_stage
        FROM {external_path}
    """).collect()
    
    # 3. Now SnowflakeFile.open() works on internal stage
    with SnowflakeFile.open(f'@temp_excel_stage/{filename}', 'rb') as f:
        workbook = load_workbook(f)
        sheet = workbook[worksheet_name]
        
    # Convert to DataFrame
    data = sheet.values
    columns = next(data)
    df = pd.DataFrame(data, columns=columns)
    
    # Write to Snowflake table
    snowpark_df = session.create_dataframe(df)
    snowpark_df.write.mode("overwrite").save_as_table(target_table)
    
    logger.info(f"Loaded {len(df)} rows from '{worksheet_name}' to {target_table}", extra = {'logger_name': logger_name})

## Process all Excel worksheets

Loop through each Excel worksheet to process and call our `load_excel_worksheet_to_table_local()` function.

In [None]:
# Process each file from the sql_get_spreadsheets cell above
files_to_load = dataframe_1
for index, excel_file in files_to_load.iterrows():
    print(f"Processing Excel file {excel_file['STAGE_FILE_PATH']}")
    load_excel_worksheet_to_table(session, excel_file['STAGE_FILE_PATH'], excel_file['WORKSHEET_NAME'], excel_file['TARGET_TABLE'])

logger.info(f"Finish executing notebook {notebook_name}", extra = {'logger_name': logger_name})

### Debugging

In [None]:
%%sql -r dataframe_2
--DESCRIBE TABLE LOCATION;
--SELECT * FROM LOCATION;
SHOW TABLES;