In [41]:

import os
import shutil
from snowflake.snowpark import Session


In [102]:

account='' # YOUR_ACCOUNT
user='' # YOUR_USER
password='' # YOUR_PASSWORD
role='' #YOUR_ROLE

database='' #YOUR_DB
schema='' #YOUR_SCHEMA
stage='' #YOUR_STAGE
stage_data_path='audio2text/tests' #data path 

warehouse='' #YOUR WH
image_registry='project_repo' #name of the image registry that will be created
image_name='audio2text:01' # name of the image

external_access_integration='ALL_EAI' # EAI that is used to retrieve the model
num_replicas = 2 # service number of replicas
job_name='audio2text_v1' # job name
output_table='audio2text_output_table' #table to write results

compute_pool_name='AUDIO2TEXT_TEST02' 
compute_pool_instance_family='CPU_X64_M'
compute_pool_instances=2


connection_parameters = {
    "account": account,
    "user": user,
    "password": password,
    "warehouse": warehouse,
    "database": database,
    "schema": schema,
    "role": role,
    "client_session_keep_alive": True,
}

session = Session.builder.configs(connection_parameters).create()


In [46]:

def upload_dummy_data_to_stage(
    session: Session, local_directory: str, stage_path: str, num_files: int = 1000, overwrite: bool = True
):    
    if not stage_path.startswith("@"):
        stage_path = f"@{stage_path}"

    overwrite_option = "TRUE" if overwrite else "FALSE"

    local_files = []
    for root, _, files in os.walk(local_directory):
        for file in files:
            local_file_path = os.path.join(root, file)
            local_files.append(local_file_path)

    for file_num in range(num_files):
        local_file_to_upload = local_files[file_num%len(local_files)]
        file_path, file_ext = os.path.splitext(os.path.basename(local_file_to_upload))
        local_target_file_path = f"{file_path}_{file_num}{file_ext}"
        shutil.copy(local_file_to_upload, local_target_file_path)

        put_command = f"PUT 'file://{local_target_file_path}' '{stage_path}' OVERWRITE={overwrite_option} AUTO_COMPRESS=False"
        print(f"Uploading: {local_target_file_path} → {stage_path}")
        
        try:
            session.sql(put_command).collect()
        except Exception as e:
            print(f"Error uploading {local_target_file_path}: {e}")
        os.remove(local_target_file_path)


session.sql(f"USE {database}.{schema}").collect()

stage_sql = f"CREATE STAGE IF NOT EXISTS {stage}"

session.sql(stage_sql).collect()


upload_dummy_data_to_stage(session, "./data", f"{stage}/{stage_data_path}")



Uploading: Sample_ATT_Inbound_Call-MONO_47sec_0.mp3 → @data_stage/tests1
Uploading: harvard_1.wav → @data_stage/tests1
Uploading: common_voice_de_37942822_2.mp3 → @data_stage/tests1
Uploading: Health-Insurance-1_3.mp3 → @data_stage/tests1
Uploading: jackhammer_4.wav → @data_stage/tests1
Uploading: common_voice_de_37888599_5.mp3 → @data_stage/tests1
Uploading: Sample_ATT_Inbound_Call-MONO_47sec_6.mp3 → @data_stage/tests1
Uploading: harvard_7.wav → @data_stage/tests1
Uploading: common_voice_de_37942822_8.mp3 → @data_stage/tests1
Uploading: Health-Insurance-1_9.mp3 → @data_stage/tests1
Uploading: jackhammer_10.wav → @data_stage/tests1
Uploading: common_voice_de_37888599_11.mp3 → @data_stage/tests1
Uploading: Sample_ATT_Inbound_Call-MONO_47sec_12.mp3 → @data_stage/tests1
Uploading: harvard_13.wav → @data_stage/tests1
Uploading: common_voice_de_37942822_14.mp3 → @data_stage/tests1
Uploading: Health-Insurance-1_15.mp3 → @data_stage/tests1
Uploading: jackhammer_16.wav → @data_stage/tests1
Upl

In [22]:

create_image_repo_sql = f"CREATE IMAGE REPOSITORY IF NOT EXISTS {image_registry}"
session.sql(create_image_repo_sql).collect()

get_image_repo_sql = f"show image repositories like '{image_registry}';"
repository_url = session.sql(get_image_repo_sql).collect()[0]['repository_url']
print(repository_url)


preprod8-aivanoutest03.awsuswest2preprod8.registry-dev.snowflakecomputing.com/aivanoudb/public/project_repo


In [98]:

os.environ['SPCS_USERNAME']=user
os.environ['SPCS_PASSWORD']=password
os.environ['SPCS_IMAGE_REPO']=repository_url
os.environ['SPCS_IMAGE_NAME']=image_name

!docker login $SPCS_IMAGE_REPO -u $SPCS_USERNAME -p $SPCS_PASSWORD

!docker build --platform linux/amd64 -t $SPCS_IMAGE_REPO/$SPCS_IMAGE_NAME -f ./Dockerfile ./

!docker push $SPCS_IMAGE_REPO/$SPCS_IMAGE_NAME


Login Succeeded
[1A[1B[0G[?25l[+] Building 0.0s (0/0)  docker:desktop-linux
[?25h[1A[0G[?25l[+] Building 0.0s (0/1)                                    docker:desktop-linux
[?25h[1A[0G[?25l[+] Building 0.2s (1/2)                                    docker:desktop-linux
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 402B                                       0.0s
[0m => [internal] load metadata for nvcr.io/nvidia/pytorch:25.02-py3          0.2s
[?25h[1A[1A[1A[1A[0G[?25l[+] Building 0.3s (2/2)                                    docker:desktop-linux
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 402B                                       0.0s
[0m[34m => [internal] load metadata for nvcr.io/nvidia/pytorch:25.02-py3          0.3s
[0m[?25h[1A[1A[1A[1A[0G[?25l[+] Building 0.3s (11/11) FINISHED                 

In [103]:

create_compute_pool_sql = f"""
create compute pool if not exists {compute_pool_name}
  min_nodes={compute_pool_instances}
  max_nodes={compute_pool_instances}
  instance_family={compute_pool_instance_family};
"""

print(session.sql(create_compute_pool_sql).collect())



[Row(status="Compute pool AIVANOU_TEST02 successfully created. Please run 'DESCRIBE COMPUTE POOL AIVANOU_TEST02' to check the compute pool state. NOTE the compute pool is not ready to deploy a service or job before reaching ACTIVE or IDLE state.")]


In [104]:

print(session.sql(f'DROP SERVICE IF EXISTS {job_name}').collect())

create_async_job_sql = f"""
EXECUTE JOB SERVICE
IN COMPUTE POOL {compute_pool_name} 
NAME = {job_name}
ASYNC = True
REPLICAS = {num_replicas}
QUERY_WAREHOUSE = {warehouse}
EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration})
FROM SPECIFICATION $$
    spec:
      container:
      - name: main
        image: /{database}/{schema}/{image_registry}/{image_name}
        command: ['python', '-u', './main.py', '--config-file=./configs/openai-whisper-tiny.toml', '--stage-name={stage}', '--stage-data-path={stage_path}']
        resources:
            requests:
              memory: "10Gi"
            limits:
              memory: "10Gi"

$$
"""

print(session.sql(create_async_job_sql).collect())



[Row(status='AUDIO2TEXT_V1 successfully dropped.')]
[Row(status="Started Snowpark Container Services Job 'AUDIO2TEXT_V1'.")]


In [105]:

print(session.sql(f'DESC SERVICE {job_name}').collect())



[Row(name='AUDIO2TEXT_V1', status='PENDING', database_name='AIVANOUDB', schema_name='PUBLIC', owner='SYSADMIN', compute_pool='AIVANOU_TEST02', spec='---\nspec:\n  containers:\n  - name: "main"\n    image: "preprod8-aivanoutest03.awsuswest2preprod8.registry-dev.snowflakecomputing.com/aivanoudb/public/project_repo/audio2text:01"\n    sha256: "@sha256:ae96ad0531f04eb2fb32e878e1d63cfd315af3769a9d664088f45017348479df"\n    command:\n    - "python"\n    - "-u"\n    - "./main.py"\n    - "--config-file=./configs/openai-whisper-tiny.toml"\n    - "--stage-name=data_stage"\n    - "--stage-data-path=audio2text/tests"\n    resources:\n      limits:\n        memory: "10Gi"\n        cpu: "6"\n      requests:\n        memory: "10Gi"\n        cpu: "0.5"\n', dns_name='audio2text-v1.dadm.svc.spcs.internal', current_instances=0, target_instances=2, min_ready_instances=1, min_instances=1, max_instances=2, auto_resume='true', external_access_integrations='["ALL_EAI"]', created_on=datetime.datetime(2025, 3, 

In [106]:

logs = session.sql(f"CALL SYSTEM$GET_SERVICE_LOGS('{job_name}', 1, 'main')").collect()
for line in logs[0][0].split('\n'):
    print(line)





In [92]:

create_output_table_sql= f"""
CREATE OR REPLACE TABLE {output_table} (
    data VARIANT
);
"""

print(session.sql(create_output_table_sql).collect())

copy_data_sql = f"""
COPY INTO {output_table}
FROM @{stage}/{job_name.upper()}
FILE_FORMAT = (TYPE = PARQUET);
"""

print(session.sql(copy_data_sql).collect())



[Row(status='Table AUDIO2TEXT_OUTPUT_TABLE successfully created.')]
[Row(file='data_stage/AUDIO2TEXT_V1/0/output/output_file_8.parquet', status='LOADED', rows_parsed=244, rows_loaded=244, error_limit=1, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='data_stage/AUDIO2TEXT_V1/0/output/output_file_15.parquet', status='LOADED', rows_parsed=148, rows_loaded=148, error_limit=1, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='data_stage/AUDIO2TEXT_V1/0/output/output_file_10.parquet', status='LOADED', rows_parsed=320, rows_loaded=320, error_limit=1, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='data_stage/AUDIO2TEXT_V1/1/output/output_file_7.parquet', status='LOADED', rows_parsed=224, rows_loaded=224, error_limit=1, errors_seen=0, first_error=None, first_error_lin

In [94]:

records = session.sql(f"select * from {output_table}").collect()
for record in records[0:10]:
    print(record)


Row(DATA='{\n  "files": "data_stage/audio2text/tests/harvard_0.wav",\n  "transcriptions": " The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham. Tacos al pastor are my favorite. A zestful food is the hot cross bun."\n}')
Row(DATA='{\n  "files": "data_stage/audio2text/tests/harvard_1.wav",\n  "transcriptions": " The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham. Tacos al pastor are my favorite. A zestful food is the hot cross bun."\n}')
Row(DATA='{\n  "files": "data_stage/audio2text/tests/harvard_10.wav",\n  "transcriptions": " The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham. Tacos al pastor are my favorite. A zestful food is the hot cross bun."\n}')
Row(DATA='{\n  "files": "data_stage/audio2text/tests/h