In [41]:

import os
import shutil
from snowflake.snowpark import Session


In [102]:

account='' # YOUR_ACCOUNT
user='' # YOUR_USER
password='' # YOUR_PASSWORD
role='' #YOUR_ROLE

database='' #YOUR_DB
schema='' #YOUR_SCHEMA
stage='' #YOUR_STAGE
stage_data_path='audio2text/tests' #data path 

warehouse='' #YOUR WH
image_registry='project_repo' #name of the image registry that will be created
image_name='audio2text:01' # name of the image

external_access_integration='ALL_EAI' # EAI that is used to retrieve the model
num_replicas = 2 # service number of replicas
job_name='audio2text_v1' # job name
output_table='audio2text_output_table' #table to write results

compute_pool_name='AUDIO2TEXT_TEST02' 
compute_pool_instance_family='CPU_X64_M'
compute_pool_instances=2


connection_parameters = {
    "account": account,
    "user": user,
    "password": password,
    "warehouse": warehouse,
    "database": database,
    "schema": schema,
    "role": role,
    "client_session_keep_alive": True,
}

session = Session.builder.configs(connection_parameters).create()


In [None]:

def upload_dummy_data_to_stage(
    session: Session, local_directory: str, stage_path: str, num_files: int = 1000, overwrite: bool = True
):    
    if not stage_path.startswith("@"):
        stage_path = f"@{stage_path}"

    overwrite_option = "TRUE" if overwrite else "FALSE"

    local_files = []
    for root, _, files in os.walk(local_directory):
        for file in files:
            local_file_path = os.path.join(root, file)
            local_files.append(local_file_path)

    for file_num in range(num_files):
        local_file_to_upload = local_files[file_num%len(local_files)]
        file_path, file_ext = os.path.splitext(os.path.basename(local_file_to_upload))
        local_target_file_path = f"{file_path}_{file_num}{file_ext}"
        shutil.copy(local_file_to_upload, local_target_file_path)

        put_command = f"PUT 'file://{local_target_file_path}' '{stage_path}' OVERWRITE={overwrite_option} AUTO_COMPRESS=False"
        print(f"Uploading: {local_target_file_path} → {stage_path}")
        
        try:
            session.sql(put_command).collect()
        except Exception as e:
            print(f"Error uploading {local_target_file_path}: {e}")
        os.remove(local_target_file_path)


session.sql(f"USE {database}.{schema}").collect()

stage_sql = f"CREATE STAGE IF NOT EXISTS {stage}"

session.sql(stage_sql).collect()


upload_dummy_data_to_stage(session, "./data", f"{stage}/{stage_data_path}")



In [None]:

create_image_repo_sql = f"CREATE IMAGE REPOSITORY IF NOT EXISTS {image_registry}"
session.sql(create_image_repo_sql).collect()

get_image_repo_sql = f"show image repositories like '{image_registry}';"
repository_url = session.sql(get_image_repo_sql).collect()[0]['repository_url']
print(repository_url)


In [None]:

os.environ['SPCS_USERNAME']=user
os.environ['SPCS_PASSWORD']=password
os.environ['SPCS_IMAGE_REPO']=repository_url
os.environ['SPCS_IMAGE_NAME']=image_name

!docker login $SPCS_IMAGE_REPO -u $SPCS_USERNAME -p $SPCS_PASSWORD

!docker build --platform linux/amd64 -t $SPCS_IMAGE_REPO/$SPCS_IMAGE_NAME -f ./Dockerfile ./

!docker push $SPCS_IMAGE_REPO/$SPCS_IMAGE_NAME


In [None]:

create_compute_pool_sql = f"""
create compute pool if not exists {compute_pool_name}
  min_nodes={compute_pool_instances}
  max_nodes={compute_pool_instances}
  instance_family={compute_pool_instance_family};
"""

print(session.sql(create_compute_pool_sql).collect())



In [None]:

print(session.sql(f'DROP SERVICE IF EXISTS {job_name}').collect())

create_async_job_sql = f"""
EXECUTE JOB SERVICE
IN COMPUTE POOL {compute_pool_name} 
NAME = {job_name}
ASYNC = True
REPLICAS = {num_replicas}
QUERY_WAREHOUSE = {warehouse}
EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration})
FROM SPECIFICATION $$
    spec:
      container:
      - name: main
        image: /{database}/{schema}/{image_registry}/{image_name}
        command: ['python', '-u', './main.py', '--config-file=./configs/openai-whisper-tiny.toml', '--stage-name={stage}', '--stage-data-path={stage_path}']
        resources:
            requests:
              memory: "10Gi"
            limits:
              memory: "10Gi"

$$
"""

print(session.sql(create_async_job_sql).collect())



In [None]:

print(session.sql(f'DESC SERVICE {job_name}').collect())



In [106]:

logs = session.sql(f"CALL SYSTEM$GET_SERVICE_LOGS('{job_name}', 1, 'main')").collect()
for line in logs[0][0].split('\n'):
    print(line)





In [None]:

create_output_table_sql= f"""
CREATE OR REPLACE TABLE {output_table} (
    data VARIANT
);
"""

print(session.sql(create_output_table_sql).collect())

copy_data_sql = f"""
COPY INTO {output_table}
FROM @{stage}/{job_name.upper()}
FILE_FORMAT = (TYPE = PARQUET);
"""

print(session.sql(copy_data_sql).collect())



In [None]:

records = session.sql(f"select * from {output_table}").collect()
for record in records[0:10]:
    print(record)
