In [6]:

import json
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F




### 1. Setup - Create Database/Security/Compute Pool objects 

- Ensure you have followed the steps in the doc https://docs.snowflake.com/en/developer-guide/snowpark-container-services/tutorials/common-setup#create-snowflake-objects to create required roles need for SPCS services. 

If you haven't already done it, run the below commands in snowsight as <b>ACCOUNTADMIN</b>

You have to execute the below commands only once for all the three containers you will be creating as part of the solution. You will be creating the other pools while crearting other SPCS services.

``` sql
USE ROLE ACCOUNTADMIN;

CREATE ROLE SPCS_PSE_ROLE;

CREATE DATABASE IF NOT EXISTS LLMDemo;
GRANT OWNERSHIP ON DATABASE LLMDemo TO ROLE SPCS_PSE_ROLE COPY CURRENT GRANTS;
GRANT OWNERSHIP ON ALL SCHEMAS IN DATABASE LLMDemo  TO ROLE SPCS_PSE_ROLE COPY CURRENT GRANTS;

CREATE OR REPLACE WAREHOUSE small_warehouse WITH
  WAREHOUSE_SIZE='X-SMALL';
GRANT USAGE ON WAREHOUSE small_warehouse TO ROLE SPCS_PSE_ROLE;

CREATE SECURITY INTEGRATION IF NOT EXISTS snowservices_ingress_oauth
  TYPE=oauth
  OAUTH_CLIENT=snowservices_ingress
  ENABLED=true;

GRANT BIND SERVICE ENDPOINT ON ACCOUNT TO ROLE SPCS_PSE_ROLE;

CREATE COMPUTE POOL PR_GPU_S
MIN_NODES = 1 
MAX_NODES = 1 
INSTANCE_FAMILY = GPU_NV_S 
AUTO_RESUME = FALSE
COMMENT = 'For Audio2text' ;

-- Below network rule and External Access INtegration is used to download the whisper mode.

-- You need to execute the below two commands only once for all the SPC services that you will be creating.

 CREATE NETWORK RULE allow_all_rule
    TYPE = 'HOST_PORT'
    MODE= 'EGRESS'
    VALUE_LIST = ('0.0.0.0:443','0.0.0.0:80');

CREATE EXTERNAL ACCESS INTEGRATION allow_all_eai
  ALLOWED_NETWORK_RULES = (allow_all_rule)
  ENABLED = true

GRANT USAGE ON INTEGRATION allow_all_eai TO ROLE SPCS_PSE_ROLE;

GRANT USAGE, MONITOR ON COMPUTE POOL PR_GPU_S TO ROLE SPCS_PSE_ROLE;

GRANT USAGE, MONITOR ON COMPUTE POOL PR_GPU_S TO ROLE SPCS_PSE_ROLE;

GRANT DATABASE ROLE SNOWFLAKE.CORTEX_USER TO ROLE SPCS_PSE_ROLE;

GRANT ROLE SPCS_PSE_ROLE TO USER <user_name>;

USE ROLE SPCS_PSE_ROLE;
USE DATABASE LLMDemo;
USE WAREHOUSE small_warehouse;
USE SCHEMA PUBLIC;

CREATE IMAGE REPOSITORY IF NOT EXISTS IMAGES;

-- CHECK THE IMAGE RESGITRY URL

SHOW IMAGE REPOSITORIES;

Example output for the above query:
 <orgname>-<acctname>.registry.snowflakecomputing.com/LLMDEMO/public/images


```


### 2. Build docker image and push the image to image registry

> Run below commands from a terminal.  Set your working directory to "audio2text". Ensure your docker is running on your laptop. Update the ORGNAME-ACCTNAME with your Snowflake account info and also update username 

``` bash

cd audio2text

-- Refer audio2text/Dockerfile for image details

docker build --no-cache --platform linux/amd64 -t ORGNAME-ACCTNAME.registry.snowflakecomputing.com/LLMDEMO/public/images/whisper-audio2text:latest . 

-- username and password is same as your snowflake credential

docker login ORGNAME-ACCTNAME.registry.snowflakecomputing.com -u <username> -p <password>

docker push ORGNAME-ACCTNAME.registry.snowflakecomputing.com/llmdemo/public/images/whisper-audio2text:latest

This will take quite sometime for the image to be pushed to Snowflake image repository.
```

### 3. Creating Internal Stages 

In [None]:
# Connection.json file should use the SPCS_PSE_ROLE which you have created earlier

connection_parameters = json.load(open('../connection.json'))
session = Session.builder.configs(connection_parameters).create()
# Add a query tag to the session.
session.query_tag = {"origin":"sf_sit-is", 
                     "name":"spcs_call_center", 
                     "version":{"major":1, "minor":0},
                     "attributes":{"is_quickstart":1, "source":"notebook"}}


In [None]:
# Run the below command to create the required stage
stages=['WHISPER_APP','AUDIO_FILES','SPECS','CSV_FILES','LLM_WORKSPACE']
for stg in stages:
    session.sql(f'''
                CREATE STAGE IF NOT EXISTS {stg} ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE') 
                DIRECTORY = (ENABLE = TRUE);
                ''').collect()


###  4. Create SPC Service
Update th YAML details [whisper_spec.yml](./whisper_spec.yml) to change the image url before executing the below put command

image: ORGNAME-ACCTNAME.registry.snowflakecomputing.com/pr_llmdemo/public/image_repo/whisper-audio2text:latest

PS: <b>Run all the below commands using the SPCS Role(or any custom role you have created) and not using accountadmin </b>


In [4]:

session.file.put("./whisper_spec.yml", "@specs",auto_compress=False)

[PutResult(source='llm-text2sql.yaml', target='llm-text2sql.yaml.gz', source_size=652, target_size=358, source_compression='NONE', target_compression='GZIP', status='UPLOADED', message='')]

In [None]:
# Create the service
session.sql('''
CREATE SERVICE Whisper_Audio_text_SVC
  IN COMPUTE POOL PR_GPU_S
  FROM @specs
  SPEC='whisper_spec.yml'
  EXTERNAL_ACCESS_INTEGRATIONS = (ALLOW_ALL_EAI)
  MIN_INSTANCES=1
  MAX_INSTANCES=1;
            ''').collect()

> Wait for the service to be in Ready State before moving ahead. Run the below command to confirm it

In [None]:
#  Check the status of service. This should be Ready. 
# Wait for few mins before the status of the service becomes active
import ast
res=session.sql(''' 
SELECT SYSTEM$GET_SERVICE_STATUS('Whisper_Audio_text_SVC',1)
''').collect()[0][0]
ast.literal_eval(res)[0]

``` {'status': 'READY',
 'message': 'Running',
 'containerName': 'audio-whisper-app',
 'instanceId': '0',
 'serviceName': 'WHISPER_AUDIO_TEXT_SVC',
 'image': 'sfseeurope-us-west-ccarrero-452.registry.snowflakecomputing.com/llmdemo/public/images/whisper-audio2text:latest',
 'restartCount': 0,
 'startTime': '2024-04-24T22:54:06Z'}
 ```

In [None]:
#  Check the log for the service for any errors.
session.sql('''SELECT value AS log_line
FROM TABLE(
 SPLIT_TO_TABLE(SYSTEM$GET_SERVICE_LOGS('Whisper_Audio_text_SVC', 0, 'audio-whisper-app'), '\n')
  )''').to_pandas()


### 5. Creating the service function

In [None]:
#   Function to get duration of the audio files
session.sql('''CREATE OR REPLACE FUNCTION DURATION(AUDIO_FILE TEXT)
RETURNS VARIANT
SERVICE=Whisper_Audio_text_SVC
ENDPOINT=API
AS '/audio-duration'
            ''').collect()


In [None]:
# Function to transcribe the audio files
session.sql('''CREATE OR REPLACE FUNCTION TRANSCRIBE(TASK TEXT, LANGUAGE TEXT, AUDIO_FILE TEXT, ENCODE BOOLEAN)
RETURNS VARIANT
SERVICE=Whisper_Audio_text_SVC
ENDPOINT=API
AS '/asr'
            ''').collect()

In [None]:
# Function to detect language of the audio file
session.sql('''CREATE OR REPLACE FUNCTION DETECT_LANGUAGE(AUDIO_FILE TEXT, ENCODE BOOLEAN)
RETURNS VARIANT
SERVICE=Whisper_Audio_text_SVC
ENDPOINT=API
AS '/detect-language'
            ''').collect()

In [19]:
# Creating the Table to load the Audio file raw text along with duration and other attributes

# Duration is in seconds

session.sql('''
    CREATE or REPLACE TABLE ALL_CLAIMS_RAW (
	DATETIME DATE,
	AUDIOFILE VARCHAR(16777216),
	CONVERSATION VARCHAR(16777216),
	PRESIGNED_URL_PATH VARCHAR(16777216),
	DURATION FLOAT NOT NULL
)''').collect()


[Row(status='Table DEMO_ALL_CLAIMS_RAW successfully created.')]

In [None]:
#  Uploading the audio files to Internal Stage

_ = session.file.put("./audiofiles/*.*", "@AUDIO_FILES/2024-01-26/", auto_compress=False,overwrite=True)

session.sql(f'''ALTER STAGE AUDIO_FILES REFRESH''').collect()


In [26]:
session.sql('ls @AUDIO_FILES/2024-01-26').collect()

[Row(name='audio_files/2024-01-26/Health-Insurance-1 (2).mp3', size=587565, md5='889b7aa9c9c09f78b376e04ca53585c3', last_modified='Wed, 31 Jan 2024 04:36:27 GMT'),
 Row(name='audio_files/2024-01-26/Health-Insurance-1.mp3', size=587565, md5='889b7aa9c9c09f78b376e04ca53585c3', last_modified='Wed, 31 Jan 2024 04:37:36 GMT'),
 Row(name='audio_files/2024-01-26/Sample_ATT_Inbound_Call-MONO_47sec (5).mp3', size=335727, md5='273123d02e1082fba024ba4429b3f03f', last_modified='Wed, 31 Jan 2024 04:36:24 GMT'),
 Row(name='audio_files/2024-01-26/Sample_ATT_Inbound_Call-MONO_47sec.mp3', size=335727, md5='273123d02e1082fba024ba4429b3f03f', last_modified='Wed, 31 Jan 2024 04:37:34 GMT'),
 Row(name='audio_files/2024-01-26/common_voice_de_37888599.mp3', size=63333, md5='cdc9ef276ed8a5ae9378795bd5ada737', last_modified='Wed, 31 Jan 2024 04:37:32 GMT'),
 Row(name='audio_files/2024-01-26/common_voice_de_37942822.mp3', size=63333, md5='68b5c52ffa45c560f851f77bb237372f', last_modified='Wed, 31 Jan 2024 04:37:

In [32]:
# Inserting records into the RAW Table
# To have different values for the datetime, store your audio files in sub folders with yyy-mm-dd format . 
# E.g. 2024-01-10. 
session.sql('''
INSERT INTO ALL_CLAIMS_RAW
(
DATETIME,
AUDIOFILE,
PRESIGNED_URL_PATH,
CONVERSATION,
DURATION
)
SELECT CAST(CASE WHEN split(RELATIVE_PATH,'/')[1]::string IS NULL THEN GETDATE() 
            ELSE split(RELATIVE_PATH,'/')[0]::string END AS DATE) as Datetime, 
        CASE WHEN split(RELATIVE_PATH,'/')[1]::string is null then split(RELATIVE_PATH,'/')[0]::string 
            ELSE split(RELATIVE_PATH,'/')[1]::string END as RELATIVE_PATH,
       GET_PRESIGNED_URL('@AUDIO_FILES', RELATIVE_PATH) AS PRESIGNED_URL
       -- ,DETECT_LANGUAGE(PRESIGNED_URL,TRUE) as DETECT_LANGUAGE
       ,TRANSCRIBE('transcribe','',PRESIGNED_URL,True)['text']::string AS EXTRACTED_TEXT
       ,DURATION(PRESIGNED_URL):call_duration_seconds::DOUBLE as CALL_DURATION_SECONDS
FROM DIRECTORY('@AUDIO_FILES')
            
            ''').collect()

[Row(DATETIME=datetime.date(2024, 1, 26), RELATIVE_PATH='Health-Insurance-1 (2).mp3', PRESIGNED_URL='https://sfc-prod3-ds1-16-customer-stage.s3.us-west-2.amazonaws.com/f8ab0000-s/stages/6f27f761-d7c8-4454-81cf-8024ebb94d8b/2024-01-26/Health-Insurance-1%20%282%29.mp3?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAU6V5ZHIX6ZNBIVXD%2F20240131%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20240131T044317Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=62d66af071794494c34392a06d90b56b154e4d56ce4de6ead5745adb26be31d9'),
 Row(DATETIME=datetime.date(2024, 1, 26), RELATIVE_PATH='Health-Insurance-1.mp3', PRESIGNED_URL='https://sfc-prod3-ds1-16-customer-stage.s3.us-west-2.amazonaws.com/f8ab0000-s/stages/6f27f761-d7c8-4454-81cf-8024ebb94d8b/2024-01-26/Health-Insurance-1.mp3?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAU6V5ZHIX6ZNBIVXD%2F20240131%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20240131T044317Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=4cfce07a

In [33]:
session.table('ALL_CLAIMS_RAW').to_pandas()

Unnamed: 0,DATETIME,AUDIOFILE,CONVERSATION,PRESIGNED_URL_PATH,DURATION
0,2023-11-11,audiofile1.mp3,"Hello, this is Emily from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,218.254271
1,2023-11-11,audiofile2.mp3,"Hello, this is James from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,197.705486
2,2023-11-11,audiofile3.mp3,"Hello, this is Sarah from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,75.172382
3,2023-11-11,audiofile4.mp3,"Hello, this is Kevin from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,224.291618
4,2023-11-15,audiofile5.mp3,"Hello, this is Olivia from AutoAssure Insuranc...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,174.649442
...,...,...,...,...,...
91,2024-01-05,audiofile92.mp3,"Good morning, this is Sara at AutoAssure Insur...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,102.048162
92,2024-01-05,audiofile93.mp3,"Hello, I'm Michael with AutoAssure Insurance, ...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,71.842008
93,2024-01-10,audiofile95.wav,"Welcome to AutoAssure Insurance, this is Josh....",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,84.340859
94,2024-01-10,audiofile97.wav,"Hi, this is Ethan from AutoAssure Insurance. W...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,195.237454


### 6. Loading Data into the ALL_CLAIMS_RAW Table from CSV

Since we don't have lot of audio files from insurance industry, we will be loading sample data into the Raw table which has the raw conversation from the insurance industry. This data will be the source for this solution.

In [17]:
_ = session.file.put("./Sample_Audio_Text.csv", "@CSV_FILES", auto_compress=False)

sp_df=session.read.options({"INFER_SCHEMA":True,"PARSE_HEADER":True,"FIELD_OPTIONALLY_ENCLOSED_BY":'"'}).csv('@CSV_FILES/Sample_Audio_Text.csv')

# sp_df = session.read.option("INFER_SCHEMA", True).option("PARSE_HEADER", True).option("FIELD_OPTIONALLY_ENCLOSED_BY",'"').csv("@CSV_FILES/Sample_Audio_Text.csv")

In [20]:
sp_df.write.mode("overwrite").save_as_table("ALL_CLAIMS_RAW")

In [21]:
session.table('ALL_CLAIMS_RAW').to_pandas()

Unnamed: 0,DATETIME,AUDIOFILE,CONVERSATION,PRESIGNED_URL_PATH,DURATION
0,2023-11-11,audiofile1.mp3,"Hello, this is Emily from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,218.254271
1,2023-11-11,audiofile2.mp3,"Hello, this is James from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,197.705486
2,2023-11-11,audiofile3.mp3,"Hello, this is Sarah from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,75.172382
3,2023-11-11,audiofile4.mp3,"Hello, this is Kevin from AutoAssure Insurance...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,224.291618
4,2023-11-15,audiofile5.mp3,"Hello, this is Olivia from AutoAssure Insuranc...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,174.649442
...,...,...,...,...,...
91,2024-01-05,audiofile92.mp3,"Good morning, this is Sara at AutoAssure Insur...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,102.048162
92,2024-01-05,audiofile93.mp3,"Hello, I'm Michael with AutoAssure Insurance, ...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,71.842008
93,2024-01-10,audiofile95.wav,"Welcome to AutoAssure Insurance, this is Josh....",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,84.340859
94,2024-01-10,audiofile97.wav,"Hi, this is Ethan from AutoAssure Insurance. W...",https://sfc-prod3-ds1-16-customer-stage.s3.us-...,195.237454
