# Call Center Analytics with Snowflake Cortex AI

## Enable Cortex AI for cross-region access

In [None]:
%sql
ALTER ACCOUNT SET CORTEX_ENABLED_CROSS_REGION = 'ANY_REGION';

## Create database & schema

In [None]:
%sql
CREATE DATABASE IF NOT EXISTS call_center_db;
CREATE OR REPLACE SCHEMA call_center_db.analytics;
CREATE OR REPLACE STAGE call_center_db.analytics.audio_files
    DIRECTORY = (ENABLE = TRUE)
    ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE')
    COMMENT = 'Stage for call center audio files';

## Setting up session

In [None]:
import pandas as pd
import json
import numpy as np
import time
from datetime import datetime, timedelta
import streamlit as st

from snowflake.snowpark import Session, DataFrame
from snowflake.snowpark import functions as F
from snowflake.snowpark import types as T
from snowflake.snowpark.version import VERSION

session = get_active_session()
session.use_database("call_center_db")
session.use_schema("analytics")

## Uploading audio files
If you are working in snowsight, you can upload the call recordings directly into the stage. Otherwise if you're working in Snowsql client / Visual studio code, you may use the ```PUT file://``` function as shown below

In [None]:
%sql
PUT file:///recordings/*.wav @audio_files AUTO_COMPRESS=FALSE;
-- * is a wildcard, meaning all files that ends with .wav will be passed through the function.
-- Do not compress the files as the following codes will not work with compressed files.

## Verify your recordings are in the stage

In [None]:
%sql
LIST @audio_files/;

## Preview audio files with Streamlit

In [None]:
stage_path = "@call_center_db.analytics.audio_files/<'yourcallrecordings'>.WAV"

with session.file.get_stream(stage_path) as f:
    audio_bytes = f.read()

    st.audio(audio_bytes, format="audio/mpeg", start_time=0)

## Create table with FILE objects for batch transcription

In [None]:
%sql
CREATE OR REPLACE TABLE audio_files_for_transcription AS
SELECT 
    RELATIVE_PATH as file_path,
    TO_FILE('@audio_files', RELATIVE_PATH) as audio_file,
    SIZE as file_size_bytes,
    LAST_MODIFIED as upload_time,
    REGEXP_SUBSTR(RELATIVE_PATH, '^[^_]+_[^_]+') AS call_id,
    SPLIT_PART(RELATIVE_PATH, '_', 3) AS handler_id
FROM DIRECTORY('@audio_files')
WHERE RELATIVE_PATH ILIKE '%.wav';
-- myrepublic's file names are in the format of '<call_id>_<handler_id>_<domain>.wav'
-- if your file names are different, you may need to adjust the REGEXP_SUBSTR and SPLIT_PART functions accordingly.
-- likewise if your audio files are in a different format, you can change the WHERE clause to filter for the correct file type: .mp3, .flac, etc.

In [None]:
SELECT * FROM audio_files_for_transcription;

## Transcribing audio files

In [None]:
%sql
CREATE OR REPLACE TABLE ai_transcribed_calls AS
SELECT 
    call_id,
    handler_id,
    AI_TRANSCRIBE(audio_file):text::STRING as transcript,
    LENGTH(AI_TRANSCRIBE(audio_file):text::STRING) as transcript_length,
    ARRAY_SIZE(SPLIT(AI_TRANSCRIBE(audio_file):text::STRING, ' ')) as word_count,
    CASE 
        WHEN AI_TRANSCRIBE(audio_file):text IS NULL THEN 'FAILED'
        WHEN LENGTH(AI_TRANSCRIBE(audio_file):text::STRING) < 10 THEN 'SHORT'
        ELSE 'SUCCESS'
    END as status,
    file_path
FROM audio_files_for_transcription
ORDER BY file_size_bytes ASC;  -- Start with smaller files

In [None]:
%sql
SELECT * FROM ai_transcribed_calls;

## Using Snowflake Cortex AI to analyse transcripts

In [None]:
%sql
-- Create table to store transcript analysis
CREATE OR REPLACE TABLE call_analysis AS
SELECT 
    call_id,
    handler_id,
    transcript,
    
    -- Sentiment Analysis
    SNOWFLAKE.CORTEX.SENTIMENT(transcript) as sentiment_score,
    CASE 
        WHEN SNOWFLAKE.CORTEX.SENTIMENT(transcript) > 0.1 THEN 'POSITIVE'
        WHEN SNOWFLAKE.CORTEX.SENTIMENT(transcript) < -0.1 THEN 'NEGATIVE'
        ELSE 'NEUTRAL'
    END as sentiment,
    
    -- Call Summary
    SNOWFLAKE.CORTEX.SUMMARIZE(transcript) as summary,
    
    -- Using JSON prompt to extract structured information
    AI_COMPLETE(
        model => 'claude-4-sonnet',
        prompt => 'Analyze this call center conversation and extract structured information. Call transcript: ' || transcript,
        model_parameters => {'temperature': 0.1, 'max_tokens': 2048},
        response_format => {
            'type': 'json',
            'schema': {
                'type': 'object',
                'properties': {
                    'primary_intent': {'type': 'string', 'enum': ['product_enquiry', 'sales', 'delivery_status', 'complaint', 'cancellation', 'other']},
                    'urgency_level': {'type': 'string', 'enum': ['low', 'medium', 'high', 'critical']},
                    'issue_resolved': {'type': 'string', 'enum': ['yes', 'no', 'partial']},
                    'escalation_required': {'type': 'string', 'enum': ['yes', 'no']},
                    'customer_satisfaction': {'type': 'string', 'enum': ['satisfied', 'neutral', 'dissatisfied']},
                    'call_duration_estimate': {'type': 'string', 'enum': ['short', 'medium', 'long']},
                    'key_issues': {'type': 'array', 'items': {'type': 'string'}},
                    'action_items': {'type': 'array', 'items': {'type': 'string'}},
                    'invoice_number': {'type': 'array', 'items': {'type': 'string'}},
                    'monetary_amounts': {'type': 'array', 'items': {'type': 'string'}},
                    'callback_requested': {'type': 'string', 'enum': ['yes', 'no']}
                },
                'required': ['primary_intent', 'urgency_level', 'issue_resolved', 'escalation_required', 'customer_satisfaction']
            }
        }
    ) as call_info,
    
    -- Quality score
    TRY_CAST(
        AI_COMPLETE(
            model => 'claude-4-sonnet',
            prompt => 'Rate this call center conversation on a scale of 1-10 for agent performance considering: professionalism, problem-solving, communication clarity, and customer service. Provide only the numeric score (no text). If you cannot determine a score, return null and nothing else: ' || transcript,
            model_parameters => {'temperature': 0, 'max_tokens': 10}
       )::VARCHAR AS NUMBER(3,1)
    ) as agent_performance_score,
    
    -- Improvement Areas
    AI_COMPLETE(
        model => 'claude-4-sonnet',
        prompt => 'List 3 specific improvement opportunities for this call center conversation in bullet points: ' || transcript,
        model_parameters => {'temperature': 0.3, 'max_tokens': 500}
    ) as improvement_areas,

    CURRENT_TIMESTAMP() as timestamp
    
FROM ai_transcribed_calls
WHERE status = 'SUCCESS'
AND transcript IS NOT NULL
AND LENGTH(transcript) > 50;  -- Filter out very short transcripts (eg. number unavailable, sent to voicemail)

In [None]:
SELECT * FROM call_analysis;

## Extracting JSON fields from AI prompt

In [None]:
%sql
ALTER TABLE call_analysis 
ADD COLUMN 
    primary_intent STRING,
    urgency_level STRING,
    issue_resolved STRING,
    escalation_required STRING,
    customer_satisfaction STRING;


UPDATE call_analysis
SET
    primary_intent = call_info:primary_intent::STRING,
    urgency_level = call_info:urgency_level::STRING,
    issue_resolved = call_info:issue_resolved::STRING,
    escalation_required = call_info:escalation_required::STRING,
    customer_satisfaction = call_info:customer_satisfaction::STRING;

In [None]:
%sql
SELECT * FROM call_analysis;