In [None]:
# Import python packages
import streamlit as st
import pandas as pd

from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *

# Get active Snowflake session
from snowflake.snowpark.context import get_active_session
session = get_active_session()

# Set context
session.sql("USE DATABASE CALL_CENTER_ANALYTICS").collect()
session.sql("USE SCHEMA AUDIO_PROCESSING").collect()
session.sql("USE WAREHOUSE AUDIO_CORTEX_WH").collect()

st.write("✅ Session setup complete - Ready for audio transcription!")


In [None]:
-- List available English MP3 audio files
SELECT 
    RELATIVE_PATH AS filename,
    SIZE AS file_size_bytes,
    ROUND(SIZE/1024/1024, 2) AS file_size_mb,
    LAST_MODIFIED
FROM DIRECTORY('@CALL_CENTER_AUDIO_FILES')
WHERE RELATIVE_PATH LIKE '%.mp3'
ORDER BY RELATIVE_PATH;


In [None]:
# Get list of audio files and create interactive player
files_df = session.sql("""
    SELECT 
        RELATIVE_PATH,
        GET_PRESIGNED_URL('@CALL_CENTER_AUDIO_FILES', RELATIVE_PATH) AS URL
    FROM DIRECTORY('@CALL_CENTER_AUDIO_FILES')
    WHERE RELATIVE_PATH LIKE '%.mp3'
    ORDER BY RELATIVE_PATH
""").to_pandas()

if not files_df.empty:
    selected_file = st.selectbox('🎧 Select Call Recording to Listen:', files_df['RELATIVE_PATH'])
    
    if selected_file:
        url = files_df[files_df['RELATIVE_PATH'] == selected_file]['URL'].iloc[0]
        st.audio(url, format="audio/mpeg")
        st.write(f"**Playing**: {selected_file}")
else:
    st.error("No MP3 files found. Please check the setup.")


In [None]:
-- Create a table to store FILE objects for transcription
CREATE OR REPLACE TABLE AUDIO_FILES_FOR_TRANSCRIPTION AS
SELECT 
    RELATIVE_PATH AS filename,
    TO_FILE('@CALL_CENTER_AUDIO_FILES', RELATIVE_PATH) AS audio_file
FROM DIRECTORY('@CALL_CENTER_AUDIO_FILES')
WHERE RELATIVE_PATH LIKE '%.mp3'
ORDER BY RELATIVE_PATH;

SELECT * FROM AUDIO_FILES_FOR_TRANSCRIPTION;


In [None]:
-- Perform AI transcription on our audio files
CREATE OR REPLACE TABLE CALL_TRANSCRIPTS AS
SELECT 
    filename AS audio_file_name,
    AI_TRANSCRIBE(audio_file) AS transcript_text,
    CURRENT_TIMESTAMP() AS processing_timestamp
FROM AUDIO_FILES_FOR_TRANSCRIPTION;

-- View transcription results
SELECT * FROM CALL_TRANSCRIPTS;


In [None]:
# Display transcription results with analysis
transcripts_df = session.table('CALL_TRANSCRIPTS').to_pandas()

st.markdown("### 🎯 Transcription Results")

if not transcripts_df.empty:
    # Display metrics
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.metric("Files Processed", len(transcripts_df))
    
    with col2:
        avg_length = transcripts_df['TRANSCRIPT_TEXT'].str.len().mean()
        st.metric("Avg Transcript Length", f"{avg_length:.0f} chars")
    
    with col3:
        total_words = transcripts_df['TRANSCRIPT_TEXT'].str.split().str.len().sum()
        st.metric("Total Words", f"{total_words:,}")
    
    # Display individual transcripts
    st.markdown("### 📝 Individual Call Transcripts")
    
    for idx, row in transcripts_df.iterrows():
        with st.expander(f"📞 {row['AUDIO_FILE_NAME']}", expanded=False):
            st.write(f"**Processing Time**: {row['PROCESSING_TIMESTAMP']}")
            st.write(f"**Transcript Length**: {len(row['TRANSCRIPT_TEXT'])} characters")
            st.write(f"**Word Count**: {len(row['TRANSCRIPT_TEXT'].split())} words")
            st.text_area("Full Transcript:", row['TRANSCRIPT_TEXT'], height=200, key=f"transcript_{idx}")
else:
    st.error("No transcripts found. Please check the transcription process.")
