# Text Analytics with Cortex AISQL

This notebook demonstrates text-focused AISQL functions:
- **AI_COMPLETE**: Generate completions for text prompts
- **AI_CLASSIFY**: Classify text into categories
- **AI_SENTIMENT**: Extract sentiment scores
- **AI_EXTRACT**: Extract structured information
- **SUMMARIZE**: Summarize text content


In [None]:
import streamlit as st
import pandas as pd
import altair as alt
from snowflake.snowpark.context import get_active_session

session = get_active_session()


In [None]:
# Set context
session.sql("USE DATABASE AISQL_DB").collect()
session.sql("USE SCHEMA AISQL_SCHEMA").collect()
session.sql("USE WAREHOUSE AISQL_WH").collect()


## 1. AI_COMPLETE: Text Generation

Generate responses and summaries for customer emails


In [None]:
# Generate responses to customer emails
sql = """
SELECT 
    ticket_id,
    SUBSTR(content, 1, 150) as customer_message,
    AI_COMPLETE('claude-3-7-sonnet', 
        'Generate a professional customer service response to this inquiry: ' || content) as ai_response
FROM emails
LIMIT 5
"""

df = session.sql(sql).to_pandas()
st.subheader("AI-Generated Customer Responses")
st.dataframe(df)


## 2. AI_CLASSIFY: Text Classification

Classify customer emails into categories


In [None]:
# Classify emails by issue type
sql = """
SELECT 
    ticket_id,
    SUBSTR(content, 1, 100) as content_preview,
    AI_CLASSIFY(
        'Classify this support ticket into one of these categories: ' || content,
        ARRAY_CONSTRUCT('Billing', 'Technical Issue', 'Event Inquiry', 'Refund Request', 'General Question')
    )['labels'][0] as category
FROM emails
LIMIT 50
"""

df_classify = session.sql(sql).to_pandas()
st.subheader("Email Classification")
st.dataframe(df_classify.head(10))

# Visualize category distribution
category_counts = df_classify['CATEGORY'].value_counts().reset_index()
category_counts.columns = ['Category', 'Count']

chart = alt.Chart(category_counts).mark_bar().encode(
    x=alt.X('Count:Q'),
    y=alt.Y('Category:N', sort='-x'),
    color=alt.Color('Category:N', legend=None),
    tooltip=['Category', 'Count']
).properties(height=300)

st.subheader("Category Distribution")
st.altair_chart(chart, use_container_width=True)


## 3. AI_SENTIMENT: Sentiment Analysis

Analyze sentiment of customer emails


In [None]:
# Analyze sentiment
sql = """
SELECT 
    ticket_id,
    user_id,
    SUBSTR(content, 1, 150) as content_preview,
    AI_SENTIMENT(content) as sentiment_score,
    CASE 
        WHEN AI_SENTIMENT(content) > 0.3 THEN 'Positive'
        WHEN AI_SENTIMENT(content) < -0.3 THEN 'Negative'
        ELSE 'Neutral'
    END as sentiment_category
FROM emails
LIMIT 100
"""

df_sentiment = session.sql(sql).to_pandas()
st.subheader("Sentiment Analysis")
st.dataframe(df_sentiment.head(10))

# Sentiment distribution
sentiment_dist = df_sentiment['SENTIMENT_CATEGORY'].value_counts().reset_index()
sentiment_dist.columns = ['Sentiment', 'Count']

pie_chart = alt.Chart(sentiment_dist).mark_arc().encode(
    theta=alt.Theta(field='Count', type='quantitative'),
    color=alt.Color(field='Sentiment', type='nominal'),
    tooltip=['Sentiment', 'Count']
).properties(width=400, height=400)

st.subheader("Sentiment Distribution")
st.altair_chart(pie_chart, use_container_width=True)


## 4. AI_EXTRACT: Information Extraction

Extract structured information from unstructured text


In [None]:
# Extract order IDs and issues
sql = """
SELECT 
    ticket_id,
    SUBSTR(content, 1, 150) as content_preview,
    AI_EXTRACT(content, 'What is the main issue or problem? Be concise.') as main_issue,
    AI_EXTRACT(content, 'What action does the customer want? Be specific.') as requested_action
FROM emails
LIMIT 20
"""

df_extract = session.sql(sql).to_pandas()
st.subheader("Extracted Information")
st.dataframe(df_extract)


## 5. SUMMARIZE: Text Summarization

Generate concise summaries of customer emails


In [None]:
# Summarize long emails
sql = """
SELECT 
    ticket_id,
    content as original_content,
    SNOWFLAKE.CORTEX.SUMMARIZE(content) as summary,
    LENGTH(content) as original_length,
    LENGTH(SNOWFLAKE.CORTEX.SUMMARIZE(content)) as summary_length
FROM emails
WHERE LENGTH(content) > 200
ORDER BY original_length DESC
LIMIT 5
"""

df_summary = session.sql(sql).to_pandas()
st.subheader("Email Summaries")

for idx, row in df_summary.iterrows():
    with st.expander(f"Ticket {row['TICKET_ID']}"):
        st.write("**Original:**")
        st.write(row['ORIGINAL_CONTENT'])
        st.write("**Summary:**")
        st.write(row['SUMMARY'])
