# Aggregation and Translation with Cortex AISQL

This notebook demonstrates:
- **AI_AGG**: Aggregate insights across rows
- **AI_SUMMARIZE_AGG**: Aggregate summaries
- **AI_TRANSLATE**: Translate text
- **AI_FILTER**: Semantic filtering and joins


In [None]:
import streamlit as st
import pandas as pd
import altair as alt
from snowflake.snowpark.context import get_active_session

session = get_active_session()
session.sql("USE DATABASE AISQL_DB").collect()
session.sql("USE SCHEMA AISQL_SCHEMA").collect()
session.sql("USE WAREHOUSE AISQL_WH").collect()


## 1. AI_AGG: Aggregate Insights

Aggregate insights across multiple tickets


In [None]:
# Aggregate insights by day
sql = """
SELECT 
    DATE_TRUNC('day', created_at) as date,
    COUNT(*) as ticket_count,
    AI_AGG(content, 'Summarize the main issues reported today') as daily_insights
FROM emails
GROUP BY DATE_TRUNC('day', created_at)
ORDER BY date DESC
LIMIT 7
"""

df_agg = session.sql(sql).to_pandas()
st.subheader("Daily Aggregated Insights")
st.dataframe(df_agg)


## 2. AI_TRANSLATE: Multilingual Support

Translate customer emails to multiple languages


In [None]:
# Translate emails to multiple languages
sql = """
SELECT 
    ticket_id,
    SUBSTR(content, 1, 100) as original_english,
    AI_TRANSLATE(content, 'en', 'es') as spanish,
    AI_TRANSLATE(content, 'en', 'fr') as french,
    AI_TRANSLATE(content, 'en', 'de') as german
FROM emails
LIMIT 5
"""

df_translate = session.sql(sql).to_pandas()
st.subheader("Multilingual Translations")

for idx, row in df_translate.iterrows():
    with st.expander(f"Ticket {row['TICKET_ID']}"):
        st.write("**English:**", row['ORIGINAL_ENGLISH'])
        st.write("**Spanish:**", row['SPANISH'][:100] if row['SPANISH'] else "N/A")
        st.write("**French:**", row['FRENCH'][:100] if row['FRENCH'] else "N/A")
        st.write("**German:**", row['GERMAN'][:100] if row['GERMAN'] else "N/A")


## 3. AI_SUMMARIZE_AGG: Aggregate Summaries

Create coherent summaries across multiple rows


In [None]:
# Aggregate summaries by user
sql = """
SELECT 
    user_id,
    COUNT(*) as ticket_count,
    AI_SUMMARIZE_AGG(content) as user_summary
FROM emails
GROUP BY user_id
HAVING COUNT(*) >= 2
ORDER BY ticket_count DESC
LIMIT 10
"""

df_sum_agg = session.sql(sql).to_pandas()
st.subheader("User Issue Summaries")
st.dataframe(df_sum_agg)
