In [None]:
# --- analysis_notebook.ipynb ---
# Notebook for analyzing call center output data using DuckDB and Altair
# Use this for charting sentiment, call topics, agent behavior, churn risk, etc.

# --- Load libraries ---
import duckdb
import pandas as pd
import altair as alt

# --- Connect to DuckDB ---
db_path = '../data/call_summary.db'
con = duckdb.connect(db_path)

# --- Load tables ---
gpt = con.execute("SELECT * FROM gpt_output").df()
redacted = con.execute("SELECT * FROM pii_output").df()
transcripts = con.execute("SELECT * FROM transcript").df()

# --- Basic counts ---
print("Total calls:", len(gpt))
print("Total transcripts:", len(transcripts))

# --- Sentiment distribution ---
sentiment_chart = (
    alt.Chart(gpt)
    .mark_bar(color='#087ea4')
    .encode(
        x=alt.X('customer_satisfaction:N', title='Customer Satisfaction'),
        y=alt.Y('count():Q', title='Number of Calls'),
        tooltip='count()'
    )
    .properties(title='Customer Satisfaction Distribution')
)
sentiment_chart.display()

# --- Issue classification breakdown ---
issue_chart = (
    alt.Chart(gpt)
    .mark_bar(color='#5e60ce')
    .encode(
        x=alt.X('classification:N', title='Issue Type'),
        y=alt.Y('count():Q', title='Count'),
        tooltip='count()'
    )
    .properties(title='Top Issue Classifications')
)
issue_chart.display()

# --- Emotional progression trends ---
emotion_trend = (
    alt.Chart(gpt)
    .mark_bar(color='#ff6b6b')
    .encode(
        x=alt.X('emotional_tone_progression:N', title='Emotional Tone Progression'),
        y=alt.Y('count():Q', title='Call Volume'),
        tooltip='count()'
    )
    .properties(title='Emotional Arc Trends')
)
emotion_trend.display()

# --- Optional: Join transcripts for keywords or language analysis ---
# merged = pd.merge(gpt, transcripts, on="call_id")
# Perform NLP-based keyword clustering or volume tracking here