# Sentiment Function Tool Validation
This notebook reuses the production `SentimentService` and mirrors the ADK `FunctionTool` logic so we can verify sentiment outcomes offline before wiring them into the multi-agent workflow.

## 1. Configure Environment Paths
Set the working directory to the project root so imports behave the same way they do inside the packaged app, and double-check that the `docs/` and `cache/` folders referenced in the runtime logs are available.

In [None]:
from pathlib import Path
import os
import sys

PROJECT_ROOT = Path('..').resolve()
os.chdir(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

DOCS_DIR = PROJECT_ROOT / 'docs'
CACHE_DIR = PROJECT_ROOT / 'cache'
print(f"Project root set to: {PROJECT_ROOT}")
print(f"Docs directory exists: {DOCS_DIR.exists()} -> {DOCS_DIR}")
print(f"Cache directory exists: {CACHE_DIR.exists()} -> {CACHE_DIR}")

## 2. Import Sentiment Tooling
Load environment variables, configure logging helpers, and ensure the sentiment models used in production (`google/gemini-embedding-001` and `mistralai/ministral-8b`) are available for later checks.

In [None]:
import os
from dotenv import load_dotenv

from vaxtalk.config.logging_config import setup_logging
from vaxtalk.model import SentimentOutput, Intensity
from vaxtalk.sentiment.sentiment_service import SentimentService

logger = setup_logging(log_dir=PROJECT_ROOT / 'logs', log_level='INFO')
env_loaded = load_dotenv(PROJECT_ROOT / '.env')
print(f".env loaded: {env_loaded}")

required_envs = [
    'OPENROUTER_API_KEY',
    'OPENROUTER_EMBED_URL',
    'OPENROUTER_CHAT_URL',
]
missing_envs = [var for var in required_envs if not os.getenv(var)]
if missing_envs:
    raise RuntimeError(f"Missing required environment variables: {missing_envs}")

EMBEDDING_MODEL_ID = 'google/gemini-embedding-001'
LLM_MODEL_ID = 'mistralai/ministral-8b'
print(f"Embedding model identifier ready: {EMBEDDING_MODEL_ID}")
print(f"Sentiment LLM identifier ready: {LLM_MODEL_ID}")

## 3. Instantiate SentimentService
Create the hybrid sentiment analyzer with the production fusion weights, load `sentiment_phrases.json`, and guard that all 15 cached embeddings are available before running tests.

In [None]:
from dataclasses import dataclass
from typing import Any

sentiment_service = SentimentService()
sentiment_service.build_sentiment_phrases_embeddings(use_cache=True)
proto_stats = sentiment_service.get_stats()
print(
    "Sentiment prototypes loaded:",
    proto_stats.get('total'),
    proto_stats.get('by_emotion')
)
print(
    f"Fusion weights -> LLM: {sentiment_service.w_llm:.2f}, "
    f"Embeddings: {sentiment_service.w_emb:.2f}"
)

if proto_stats.get('total', 0) < 15:
    raise RuntimeError(
        "Expected at least 15 sentiment prototypes; rebuild cache with load-corpus."
    )


def _neutral_sentiment_output() -> SentimentOutput:
    return SentimentOutput(
        satisfaction=Intensity.LOW,
        frustration=Intensity.LOW,
        confusion=Intensity.LOW,
    )


@dataclass
class NotebookToolContext:
    state: dict[str, Any]


def run_sentiment_tool(tool_context: NotebookToolContext) -> dict[str, Any]:
    user_input = str(tool_context.state.get('user:input', '')).strip()

    if not user_input:
        result = _neutral_sentiment_output()
        reason = 'empty_input'
    else:
        try:
            result = sentiment_service.analyze_emotion(user_input)
            reason = 'ok'
        except RuntimeError:
            sentiment_service.build_sentiment_phrases_embeddings(use_cache=False)
            result = sentiment_service.analyze_emotion(user_input)
            reason = 'rebuilt'
        except Exception as exc:  # noqa: BLE001
            logger.error("Sentiment tool fallback triggered: %s", exc)
            result = _neutral_sentiment_output()
            reason = 'fallback'

    tool_context.state['sentiment_output'] = result
    return {
        'status': 'success',
        'reason': reason,
        'sentiment': result.model_dump(),
    }

## 4. Define Test Utterances
Craft a balanced set of positive, negative, mixed, and neutral utterances plus their expected tone, then store everything inside a pandas `DataFrame` for downstream analysis.

In [None]:
import pandas as pd

test_utterances = [
    {
        'label': 'gratitude_positive',
        'tone_hint': 'High satisfaction, negligible frustration',
        'text': 'Thank you for clarifying the booster timing—this finally makes sense.'
    },
    {
        'label': 'anxious_negative',
        'tone_hint': 'High frustration and confusion',
        'text': 'I am terrified because every clinic gives me a different answer and I still have no appointment.'
    },
    {
        'label': 'curious_neutral',
        'tone_hint': 'Low across the board, fact-seeking',
        'text': 'Could you remind me which vaccines are recommended for teachers this fall?'
    },
    {
        'label': 'mixed_relief',
        'tone_hint': 'Moderate satisfaction with residual confusion',
        'text': 'I feel relieved after reading the brochure, but I am still unsure about the side effects timeline.'
    },
    {
        'label': 'impatient_negative',
        'tone_hint': 'Low satisfaction, high frustration',
        'text': 'This waiting list is ridiculous and I am losing patience with the whole process.'
    },
    {
        'label': 'optimistic_positive',
        'tone_hint': 'High satisfaction, low frustration',
        'text': 'Great news about the updated pediatric schedule—it gives me confidence to book right away.'
    },
]

utterance_df = pd.DataFrame(test_utterances)
utterance_df

## 5. Run Sentiment Inference
Invoke the autonomous sentiment tool for every utterance, capture latency, tool metadata, and the fused intensity outputs so we can compare the results side-by-side.

In [None]:
import time

results: list[dict[str, Any]] = []
tool_errors: list[dict[str, Any]] = []

for row in utterance_df.itertuples(index=False):
    ctx = NotebookToolContext(state={'user:input': row.text})
    start = time.perf_counter()
    try:
        response = run_sentiment_tool(ctx)
        elapsed_ms = (time.perf_counter() - start) * 1000
        output: SentimentOutput = ctx.state['sentiment_output']
        results.append(
            {
                'label': row.label,
                'tone_hint': row.tone_hint,
                'text': row.text,
                'reason': response['reason'],
                'satisfaction': output.satisfaction,
                'frustration': output.frustration,
                'confusion': output.confusion,
                'latency_ms': round(elapsed_ms, 1),
            }
        )
    except Exception as exc:  # noqa: BLE001
        elapsed_ms = (time.perf_counter() - start) * 1000
        tool_errors.append(
            {
                'label': row.label,
                'error': str(exc),
                'latency_ms': round(elapsed_ms, 1),
            }
        )

results_df = pd.DataFrame(results)
results_df

## 6. Validate Outputs and Diagnostics
Ensure every invocation produced sentiment labels, report latency stats, surface any tool errors, and visualize the distribution of intensity levels across the corpus.

In [None]:
import math
import matplotlib.pyplot as plt

if results_df.empty:
    raise AssertionError('No successful sentiment runs were recorded.')

for column in ['satisfaction', 'frustration', 'confusion']:
    if results_df[column].isna().any():
        raise AssertionError(f"Column {column} contains missing values.")

latency_stats = {
    'min_ms': results_df['latency_ms'].min(),
    'max_ms': results_df['latency_ms'].max(),
    'avg_ms': round(results_df['latency_ms'].mean(), 1),
}
print('Latency summary (ms):', latency_stats)

if tool_errors:
    print('Tool errors detected:')
    display(pd.DataFrame(tool_errors))
else:
    print('All tool invocations completed without exceptions.')

tall_df = (
    results_df
    .melt(id_vars=['label'], value_vars=['satisfaction', 'frustration', 'confusion'],
          var_name='emotion', value_name='intensity')
)
counts = tall_df.value_counts(['emotion', 'intensity']).unstack(fill_value=0)
print('\nIntensity counts by emotion:')
display(counts)

fig, ax = plt.subplots(figsize=(6, 4))
counts.plot(kind='bar', ax=ax)
ax.set_ylabel('Count')
ax.set_title('Sentiment Tool Intensity Distribution')
ax.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()

results_df

### Helper: Probe Custom Utterances
Use this utility to feed any arbitrary text through the same autonomous tool flow without editing earlier cells.

In [None]:
def probe_sentiment(text: str) -> dict[str, Any]:
    ctx = NotebookToolContext(state={'user:input': text})
    response = run_sentiment_tool(ctx)
    output: SentimentOutput = ctx.state['sentiment_output']
    return {
        'input': text,
        'status': response['status'],
        'reason': response['reason'],
        'satisfaction': output.satisfaction,
        'frustration': output.frustration,
        'confusion': output.confusion,
    }

probe_sentiment('I appreciate how clearly you explained the booster requirements.')