### <font color='#4285f4'>Overview</font>

Overview: Creates and runs Conversational Analytics Agents.  


Cost:
* Approximate cost: $1$

Author:
* Adam Paternostro

### <font color='#4285f4'>Video Walkthrough</font>

[Video](https://storage.googleapis.com/data-analytics-golden-demo/colab-videos/Demo-Conversational-Analytics.mp4)


In [None]:
from IPython.display import HTML

HTML("""
<video width="800" height="600" controls>
  <source src="https://storage.googleapis.com/data-analytics-golden-demo/colab-videos/Demo-Conversational-Analytics.mp4" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")

### <font color='#4285f4'>License</font>

```
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
```

### <font color='#4285f4'>Pip installs</font>

In [None]:
# PIP Installs (if necessary)
import sys

!{sys.executable} -m pip install json_stream
!{sys.executable} -m pip install vega

### <font color='#4285f4'>Initialize</font>

In [None]:
from PIL import Image
from IPython.display import HTML
import IPython.display
import google.auth
import requests
import json
import uuid
import base64
import os
import cv2
import random
import time
import datetime
import base64
import random

import logging
from tenacity import retry, wait_exponential, stop_after_attempt, before_sleep_log, retry_if_exception

In [None]:
# Set these (run this cell to verify the output)

dataset_name = "agentic_beans_curated"
global_location = "global"
data_agent_id = f"da-coffee-demo-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}"
taxi_data_agent_id = f"da-taxi-demo-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}"

conversation_id = f"{data_agent_id}-01"
agentic_beans_conversation_id = f"{data_agent_id}-02"

# Get the current date and time
now = datetime.datetime.now()

# Format the date and time as desired
formatted_date = now.strftime("%Y-%m-%d-%H-%M")

# Get some values using gcloud
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
user = !(gcloud auth list --filter=status:ACTIVE --format="value(account)")

if len(user) != 1:
  raise RuntimeError(f"user is not set: {user}")
user = user[0]

print(f"project_id = {project_id}")
print(f"user = {user}")

### <font color='#4285f4'>Helper Methods</font>

#### restAPIHelper
Calls the Google Cloud REST API using the current users credentials.

In [None]:
def restAPIHelper(url: str, http_verb: str, request_body: str) -> str:
  """Calls the Google Cloud REST API passing in the current users credentials"""

  import google.auth.transport.requests
  import requests
  import google.auth
  import json

  # Get an access token based upon the current user
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
    "Content-Type" : "application/json",
    "Authorization" : "Bearer " + access_token
  }

  if http_verb == "GET":
    response = requests.get(url, headers=headers)
  elif http_verb == "POST":
    response = requests.post(url, json=request_body, headers=headers)
  elif http_verb == "PUT":
    response = requests.put(url, json=request_body, headers=headers)
  elif http_verb == "PATCH":
    response = requests.patch(url, json=request_body, headers=headers)
  elif http_verb == "DELETE":
    response = requests.delete(url, headers=headers)
  else:
    raise RuntimeError(f"Unknown HTTP verb: {http_verb}")

  if response.status_code == 200:
    return json.loads(response.content)
    #image_data = json.loads(response.content)["predictions"][0]["bytesBase64Encoded"]
  else:
    error = f"Error restAPIHelper -> ' Status: '{response.status_code}' Text: '{response.text}'"
    raise RuntimeError(error)

### <font color='#4285f4'>MAIN CODE - Conversational Analytics - REST API Methods</font>

#### Display Helpers

##### conversational_analytics_display_html

In [None]:
def conversational_analytics_display_html(json_object):
  import json
  from IPython.display import display, HTML

  # Find the part of the JSON that contains the result data
  result_rows = None
  for item in json_object:
      # Safely navigate the nested structure using .get() to avoid errors
      # if a key is missing in one of the log entries.
      try:
          # This path finds the list of data rows we care about
          data_rows = item['systemMessage']['data']['result']['data']
          # If we find a non-empty list of results, we store it and stop looking
          if data_rows:
              result_rows = data_rows
              break
      except (KeyError, TypeError):
          # This will catch entries that don't have the 'result' structure
          continue

  #print(f"result_rows: {result_rows}")

  # Build the HTML table from the found results
  html_output = """
  <style>
      table {
          border-collapse: collapse;
          width: 60%;
          font-family: Arial, sans-serif;
          margin-top: 20px;
      }
      th, td {
          border: 1px solid #dddddd;
          text-align: left;
          padding: 12px;
      }
      th {
          background-color: #f2f2f2;
          text-transform: capitalize;
      }
      tr:nth-child(even) {
          background-color: #f9f9f9;
      }
  </style>
  """

  # Proceed only if we found the result data
  if result_rows:
      #print(f"result_rows[0].keys(): {result_rows[0].keys()}")
      # Step 3a: Determine the headers from the keys of the first data row
      headers = list(result_rows[0].keys())

      html_output += "<table><thead><tr>"

      # Create header cells (<th>)
      for header in headers:
          # A little formatting to make headers look nice (e.g., "user_id" -> "User Id")
          formatted_header = header.replace('_', ' ').title()
          html_output += f"<th>{formatted_header}</th>"

      html_output += "</tr></thead><tbody>"

      # Step 3b: Loop through the data to create the rows
      for row in result_rows:
          html_output += "<tr>"
          # Use the 'headers' list to ensure column order is consistent
          for header in headers:
              html_output += f"<td>{row.get(header, 'N/A')}</td>"
          html_output += "</tr>"

      html_output += "</tbody></table>"
  else:
      html_output += "<p>No result data found in the provided JSON.</p>"

  # 4. Display the generated HTML in the Colab notebook
  return html_output

##### conversational_analytics_display_chart (Vega Chart)

In [None]:
def conversational_analytics_display_chart(json_object):
    import json
    import altair as alt
    try:
        if not isinstance(json_object, list):
            print("Error: The provided JSON does not represent a list of entries.")
            return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return None

    vega_spec = None
    for entry in json_object:
        if isinstance(entry, dict):
            try:
                vega_spec = entry.get('systemMessage', {}).get('chart', {}).get('result', {}).get('vegaConfig')
                if vega_spec:
                    # print("Found vegaConfig, generating chart...")
                    break
            except AttributeError:
                continue

    if not vega_spec:
        print("No chart configuration ('vegaConfig') found in the provided JSON data.")
        return None

    try:
        vega_spec['width'] = 700
        vega_spec['height'] = 400
        chart = alt.Chart.from_dict(vega_spec)
        return chart
    except Exception as e:
        print(f"An error occurred while creating the chart with Altair: {e}")
        return None

##### conversational_analytics_display_sql

In [None]:
def conversational_analytics_display_sql(json_object) -> str | None:
    """
    Parses a JSON string OR a Python list to find the 'generatedSql' value.

    Returns:
        The SQL query string if found, otherwise None.
    """
    import json
    # Now, proceed with the original logic on the parsed list
    for item in json_object:
        if isinstance(item, dict):
            sql_query = item.get('systemMessage', {}).get('data', {}).get('generatedSql')
            if sql_query:
                return sql_query

    return None

#### Data Agents: List, Exists, Create, Get, Delete

In [None]:
def data_agent_list(project_id, global_location):
  """Lists all the data agents"""

  # Gather existing items
  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.dataAgents/list
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/dataAgents"
  json_result = restAPIHelper(url, "GET", None)
  return json_result

In [None]:
def data_agent_exists(project_id, global_location, data_agent_id):
  """Tests to see if a data agent already exists"""

  # Gather existing items
  json_result = data_agent_list(project_id, global_location)

  # Test to see if exists
  if "dataAgents" in json_result:
    for item in json_result["dataAgents"]:
      if item["name"] == f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}":
        print(f"data_agent_exists: {data_agent_id} already exists")
        return True

  return False

In [None]:
def data_agent_get(project_id, global_location, data_agent_id):
  """Gets a data agent (assumes it exists)"""

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.dataAgents/get
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}"

  response = restAPIHelper(url, "GET", request_body = None)
  return response

In [None]:
def data_agent_create(project_id, global_location, data_agent_id, system_instruction, bigquery_data_source, enable_python = False):
  """Creates a data agent if it does not exist"""

  if data_agent_exists(project_id, global_location, data_agent_id) == True:
    print("data_agent_create: Not created since it exists")
    return data_agent_get(project_id, global_location, data_agent_id)

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.dataAgents/create
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/dataAgents?data_agent_id={data_agent_id}"

  request_body = {
        "name": f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}",
        "description": f"This is the description of {data_agent_id}.",
        "data_analytics_agent": {
            "published_context": {
                "datasource_references": bigquery_data_source,
                "system_instruction": system_instruction,
                "options": {
                    "analysis": {
                        "python": {
                            "enabled": enable_python
                        }
                    }
                }
            }
          }
      }

  response = restAPIHelper(url, "POST", request_body)
  return response

In [None]:
def data_agent_delete(project_id, global_location, data_agent_id):
  """Deletes a data agent (assumes it exists)"""

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.dataAgents/delete
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}"

  response = restAPIHelper(url, "DELETE", request_body = None)
  return response

#### Data Agent Conversation: List, Exists, Create, Get, Delete

In [None]:
def data_agent_conversations_list(project_id, global_location):
  """Lists all the conversations for a data agent"""

  # Gather existing items
  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.conversations/list
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/conversations"
  json_result = restAPIHelper(url, "GET", None)
  return json_result

In [None]:
def data_agent_conversations_get(project_id, global_location, conversation_id):
  """Gets a conversation for a data agent"""

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations.dataAgents/get
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/conversations/{conversation_id}"

  response = restAPIHelper(url, "GET", request_body = None)
  return response

In [None]:
def data_agent_conversations_exists(project_id, global_location, conversation_id):
  """Tests to see if a conversation for data agent already exists"""

  # Gather existing items
  json_result = data_agent_conversations_list(project_id, global_location)

  # Test to see if exists
  if "conversations" in json_result:
    for item in json_result["conversations"]:
      if item["name"] == f"projects/{project_id}/locations/{global_location}/conversations/{conversation_id}":
        print(f"data_agent_conversations_exists: {conversation_id} already exists")
        return True

  return False

In [None]:
def data_agent_conversations_create(project_id, global_location, data_agent_id, conversation_id):
  """Creates a conversation within a data agent if it does not exist.

  Note: Conversation ids must be unique accross data agents.
  """

  if data_agent_conversations_exists(project_id, global_location, conversation_id) == True:
    print("data_agent_conversations_create: Not created since it exists")
    return data_agent_conversations_get(project_id, global_location, conversation_id)

  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}/conversations?conversation_id={conversation_id}"

  request_body = {
      "agents": [
          f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}"
      ],
      "name": f"projects/{project_id}/locations/{global_location}/conversations/{conversation_id}"
  }

  response = restAPIHelper(url, "POST", request_body)
  return response

##### Data Agent Conversation: Chat

In [None]:
def data_agent_chat(project_id, global_location, data_agent_id, chat_message, conversation_id = None):
  """Chats with a data agent.
  If a conversation id is supplied then we will use our conversation (memory) for the chat."""

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations/chat
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}:chat"


  request_body = None
  if conversation_id is not None:
    # Stateful conversation
    # print("Stateful conversation")
    request_body = {
        "parent": f"projects/{project_id}/locations/{global_location}",
        "messages": [
            {
                "userMessage": {
                    "text": chat_message
                }
            }
        ],
        "conversation_reference": {
            "conversation": f"projects/{project_id}/locations/{global_location}/conversations/{conversation_id}",
            "data_agent_context": {
                "data_agent": f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}",
            }
        }
    }
  else:
    # Stateless chat
    # print("Stateless conversation")
    request_body = {
        "parent": f"projects/{project_id}/locations/{global_location}",
        "messages": [
            {
                "userMessage": {
                    "text": chat_message
                }
            }
        ],
        "data_agent_context": {
            "data_agent": f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}",
        }
    }

  response = restAPIHelper(url, "POST", request_body)
  return response

In [None]:
def process_message(message_dict):
    """
    Processes a single, NORMAL Python dictionary representing a message.
    We can safely use .get() here because it's no longer a transient object.
    """
    # Check if the message is a system message
    system_message = message_dict.get("systemMessage")
    if not system_message:
        # If it's not a system message (e.g., userMessage), we can ignore or handle it
        return

    # Now we work with the content of the system_message

    # 1. Check for the initial user question
    question = system_message.get("schema", {}).get("query", {}).get("question")
    if question:
        print(f"-> User Question: {question}")
        return

    # 2. Check for the 'data' container
    data = system_message.get("data")
    if data:
        generated_sql = data.get("generatedSql")
        if generated_sql:
            print(f"\n-> Generated SQL:\n{generated_sql.strip()}")
            return

    # 3. Check for the 'chart' container
    chart = system_message.get("chart")
    if chart:
        instructions = chart.get("query", {}).get("instructions")
        if instructions:
            print(f"\n-> Charting Task: {instructions}")
            return

    # 4. Check for the 'text' container (final summary)
    text = system_message.get("text")
    if text:
        parts = text.get("parts")
        if parts:
            final_summary = " ".join(parts)
            print(f"\n-> Final Summary: {final_summary}")
            return

def convert_to_native(obj):
    """
    Recursively converts json-stream's custom objects into native Python
    dictionaries and lists.
    """
    import json_stream
    import collections.abc

    # Check if the object behaves like a dictionary (a "Mapping")
    if isinstance(obj, collections.abc.Mapping):
        return {key: convert_to_native(value) for key, value in obj.items()}
    # Check if the object behaves like a list/tuple (a "Sequence"),
    # but explicitly exclude strings, which are also sequences.
    elif isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str):
        return [convert_to_native(item) for item in obj]
    # Otherwise, it's a primitive type (str, int, float, bool, None), so return it directly
    else:
        return obj


def data_agent_chat_streaming(project_id, global_location, data_agent_id, chat_message, conversation_id = None):
  """Chats with a data agent.
  If a conversation id is supplied then we will use our conversation (memory) for the chat."""

  # https://cloud.google.com/gemini/docs/conversational-analytics-api/reference/rest/v1alpha/projects.locations/chat
  url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{global_location}:chat"

  request_body = None
  if conversation_id is not None:
    # Stateful conversation
    print("Stateful conversation")
    request_body = {
        "parent": f"projects/{project_id}/locations/{global_location}",
        "messages": [
            {
                "userMessage": {
                    "text": chat_message
                }
            }
        ],
        "conversation_reference": {
            "conversation": f"projects/{project_id}/locations/{global_location}/conversations/{conversation_id}",
            "data_agent_context": {
                "data_agent": f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}",
            }
        }
    }
  else:
    # Stateless chat
    print("Stateless conversation")
    request_body = {
        "parent": f"projects/{project_id}/locations/{global_location}",
        "messages": [
            {
                "userMessage": {
                    "text": chat_message
                }
            }
        ],
        "data_agent_context": {
            "data_agent": f"projects/{project_id}/locations/{global_location}/dataAgents/{data_agent_id}",
        }
    }

  import google.auth.transport.requests
  import requests
  import google.auth
  import json
  import ijson
  import json_stream # You'll need to pip install json-stream

  # Get an access token based upon the current user
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
    "Content-Type" : "application/json",
    "Authorization" : "Bearer " + access_token
  }

  try:
      all_messages_custom_objects = []
      with requests.post(url, json=request_body, headers=headers, stream=True) as response:
        response.raise_for_status()
        print("Connection successful. Receiving and decompressing stream...\n")

        message_stream = json_stream.load(response.iter_content(), persistent=True)

        for message_obj in message_stream:
            process_message(message_obj)
            all_messages_custom_objects.append(message_obj)

      print("\n\nStream finished.")

      # --- Step 2: Apply the conversion function ---
      print("Converting custom objects to native Python types...")
      native_python_list = convert_to_native(all_messages_custom_objects)

      return native_python_list


  except requests.exceptions.RequestException as e:
      print(f"An API error occurred: {e}")
      return None
  except Exception as e:
      print(f"An unexpected error occurred: {e}")
      import traceback
      traceback.print_exc()
      return None


          # Raise an exception for bad status codes (4xx or 5xx)
          response.raise_for_status()
          print("Connection successful. Waiting for messages...")

          # 2. Iterate over the response line by line
          # iter_lines() is perfect for line-delimited data
          for line in response.iter_lines():
              # The iterator yields bytes, so we need to decode them
              if line:
                  try:
                      # 3. Parse each line as a JSON object
                      data = json.loads(line.decode('utf-8'))

                      # 4. Handle the message based on its content
                      if data.get("status") == "processing":
                          progress = data.get("progress", 0)
                          message = data.get("message", "")
                          print(f"PROGRESS UPDATE: [{progress}%] {message}")
                      elif data.get("status") == "complete":
                          message = data.get("message", "")
                          result_file = data.get("result_file", "N/A")
                          print(f"\nFINAL MESSAGE: {message}")
                          print(f"Result file is at: {result_file}")
                          # You can break the loop once the final message is received
                          break
                      else:
                          # Handle other message types, like the initial 'started' message
                          print(f"INFO: {data.get('message')}")

                  except json.JSONDecodeError:
                      print(f"Warning: Could not decode JSON from line: {line}")

### <font color='#4285f4'>MAIN CODE - Conversational Analytics - Chat</font>

#### Conversational Analytics Class

In [None]:
import os
import json
import collections.abc
import html
import requests
import google.auth
import google.auth.transport.requests
import json_stream
import altair as alt
from IPython.display import display, HTML
import pandas as pd  # This is NEW

# _ConversationalAnalyticsStyler class remains unchanged from the previous version.
class _ConversationalAnalyticsStyler:
    ICONS = {'Agent':'https://storage.googleapis.com/data-analytics-golden-demo/images/BigQuery-32-color.svg','Error':'https://storage.googleapis.com/data-analytics-golden-demo/images/error.svg'}
    @staticmethod
    def _get_full_html_style_and_script()->str:
        script='<script>function renderVegaChart(spec){if(typeof vegaEmbed==="function"){vegaEmbed("#vega-chart-container",spec,{"actions":true}).catch(console.error)}else{console.error("vega-embed.js is not loaded.")}}</script><script src="https://cdn.jsdelivr.net/npm/vega@5"></script><script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script><script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>';style='<style>body{background-color:#f8f9fa;font-family:"Roboto",Arial,sans-serif;color:#202124}.conversation-container{width:95%;max-width:1200px;margin:auto}.message-container{display:flex;align-items:flex-start;margin-bottom:12px}.avatar{width:32px;height:32px;border-radius:50%;margin-right:15px;flex-shrink:0;margin-top:5px}.message-content{display:flex;flex-direction:column;align-items:flex-start;width:100%}.thought-bubble{background-color:#3c4043;color:#e8eaed;border:none;border-radius:18px;padding:10px 15px;max-width:95%;box-shadow:0 1px 2px 0 rgba(60,64,67,.3),0 1px 3px 1px rgba(60,64,67,.15);margin-bottom:8px;box-sizing:border-box}.thought-bubble.error-bubble{background-color:#f2dede;color:#a94442}.thought-bubble p,.thought-bubble pre{margin:5px 0;font-size:.9em;white-space:pre-wrap;word-break:break-word}.thought-bubble.error-bubble p,.thought-bubble.error-bubble pre{color:#a94442}.thought-chip{display:inline-block;padding:3px 10px;font-size:.8em;font-weight:500;border-radius:12px;margin-bottom:8px}.thought-chip.thought-chip-main{background-color:#8ab4f8;color:#202124}.thought-chip.error-chip{background-color:#d9534f;color:#fff}.formatted-result-container{margin-top:20px;border:1px solid #dfe1e5;border-radius:8px;background-color:#fff;box-shadow:0 1px 2px 0 rgba(60,64,67,.3),0 1px 3px 1px rgba(60,64,67,.15)}.formatted-result-header{padding:12px 16px;font-size:1.1em;font-weight:500;color:#202124;border-bottom:1px solid #dfe1e5;background-color:#f8f9fa;border-radius:8px 8px 0 0}.formatted-result-content{padding:16px}.sql-query-display{background-color:#f1f3f4;padding:15px;border-radius:8px;margin-bottom:20px}.sql-query-display pre code{white-space:pre-wrap;word-break:break-all}.google-table{border-collapse:collapse;width:100%;border:1px solid #dfe1e5;box-shadow:0 2px 2px rgba(0,0,0,.14);margin-bottom:20px}.google-table thead tr{background-color:#4285f4;color:#fff}.google-table th,.google-table td{padding:12px 15px;text-align:left;border:1px solid #dfe1e5}.google-table tbody tr:nth-of-type(even){background-color:#f8f9fa}.google-table tbody tr:hover{background-color:#e8f0fe}#vega-chart-container{width:100%}</style>'
        return style+script
    def create_thought_bubble_html(self,chip_text:str,content_text:str)->str:
        return f'<div class="conversation-container"><div class="message-container agent"><img src="{self.ICONS["Agent"]}" alt="Agent icon" class="avatar"><div class="message-content"><div class="thought-bubble"><div class="thought-chip thought-chip-main">{chip_text}</div><pre>{html.escape(content_text)}</pre></div></div></div></div>'
    def create_error_bubble_html(self,error_text:str)->str:
        return f'<div class="conversation-container"><div class="message-container agent"><img src="{self.ICONS["Error"]}" alt="Error icon" class="avatar"><div class="message-content"><div class="thought-bubble error-bubble"><div class="thought-chip error-chip">Error</div><pre>{html.escape(error_text)}</pre></div></div></div></div>'
    def create_formatted_result_html(self,sql:str|None,table_html:str|None,chart_json:str|None)->str:
        content_parts=[]
        if sql:content_parts.append(f'<div class="sql-query-display"><pre><code>{html.escape(sql)}</code></pre></div>')
        if table_html:content_parts.append(table_html.replace("<table",'<table class="google-table"',1))
        if chart_json:content_parts.append('<div id="vega-chart-container"></div><script>renderVegaChart('+chart_json+");</script>")
        if not content_parts:return""
        return f'<div class="formatted-result-container"><div class="formatted-result-header">Formatted Result</div><div class="formatted-result-content">{"".join(content_parts)}</div></div>'



class ConversationalAnalytics:
    """
    A class to interact with the Google Cloud Conversational Analytics API,
    providing a rich, styled, and streaming notebook experience with conversation memory.
    """
    def __init__(self, project_id: str = None, location: str = "global"):
        self.project_id = project_id or os.environ.get("GOOGLE_CLOUD_PROJECT")
        if not self.project_id:
            raise ValueError("Project ID not found. Provide it or set GOOGLE_CLOUD_PROJECT.")
        self.location = location
        self._styler = _ConversationalAnalyticsStyler()
        self._conversation_cache = set() # In-memory cache for existing conversations
        self.dataframe = pd.DataFrame() # This is NEW
        self._reset_results()

    # --- Public Properties ---
    @property
    def sql(self) -> str | None: return self._sql_query
    @property
    def html(self) -> str | None: return self._html_table
    @property
    def chart(self) -> alt.Chart | None: return self._chart_obj
    @property
    def errors(self) -> list[str]: return self._errors

    # --- Main Chat Method ---
    def chat(self, agent_id: str, prompt: str, conversation_id: str = None) -> None:
        self._reset_results()
        display(HTML(self._styler._get_full_html_style_and_script()))

        if conversation_id:
            self._ensure_conversation_exists(agent_id, conversation_id)

        api_url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{self.project_id}/locations/{self.location}:chat"
        base_payload = {"messages": [{"userMessage": {"text": prompt}}]}
        agent_context = {"data_agent": f"projects/{self.project_id}/locations/{self.location}/dataAgents/{agent_id}"}

        if conversation_id:
          base_payload["conversation_reference"] = {"conversation": f"projects/{self.project_id}/locations/{self.location}/conversations/{conversation_id}", "data_agent_context": agent_context}
        else:
          base_payload["data_agent_context"] = agent_context

        creds, _ = google.auth.default(); creds.refresh(google.auth.transport.requests.Request())
        headers = {"Content-Type": "application/json", "Authorization": "Bearer " + creds.token}

        try:
            all_messages = []
            with requests.post(api_url, json=base_payload, headers=headers, stream=True) as response:
                response.raise_for_status()
                for message_obj in json_stream.load(response.iter_content(), persistent=True):
                    self._stream_and_display_message(message_obj)
                    all_messages.append(message_obj)
            self.response_json = self._convert_to_native(all_messages)
            self._parse_final_response()
            if self.html or self.chart:
                chart_json = self.chart.to_json(indent=2) if self.chart else None
                final_html = self._styler.create_formatted_result_html(self.sql, self.html, chart_json)
                display(HTML(final_html))
        except requests.exceptions.RequestException as e:
            error_msg = f"API Request Error: {e.response.text if e.response else e}"
            self._errors.append(error_msg)
            display(HTML(self._styler.create_error_bubble_html(error_msg)))
        except Exception as e:
            import traceback
            error_msg = f"Client-side Error: {traceback.format_exc()}"
            self._errors.append(error_msg)
            display(HTML(self._styler.create_error_bubble_html(error_msg)))

    # --- Conversation Management Methods ---
    def _ensure_conversation_exists(self, agent_id: str, conversation_id: str):
        cache_key = (conversation_id, agent_id)
        if cache_key in self._conversation_cache:
            return

        print(f"Checking for conversation '{conversation_id}'...")
        if not self._conversation_exists_api(conversation_id):
            print(f"Conversation not found. Creating '{conversation_id}' for agent '{agent_id}'...")
            self._create_conversation_api(agent_id, conversation_id)
            print("Conversation created successfully.")
        else:
            print("Conversation found on server.")

        self._conversation_cache.add(cache_key)

    def _conversation_exists_api(self, conversation_id: str) -> bool:
        url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{self.project_id}/locations/{self.location}/conversations/{conversation_id}"
        try:
            self._rest_api_helper(url, "GET", None)
            return True
        except RuntimeError as e:
            if "404" in str(e):
                return False
            raise e

    def _create_conversation_api(self, agent_id: str, conversation_id: str) -> dict:
        url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{self.project_id}/locations/{self.location}/conversations?conversation_id={conversation_id}"
        request_body = {
            "agents": [f"projects/{self.project_id}/locations/{self.location}/dataAgents/{agent_id}"],
            "name": f"projects/{self.project_id}/locations/{self.location}/conversations/{conversation_id}"
        }
        return self._rest_api_helper(url, "POST", request_body)

    # --- Internal Helper and Parsing Methods ---
    @staticmethod
    def _rest_api_helper(url: str, http_verb: str, request_body: dict = None) -> dict:
        creds, _ = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        headers = {"Content-Type": "application/json", "Authorization": "Bearer " + creds.token}
        try:
            if http_verb == "GET": response = requests.get(url, headers=headers)
            elif http_verb == "POST": response = requests.post(url, json=request_body, headers=headers)
            else: raise ValueError(f"Unsupported HTTP verb: {http_verb}")
            response.raise_for_status()
            return response.json() if response.content else {}
        except requests.exceptions.HTTPError as e:
            raise RuntimeError(f"Error restAPIHelper -> '{e.response.status_code}': {e.response.text}") from e

    def _reset_results(self):
        self.response_json=None; self._sql_query=None; self._html_table=None; self._chart_obj=None; self._errors=[]
        self.dataframe = pd.DataFrame() # This is MODIFIED

    def _stream_and_display_message(self, message_dict: dict) -> None:
        chip, content, is_err = None, None, False
        if err := message_dict.get("error"): is_err, content = True, err.get('message','Unspecified err.')
        elif sm := message_dict.get("systemMessage"):
            if err := sm.get("error"): is_err, content = True, err.get("text")
            elif q := sm.get("schema",{}).get("query",{}).get("question"): chip, content="Analyzing Query", q
            elif d := sm.get("data"):
                if sql := d.get("generatedSql"): chip, content="Generating SQL", sql.strip()
            elif c := sm.get("chart"):
                if i := c.get("query",{}).get("instructions"): chip, content="Preparing Chart", i
            elif t := sm.get("text"):
                if p := t.get("parts"): chip, content="Summary", ' '.join(p)
        if content:
            if is_err: self._errors.append(content); bubble = self._styler.create_error_bubble_html(content)
            else: bubble = self._styler.create_thought_bubble_html(chip, content)
            display(HTML(bubble))
    def _parse_final_response(self):
        if not self.response_json: return
        self._sql_query = self._extract_sql(self.response_json)
        self._html_table = self._create_html_table(self.response_json)
        self._chart_obj = self._create_chart(self.response_json)
    @staticmethod
    def _convert_to_native(obj):
        if isinstance(obj, collections.abc.Mapping): return {k:ConversationalAnalytics._convert_to_native(v) for k, v in obj.items()}
        if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str): return [ConversationalAnalytics._convert_to_native(i) for i in obj]
        return obj
    def _extract_sql(self, json_object: list) -> str | None:
        return next((item["systemMessage"]["data"]["generatedSql"] for item in reversed(json_object) if isinstance(item,dict) and item.get("systemMessage",{}).get("data",{}).get("generatedSql")), None)

    def _create_html_table(self, json_object: list) -> str | None:
        result_rows = None
        for item in reversed(json_object):
            try:
                if data_rows := item['systemMessage']['data']['result']['data']:
                    result_rows = data_rows
                    break
            except (KeyError, TypeError):
                continue

        if not result_rows:
            return None

        # This is NEW and what I want
        self.dataframe = pd.DataFrame(result_rows)

        headers = list(result_rows[0].keys())
        html_str = "<table><thead><tr>"
        for header in headers:
            html_str += f"<th>{header.replace('_', ' ').title()}</th>"
        html_str += "</tr></thead><tbody>"
        for row in result_rows:
            html_str += "<tr>"
            for header in headers:
                html_str += f"<td>{row.get(header, 'N/A')}</td>"
            html_str += "</tr>"
        html_str += "</tbody></table>"
        return html_str

    def _create_chart(self, json_object: list) -> alt.Chart | None:
        for entry in json_object:
            try:
                if spec := entry.get('systemMessage',{}).get('chart',{}).get('result',{}).get('vegaConfig'):
                    spec['width'] = 'container'; spec['height'] = 400
                    return alt.Chart.from_dict(spec)
            except (AttributeError, TypeError, KeyError, Exception) as e:
                if isinstance(e, (AttributeError, TypeError, KeyError)): continue
                self._errors.append(f"Altair Chart Error: {e}"); return None
        return None

### <font color='#4285f4'>MAIN CODE - Conversational Analytics - Demo - REST APIs</font>

#### **Create Conversation Agent**

##### System Instruction - for tables to query

In [None]:
system_instruction = f"""
- system_description: "You are an expert data analyst for a coffee truck business and can answer questions about customer, order, product, and truck data."
- tables:
    - table:
        - name: {project_id}.agentic_beans_curated.customer
        - description: Customer information for Agentic Beans coffee truck customers, including their demographics and purchase behavior.
        - synonyms: customers, coffee_lovers
        - tags: 'personal_data, demographics, sales_performance'
        - fields:
            - field:
                - name: customer_id
                - description: The unique identifier and primary key for each customer across all transactions.
            - field:
                - name: customer_name
                - description: The full name of the customer, used for personalized interactions.
                - tag: person
                - sample_values: 'Alice Wonderland, Bob The Builder, Charlie Chaplin'
            - field:
                - name: customer_yob
                - description: The customer's year of birth, useful for age-based demographic segmentation.
            - field:
                - name: customer_email
                - description: The unique email address of the customer, primarily used for direct marketing and sending receipts.
                - tag: contact
                - sample_values: 'alice.w@example.com, bob.b@mail.com, c.chaplin@email.net'
            - field:
                - name: customer_inception_date
                - description: The date when the customer made their very first transaction, indicating their tenure.
            - field:
                - name: country_code
                - description: The two-letter ISO 3166-1 alpha-2 country code where the customer is located (e.g., 'US' for United States, 'CA' for Canada).
                - sample_values: 'US, CA, GB, AU'
            - field:
                - name: customer_total_spend
                - description: The cumulative amount of money the customer has spent across all their orders.
                - aggregations: 'sum, avg'
            - field:
                - name: customer_average_transaction_value
                - description: The average value of a single order placed by this customer.
                - aggregations: 'sum, avg'
            - field:
                - name: customer_number_of_transactions
                - description: The total count of orders placed by the customer.
                - aggregations: 'sum, avg'
            - field:
                - name: customer_favorite_product_id
                - description: The identifier of the product that this customer has purchased most frequently.
            - field:
                - name: customer_favorite_product_category_id
                - description: The identifier of the product category that this customer has purchased most frequently.
    - table:
        - name: {project_id}.agentic_beans_curated.order_detail
        - description: Detailed line items for each order, specifying products, quantities, and individual prices.
        - synonyms: order_items, line_items, purchased_items
        - fields:
            - field:
                - name: order_detail_id
                - description: Unique identifier for each individual item entry in an order.
            - field:
                - name: order_header_id
                - description: Foreign key linking to the order_header table, indicating which main order this detail belongs to.
            - field:
                - name: truck_menu_id
                - description: Identifier for the specific menu item as it appeared on the truck's menu at the time of order.
            - field:
                - name: order_quantity
                - description: The number of units of this specific menu item purchased in the order.
                - aggregations: 'sum, avg'
            - field:
                - name: product_id
                - description: Foreign key referencing the 'product_id' from the 'product' table, identifying the core product offering.
            - field:
                - name: product_category_id
                - description: Foreign key referencing the 'product_category_id' from the 'product_category' table, identifying the category of the purchased product.
            - field:
                - name: size
                - description: The specific size of the product offering, e.g., 'S', 'M', 'L', or 'N/A' for items without a size option.
                - sample_values: 'S, M, L, N/A'
            - field:
                - name: price
                - description: The price of this specific product item as listed on the truck's menu for this order.
                - aggregations: 'sum, avg'
            - field:
                - name: order_detail_total
                - description: The calculated total cost for this line item (order_quantity * price).
                - aggregations: 'sum, avg'
    - table:
        - name: {project_id}.agentic_beans_curated.order_header
        - description: Main information for each customer order, including timestamp, customer, truck, and total cost.
        - synonyms: orders, transactions, sales
        - tags: 'sales, order_processing, revenue'
        - fields:
            - field:
                - name: order_header_id
                - description: Unique identifier for each order placed at a coffee truck.
            - field:
                - name: order_header_timestamp
                - description: The exact date and time when the order was successfully completed.
            - field:
                - name: truck_id
                - description: The identifier of the coffee truck where this order was placed.
            - field:
                - name: customer_id
                - description: The identifier of the customer who placed this order.
            - field:
                - name: order_neighborhood
                - description: The geographical area or neighborhood where the order was fulfilled, important for location-based analysis.
                - sample_values: 'Downtown, Financial District, East Village, West Loop'
            - field:
                - name: order_header_total
                - description: The grand total amount for the entire order, including all items.
                - aggregations: 'sum, avg'
            - field:
                - name: payment_method
                - description: The method used by the customer to pay for the order, e.g., 'Credit Card', 'Cash', 'Mobile App'.
                - sample_values: 'Credit Card, Cash, Mobile App, Digital Wallet'
    - table:
        - name: {project_id}.agentic_beans_curated.product
        - description: Definitions and details for all individual products offered by Agentic Beans, regardless of specific truck menus.
        - synonyms: products, coffee_items, food_items
        - fields:
            - field:
                - name: product_id
                - description: The unique identifier and primary key for each distinct product.
            - field:
                - name: product_category_id
                - description: A foreign key that links this product to its broader category in the product_category table.
            - field:
                - name: product_name
                - description: The public-facing name of the individual product, as it appears on general menus (e.g., 'Latte', 'Croissant', 'Espresso').
                - tag: item_name
                - sample_values: 'Latte, Cold Brew, Cappuccino, Blueberry Muffin'
            - field:
                - name: product_description
                - description: A detailed, customer-facing description of the product, useful for marketing materials and menus.
            - field:
                - name: product_image_prompt
                - description: The specific textual prompt used to generate the visual representation of the product via AI.
            - field:
                - name: product_image_uri
                - description: The URI (Uniform Resource Identifier) indicating the storage location of the product's image in Google Cloud Storage.
            - field:
                - name: product_image_obj_ref
                - description: Contains detailed BigLake Object Reference Data for the product image, if applicable.
    - table:
        - name: {project_id}.agentic_beans_curated.product_category
        - description: Categories used to group and organize various products offered by Agentic Beans.
        - synonyms: categories, product_groups
        - fields:
            - field:
                - name: product_category_id
                - description: The unique identifier and primary key for each product category.
            - field:
                - name: product_category_name
                - description: The public-facing name of the product category, such as 'Espresso & Milk Drinks' or 'Pastries'.
                - sample_values: 'Espresso & Milk, Brewed Coffee, Pastries, Teas'
            - field:
                - name: product_category_description
                - description: A detailed description of the product category, providing context for products within it.
            - field:
                - name: product_category_image_prompt
                - description: A GenAI prompt used to generate the image associated with this product category.
            - field:
                - name: product_category_image_uri
                - description: The URI location of the product category's image in Google Cloud Storage.
            - field:
                - name: product_category_image_obj_ref
                - description: Contains detailed BigLake Object Reference Data for the product category image, if applicable.
    - table:
        - name: {project_id}.agentic_beans_curated.truck
        - description: Comprehensive details about each individual coffee truck in the Agentic Beans fleet.
        - synonyms: trucks, coffee_trucks, fleet_vehicles
        - tags: 'operations, assets, logistics'
        - fields:
            - field:
                - name: truck_id
                - description: The unique identifier and primary key for each operational coffee truck.
            - field:
                - name: truck_name
                - description: A unique, memorable name assigned to each truck, e.g., 'Bean Machine', 'The Daily Grind'.
                - sample_values: 'Bean Machine, Roaming Roast, Espresso Express, Grind House'
            - field:
                - name: truck_license_plate
                - description: The official vehicle license plate number for identification and compliance.
            - field:
                - name: truck_vin
                - description: The Vehicle Identification Number, a unique serial number for the truck's manufacturing and registration.
            - field:
                - name: truck_acquisition_timestamp
                - description: The date and time when the truck was integrated into the Agentic Beans fleet.
    - table:
        - name: {project_id}.agentic_beans_curated.truck_menu
        - description: A mapping of which products are available on which trucks, including truck-specific pricing and sizes.
        - synonyms: menu, truck_offerings, daily_specials
        - fields:
            - field:
                - name: truck_menu_id
                - description: The unique identifier and primary key for each distinct menu entry on a specific truck.
            - field:
                - name: truck_id
                - description: Foreign key referencing the 'truck' table, indicating which truck offers this menu item.
            - field:
                - name: product_id
                - description: Foreign key referencing the 'product' table, identifying the core product of this menu item.
            - field:
                - name: size
                - description: The specific size available for this product on this truck's menu, e.g., 'S', 'M', 'L', or 'N/A'.
                - sample_values: 'S, M, L, N/A'
            - field:
                - name: price
                - description: The price at which this specific product and size is sold on this particular truck's menu.
                - aggregations: 'sum, avg'
- golden_queries:
    - golden_query:
        - natural_language_query: How many unique customers have placed an order?
        - sql_query: SELECT COUNT(DISTINCT customer_id) FROM {project_id}.agentic_beans_curated.customer
    - golden_query:
        - natural_language_query: What is the total revenue generated from all orders?
        - sql_query: SELECT SUM(order_header_total) FROM {project_id}.agentic_beans_curated.order_header
    - golden_query:
        - natural_language_query: Which product category has the highest number of distinct products?
        - sql_query: >-
            SELECT
            pc.product_category_name,
            COUNT(p.product_id)
            FROM {project_id}.agentic_beans_curated.product_category pc
            JOIN {project_id}.agentic_beans_curated.product p
            ON pc.product_category_id = p.product_category_id
            GROUP BY pc.product_category_name
            ORDER BY COUNT(p.product_id) DESC
            LIMIT 1
    - golden_query:
        - natural_language_query: How many orders were placed through the 'Bean Machine' truck?
        - sql_query: >-
            SELECT
            COUNT(oh.order_header_id)
            FROM {project_id}.agentic_beans_curated.order_header oh
            JOIN {project_id}.agentic_beans_curated.truck t
            ON oh.truck_id = t.truck_id
            WHERE t.truck_name = 'Bean Machine'
    - golden_query:
        - natural_language_query: What is the average price of a 'Latte' across all trucks and sizes?
        - sql_query: >-
            SELECT AVG(tm.price)
            FROM {project_id}.agentic_beans_curated.truck_menu tm
            JOIN {project_id}.agentic_beans_curated.product p
            ON tm.product_id = p.product_id
            WHERE p.product_name = 'Latte'
- golden_action_plans:
    - golden_action_plan:
        - natural_language_query: Show me the total number of orders broken down by payment method.
        - action_plan:
            - step: >-
                Run a SQL query on the table {project_id}.agentic_beans_curated.order_header
                to get the count of orders grouped by their 'payment_method'.
            - step: >-
                Create a bar chart or pie chart using the retrieved data,
                where each slice/bar represents a payment method and its corresponding order count.
    - golden_action_plan:
        - natural_language_query: Display the average order value for customers from each country.
        - action_plan:
            - step: >-
                Run a SQL query joining the {project_id}.agentic_beans_curated.customer table
                to calculate the average of 'customer_average_transaction_value' for each 'country_code'.
            - step: >-
                Create a bar chart or geo map showing the average transaction value for each country code.
    - golden_action_plan:
        - natural_language_query: Analyze which trucks are selling the most of 'Espresso' products.
        - action_plan:
            - step: >-
                Run a SQL query that joins 'truck_menu', 'product', and 'order_detail' tables.
                Filter for 'Espresso' products, sum 'order_quantity', and group by 'truck_name'.
            - step: >-
                Visualize the results using a bar chart showing each truck's total 'Espresso' quantity sold.
- relationships:
    - relationship:
        - name: customer_places_order
        - description: Each customer can place multiple orders, and each order is associated with one customer.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.customer
        - right_table: {project_id}.agentic_beans_curated.order_header
        - relationship_columns: "// Join columns - left_column: 'customer_id' - right_column: 'customer_id'"
    - relationship:
        - name: order_header_has_details
        - description: Each order header can have multiple detailed line items, and each detail belongs to one order header.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.order_header
        - right_table: {project_id}.agentic_beans_curated.order_detail
        - relationship_columns: "// Join columns - left_column: 'order_header_id' - right_column: 'order_header_id'"
    - relationship:
        - name: product_in_order_detail
        - description: A product can appear as a line item in multiple order details.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.product
        - right_table: {project_id}.agentic_beans_curated.order_detail
        - relationship_columns: "// Join columns - left_column: 'product_id' - right_column: 'product_id'"
    - relationship:
        - name: product_belongs_to_category
        - description: Each product belongs to a single product category, and a category can contain many products.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.product_category
        - right_table: {project_id}.agentic_beans_curated.product
        - relationship_columns: "// Join columns - left_column: 'product_category_id' - right_column: 'product_category_id'"
    - relationship:
        - name: truck_has_menu
        - description: Each truck has its own menu with multiple offerings.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.truck
        - right_table: {project_id}.agentic_beans_curated.truck_menu
        - relationship_columns: "// Join columns - left_column: 'truck_id' - right_column: 'truck_id'"
    - relationship:
        - name: truck_menu_item_in_order_detail
        - description: A specific truck menu item can be chosen in many order details.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.truck_menu
        - right_table: {project_id}.agentic_beans_curated.order_detail
        - relationship_columns: "// Join columns - left_column: 'truck_menu_id' - right_column: 'truck_menu_id'"
    - relationship:
        - name: order_from_truck
        - description: Each order is placed at a specific coffee truck.
        - relationship_type: one-to-many
        - join_type: left
        - left_table: {project_id}.agentic_beans_curated.truck
        - right_table: {project_id}.agentic_beans_curated.order_header
        - relationship_columns: "// Join columns - left_column: 'truck_id' - right_column: 'truck_id'"
- glossaries:
    - glossary:
        - term: customer
        - description: A patron who purchases products or services from Agentic Beans coffee trucks.
    - glossary:
        - term: product
        - description: Any item, such as a coffee beverage, pastry, or merchandise, offered for sale by Agentic Beans.
    - glossary:
        - term: truck
        - description: A mobile retail unit operated by Agentic Beans for selling coffee and related products.
    - glossary:
        - term: order
        - description: A completed transaction initiated by a customer at an Agentic Beans coffee truck.
    - glossary:
        - term: menu item
        - description: A specific product configuration (product, size, price) as listed on an individual truck's offerings.
    - glossary:
        - term: revenue
        - description: The total income generated from the sale of goods and services before expenses.
- additional_instructions:
    - text: All inquiries and analyses should be grounded in the context of Agentic Beans coffee truck operations.
    - text: Focus on providing insights that help understand sales performance, customer preferences, and truck efficiency.
"""

##### Data Source - for tables to query

In [None]:
bigquery_data_source = {
    "bq": {
        "tableReferences": [
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "customer",
            },
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "order_detail",
            },
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "order_header",
            },
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "product",
            },
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "product_category",
            },
            {
                "projectId": project_id,
                "datasetId": dataset_name,
                "tableId": "truck",
            }
        ]
    }
}

##### **Create the Conversational Analytics Agent**

In [None]:
response_data_agent_create = data_agent_create(project_id, global_location, data_agent_id,
                                               system_instruction, bigquery_data_source, False)

In [None]:
print(f"formatted_json: {json.dumps(response_data_agent_create, indent=2)}")

#### **Create Conversation**

In [None]:
# Show our conversation id
print(f"conversation_id = {conversation_id}")

response_data_agent_conversations_create = data_agent_conversations_create(project_id, global_location, data_agent_id, conversation_id)

In [None]:
print(f"formatted_json: {json.dumps(response_data_agent_conversations_create, indent=2)}")

#### **Stateful Chat**

- Create a stateful chat where we can ask follow up questions to.
- We will use the conversation that we created.

##### Stateful Chat: Ask initial question

In [None]:
chat_message = "How many orders are there by product?"
stateful_chat_response_01 = data_agent_chat(project_id, global_location, data_agent_id, chat_message, conversation_id)

In [None]:
# You get back a python dictionary which you can display
# print(json.dumps(stateful_chat_response_01, indent=2))

display(HTML(conversational_analytics_display_html(stateful_chat_response_01)))

##### Stateful Chat: Ask follow-up conversation

In [None]:
# Ask a follow up
chat_message = "Now just show me the top 3."
stateful_chat_response_02 = data_agent_chat(project_id, global_location, data_agent_id, chat_message, conversation_id)

In [None]:
# You get back a python dictionary which you can display
# print(json.dumps(stateful_chat_response_02, indent=2))
display(HTML(conversational_analytics_display_html(stateful_chat_response_02)))

#### **Stateless Chat**

##### Stateless Chat: Join tables

In [None]:
# Do a stateless chat (you will need to manually manage history if you are not doing one-shot prompting)

# Cause the agent to join the tables
chat_message = "Count the order by user first name and show the top 10 highest order count."

stateless_chat_response_01 = data_agent_chat(project_id, global_location, data_agent_id, chat_message, None)

In [None]:
# You get back a python dictionary which you can display
# print(json.dumps(stateless_chat_response_01, indent=2))

display(HTML(conversational_analytics_display_html(stateless_chat_response_01)))

##### Stateless Chat: Draw a bar chart from the JSON (Vega Config)

In [None]:
chat_message = "Create a bar chart of orders by product category"

stateless_chat_response_02 = data_agent_chat(project_id, global_location, data_agent_id, chat_message, None)

In [None]:
# You get back a python dictionary which you can display
# print(json.dumps(stateless_chat_response_02, indent=2))

display(conversational_analytics_display_sql(stateless_chat_response_02))
print()

display(HTML(conversational_analytics_display_html(stateless_chat_response_02)))
print()

display(conversational_analytics_display_chart(stateless_chat_response_02))

### <font color='#4285f4'>MAIN CODE - Conversational Analytics - Demo - Streaming and Displaying Results</font>

In [None]:
ca = ConversationalAnalytics()

In [None]:
ca.chat(
    agent_id=data_agent_id,
    prompt="What are my sales by product and product category"
)

In [None]:
# We also can access the results in a dataframe so we can use in our code

ca.dataframe

In [None]:
ca.chat(
    agent_id=data_agent_id,
    prompt="What are my top 10 sales by user first and last name"
)

In [None]:
# Stateful, we will create the conversation automatically if it does not exist
ca.chat(
    agent_id=data_agent_id,
    prompt="What are my sales by product and product category.",
    conversation_id= agentic_beans_conversation_id
)

In [None]:
# Ask a follow-up questions since our agent has memory (conversation)
ca.chat(
    agent_id=data_agent_id,
    prompt="Now show me the bottom 5",
    conversation_id= agentic_beans_conversation_id
)

### <font color='#4285f4'>MAIN CODE - Conversational Analytics - Taxi Data</font>

##### System Instruction - for tables to query

In [None]:
taxi_system_instruction = f"""
- system_description: You are an expert in New York City taxi trip data, capable of
    analyzing and answering questions related to taxi ride statistics, payment methods,
    vendor performance, and location-based insights for yellow and green taxis.
- tables:
  - table:
    - name: {project_id}.nyc_taxi_curated.taxi_trips
    - description: Recorded taxi trip details in New York City, including fare amounts,
        distances, and timestamps.
    - synonyms: rides, cabs, journeys
    - tags: transportation, fares, trips
    - fields:
      - field:
        - name: Vendor_Id
        - description: A unique identifier for the taxi vendor.
      - field:
        - name: Pickup_DateTime
        - description: The date and time when the taxi ride began.
      - field:
        - name: Dropoff_DateTime
        - description: The date and time when the taxi ride ended.
      - field:
        - name: Passenger_Count
        - description: The number of passengers in the taxi during the trip.
        - aggregations: sum, avg, count
      - field:
        - name: Trip_Distance
        - description: The distance traveled during the taxi ride in miles.
        - aggregations: sum, avg, max
      - field:
        - name: Rate_Code_Id
        - description: The rate code ID applicable to the taxi ride.
      - field:
        - name: Store_And_Forward
        - description: A flag indicating whether the trip record was stored and forwarded
            to the vendor.
        - sample_values: Y, N
      - field:
        - name: PULocationID
        - description: The ID of the location where the passenger was picked up.
      - field:
        - name: DOLocationID
        - description: The ID of the location where the passenger was dropped off.
      - field:
        - name: Payment_Type_Id
        - description: The ID representing the payment type used for the taxi ride.
      - field:
        - name: Fare_Amount
        - description: The amount of the fare for the taxi ride.
        - aggregations: sum, avg, max
      - field:
        - name: Surcharge
        - description: Any additional surcharge applied to the fare.
        - aggregations: sum, avg
      - field:
        - name: MTA_Tax
        - description: The Metropolitan Transportation Authority tax amount.
        - aggregations: sum, avg
      - field:
        - name: Tip_Amount
        - description: The amount of tip given by the passenger.
        - aggregations: sum, avg
      - field:
        - name: Tolls_Amount
        - description: The amount of tolls paid during the taxi ride.
        - aggregations: sum, avg
      - field:
        - name: Improvement_Surcharge
        - description: The improvement surcharge amount.
        - aggregations: sum, avg
      - field:
        - name: Total_Amount
        - description: The total amount charged for the taxi ride.
        - aggregations: sum, avg
      - field:
        - name: Congestion_Surcharge
        - description: The congestion surcharge amount.
        - aggregations: sum, avg
  - table:
    - name: {project_id}.nyc_taxi_curated.payment_type
    - description: Lookup table for payment types used in taxi rides.
    - synonyms: payment_methods
    - tags: payment
    - fields:
      - field:
        - name: Payment_Type_Id
        - description: Unique identifier for the payment type used for a taxi ride.
      - field:
        - name: Payment_Type_Description
        - description: Description of the payment type used (e.g., credit card, cash).
        - sample_values:
          - Credit card
          - Cash
          - No charge
          - Dispute
          - Unknown
          - Voided trip
  - table:
    - name: {project_id}.nyc_taxi_curated.rate_code
    - description: Lookup table for various rate codes applicable to taxi rides.
    - synonyms: fares, pricing
    - tags: rate
    - fields:
      - field:
        - name: Rate_Code_Id
        - description: The Rate Code ID associated with a taxi ride.
      - field:
        - name: Rate_Code_Description
        - description: A textual description of the Rate Code ID.
        - sample_values:
          - Standard rate
          - JFK
          - Newark
          - Nassau or Westchester
          - Negotiated fare
          - Group ride
  - table:
    - name: {project_id}.nyc_taxi_curated.trip_type
    - description: Defines various categories or types for taxi trips.
    - synonyms: ride_categories
    - tags: trip_classification
    - fields:
      - field:
        - name: Trip_Type_Id
        - description: A unique identifier for the trip type.
      - field:
        - name: Trip_Type_Description
        - description: A textual description of the trip type.
        - sample_values:
          - Street-hail
          - Dispatch
  - table:
    - name: {project_id}.nyc_taxi_curated.vendor
    - description: Information about different taxi vendors.
    - synonyms: taxi_companies, operators
    - tags: vendor
    - fields:
      - field:
        - name: Vendor_Id
        - description: The ID of the taxi vendor.
      - field:
        - name: Vendor_Description
        - description: A description of the taxi vendor.
        - sample_values:
          - Creative Mobile Technologies, LLC
          - VeriFone Inc.
  - table:
    - name: {project_id}.nyc_taxi_curated.location
    - description: Geographical location data for taxi pickup and dropoff points in
        NYC.
    - synonyms: zones, areas, districts
    - tags: geography, location
    - fields:
      - field:
        - name: location_id
        - description: The unique identifier for the location.
      - field:
        - name: borough
        - description: The borough in New York City where the location is situated.
        - sample_values:
          - Manhattan
          - Queens
          - Bronx
          - Brooklyn
          - EWR
          - Staten Island
      - field:
        - name: zone
        - description: The specific zone within the borough.
      - field:
        - name: service_zone
        - description: Categorization of the zone based on the service provided.
      - field:
        - name: latitude
        - description: The latitude coordinate of the location.
      - field:
        - name: longitude
        - description: The longitude coordinate of the location.
- golden_queries:
  - golden_query:
    - natural_language_query: How many taxi trips were recorded?
    - sql_query: SELECT COUNT(*) FROM {project_id}.nyc_taxi_curated.taxi_trips
  - golden_query:
    - natural_language_query: What was the total fare amount for all trips?
    - sql_query: SELECT SUM(Fare_Amount) FROM {project_id}.nyc_taxi_curated.taxi_trips
  - golden_query:
    - natural_language_query: How many trips were paid by credit card?
    - sql_query: SELECT COUNT(t.Payment_Type_Id) FROM {project_id}.nyc_taxi_curated.taxi_trips
        AS t JOIN {project_id}.nyc_taxi_curated.payment_type AS pt ON t.Payment_Type_Id
        = pt.Payment_Type_Id WHERE pt.Payment_Type_Description = 'Credit card'
  - golden_query:
    - natural_language_query: What is the average trip distance?
    - sql_query: SELECT AVG(Trip_Distance) FROM {project_id}.nyc_taxi_curated.taxi_trips
  - golden_query:
    - natural_language_query: What is the most common payment type description?
    - sql_query: SELECT pt.Payment_Type_Description, COUNT(*) FROM {project_id}.nyc_taxi_curated.taxi_trips
        AS t JOIN {project_id}.nyc_taxi_curated.payment_type AS pt ON t.Payment_Type_Id
        = pt.Payment_Type_Id GROUP BY pt.Payment_Type_Description ORDER BY COUNT(*)
        DESC LIMIT 1
  - golden_query:
    - natural_language_query: What were the top 5 pickup locations by count of trips?
    - sql_query: SELECT l.zone, COUNT(t.PULocationID) FROM {project_id}.nyc_taxi_curated.taxi_trips
        AS t JOIN {project_id}.nyc_taxi_curated.location AS l ON t.PULocationID
        = l.location_id GROUP BY l.zone ORDER BY COUNT(t.PULocationID) DESC LIMIT
        5
  - golden_query:
    - natural_language_query: Show total amount by vendor description.
    - sql_query: SELECT v.Vendor_Description, SUM(t.Total_Amount) FROM {project_id}.nyc_taxi_curated.taxi_trips
        AS t JOIN {project_id}.nyc_taxi_curated.vendor AS v ON t.Vendor_Id
        = v.Vendor_Id GROUP BY v.Vendor_Description
- golden_action_plans:
  - golden_action_plan:
    - natural_language_query: Show me the number of trips by payment type.
    - action_plan:
      - step: Run a SQL query joining {project_id}.nyc_taxi_curated.taxi_trips
          and {project_id}.nyc_taxi_curated.payment_type tables to count trips
          per payment type description.
      - step: Create a bar chart showing trip counts for each payment type.
  - golden_action_plan:
    - natural_language_query: What is the average fare amount per rate code?
    - action_plan:
      - step: Run a SQL query joining {project_id}.nyc_taxi_curated.taxi_trips
          and {project_id}.nyc_taxi_curated.rate_code tables to calculate
          the average Fare_Amount for each Rate_Code_Description.
      - step: Present the results as a table or bar chart.
- relationships:
  - relationship:
    - name: taxi_trips_to_payment_type
    - description: Maps taxi trips to their payment method details.
    - relationship_type: many-to-one
    - join_type: left
    - left_table: {project_id}.nyc_taxi_curated.taxi_trips
    - right_table: {project_id}.nyc_taxi_curated.payment_type
    - relationship_columns:
      - left_column: Payment_Type_Id
      - right_column: Payment_Type_Id
  - relationship:
    - name: taxi_trips_to_rate_code
    - description: Connects taxi trips to the rate code under which they were charged.
    - relationship_type: many-to-type
    - join_type: left
    - left_table: {project_id}.nyc_taxi_curated.taxi_trips
    - right_table: {project_id}.nyc_taxi_curated.rate_code
    - relationship_columns:
      - left_column: Rate_Code_Id
      - right_column: Rate_Code_Id
  - relationship:
    - name: taxi_trips_to_vendor
    - description: Links taxi trips to the specific vendor providing the service.
    - relationship_type: many-to-one
    - join_type: left
    - left_table: {project_id}.nyc_taxi_curated.taxi_trips
    - right_table: {project_id}.nyc_taxi_curated.vendor
    - relationship_columns:
      - left_column: Vendor_Id
      - right_column: Vendor_Id
  - relationship:
    - name: taxi_trips_to_pickup_location
    - description: Associates taxi trips with their pickup location details.
    - relationship_type: many-to-one
    - join_type: left
    - left_table: {project_id}.nyc_taxi_curated.taxi_trips
    - right_table: {project_id}.nyc_taxi_curated.location
    - relationship_columns:
      - left_column: PULocationID
      - right_column: location_id
  - relationship:
    - name: taxi_trips_to_dropoff_location
    - description: Associates taxi trips with their dropoff location details.
    - relationship_type: many-to-one
    - join_type: left
    - left_table: {project_id}.nyc_taxi_curated.taxi_trips
    - right_table: {project_id}.nyc_taxi_curated.location
    - relationship_columns:
      - left_column: DOLocationID
      - right_column: location_id
- glossaries:
  - glossary:
    - term: PULocationID
    - description: Pick-up Location ID
  - glossary:
    - term: DOLocationID
    - description: Drop-off Location ID
  - glossary:
    - term: MTA
    - description: Metropolitan Transportation Authority
- additional_instructions:
  - text: All taxi trip data pertains to yellow and green taxi trips in New York City.
  - text: Times are recorded in Eastern Time (ET).
"""

##### Data Source - for tables to query

In [None]:
taxi_bigquery_data_source = {
    "bq": {
        "tableReferences": [
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "taxi_trips",
            },
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "payment_type",
            },
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "rate_code",
            },
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "trip_type",
            },
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "vendor",
            },
            {
                "projectId": project_id,
                "datasetId": "nyc_taxi_curated",
                "tableId": "location",
            }
        ]
    }
}

##### **Demo**

In [None]:
response_taxi_data_agent_create = data_agent_create(project_id, global_location, taxi_data_agent_id,
                                               taxi_system_instruction, taxi_bigquery_data_source, False)

In [None]:
ca_taxi = ConversationalAnalytics()

In [None]:
ca_taxi.chat(
    agent_id=taxi_data_agent_id,
    prompt="Count the taxi trips for by year and borough. Show the year field as an int."
)

In [None]:
ca_taxi.dataframe

In [None]:
ca_taxi.chat(
    agent_id=taxi_data_agent_id,
    prompt="Show me the number of taxi trips by borough."
)

In [None]:
ca_taxi.chat(
    agent_id=taxi_data_agent_id,
    prompt="Show me the number of taxi trips by borough and year. Extract the Year as an Integer."
)

In [None]:
ca_taxi.chat(
    agent_id=taxi_data_agent_id,
    prompt="Show me 5 taxi trips for Manhattan for January 2025"
)

### <font color='#4285f4'>MAIN CODE - Test CA Agents created by the DA Agent</font>

In [None]:
# This is created by the DA Agent (this is another notebook)
created_ca_agent_by_agent_id = "ca-agent-100"
data_agent_get(project_id, global_location, created_ca_agent_by_agent_id)

In [None]:
ca_taxi.chat(
    agent_id=created_ca_agent_by_agent_id,
    prompt="Count the taxi trips for by year and borough. Show the year field as an int."
)

In [None]:
# The year column might be named differently
ca_taxi.dataframe.query('borough == "Manhattan" and trip_year == "2025"')

In [None]:
ca_taxi.chat(
    agent_id=created_ca_agent_by_agent_id,
    prompt="Count the taxi trips for Manhattan for January 2025."
)

In [None]:
ca_taxi.chat(
    agent_id=created_ca_agent_by_agent_id,
    prompt="How many trips in 2025 in borough Manhattan.  First get all the Manhatten location codes, then do the main query."
)

In [None]:
ca_taxi.response_json


In [None]:
ca_taxi_2 = ConversationalAnalytics()

In [None]:
ca_taxi_2.chat(
    agent_id=created_ca_agent_by_agent_id,
    prompt="How many trips in 2025 in borough Manhattan.  First get all the Manhatten location codes, then do the main query. Do this in two steps."
)

### <font color='#4285f4'>Clean Up</font>

In [None]:
# Delete the CA API Agent

user_input = input(f"Do you want to delete your agents ({data_agent_id}, {taxi_data_agent_id}) (Y/n)?")
if user_input == "Y":
  #data_agent_conversations_delete() -> this gets deleted after a TTL
  data_agent_delete(project_id, global_location, data_agent_id)
  data_agent_delete(project_id, global_location, taxi_data_agent_id)