

---



# **Multi-Functional Tool Integration Using Multimodal Live API and Websockets**

---



In [27]:
# set up
%pip install -q 'websockets~=14.0' altair

In [28]:
# Configure your API key.
import os
import altair as alt
from google.api_core import retry
from google.colab import userdata
MAPS_API_KEY = userdata.get('MAPS_API_KEY')
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [34]:
uri = f"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key={GOOGLE_API_KEY}"
model = "models/gemini-2.0-flash-exp"

In [35]:
# Prepare some supporting tools.
import contextlib
import wave

@contextlib.contextmanager
def wave_file(filename, channels=1, rate=24000, sample_width=2):
  """Define a wave context manager using the audio parameters supplied."""
  with wave.open(filename, "wb") as wf:
    wf.setnchannels(channels)
    wf.setsampwidth(sample_width)
    wf.setframerate(rate)
    yield wf

In [36]:
import logging
logger = logging.getLogger("Live")
# Switch to "DEBUG" to see the in-flight requests & responses
logger.setLevel("INFO")

In [37]:
# Explain the functions of connections.
import asyncio
import base64
import json
import time

from websockets.asyncio.client import connect
from IPython import display


async def setup(ws, modality, tools):
  """Perform a setup handshake to configure the conversation."""
  setup = {
      "setup": {
          "model": model,
          "tools": tools,
          "generation_config": {
              "response_modalities": [modality]
          }
      }
  }
  setup_json = json.dumps(setup)
  logger.debug(">>> " + setup_json)
  await ws.send(setup_json)

  setup_response = json.loads(await ws.recv())
  logger.debug("<<< " + json.dumps(setup_response))

async def send(ws, prompt):
  """Send a user content message (only text is supported)."""
  msg = {
    "client_content": {
      "turns": [{"role": "user", "parts": [{"text": prompt}]}],
      "turn_complete": True,
    }
  }
  json_msg = json.dumps(msg)
  logger.debug(">>> " + json_msg)
  await ws.send(json_msg)


def handle_server_content(wf, server_content):
  """Handle any server content messages, e.g. incoming audio or text."""
  audio = False
  model_turn = server_content.pop("modelTurn", None)
  if model_turn:
    text = model_turn["parts"][0].pop("text", None)
    if text:
      print(text, end='')

    inline_data = model_turn['parts'][0].pop('inlineData', None)
    if inline_data:
      print('.', end='')
      b64data = inline_data['data']
      pcm_data = base64.b64decode(b64data)
      wf.writeframes(pcm_data)
      audio = True

  turn_complete = server_content.pop('turnComplete', None)
  return turn_complete, audio


async def handle_tool_call(ws, tool_call, responses):
  """Process an incoming tool call request, returning a response."""
  logger.debug("<<< " + json.dumps(tool_call))
  for fc in tool_call['functionCalls']:

    if fc['name'] in responses:
      # Use a response from `responses` if provided.
      result_entry = responses[fc['name']]
      # If it's a function, actuall call it.
      if callable(result_entry):
        result = result_entry(**fc['args'])
    else:
      # Otherwise it's a stub, just say "OK"
      result = {'string_value': 'ok'}

    msg = {
      'tool_response': {
          'function_responses': [{
              'id': fc['id'],
              'name': fc['name'],
              'response': {'result': result}
          }]
        }
    }
    json_msg = json.dumps(msg)
    logger.debug(">>> " + json_msg)
    await ws.send(json_msg)


@contextlib.asynccontextmanager
async def quick_connect(modality='TEXT', tools=()):
  """Establish a connection and keep it open while the context is active."""
  async with connect(uri, additional_headers={"Content-Type": "application/json"}) as ws:
    await setup(ws, modality, tools)
    yield ws


audio_lock = time.time()

async def run(ws, prompt, responses=()):
  """Send the provided prompt and handle the streamed response."""
  print('>', prompt)
  await send(ws, prompt)

  audio = False
  filename = 'audio.wav'
  with wave_file(filename) as wf:
    async for raw_response in ws:
      response = json.loads(raw_response.decode())
      logger.debug("<<< " + str(response)[:150])

      server_content = response.pop("serverContent", None)
      if server_content:
        turn_complete, a = handle_server_content(wf, server_content)
        audio = audio or a

        if turn_complete:
          print()
          print('')
          break

      tool_call = response.pop('toolCall', None)
      if tool_call:
        await handle_tool_call(ws, tool_call, responses)

  if audio:
    global audio_lock
    # Sleep before playing audio to make sure it doesn't play over an existing clip.
    if (delta := audio_lock - time.time()) > 0:
      print('Pausing for audio to complete...')
      await asyncio.sleep(delta + 1.0)  # include a buffer so there's a breather

    display.display(display.Audio(filename, autoplay=True))
    audio_lock = time.time() + (wf.getnframes() / wf.getframerate())



---

# **The code uses the `quick_connect` context manager to create a web-socket connection (`ws`) to the API. While the connection is active, the `run` function sends prompts and processes responses. The modality can be switched between AUDIO and TEXT as needed.**

---



In [38]:
tools = [
    {'google_search': {}},
    {'code_execution': {}},
]

async def go():
  async with quick_connect(tools=tools, modality="AUDIO") as ws:
    await run(ws, "Enumerate the last five films featuring Kamal Haasan, along with their runtimes and release years.")

logger.setLevel('INFO')
await go()

> Enumerate the last five films featuring Kamal Haasan, along with their runtimes and release years.
............................................................................................................................................................





---

# **Define a charting tool using Vega-Altair by creating a schema (`altair_fns`), an execution function (`render_altair`), and linking them through `tool_calls`. Altair uses JSON for chart persistence, enabling chart generation via the tool.**

---



In [39]:
def apply_altair_theme(altair_json: str, theme: str) -> str:
  chart = alt.Chart.from_json(altair_json)
  with alt.themes.enable(theme):
    themed_altair_json = chart.to_json()
  return themed_altair_json


@retry.Retry()
def render_altair(altair_json: str, theme: str = "default"):
  themed_altair_json = apply_altair_theme(altair_json, theme)
  chart = alt.Chart.from_json(themed_altair_json)
  chart.display()

  return {'string_value': 'ok'}


altair_fns = [
  {
    'name': 'render_altair',
    'description': 'Displays an Altair chart in JSON format.',
    'parameters': {
      'type': 'OBJECT',
      'properties': {
        'altair_json': {
            'type': 'STRING',
            'description': 'JSON STRING representation of the Altair chart to render. Must be a string, not a json object',
        },
        'theme': {
            'type': 'STRING',
            'description': 'Altair theme. Choose from one of "dark", "ggplot2", "default", "opaque".',
        },
      },
    },
  },
]

tool_calls = {
    'render_altair': render_altair,
}



---

# **The code uses `quick_connect` to open a streaming session, sending prompts and handling responses in real-time. Multiple `run` calls enable a multi-turn conversation, which ends when the `quick_connect` block finishes.**

---



In [40]:
tools = [
    {'google_search': {}},
    {'code_execution': {}},
    {'function_declarations': altair_fns},
]

async def go():
  async with quick_connect(tools=tools, modality="AUDIO") as ws:

    # Google Search
    await run(ws, "Please find the last 5 Denis Villeneuve movies and look up their runtimes and the year published.")
    # Code execution
    await run(ws, "Can you write some code to work out which has the longest and shortest runtimes?")
    # Tool use
    await run(ws, "Now can you plot them in a line chart showing the year on the x-axis and runtime on the y-axis?", responses=tool_calls)
    # Tool use - this step takes user input, so you can ask the model to tweak the chart to your liking.
    # Try changing to dark mode, or lay out the data differently.
    await run(ws, input('Any requests? > '), responses=tool_calls)


logger.setLevel('INFO')
await go()

> Please find the last 5 Denis Villeneuve movies and look up their runtimes and the year published.
.......................................................................................................................................................................



> Can you write some code to work out which has the longest and shortest runtimes?
....................................................

Pausing for audio to complete...


> Now can you plot them in a line chart showing the year on the x-axis and runtime on the y-axis?


..........................................................



Any requests? > No
> No
...................

Pausing for audio to complete...




---

# **Use the Google Maps Static API to draw a map during the conversation. Ensure the API key has Static Maps API enabled. The hidden cell runs the `draw_map` function, which requires a center point, zoom level, and custom marker styles and locations.**

---



In [41]:
map_fns = [
  {
    'name': 'draw_map',
    'description': 'Render a Google Maps static map using the specified parameters. No information is returned.',
    'parameters': {
      'type': 'OBJECT',
      'properties': {
        'center': {
            'type': 'STRING',
            'description': 'Location to center the map. It can be a lat,lng pair (e.g. 40.714728,-73.998672), or a string address of a location (e.g. Berkeley,CA).',
        },
        'zoom': {
            'type': 'NUMBER',
            'description': 'Google Maps zoom level. 1 is the world, 20 is zoomed in to building level. Integer only. Level 11 shows about a 15km radius. Level 9 is about 30km radius.'
        },
        'path': {
            "type": "STRING",
            'description': """The path parameter defines a set of one or more locations connected by a path to overlay on the map image. The path parameter takes set of value assignments (path descriptors) of the following format:

path=pathStyles|pathLocation1|pathLocation2|... etc.

Note that both path points are separated from each other using the pipe character (|). Because both style information and point information is delimited via the pipe character, style information must appear first in any path descriptor. Once the Maps Static API server encounters a location in the path descriptor, all other path parameters are assumed to be locations as well.

Path styles
The set of path style descriptors is a series of value assignments separated by the pipe (|) character. This style descriptor defines the visual attributes to use when displaying the path. These style descriptors contain the following key/value assignments:

weight: (optional) specifies the thickness of the path in pixels. If no weight parameter is set, the path will appear in its default thickness (5 pixels).
color: (optional) specifies a color either as a 24-bit (example: color=0xFFFFCC) or 32-bit hexadecimal value (example: color=0xFFFFCCFF), or from the set {black, brown, green, purple, yellow, blue, gray, orange, red, white}.

When a 32-bit hex value is specified, the last two characters specify the 8-bit alpha transparency value. This value varies between 00 (completely transparent) and FF (completely opaque). Note that transparencies are supported in paths, though they are not supported for markers.

fillcolor: (optional) indicates both that the path marks off a polygonal area and specifies the fill color to use as an overlay within that area. The set of locations following need not be a "closed" loop; the Maps Static API server will automatically join the first and last points. Note, however, that any stroke on the exterior of the filled area will not be closed unless you specifically provide the same beginning and end location.
geodesic: (optional) indicates that the requested path should be interpreted as a geodesic line that follows the curvature of the earth. When false, the path is rendered as a straight line in screen space. Defaults to false.
Some example path definitions:

Thin blue line, 50% opacity: path=color:0x0000ff80|weight:1
Solid red line: path=color:0xff0000ff|weight:5
Solid thick white line: path=color:0xffffffff|weight:10
These path styles are optional. If default attributes are desired, you may skip defining the path attributes; in that case, the path descriptor's first "argument" will consist instead of the first declared point (location).

Path points
In order to draw a path, the path parameter must also be passed two or more points. The Maps Static API will then connect the path along those points, in the specified order. Each pathPoint is denoted in the pathDescriptor separated by the | (pipe) character.
""",
        },
        'markers': {
            "type": "ARRAY",
            "items": {
                "type": "STRING"
            },
            # Copied from https://developers.google.com/maps/documentation/maps-static/start#Markers
            'description': """The markers parameter defines a set of one or more markers (map pins) at a set of locations. Each marker defined within a single markers declaration must exhibit the same visual style; if you wish to display markers with different styles, you will need to supply multiple markers parameters with separate style information.

The markers parameter takes set of value assignments (marker descriptors) of the following format:

markers=markerStyles|markerLocation1| markerLocation2|... etc.

The set of markerStyles is declared at the beginning of the markers declaration and consists of zero or more style descriptors separated by the pipe character (|), followed by a set of one or more locations also separated by the pipe character (|).

Because both style information and location information is delimited via the pipe character, style information must appear first in any marker descriptor. Once the Maps Static API server encounters a location in the marker descriptor, all other marker parameters are assumed to be locations as well.

Marker styles
The set of marker style descriptors is a series of value assignments separated by the pipe (|) character. This style descriptor defines the visual attributes to use when displaying the markers within this marker descriptor. These style descriptors contain the following key/value assignments:

size: (optional) specifies the size of marker from the set {tiny, mid, small}. If no size parameter is set, the marker will appear in its default (normal) size.
color: (optional) specifies a 24-bit color (example: color=0xFFFFCC) or a predefined color from the set {black, brown, green, purple, yellow, blue, gray, orange, red, white}.

Note that transparencies (specified using 32-bit hex color values) are not supported in markers, though they are supported for paths.

label: (optional) specifies a single uppercase alphanumeric character from the set {A-Z, 0-9}. (The requirement for uppercase characters is new to this version of the API.) Note that default and mid sized markers are the only markers capable of displaying an alphanumeric-character parameter. tiny and small markers are not capable of displaying an alphanumeric-character.
""",
        }
      },
      "required": [
        "center",
        "zoom",
      ]

    },
  },
]

In [42]:
# Define the `draw_map` function and add `google_search` as a tool to find popular restaurants.
from urllib.parse import urlencode

import altair as alt
from google.api_core import retry
import requests


def draw_map(center, zoom, path: str = "", markers: list[str] = ()):
  logger.debug(f'MAPS: {center=} {zoom=} {path=} {markers=}')
  q = {
      'key': MAPS_API_KEY,
      'size': '512x512',
      'center': center,
      'zoom': zoom,
  }

  if path:
    q['path'] = path

  qs = list(q.items())

  for marker in markers:
    qs.append(('markers', marker))

  url = f'https://maps.googleapis.com/maps/api/staticmap?{urlencode(qs)}'
  display.display(display.Image(url=url))
  logger.debug(f"Map URL: {url}")

  return {'string_value': f'ok'}


tool_calls = {
    'draw_map': draw_map,
}

tools = [
    {'google_search': {}},
    {'function_declarations': map_fns},
]

In [43]:
# Summarize and conclude the conversation.
async def go():
  async with quick_connect(tools=tools, modality="TEXT") as ws:

    # Google Search + Tools (Maps)
    await run(ws, "Please look up and mark 3 Chennai restaurants that are currently trending on a map.", responses=tool_calls)
    # Code execution + Tools
    await run(ws, "Now write some code to randomly pick one to eat at tonight and zoom in to that one on the map.", responses=tool_calls)


logger.setLevel('INFO')
await go()

> Please look up and mark 3 Chennai restaurants that are currently trending on a map.
Based on the search results, here are three trending restaurants in Chennai that I will mark on a map:

1. **601, The Park Chennai:**  Located at Anna Salai, this all-day diner has been revamped and is known for its international and local flavors, as well as their tiramisu.
2.  **Hundreds Bistro:** Located on Harrington Road, this restaurant is noted for its modern approach to Indian cuisine.
3. **Ciclo Cafe:** Located in Kotturpuram, this cafe appears to be popular and has good ratings.

I will use these locations to generate a map.




> Now write some code to randomly pick one to eat at tonight and zoom in to that one on the map.



Okay, I have randomly chosen "601, The Park Chennai" for tonight's dinner. I've also generated a map zoomed into that location.






---

# **Use Google Maps to plot India's capital cities with markers, applying a circular color gradient around the country.**

---



In [44]:
from urllib.parse import urlencode

import altair as alt
from google.api_core import retry


tool_calls = {
    'draw_map': draw_map,
}

tools = [
    {'code_execution': {}},
    {'function_declarations': map_fns},
]

async def go():
  async with quick_connect(tools=tools, modality="TEXT") as ws:

    # Code exec and tool use. No search.
    await run(ws, "Plot markers on every capital city in India using a gradient between "
                  "Saffron and Green. Plan out your steps first, then follow the plan.", responses=tool_calls)

    await run(ws, "Awesome! Can you ensure the gradient is applied smoothly in a circular direction "
                  "around the country?", responses=tool_calls)


logger.setLevel('INFO')
await go()

> Plot markers on every capital city in India using a gradient between Saffron and Green. Plan out your steps first, then follow the plan.
Okay, I understand. Here's my plan to plot markers on a map of India's capital cities with a color gradient from Saffron to Green:

**Plan:**

1.  **Identify Capital Cities:**  I need a list of capital cities in India. I will use a python dictionary for this.
2.  **Determine Marker Colors:** I'll need to determine a sequence of colors progressing from Saffron to Green. Since I can only use pre-defined colors, I'll select colors that approximate a gradient.
3. **Prepare marker strings:** I'll use the color information along with the city locations to generate the marker strings for the API call.
4. **Call the API:** Finally, I will call the `default_api.draw_map` function to generate the map with the markers.
5. **Return Result:** I'll return the map data.

**Execution:**

Now, let's execute the plan step-by-step.

**Step 1: Identify Capital Cities**

The map has been generated successfully. The result indicates "ok".


> Awesome! Can you ensure the gradient is applied smoothly in a circular direction around the country?
Okay, I understand. To achieve a smoother, circular gradient, I need to re-arrange the cities and colors to create a circular pattern. Here's the updated plan:

**Updated Plan:**

1.  **Reorder Cities:**  Instead of the current order, I'll sort the cities roughly based on their geographical positions in a circular manner around India (e.g., starting from North, going East, South, West, then back to North).
2.  **Define Color Gradient:**  I'll keep the color gradient but perhaps use more steps to make the transition smoother.
3.  **Prepare marker strings:** I'll use the re-ordered cities and the new color information to generate marker strings.
4.  **Call the API:**  Call `default_api.draw_map` with the new marker strings.

**Execution:**

Let's execute the updated plan step-by-step.

**Step 1: Reorder Cities (Geogra

The map with the circular color gradient has been generated successfully.






---

# **Performance improves with your feedback for optimal results.**

**Next Steps:**  
- Explore multimodal streaming via Live API in Google AI Studio (no code needed).  
- Use the Live API starter with Python SDK or websockets.  
- Explore grounding examples and the live API tutorial for Gemini 2.0's tool use features.**

---
