# ElevenLabs Text-to-Speech Instrumentation Example

This notebook demonstrates how to use OpenInference instrumentation with ElevenLabs TTS API, sending traces to Arize.

## Setup

Install required packages:

In [None]:
%pip install elevenlabs arize-otel python-dotenv

## Load Environment Variables

Copy `.env.example` to `.env` and fill in your keys.

In [None]:
from dotenv import load_dotenv

load_dotenv()

## Configure Arize OTel

In [None]:
import os
from arize.otel import register

tracer_provider = register(
    space_id=os.environ["ARIZE_SPACE_ID"],
    api_key=os.environ["ARIZE_API_KEY"],
    project_name=os.environ.get("ARIZE_PROJECT", "elevenlabs-demo"),
)

## Instrument ElevenLabs

In [None]:
from openinference.instrumentation.elevenlabs import ElevenLabsInstrumentor

ElevenLabsInstrumentor().instrument(tracer_provider=tracer_provider)

## Text-to-Speech: Convert (Sync)

In [None]:
import os
from elevenlabs import ElevenLabs

client = ElevenLabs(api_key=os.environ["ELEVEN_API_KEY"])

# Generate speech - returns audio bytes
audio = client.text_to_speech.convert(
    voice_id="JBFqnCBsd6RMkjVDRZzb",  # George voice
    text="Hello! This is a test of ElevenLabs text to speech.",
    model_id="eleven_multilingual_v2",
)

# Collect audio bytes
audio_bytes = b"".join(audio)
print(f"Generated {len(audio_bytes)} bytes of audio")

## Text-to-Speech: Stream (Sync)

In [None]:
# Stream audio chunks
audio_stream = client.text_to_speech.stream(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    text="This demonstrates streaming audio generation.",
    model_id="eleven_multilingual_v2",
)

chunks = []
for chunk in audio_stream:
    chunks.append(chunk)

print(f"Received {len(chunks)} audio chunks")

## Text-to-Speech: Async Convert

In [None]:
from elevenlabs import AsyncElevenLabs

async_client = AsyncElevenLabs(api_key=os.environ["ELEVEN_API_KEY"])

async def async_tts_example():
    # Note: convert() returns an async generator directly, no await needed
    audio = async_client.text_to_speech.convert(
        voice_id="JBFqnCBsd6RMkjVDRZzb",
        text="This is an async text to speech call.",
        model_id="eleven_multilingual_v2",
    )
    audio_bytes = b"".join([chunk async for chunk in audio])
    print(f"Async generated {len(audio_bytes)} bytes of audio")

await async_tts_example()

## Text-to-Speech with Timestamps

In [None]:
# Get audio with character-level timestamps
response = client.text_to_speech.convert_with_timestamps(
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    text="Hello world!",
    model_id="eleven_multilingual_v2",
)

for item in response:
    if hasattr(item, 'audio_base64') and item.audio_base64:
        print(f"Audio chunk received")
    if hasattr(item, 'alignment') and item.alignment:
        print(f"Alignment: {item.alignment}")

## Cleanup

Uninstrument when done (optional):

In [None]:
ElevenLabsInstrumentor().uninstrument()