### Demo to instrument LiteLLM calls and send their spans to our Phoenix collector

Import Phoenix, OTel, and other dependencies. 
Get your API Key from an .env file.
Launch the Phoenix app and send the endpoint to be the Phoenix collector.

In [None]:
import os

import litellm
import phoenix as px
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the secret key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")


session = px.launch_app()

from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))

No need to run below cell if you have done ```pip install openinference-instrumentation-litellm``` already

In [None]:
# Add the package path to the system path
import os
import sys

# Determine the absolute path to the 'src' directory
package_path = os.path.abspath(os.path.join(os.getcwd(), "..", "src"))

# Add the package path to the system path if it's not already included
if package_path not in sys.path:
    sys.path.append(package_path)

Set up our LiteLLM instrumentor with just 2 lines!

In [None]:
from openinference.instrumentation.litellm import LiteLLMInstrumentor

LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

Make calls to LiteLLM functions as usual. You will see their spans in the Phoenix UI.

In [None]:
# Simple single message completion call
litellm.completion(
    model="gpt-3.5-turbo", messages=[{"content": "What's the capital of China?", "role": "user"}]
)

In [None]:
# Multiple message conversation completion call with added param
litellm.completion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
)

In [None]:
# Multiple message conversation acompletion call with added params
await litellm.acompletion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
    max_tokens=20,
)

In [None]:
# Perform image analysis by providing a url to the image and querying the LLM
litellm.completion(
    model="gpt-4o",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What’s in this image?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
                    },
                },
            ],
        }
    ],
)

In [None]:
# First run pip install tenacity
litellm.completion_with_retries(
    model="gpt-3.5-turbo",
    messages=[{"content": "What's the highest grossing film ever", "role": "user"}],
)

In [None]:
litellm.embedding(model="text-embedding-ada-002", input=["good morning from litellm"])

In [None]:
await litellm.aembedding(model="text-embedding-ada-002", input=["good morning from litellm"])

In [None]:
# Image generation using OpenAI
litellm.image_generation(model="dall-e-2", prompt="cute baby otter")

In [None]:
await litellm.aimage_generation(model="dall-e-2", prompt="cute baby otter")

In [None]:
# Image generation using Bedrock
# pip install boto3 first before importing

os.getenv("AWS_ACCESS_KEY_ID")
os.getenv("AWS_SECRET_ACCESS_KEY")
os.getenv("AWS_SESSION_TOKEN")
os.getenv("AWS_REGION")

litellm.image_generation(
    model="bedrock/stability.stable-diffusion-xl-v1",
    prompt="blue sky with fluffy white clouds and green hills",
)

Now uninstrument LiteLLM calls and see how you will no longer see traces for the calls in the following cells until instrumented again.

In [None]:
LiteLLMInstrumentor().uninstrument(tracer_provider=tracer_provider)

In [None]:
litellm.embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
await litellm.acompletion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
    max_tokens=20,
)
litellm.completion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
)

Now instrument again, and you will see traces in the Phoenix UI.

In [None]:
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

In [None]:
litellm.embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
await litellm.acompletion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
    max_tokens=20,
)
litellm.completion(
    model="gpt-3.5-turbo",
    messages=[
        {"content": "Hello, I want to bake a cake", "role": "user"},
        {"content": "Hello, I can pull up some recipes for cakes.", "role": "assistant"},
        {"content": "No actually I want to make a pie", "role": "user"},
    ],
    temperature=0.7,
)