In [1]:
import phoenix as px
import litellm

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the secret key from environment variables
openai_api_key = os.getenv('OPENAI_API_KEY')


session = px.launch_app()

from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))

  from .autonotebook import tqdm as notebook_tqdm
INFO:phoenix.config:📋 Ensuring phoenix working directory: /Users/shreyasridhar/.phoenix


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


I0000 00:00:1722972431.101137  313290 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


In [2]:
# Add the package path to the system path
import sys
import os

# Determine the absolute path to the 'src' directory
package_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))

# Add the package path to the system path if it's not already included
if package_path not in sys.path:
    sys.path.append(package_path)


In [3]:
from openinference.instrumentation.litellm import LiteLLMInstrumentor
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

In [4]:
# Simple single message completion call
litellm.completion(model="gpt-3.5-turbo", 
                   messages=[{"content": "What's the capital of China?", "role": "user"}])

I0000 00:00:1722972442.379688  313199 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722972442.379869  313199 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


ModelResponse(id='chatcmpl-9tK82fex466IyLwCbEI6ES8cM9hoy', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Beijing', role='assistant', tool_calls=None, function_call=None))], created=1722972442, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=2, prompt_tokens=14, total_tokens=16))

In [5]:
# Multiple message conversation completion call with added param
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )


ModelResponse(id='chatcmpl-9tK8ATxcNSROS4xiYYl8xZUkbOa2k', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Great! What kind of pie are you looking to make? Apple, cherry, pumpkin, pecan, or something else? Let me know and I can help you find a recipe.', role='assistant', tool_calls=None, function_call=None))], created=1722972450, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=37, prompt_tokens=42, total_tokens=79))

In [6]:
# Multiple message conversation acompletion call with added params
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )


I0000 00:00:1722971124.005816  300873 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722971124.005994  300873 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


ModelResponse(id='chatcmpl-9tJmmuD3LwF0qBbeW6Gtj6IL4wpD1', choices=[Choices(finish_reason='length', index=0, message=Message(content='Great! What kind of pie are you thinking of making? I can help you find a recipe for', role='assistant', tool_calls=None, function_call=None))], created=1722971124, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=20, prompt_tokens=42, total_tokens=62))

In [None]:
litellm.completion_with_retries(
            model="gpt-3.5-turbo",
            messages=[{ "content": "What's the highest grossing film ever","role": "user"}]
        )

In [None]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])

In [None]:
await litellm.aembedding(model='text-embedding-ada-002', input=["good morning from litellm"])

In [6]:
litellm.image_generation(model='dall-e-2', prompt="cute baby otter")

ImageResponse(created=1722972470, data=[{'b64_json': None, 'revised_prompt': None, 'url': 'https://oaidalleapiprodscus.blob.core.windows.net/private/org-BbTPJCaZn15AHWabDLEhr6Zd/user-yOf3YhCIEb08AQU8TwPBEMcf/img-FUr1aFnu9Z1SZhhrlFBEkBui.png?st=2024-08-06T18%3A27%3A50Z&se=2024-08-06T20%3A27%3A50Z&sp=r&sv=2023-11-03&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-08-06T05%3A42%3A07Z&ske=2024-08-07T05%3A42%3A07Z&sks=b&skv=2023-11-03&sig=HmmRW1MuYTGQ9VILpJ%2BKlX/PhyfzZCjXM/KbcL74ju4%3D'}], usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0})

In [None]:
await litellm.aimage_generation(model='dall-e-2', prompt="cute baby otter")

In [None]:
LiteLLMInstrumentor().uninstrument(tracer_provider=tracer_provider)

In [None]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )

In [None]:
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

In [None]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )