In [1]:
import phoenix as px
import litellm

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the secret key from environment variables
openai_api_key = os.getenv('OPENAI_API_KEY')


session = px.launch_app()

from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


I0000 00:00:1722220930.637031  626003 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


In [2]:
# Add the package path to the system path
import sys
import os

# Determine the absolute path to the 'src' directory
package_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))

# Add the package path to the system path if it's not already included
if package_path not in sys.path:
    sys.path.append(package_path)


In [3]:
from openinference.instrumentation.litellm import LiteLLMInstrumentor
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

In [4]:
# Simple single message completion call
litellm.completion(model="gpt-3.5-turbo", 
                   messages=[{"content": "What's the capital of China?", "role": "user"}])

I0000 00:00:1722220937.284050  625893 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722220937.284228  625893 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


ModelResponse(id='chatcmpl-9qAczdxnOMvkHnvoPJ1npTxIknZvg', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Beijing', role='assistant', tool_calls=None, function_call=None))], created=1722220937, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=2, prompt_tokens=14, total_tokens=16))

In [5]:
# Multiple message conversation completion call with added param
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )


ModelResponse(id='chatcmpl-9qAd1WRS2KNBMIb1u7rWtv2or7CC7', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Oh, I see! What type of pie are you thinking of making? I can help you find a recipe for that as well.', role='assistant', tool_calls=None, function_call=None))], created=1722220939, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=42, total_tokens=69))

In [6]:
# Multiple message conversation acompletion call with added params
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )


I0000 00:00:1722220939.762190  625893 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722220939.762316  625893 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


ModelResponse(id='chatcmpl-9qAd20H69IKm30K0otGgP8ZEaXyyw', choices=[Choices(finish_reason='length', index=0, message=Message(content='Got it! What type of pie are you looking to make? Apple, pumpkin, pecan,', role='assistant', tool_calls=None, function_call=None))], created=1722220940, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=20, prompt_tokens=42, total_tokens=62))

In [7]:
litellm.completion_with_retries(
            model="gpt-3.5-turbo",
            messages=[{ "content": "What's the highest grossing film ever","role": "user"}]
        )

ModelResponse(id='chatcmpl-9qAd2tlNEKRzOagNu7uOb5SygQVLU', choices=[Choices(finish_reason='stop', index=0, message=Message(content='As of now, the highest grossing film ever is Avengers: Endgame, which grossed over $2.798 billion worldwide.', role='assistant', tool_calls=None, function_call=None))], created=1722220940, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=15, total_tokens=42))

In [8]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])

EmbeddingResponse(model='text-embedding-ada-002', data=[{'embedding': [-0.002272780751809478, 0.010863995179533958, -0.026400675997138023, -0.022937390953302383, -0.011056399904191494, 0.0492556057870388, -0.028888193890452385, 0.016038307920098305, 0.00217657838948071, -0.009414087980985641, 0.00022762165463063866, 0.017948610708117485, -0.027362698689103127, -0.02388567104935646, 0.009558391757309437, -0.001791768940165639, 0.010960197076201439, 0.0019206113647669554, 0.010087504051625729, 0.0011698893504217267, -0.005730912089347839, 0.008713184855878353, 0.010046274401247501, -0.010128733702003956, 0.01189473457634449, -0.04067985340952873, 0.012080268003046513, -0.029630325734615326, -0.00924229808151722, 0.006473044399172068, -0.009111737832427025, 0.006390585098415613, -0.02612581104040146, -0.004816989414393902, -0.019020579755306244, -0.032543882727622986, 0.00521897803992033, -0.012650609947741032, 0.0033808257430791855, -0.013406485319137573, 0.012258929200470448, 5.01411850

In [9]:
await litellm.aembedding(model='text-embedding-ada-002', input=["good morning from litellm"])

EmbeddingResponse(model='text-embedding-ada-002', data=[{'embedding': [-0.002272780751809478, 0.010863995179533958, -0.026400675997138023, -0.022937390953302383, -0.011056399904191494, 0.0492556057870388, -0.028888193890452385, 0.016038307920098305, 0.00217657838948071, -0.009414087980985641, 0.00022762165463063866, 0.017948610708117485, -0.027362698689103127, -0.02388567104935646, 0.009558391757309437, -0.001791768940165639, 0.010960197076201439, 0.0019206113647669554, 0.010087504051625729, 0.0011698893504217267, -0.005730912089347839, 0.008713184855878353, 0.010046274401247501, -0.010128733702003956, 0.01189473457634449, -0.04067985340952873, 0.012080268003046513, -0.029630325734615326, -0.00924229808151722, 0.006473044399172068, -0.009111737832427025, 0.006390585098415613, -0.02612581104040146, -0.004816989414393902, -0.019020579755306244, -0.032543882727622986, 0.00521897803992033, -0.012650609947741032, 0.0033808257430791855, -0.013406485319137573, 0.012258929200470448, 5.01411850

In [10]:
litellm.image_generation(model='dall-e-2', prompt="cute baby otter")

ImageResponse(created=1722220953, data=[{'b64_json': None, 'revised_prompt': None, 'url': 'https://oaidalleapiprodscus.blob.core.windows.net/private/org-BbTPJCaZn15AHWabDLEhr6Zd/user-yOf3YhCIEb08AQU8TwPBEMcf/img-WvytRwkS8ucvOF1xuOiyPlRD.png?st=2024-07-29T01%3A42%3A33Z&se=2024-07-29T03%3A42%3A33Z&sp=r&sv=2023-11-03&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-07-29T00%3A07%3A29Z&ske=2024-07-30T00%3A07%3A29Z&sks=b&skv=2023-11-03&sig=ZEJXVR%2BWbKK9n%2BUPCe4XbZAFKUnedc5%2BCnoaD0u9o8E%3D'}], usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0})

In [11]:
await litellm.aimage_generation(model='dall-e-2', prompt="cute baby otter")

ImageResponse(created=1722220962, data=[{'b64_json': None, 'revised_prompt': None, 'url': 'https://oaidalleapiprodscus.blob.core.windows.net/private/org-BbTPJCaZn15AHWabDLEhr6Zd/user-yOf3YhCIEb08AQU8TwPBEMcf/img-e8x6ETAkHrroKuy5yI7DFBFt.png?st=2024-07-29T01%3A42%3A42Z&se=2024-07-29T03%3A42%3A42Z&sp=r&sv=2023-11-03&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-07-28T23%3A26%3A42Z&ske=2024-07-29T23%3A26%3A42Z&sks=b&skv=2023-11-03&sig=8T3nCkpf2zoz08S8r%2BVCb%2BVFMH1JVBmaH7dCi%2BL6Ntg%3D'}], usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0})

In [12]:
LiteLLMInstrumentor().uninstrument(tracer_provider=tracer_provider)

In [13]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )

ModelResponse(id='chatcmpl-9qAdxGggsr3dM5TlTht5UUegkDkI6', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Great! What type of pie are you thinking of making? I can help you find a recipe for that as well.', role='assistant', tool_calls=None, function_call=None))], created=1722220997, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=24, prompt_tokens=42, total_tokens=66))

In [14]:
LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)

WARNI [opentelemetry.trace] Overriding of current TracerProvider is not allowed


In [15]:
litellm.embedding(model='text-embedding-ada-002', input=["good morning from litellm"])
await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7,
            max_tokens=20
        )
litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{ "content": "Hello, I want to bake a cake","role": "user"},
                      { "content": "Hello, I can pull up some recipes for cakes.","role": "assistant"},
                      { "content": "No actually I want to make a pie","role": "user"},],
            temperature=0.7
        )

ModelResponse(id='chatcmpl-9qAe5Nki3QnFNTbSBaXSQwMhOwYra', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Great! What kind of pie would you like to make? Apple, cherry, pumpkin, or something else?', role='assistant', tool_calls=None, function_call=None))], created=1722221005, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=22, prompt_tokens=42, total_tokens=64))