# test-api.ipynb

Test API and helper functions

In [1]:
import os, sys
from typing import Optional, List, Dict
sys.path.append(os.path.join(".."))
import hjson
import docstring_parser
import inspect
from llms_wrapper.llms import LLMS, KNOWN_LLM_CONFIG_FIELDS
from llms_wrapper.config import update_llm_config
from llms_wrapper.utils import dict_except


In [2]:
config = dict(
    llms=[
        # OpenAI
        # https://platform.openai.com/docs/models
        dict(llm="openai/gpt-4o"),
        dict(llm="openai/gpt-4o-mini"),
        dict(llm="openai/o1"),        # restricted
        dict(llm="openai/o1-mini"),   # restricted
        dict(llm="openai/gpt-4.5-preview"),   # restricted?
        # Google Gemini
        # https://ai.google.dev/gemini-api/docs/models/gemini
        dict(llm="gemini/gemini-2.0-flash-exp"),
        dict(llm="gemini/gemini-1.5-flash"),
        dict(llm="gemini/gemini-1.5-pro"),
        # Anthropic
        # https://docs.anthropic.com/en/docs/about-claude/models
        dict(llm="anthropic/claude-3-5-sonnet-20240620"),
        dict(llm="anthropic/claude-3-opus-20240229"),
        # Mistral
        # https://docs.mistral.ai/getting-started/models/models_overview/
        dict(llm="mistral/mistral-large-latest"),
        # XAI
        # dict(llm="xai/grok-2"),     # not mapped by litellm yet?
        dict(llm="xai/grok-beta"),
        # Groq
        # https://console.groq.com/docs/models
        dict(llm="groq/llama3-70b-8192"),
        dict(llm="groq/llama-3.3-70b-versatile"),
        # Deepseek
        # https://api-docs.deepseek.com/quick_start/pricing
        dict(llm="deepseek/deepseek-chat"),
        dict(
            llm="gemini/somemodel",
            max_input_tokens=100000,
            cost_per_prompt_token=0.0002,
            temperature=0,
        ), 
    ],
    providers = dict(
        openai = dict(api_key_env="MY_OPENAI_API_KEY"),
        gemini = dict(api_key_env="MY_GEMINI_API_KEY"),
        anthropic = dict(api_key_env="MY_ANTHROPIC_API_KEY"),
        mistral = dict(api_key_env="MY_MISTRAL_API_KEY"),
        xai = dict(api_key_env="MY_XAI_API_KEY"),    
        groq = dict(api_key_env="MY_GROQ_API_KEY"),
        deepseek = dict(api_key_env="MY_DEEPSEEK_API_KEY"),
    )
)
_ = update_llm_config(config)

In [3]:
llms = LLMS(
    config,  
    # use_phoenix=("http://0.0.0.0:6006/v1/traces", "llms_wrapper_test")
)

In [4]:
llms.list_aliases()

['openai/gpt-4o',
 'openai/gpt-4o-mini',
 'openai/o1',
 'openai/o1-mini',
 'openai/gpt-4.5-preview',
 'gemini/gemini-2.0-flash-exp',
 'gemini/gemini-1.5-flash',
 'gemini/gemini-1.5-pro',
 'anthropic/claude-3-5-sonnet-20240620',
 'anthropic/claude-3-opus-20240229',
 'mistral/mistral-large-latest',
 'xai/grok-beta',
 'groq/llama3-70b-8192',
 'groq/llama-3.3-70b-versatile',
 'deepseek/deepseek-chat',
 'gemini/somemodel']

In [5]:
# llms.known_models()

In [6]:
llms.cost_per_token("openai/gpt-4o")

(1e-05, 2.5e-06)

In [7]:
llms.max_input_tokens("openai/gpt-4o")

128000

In [8]:
llms.max_output_tokens("openai/gpt-4o")

16384

In [9]:
llms["gemini/somemodel"].config

{'llm': 'gemini/somemodel',
 'max_input_tokens': 100000,
 'cost_per_prompt_token': 0.0002,
 'temperature': 0,
 'api_key_env': 'MY_GEMINI_API_KEY',
 'alias': 'gemini/somemodel',
 '_cost': 0,
 '_last_request_time': 0,
 '_elapsed_time': 0}

In [10]:
llms.cost_per_token("gemini/gemini-1.5-flash")

(3e-07, 7.5e-08)

In [11]:
llms.cost_per_token("gemini/somemodel")

(0.0002, None)

In [12]:
llms.max_input_tokens("gemini/somemodel")

100000

In [13]:
llms.max_output_tokens("gemini/somemodel")

In [14]:
msg1 = llms.make_messages("What is a monoid?")

In [15]:
llms.count_tokens("openai/gpt-4.5-preview", msg1)

13

In [16]:
llm = llms["openai/gpt-4o"]

In [17]:
llm.count_tokens(msg1)

13

In [18]:
llm.count_tokens("What is a monoid?")

13

In [15]:
ret1 = llms.query(llmalias="openai/gpt-4.5-preview", return_cost=True, return_response=True, messages=msg1)
ret1

{'elapsed_time': 39.25307822227478,
 'response': ModelResponse(id='chatcmpl-BFI6FbX7BtguunCLPpOR1KqsexGuk', created=1742984195, model='gpt-4.5-preview-2025-02-27', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='A **monoid** is a fundamental algebraic structure in mathematics. Formally, it\'s defined as a set equipped with an associative binary operation and an identity element.\n\nSpecifically, a monoid is a triple \\((M, \\cdot, e)\\), where:\n\n1. **\\(M\\)** is a set.\n2. **\\(\\cdot\\)** is a binary operation: \n   \\[\n   \\cdot : M \\times M \\rightarrow M\n   \\]\n   satisfying the associativity property:\n   \\[\n   (a \\cdot b) \\cdot c = a \\cdot (b \\cdot c) \\quad \\text{for every } a, b, c \\in M.\n   \\]\n3. **\\(e\\)** is a special element in \\(M\\), called the identity element, such that for every element \\(a\\) in \\(M\\):\n   \\[\n   e \\cdot a = a \\cdot e = a.\n   \\]\n\n**Examples of mon

In [16]:
msg2 = llms.make_messages("What is a monoid? Return a JSON dict that has the single key 'answer' that contains your answer.")

In [17]:
ret2=llms.query(
    llmalias="openai/gpt-4o", 
    return_response=True,
    return_cost=True,
    messages=msg2, response_format=dict(type="json_object"))
ret2

{'elapsed_time': 3.4803478717803955,
 'response': ModelResponse(id='chatcmpl-B84wrhOs6ttfi69R2Aw67GVoGWY2O', created=1741265345, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_eb9dce56a8', choices=[Choices(finish_reason='stop', index=0, message=Message(content='{\n  "answer": "In mathematics, a monoid is an algebraic structure with a single associative binary operation and an identity element. A set M equipped with a binary operation ⋅ : M × M → M forms a monoid if it satisfies the following conditions: 1) Associativity: For all a, b, c in M, (a ⋅ b) ⋅ c = a ⋅ (b ⋅ c). 2) Identity element: There exists an element e in M such that for every element a in M, e ⋅ a = a ⋅ e = a. Monoids are a central concept in abstract algebra and are used in various fields, including computer science, for modeling computations and processes."\n}', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'refusal': None}))], usage=Usage(completion_tokens

In [18]:
# ret2["response"].response_ms

In [19]:
def func1(a: str, b: int, c: int = 1, d: Optional[List[Dict]] = None) -> str: 
    """
    This is the short description.

    Here we may have a longer description. This one can go over many lines

    :param str a: this is parameter a
    :param b: this is parameter b
    :type b: int
    :param c: some parameter c    
    :param d: some parameter d
    :return: what it returns
    :rtype: str
    """
    return "x"


In [20]:
llms.make_tooling(func1)

[{'type': 'function',
  'function': {'name': 'func1',
   'description': 'This is the short description.\n\nHere we may have a longer description. This one can go over many lines',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'str', 'description': 'this is parameter a'},
     'b': {'type': 'int', 'description': 'this is parameter b'},
     'c': {'type': None, 'description': 'some parameter c'},
     'd': {'type': None, 'description': 'some parameter d'}},
    'required': ['a', 'b']}}}]

In [21]:
doc = docstring_parser.parse(func1.__doc__)

In [22]:
doc.params[0]

<docstring_parser.common.DocstringParam at 0x77da8cf82190>

In [23]:
len(doc.params)

4

## Test Retries

In [24]:
config2 = dict(
    llms = [
        dict(llm="ollama/llama3", api_url="http://localhost:11434", num_retries=3)
    ]
)
config2

{'llms': [{'llm': 'ollama/llama3',
   'api_url': 'http://localhost:11434',
   'num_retries': 3}]}

In [25]:
llms2 = LLMS(config2, use_phoenix=("http://0.0.0.0:6006/v1/traces", "llms_wrapper_test"))
# llms2 = LLMS(config2)
llms2["ollama/llama3"].config

Overriding of current TracerProvider is not allowed
Attempting to instrument while already instrumented


🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: llms_wrapper_test
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://0.0.0.0:6006/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



{'llm': 'ollama/llama3',
 'api_url': 'http://localhost:11434',
 'num_retries': 3,
 '_cost': 0,
 '_elapsed_time': 0}

In [26]:
messages = llms2.make_messages(query="What is a monoid")

In [27]:
llms["openai/gpt-4o"].config

{'llm': 'openai/gpt-4o',
 'api_key_env': 'MY_OPENAI_API_KEY',
 'alias': 'openai/gpt-4o',
 '_cost': 0.001595,
 '_elapsed_time': 3.4803478717803955}

In [28]:
ret = llms.query(
    "openai/gpt-4o", 
    messages=messages, 
    # return_cost=True,
    debug=True,
    num_retries=0,
)
ret

{'elapsed_time': 8.568790674209595,
 'finish_reason': 'stop',
 'answer': 'In abstract algebra, a monoid is a basic algebraic structure that consists of a set equipped with an associative binary operation and an identity element. More formally, a monoid is defined by the following properties:\n\n1. **Set and Operation**: There is a set \\( M \\) and a binary operation \\( \\cdot : M \\times M \\to M \\) (often denoted by juxtaposition, e.g., \\( a \\cdot b \\) is written as \\( ab \\)).\n\n2. **Associativity**: The binary operation is associative. That is, for all \\( a, b, c \\in M \\), the equation \\((ab)c = a(bc)\\) holds.\n\n3. **Identity Element**: There exists an identity element \\( e \\in M \\) such that for every element \\( a \\in M \\), the equations \\( ea = a \\) and \\( ae = a \\) hold. This element is sometimes called the "unit" or "neutral element."\n\nMonoids are used in various areas of mathematics and computer science, often in contexts where a notion of composition 

### Test rate limiting

See https://github.com/OFAI/python-llms-wrapper/issues/16

UPDATE: so it turns out rpm does not do anything, implemented the `min_delay` parameter

In [6]:
from datetime import datetime
import time
qs = [f"What is a {term}" for term in ["field", "complex number", "sinus function", "monoid"]]
last_req = time.time()
for llmname in ["openai/gpt-4o", "gemini/gemini-1.5-flash", "mistral/mistral-large-latest", ""]:
    print("Running for", llmname)
    for q in qs:
        messages = llms.make_messages(query=q)
        current_time = datetime.now().strftime("%H:%M:%S.%f")[:-4]  # Trim to 2 decimal places
        print("Query", q)
        print("Sending at", current_time, "delay", time.time()-last_req)
        last_req = time.time()
        ret=llms.query(
            llmname, 
            messages=messages, 
            return_cost=True,
            debug=False,
            min_delay=60,  # in seconds
        )
        error = ret["error"]
        cost = ret.get("cost", 0.0)
        if error:
            print("ERROR:", error)
        else:
            ans = ret.get("answer", "NO ANSWER!")
            print("RESPONSE:", ans[0:min(100, len(ans))])
            


Running for openai/gpt-4o
Query What is a field
Sending at 14:58:25.64 delay 0.0006449222564697266
RESPONSE: The term "field" can refer to different concepts depending on the context. Here are a few common mea
Query What is a complex number
Sending at 14:58:32.48 delay 6.8354573249816895
RESPONSE: A complex number is a number that can be expressed in the form \( a + bi \), where \( a \) and \( b 
Query What is a sinus function
Sending at 14:59:28.26 delay 55.77737355232239


KeyboardInterrupt: 

In [7]:
from datetime import datetime

current_time = datetime.now().strftime("%H:%M:%S.%f")[:-4]  # Trim to 2 decimal places
print(current_time)

14:20:52.11


In [13]:
llms["xai/grok-beta"].config

{'llm': 'xai/grok-beta',
 'api_key_env': 'MY_XAI_API_KEY',
 'alias': 'xai/grok-beta',
 '_cost': 0,
 '_elapsed_time': 0}