# test-models.ipynb

Just test model usage

In [1]:
import os, sys
sys.path.append(os.path.join(".."))
import hjson
from llms_wrapper.llms import LLMS
from llms_wrapper.config import update_llm_config

In [2]:
config = dict(
    llms=[
        # OpenAI
        # https://platform.openai.com/docs/models
        dict(llm="openai/gpt-4o"),
        dict(llm="openai/gpt-4o-mini"),
        # dict(llm="openai/o1"),        # restricted
        # dict(llm="openai/o1-mini"),   # restricted
        # Google Gemini
        # https://ai.google.dev/gemini-api/docs/models/gemini
        dict(llm="gemini/gemini-2.0-flash-exp"),
        dict(llm="gemini/gemini-1.5-flash"),
        dict(llm="gemini/gemini-1.5-pro"),
        # Anthropic
        # https://docs.anthropic.com/en/docs/about-claude/models
        dict(llm="anthropic/claude-3-5-sonnet-20240620"),
        dict(llm="anthropic/claude-3-opus-20240229"),
        # Mistral
        # https://docs.mistral.ai/getting-started/models/models_overview/
        dict(llm="mistral/mistral-large-latest"),
        # XAI
        # dict(llm="xai/grok-2"),     # not mapped by litellm yet?
        dict(llm="xai/grok-beta"),
        # Groq
        # https://console.groq.com/docs/models
        dict(llm="groq/llama3-70b-8192"),
        dict(llm="groq/llama-3.3-70b-versatile"),
        # Deepseek
        # https://api-docs.deepseek.com/quick_start/pricing
        dict(llm="deepseek/deepseek-chat"),
    ],
    providers = dict(
        openai = dict(api_key_env="MY_OPENAI_API_KEY"),
        gemini = dict(api_key_env="MY_GEMINI_API_KEY"),
        anthropic = dict(api_key_env="MY_ANTHROPIC_API_KEY"),
        mistral = dict(api_key_env="MY_MISTRAL_API_KEY"),
        xai = dict(api_key_env="MY_XAI_API_KEY"),    
        groq = dict(api_key_env="MY_GROQ_API_KEY"),
        deepseek = dict(api_key_env="MY_DEEPSEEK_API_KEY"),
    )
)


In [3]:
with open("example-config2.hjson", "wt") as outfp:
    hjson.dump(config, outfp, indent=2)

In [3]:
update_llm_config(config)
llms = LLMS(config, use_phoenix=("http://0.0.0.0:6006/v1/traces", "llms_wrapper_test"))
config

Attempting to instrument while already instrumented


🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: llms_wrapper_test
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: http://0.0.0.0:6006/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



{'llms': [{'llm': 'openai/gpt-4o',
   'api_key_env': 'MY_OPENAI_API_KEY',
   'alias': 'openai/gpt-4o'},
  {'llm': 'openai/gpt-4o-mini',
   'api_key_env': 'MY_OPENAI_API_KEY',
   'alias': 'openai/gpt-4o-mini'},
  {'llm': 'gemini/gemini-2.0-flash-exp',
   'api_key_env': 'MY_GEMINI_API_KEY',
   'alias': 'gemini/gemini-2.0-flash-exp'},
  {'llm': 'gemini/gemini-1.5-flash',
   'api_key_env': 'MY_GEMINI_API_KEY',
   'alias': 'gemini/gemini-1.5-flash'},
  {'llm': 'gemini/gemini-1.5-pro',
   'api_key_env': 'MY_GEMINI_API_KEY',
   'alias': 'gemini/gemini-1.5-pro'},
  {'llm': 'anthropic/claude-3-5-sonnet-20240620',
   'api_key_env': 'MY_ANTHROPIC_API_KEY',
   'alias': 'anthropic/claude-3-5-sonnet-20240620'},
  {'llm': 'anthropic/claude-3-opus-20240229',
   'api_key_env': 'MY_ANTHROPIC_API_KEY',
   'alias': 'anthropic/claude-3-opus-20240229'},
  {'llm': 'mistral/mistral-large-latest',
   'api_key_env': 'MY_MISTRAL_API_KEY',
   'alias': 'mistral/mistral-large-latest'},
  {'llm': 'xai/grok-beta',
  

In [5]:
llms.list_aliases()

['openai/gpt-4o',
 'openai/gpt-4o-mini',
 'gemini/gemini-2.0-flash-exp',
 'gemini/gemini-1.5-flash',
 'gemini/gemini-1.5-pro',
 'anthropic/claude-3-5-sonnet-20240620',
 'anthropic/claude-3-opus-20240229',
 'mistral/mistral-large-latest',
 'xai/grok-beta',
 'groq/llama3-70b-8192',
 'groq/llama-3.3-70b-versatile',
 'deepseek/deepseek-chat']

In [6]:
for llmalias in llms.list_aliases():
    print(f"{llmalias}:")
    try:
        c1, c2 = llms.cost_per_token(llmalias)
        print(f"  Cost per token, input={c1}, output={c2}")
        mt = llms.max_prompt_tokens(llmalias)
        print(f"  Maximum tokens: {mt}")  
    except:
        print(f"  No information")

openai/gpt-4o:
  Cost per token, input=1e-05, output=2.5e-06
  No information
openai/gpt-4o-mini:
  Cost per token, input=6e-07, output=1.5e-07
  No information
gemini/gemini-2.0-flash-exp:
  Cost per token, input=0, output=0.0
  No information
gemini/gemini-1.5-flash:
  Cost per token, input=3e-07, output=7.5e-08
  No information
gemini/gemini-1.5-pro:
  Cost per token, input=1.05e-05, output=3.5e-06
  No information
anthropic/claude-3-5-sonnet-20240620:
  Cost per token, input=1.5e-05, output=3e-06
  No information
anthropic/claude-3-opus-20240229:
  Cost per token, input=7.5e-05, output=1.5e-05
  No information
mistral/mistral-large-latest:
  Cost per token, input=6e-06, output=2e-06
  No information
xai/grok-beta:
  Cost per token, input=1.5e-05, output=5e-06
  No information
groq/llama3-70b-8192:
  Cost per token, input=7.9e-07, output=5.9e-07
  No information
groq/llama-3.3-70b-versatile:
  Cost per token, input=7.9e-07, output=5.9e-07
  No information
deepseek/deepseek-chat:
  C

In [4]:
messages = llms.make_messages(query="What is the first name of the famous physicist who came up with the theory of relativity? Answer with the first name only.")


In [7]:


for llmalias in llms.list_aliases():
    print("Model:", llmalias)
    cinfo = {}
    ret = llms.query(
        llmalias, 
        messages=messages, 
        return_cost=True,
        debug=False,
    )
    answer = ret["answer"]
    error = ret["error"]
    if error:
        print("  ERROR:", error)
    else:
        print("  Answer:", answer)


Model: openai/gpt-4o
  Answer: Albert
Model: openai/gpt-4o-mini
  Answer: Albert
Model: gemini/gemini-2.0-flash-exp
  Answer: Albert

Model: gemini/gemini-1.5-flash
  Answer: Albert

Model: gemini/gemini-1.5-pro
  Answer: Albert

Model: anthropic/claude-3-5-sonnet-20240620
  Answer: Albert
Model: anthropic/claude-3-opus-20240229
  Answer: Albert
Model: mistral/mistral-large-latest
  Answer: Albert
Model: xai/grok-beta
  Answer: Albert
Model: groq/llama3-70b-8192
  Answer: Albert
Model: groq/llama-3.3-70b-versatile
  Answer: Albert
Model: deepseek/deepseek-chat
  Answer: Albert


In [5]:
# test using the LLM object
llm1 = llms["openai/gpt-4o"]
llm1

LLM(openai/gpt-4o)

In [6]:
ret = llm1.query(messages=messages, 
        return_cost=True,
        debug=False,)
ret

{'elapsed_time': 0.7788379192352295,
 'cost': 0.000105,
 'n_completion_tokens': 2,
 'n_prompt_tokens': 34,
 'n_total_tokens': 36,
 'answer': 'Albert',
 'error': '',
 'ok': True}