Smart LLM routing with TIBET provenance. Route queries to the right model, track everything.
pip install oomllamaWith TIBET provenance:
pip install oomllama[tibet]from oomllama import OomLlama
# Simple generation
llm = OomLlama()
response = llm.generate("Hello!")
# With specific model
response = llm.generate("Complex question", model="qwen2.5:32b")
# Auto-routing (picks best model for the query)
llm = OomLlama(auto_route=True)
response = llm.generate("Write a Python function") # Routes to code modelOomLlama automatically selects the best model based on your query:
from oomllama import OomLlama, ModelRouter
llm = OomLlama(auto_route=True)
# Code query → routes to code-capable model
llm.generate("Write a binary search function")
# Simple query → routes to fast model
llm.generate("What is 2+2?")
# Complex query → routes to reasoning model
llm.generate("Explain quantum entanglement in detail...")Track every LLM call with cryptographic provenance:
from oomllama import OomLlama
from tibet_core import Provider
# Enable TIBET tracking
tibet = Provider(actor="jis:company:my_app")
llm = OomLlama(tibet=tibet)
# All calls now create provenance tokens
response = llm.generate("Summarize this document")
# Audit trail
for token in tibet.find(action="llm_generate"):
print(f"{token.timestamp}: {token.erin['model']}")
print(f" Reason: {token.erachter}")# Generate text
oomllama gen "Hello, how are you?"
# Auto-route
oomllama gen --auto "Write a Python web scraper"
# Interactive chat
oomllama chat -m qwen2.5:7b
# List models
oomllama list
# Check status
oomllama statusfrom oomllama import OomLlama
llm = OomLlama(
model="qwen2.5:7b", # Default model
ollama_url="http://localhost:11434", # Ollama API
auto_route=True, # Enable smart routing
system_prompt="You are helpful." # Default system prompt
)
# Set defaults
llm.set_defaults(
temperature=0.8,
max_tokens=1024
)from oomllama import OomLlama, ModelRouter, ModelConfig, ModelCapability
# Define your models
router = ModelRouter([
ModelConfig(
name="my-model:7b",
size="7b",
capabilities=[ModelCapability.CODE, ModelCapability.FAST],
priority=30
),
])
llm = OomLlama(router=router, auto_route=True)# Connect to remote GPU server
llm = OomLlama(ollama_url="http://192.168.4.85:11434")- Python 3.10+
- Ollama running locally or remotely
MIT - Humotica