Skip to content
This repository was archived by the owner on Dec 11, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/all-sdk-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
NOTDIAMOND_API_URL: ${{ secrets.NOTDIAMOND_API_URL }}
run: |
PYTEST_ADDOPTS="--color=yes" poetry run coverage run --branch -m pytest -v tests/
poetry run coverage report -m
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ __pycache__/
.env
.envrc
.python-version
dist/
!dist/.gitkeep
37 changes: 33 additions & 4 deletions notdiamond/llms/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@
MissingLLMConfigs,
)
from notdiamond.llms.config import LLMConfig
from notdiamond.llms.request import amodel_select, model_select, report_latency, create_preference_id
from notdiamond.llms.request import (
amodel_select,
create_preference_id,
model_select,
report_latency,
)
from notdiamond.metrics.metric import Metric
from notdiamond.prompts import _curly_escape, inject_system_prompt
from notdiamond.types import NDApiKeyValidator
Expand Down Expand Up @@ -69,6 +74,7 @@ class _NDRouterClient(BaseModel):
preference_id: Optional[str]
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
callbacks: Optional[List]
nd_api_url: Optional[str]

class Config:
arbitrary_types_allowed = True
Expand All @@ -85,6 +91,7 @@ def __init__(
preference_id: Optional[str] = None,
callbacks: Optional[List] = None,
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
**kwargs,
):
if api_key is None:
Expand Down Expand Up @@ -120,6 +127,7 @@ def __init__(
preference_id=preference_id,
tools=tools,
callbacks=callbacks,
nd_api_url=nd_api_url,
**kwargs,
)

Expand All @@ -132,7 +140,7 @@ def completions(self):
return self

def create_preference_id(self, name: Optional[str] = None) -> str:
return create_preference_id(self.api_key, name)
return create_preference_id(self.api_key, name, self.nd_api_url)

async def amodel_select(
self,
Expand Down Expand Up @@ -173,8 +181,8 @@ async def amodel_select(
Defaults to Metric("accuracy").
timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend.
Default to 5 seconds.
nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL.
**kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through.

Returns:
tuple[str, Optional[LLMConfig]]: returns the session_id and the chosen LLM
"""
Expand Down Expand Up @@ -205,6 +213,7 @@ async def amodel_select(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

if not best_llm:
Expand Down Expand Up @@ -286,6 +295,7 @@ def model_select(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

if not best_llm:
Expand Down Expand Up @@ -508,6 +518,7 @@ class _NDInvokerClient(_NDRouterClient, LLM):
preference_id: Optional[str]
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
callbacks: Optional[List]
nd_api_url: Optional[str]

def __init__(
self,
Expand All @@ -521,6 +532,7 @@ def __init__(
preference_id: Optional[str] = None,
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None,
callbacks: Optional[List] = None,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
**kwargs,
) -> None:
super().__init__(
Expand All @@ -534,6 +546,7 @@ def __init__(
preference_id=preference_id,
tools=tools,
callbacks=callbacks,
nd_api_url=nd_api_url,
**kwargs,
)

Expand Down Expand Up @@ -607,6 +620,7 @@ def create(
dict.
timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend.
Default to 5 seconds.
nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL.
**kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through.

Raises:
Expand Down Expand Up @@ -799,6 +813,7 @@ def invoke(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

is_default = False
Expand Down Expand Up @@ -994,6 +1009,7 @@ async def ainvoke(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

is_default = False
Expand Down Expand Up @@ -1180,6 +1196,7 @@ def stream(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

if not best_llm:
Expand Down Expand Up @@ -1294,6 +1311,7 @@ async def astream(
preference_id=self.preference_id,
tools=self.tools,
timeout=timeout,
nd_api_url=self.nd_api_url,
)

if not best_llm:
Expand Down Expand Up @@ -1363,6 +1381,7 @@ async def _async_invoke_with_latency_tracking(
llm_config=llm_config,
tokens_per_second=tokens_per_second,
notdiamond_api_key=self.api_key,
nd_api_url=self.nd_api_url,
)
self.call_callbacks(
"on_latency_tracking",
Expand Down Expand Up @@ -1406,6 +1425,7 @@ def _invoke_with_latency_tracking(
llm_config=llm_config,
tokens_per_second=tokens_per_second,
notdiamond_api_key=self.api_key,
nd_api_url=self.nd_api_url,
)
self.call_callbacks(
"on_latency_tracking",
Expand Down Expand Up @@ -1625,11 +1645,20 @@ class NotDiamond(_NDClient):
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
"""Bind tools to the LLM object. The tools will be passed to the LLM object when invoking it."""

nd_api_url: Optional[str]
"""The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL."""

class Config:
arbitrary_types_allowed = True

def __init__(self, *args, **kwargs):
def __init__(
self,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.nd_api_url = nd_api_url


def _get_accepted_invoke_errors(provider: str) -> Tuple:
Expand Down
35 changes: 21 additions & 14 deletions notdiamond/llms/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def model_select_prepare(
tradeoff: Optional[str] = None,
preference_id: Optional[str] = None,
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
):
"""
This is the core method for the model_select endpoint.
Expand All @@ -42,12 +43,12 @@ def model_select_prepare(
preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
Defaults to None.
async_mode (bool, optional): whether to run the request in async mode. Defaults to False.
nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.

Returns:
tuple(url, payload, headers): returns data to be used for the API call of modelSelect
"""

url = f"{settings.ND_BASE_URL}/v2/modelRouter/modelSelect"
url = f"{nd_api_url}/v2/modelRouter/modelSelect"
tools_dict = get_tools_in_openai_format(tools)

payload: ModelSelectRequestPayload = {
Expand Down Expand Up @@ -129,6 +130,7 @@ def model_select(
preference_id: Optional[str] = None,
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
timeout: Optional[int] = 5,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
):
"""
This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API.
Expand All @@ -146,7 +148,7 @@ def model_select(
preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
Defaults to None.
timeout (int, optional): timeout for the request. Defaults to 5.

nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
Returns:
tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string.
In case of an error the LLM defaults to None and the session ID defaults
Expand All @@ -162,6 +164,7 @@ def model_select(
tradeoff=tradeoff,
preference_id=preference_id,
tools=tools,
nd_api_url=nd_api_url,
)

try:
Expand Down Expand Up @@ -192,6 +195,7 @@ async def amodel_select(
preference_id: Optional[str] = None,
tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
timeout: Optional[int] = 5,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
):
"""
This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API.
Expand All @@ -209,7 +213,7 @@ async def amodel_select(
preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
Defaults to None.
timeout (int, optional): timeout for the request. Defaults to 5.

nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
Returns:
tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string.
In case of an error the LLM defaults to None and the session ID defaults
Expand Down Expand Up @@ -250,6 +254,7 @@ def report_latency(
llm_config: LLMConfig,
tokens_per_second: float,
notdiamond_api_key: str,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
):
"""
This method makes an API call to the NotDiamond server to report the latency of an LLM call.
Expand All @@ -263,14 +268,14 @@ def report_latency(
llm_provider (LLMConfig): specifying the LLM provider for which the latency is reported
tokens_per_second (float): latency of the model call calculated based on time elapsed, input tokens, and output tokens
notdiamond_api_key (str): NotDiamond API call used for authentication

nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
Returns:
int: status code of the API call, 200 if it's success

Raises:
ApiError: if the API call to the NotDiamond backend fails, this error is raised
"""
url = f"{settings.ND_BASE_URL}/v2/report/metrics/latency"
url = f"{nd_api_url}/v2/report/metrics/latency"

payload = {
"session_id": session_id,
Expand All @@ -290,28 +295,30 @@ def report_latency(

return response.status_code

def create_preference_id(notdiamond_api_key: str, name: Optional[str] = None) -> str:

def create_preference_id(
notdiamond_api_key: str,
name: Optional[str] = None,
nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
) -> str:
"""
Create a preference id with an optional name. The preference name will appear in your
dashboard on Not Diamond.
"""
url = f"{settings.ND_BASE_URL}/v2/preferences/userPreferenceCreate"
url = f"{nd_api_url}/v2/preferences/userPreferenceCreate"
headers = _default_headers(notdiamond_api_key)
res = requests.post(
url=url,
headers=headers,
json={"name": name}
)
res = requests.post(url=url, headers=headers, json={"name": name})
if res.status_code == 200:
preference_id = res.json()["preference_id"]
else:
raise Exception(f"Error creating preference ID: {res.text}")

return preference_id


def _default_headers(notdiamond_api_key: str) -> Dict[str, str]:
return {
"content-type": "application/json",
"Authorization": f"Bearer {notdiamond_api_key}",
"User-Agent": f"Python-SDK/{settings.VERSION}",
}
}
3 changes: 2 additions & 1 deletion notdiamond/metrics/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def feedback_request(
llm_config: LLMConfig,
feedback_payload: Dict[str, int],
notdiamond_api_key: str,
nd_api_url: str = settings.NOTDIAMOND_API_URL,
) -> bool:
url = f"{settings.ND_BASE_URL}/v2/report/metrics/feedback"
url = f"{nd_api_url}/v2/report/metrics/feedback"

payload: FeedbackRequestPayload = {
"session_id": session_id,
Expand Down
4 changes: 3 additions & 1 deletion notdiamond/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="")


ND_BASE_URL = "https://not-diamond-server.onrender.com"
NOTDIAMOND_API_URL = os.getenv(
"NOTDIAMOND_API_URL", "https://not-diamond-server.onrender.com"
)

PROVIDERS = {
"openai": {
Expand Down
13 changes: 10 additions & 3 deletions notdiamond/toolkit/custom_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from notdiamond.exceptions import ApiError
from notdiamond.llms.client import NotDiamond
from notdiamond.llms.config import LLMConfig
from notdiamond.settings import ND_BASE_URL, NOTDIAMOND_API_KEY, VERSION
from notdiamond.settings import NOTDIAMOND_API_KEY, NOTDIAMOND_API_URL, VERSION
from notdiamond.types import NDApiKeyValidator


Expand Down Expand Up @@ -46,8 +46,9 @@ def _request_train_router(
dataset_file: str,
llm_configs: List[LLMConfig],
preference_id: Optional[str],
nd_api_url: str,
) -> str:
url = f"{ND_BASE_URL}/v2/pzn/trainCustomRouter"
url = f"{nd_api_url}/v2/pzn/trainCustomRouter"

files = {"dataset_file": open(dataset_file, "rb")}

Expand Down Expand Up @@ -123,6 +124,7 @@ def fit(
response_column: str,
score_column: str,
preference_id: Optional[str] = None,
nd_api_url: Optional[str] = NOTDIAMOND_API_URL,
) -> str:
"""
Method to train a custom router using provided dataset.
Expand All @@ -138,6 +140,7 @@ def fit(
to the score given to the response from the LLM.
preference_id (Optional[str], optional): If specified, the custom router
associated with the preference_id will be updated with the provided dataset.
nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to prod.

Raises:
ApiError: When the NotDiamond API fails
Expand All @@ -157,7 +160,11 @@ def fit(
with tempfile.NamedTemporaryFile(suffix=".csv") as joint_csv:
joint_df.to_csv(joint_csv.name, index=False)
preference_id = self._request_train_router(
prompt_column, joint_csv.name, llm_configs, preference_id
prompt_column,
joint_csv.name,
llm_configs,
preference_id,
nd_api_url,
)

return preference_id
Expand Down
2 changes: 1 addition & 1 deletion tests/test_llm_calls/test_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

@pytest.mark.longrun
class Test_Perplexity_LLMs:
def test_llama_3_sonar_large_32k_online(self):
def test_llama_3_1_sonar_large_128k_online(self):
provider = NDLLMProviders.LLAMA_3_1_SONAR_LARGE_128K_ONLINE
provider.kwargs = {"max_tokens": 10}
nd_llm = NotDiamond(
Expand Down