Not-Diamond · acompa · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/.github/workflows/all-sdk-tests.yml b/.github/workflows/all-sdk-tests.yml
@@ -58,6 +58,7 @@ jobs:
           REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
           PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
           OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          NOTDIAMOND_API_URL: ${{ secrets.NOTDIAMOND_API_URL }}
         run: |
           PYTEST_ADDOPTS="--color=yes" poetry run coverage run --branch -m pytest -v tests/
           poetry run coverage report -m

diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@ __pycache__/
 .env
 .envrc
 .python-version
+dist/
 !dist/.gitkeep
diff --git a/notdiamond/llms/client.py b/notdiamond/llms/client.py
@@ -32,7 +32,12 @@
     MissingLLMConfigs,
 )
 from notdiamond.llms.config import LLMConfig
-from notdiamond.llms.request import amodel_select, model_select, report_latency, create_preference_id
+from notdiamond.llms.request import (
+    amodel_select,
+    create_preference_id,
+    model_select,
+    report_latency,
+)
 from notdiamond.metrics.metric import Metric
 from notdiamond.prompts import _curly_escape, inject_system_prompt
 from notdiamond.types import NDApiKeyValidator
@@ -69,6 +74,7 @@ class _NDRouterClient(BaseModel):
         preference_id: Optional[str]
         tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
         callbacks: Optional[List]
+        nd_api_url: Optional[str]
 
         class Config:
             arbitrary_types_allowed = True
@@ -85,6 +91,7 @@ def __init__(
             preference_id: Optional[str] = None,
             callbacks: Optional[List] = None,
             tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None,
+            nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
             **kwargs,
         ):
             if api_key is None:
@@ -120,6 +127,7 @@ def __init__(
                 preference_id=preference_id,
                 tools=tools,
                 callbacks=callbacks,
+                nd_api_url=nd_api_url,
                 **kwargs,
             )
 
@@ -132,7 +140,7 @@ def completions(self):
             return self
 
         def create_preference_id(self, name: Optional[str] = None) -> str:
-            return create_preference_id(self.api_key, name)
+            return create_preference_id(self.api_key, name, self.nd_api_url)
 
         async def amodel_select(
             self,
@@ -173,8 +181,8 @@ async def amodel_select(
                                                 Defaults to Metric("accuracy").
                 timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend.
                                 Default to 5 seconds.
+                nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL.
                 **kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through.
-
             Returns:
                 tuple[str, Optional[LLMConfig]]: returns the session_id and the chosen LLM
             """
@@ -205,6 +213,7 @@ async def amodel_select(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             if not best_llm:
@@ -286,6 +295,7 @@ def model_select(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             if not best_llm:
@@ -508,6 +518,7 @@ class _NDInvokerClient(_NDRouterClient, LLM):
         preference_id: Optional[str]
         tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
         callbacks: Optional[List]
+        nd_api_url: Optional[str]
 
         def __init__(
             self,
@@ -521,6 +532,7 @@ def __init__(
             preference_id: Optional[str] = None,
             tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None,
             callbacks: Optional[List] = None,
+            nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
             **kwargs,
         ) -> None:
             super().__init__(
@@ -534,6 +546,7 @@ def __init__(
                 preference_id=preference_id,
                 tools=tools,
                 callbacks=callbacks,
+                nd_api_url=nd_api_url,
                 **kwargs,
             )
 
@@ -607,6 +620,7 @@ def create(
                                                                 dict.
                 timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend.
                                 Default to 5 seconds.
+                nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL.
                 **kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through.
 
             Raises:
@@ -799,6 +813,7 @@ def invoke(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             is_default = False
@@ -994,6 +1009,7 @@ async def ainvoke(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             is_default = False
@@ -1180,6 +1196,7 @@ def stream(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             if not best_llm:
@@ -1294,6 +1311,7 @@ async def astream(
                 preference_id=self.preference_id,
                 tools=self.tools,
                 timeout=timeout,
+                nd_api_url=self.nd_api_url,
             )
 
             if not best_llm:
@@ -1363,6 +1381,7 @@ async def _async_invoke_with_latency_tracking(
                 llm_config=llm_config,
                 tokens_per_second=tokens_per_second,
                 notdiamond_api_key=self.api_key,
+                nd_api_url=self.nd_api_url,
             )
             self.call_callbacks(
                 "on_latency_tracking",
@@ -1406,6 +1425,7 @@ def _invoke_with_latency_tracking(
                 llm_config=llm_config,
                 tokens_per_second=tokens_per_second,
                 notdiamond_api_key=self.api_key,
+                nd_api_url=self.nd_api_url,
             )
             self.call_callbacks(
                 "on_latency_tracking",
@@ -1625,11 +1645,20 @@ class NotDiamond(_NDClient):
     tools: Optional[Sequence[Union[Dict[str, Any], Callable]]]
     """Bind tools to the LLM object. The tools will be passed to the LLM object when invoking it."""
 
+    nd_api_url: Optional[str]
+    """The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL."""
+
     class Config:
         arbitrary_types_allowed = True
 
-    def __init__(self, *args, **kwargs):
+    def __init__(
+        self,
+        nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
+        *args,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
+        self.nd_api_url = nd_api_url
 
 
 def _get_accepted_invoke_errors(provider: str) -> Tuple:

diff --git a/notdiamond/llms/request.py b/notdiamond/llms/request.py
@@ -25,6 +25,7 @@ def model_select_prepare(
     tradeoff: Optional[str] = None,
     preference_id: Optional[str] = None,
     tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
+    nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
 ):
     """
     This is the core method for the model_select endpoint.
@@ -42,12 +43,12 @@ def model_select_prepare(
         preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
                                                     Defaults to None.
         async_mode (bool, optional): whether to run the request in async mode. Defaults to False.
+        nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
 
     Returns:
         tuple(url, payload, headers): returns data to be used for the API call of modelSelect
     """
-
-    url = f"{settings.ND_BASE_URL}/v2/modelRouter/modelSelect"
+    url = f"{nd_api_url}/v2/modelRouter/modelSelect"
     tools_dict = get_tools_in_openai_format(tools)
 
     payload: ModelSelectRequestPayload = {
@@ -129,6 +130,7 @@ def model_select(
     preference_id: Optional[str] = None,
     tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
     timeout: Optional[int] = 5,
+    nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
 ):
     """
     This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API.
@@ -146,7 +148,7 @@ def model_select(
         preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
                                                     Defaults to None.
         timeout (int, optional): timeout for the request. Defaults to 5.
-
+        nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
     Returns:
         tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string.
                                         In case of an error the LLM defaults to None and the session ID defaults
@@ -162,6 +164,7 @@ def model_select(
         tradeoff=tradeoff,
         preference_id=preference_id,
         tools=tools,
+        nd_api_url=nd_api_url,
     )
 
     try:
@@ -192,6 +195,7 @@ async def amodel_select(
     preference_id: Optional[str] = None,
     tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [],
     timeout: Optional[int] = 5,
+    nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
 ):
     """
     This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API.
@@ -209,7 +213,7 @@ async def amodel_select(
         preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard.
                                                     Defaults to None.
         timeout (int, optional): timeout for the request. Defaults to 5.
-
+        nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
     Returns:
         tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string.
                                         In case of an error the LLM defaults to None and the session ID defaults
@@ -250,6 +254,7 @@ def report_latency(
     llm_config: LLMConfig,
     tokens_per_second: float,
     notdiamond_api_key: str,
+    nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
 ):
     """
     This method makes an API call to the NotDiamond server to report the latency of an LLM call.
@@ -263,14 +268,14 @@ def report_latency(
         llm_provider (LLMConfig): specifying the LLM provider for which the latency is reported
         tokens_per_second (float): latency of the model call calculated based on time elapsed, input tokens, and output tokens
         notdiamond_api_key (str): NotDiamond API call used for authentication
-
+        nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None.
     Returns:
         int: status code of the API call, 200 if it's success
 
     Raises:
         ApiError: if the API call to the NotDiamond backend fails, this error is raised
     """
-    url = f"{settings.ND_BASE_URL}/v2/report/metrics/latency"
+    url = f"{nd_api_url}/v2/report/metrics/latency"
 
     payload = {
         "session_id": session_id,
@@ -290,28 +295,30 @@ def report_latency(
 
     return response.status_code
 
-def create_preference_id(notdiamond_api_key: str, name: Optional[str] = None) -> str:
+
+def create_preference_id(
+    notdiamond_api_key: str,
+    name: Optional[str] = None,
+    nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL,
+) -> str:
     """
     Create a preference id with an optional name. The preference name will appear in your
     dashboard on Not Diamond.
     """
-    url = f"{settings.ND_BASE_URL}/v2/preferences/userPreferenceCreate"
+    url = f"{nd_api_url}/v2/preferences/userPreferenceCreate"
     headers = _default_headers(notdiamond_api_key)
-    res = requests.post(
-        url=url,
-        headers=headers,
-        json={"name": name}
-    )
+    res = requests.post(url=url, headers=headers, json={"name": name})
     if res.status_code == 200:
         preference_id = res.json()["preference_id"]
     else:
         raise Exception(f"Error creating preference ID: {res.text}")
 
     return preference_id
 
+
 def _default_headers(notdiamond_api_key: str) -> Dict[str, str]:
     return {
         "content-type": "application/json",
         "Authorization": f"Bearer {notdiamond_api_key}",
         "User-Agent": f"Python-SDK/{settings.VERSION}",
-    }
+    }
diff --git a/notdiamond/metrics/request.py b/notdiamond/metrics/request.py
@@ -17,8 +17,9 @@ def feedback_request(
     llm_config: LLMConfig,
     feedback_payload: Dict[str, int],
     notdiamond_api_key: str,
+    nd_api_url: str = settings.NOTDIAMOND_API_URL,
 ) -> bool:
-    url = f"{settings.ND_BASE_URL}/v2/report/metrics/feedback"
+    url = f"{nd_api_url}/v2/report/metrics/feedback"
 
     payload: FeedbackRequestPayload = {
         "session_id": session_id,

diff --git a/notdiamond/settings.py b/notdiamond/settings.py
@@ -18,7 +18,9 @@
 REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="")
 
 
-ND_BASE_URL = "https://not-diamond-server.onrender.com"
+NOTDIAMOND_API_URL = os.getenv(
+    "NOTDIAMOND_API_URL", "https://not-diamond-server.onrender.com"
+)
 
 PROVIDERS = {
     "openai": {

diff --git a/notdiamond/toolkit/custom_router.py b/notdiamond/toolkit/custom_router.py
@@ -11,7 +11,7 @@
 from notdiamond.exceptions import ApiError
 from notdiamond.llms.client import NotDiamond
 from notdiamond.llms.config import LLMConfig
-from notdiamond.settings import ND_BASE_URL, NOTDIAMOND_API_KEY, VERSION
+from notdiamond.settings import NOTDIAMOND_API_KEY, NOTDIAMOND_API_URL, VERSION
 from notdiamond.types import NDApiKeyValidator
 
 
@@ -46,8 +46,9 @@ def _request_train_router(
         dataset_file: str,
         llm_configs: List[LLMConfig],
         preference_id: Optional[str],
+        nd_api_url: str,
     ) -> str:
-        url = f"{ND_BASE_URL}/v2/pzn/trainCustomRouter"
+        url = f"{nd_api_url}/v2/pzn/trainCustomRouter"
 
         files = {"dataset_file": open(dataset_file, "rb")}
 
@@ -123,6 +124,7 @@ def fit(
         response_column: str,
         score_column: str,
         preference_id: Optional[str] = None,
+        nd_api_url: Optional[str] = NOTDIAMOND_API_URL,
     ) -> str:
         """
         Method to train a custom router using provided dataset.
@@ -138,6 +140,7 @@ def fit(
                 to the score given to the response from the LLM.
             preference_id (Optional[str], optional): If specified, the custom router
                 associated with the preference_id will be updated with the provided dataset.
+            nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to prod.
 
         Raises:
             ApiError: When the NotDiamond API fails
@@ -157,7 +160,11 @@ def fit(
         with tempfile.NamedTemporaryFile(suffix=".csv") as joint_csv:
             joint_df.to_csv(joint_csv.name, index=False)
             preference_id = self._request_train_router(
-                prompt_column, joint_csv.name, llm_configs, preference_id
+                prompt_column,
+                joint_csv.name,
+                llm_configs,
+                preference_id,
+                nd_api_url,
             )
 
         return preference_id

diff --git a/tests/test_llm_calls/test_perplexity.py b/tests/test_llm_calls/test_perplexity.py
@@ -7,7 +7,7 @@
 
 @pytest.mark.longrun
 class Test_Perplexity_LLMs:
-    def test_llama_3_sonar_large_32k_online(self):
+    def test_llama_3_1_sonar_large_128k_online(self):
         provider = NDLLMProviders.LLAMA_3_1_SONAR_LARGE_128K_ONLINE
         provider.kwargs = {"max_tokens": 10}
         nd_llm = NotDiamond(
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,5 @@ __pycache__/ @@
     .env
     .envrc
     .python-version
+    dist/
     !dist/.gitkeep