From 82f668578f34ad6c85b701005682bb434a882e2a Mon Sep 17 00:00:00 2001 From: Alejandro Companioni Date: Wed, 21 Aug 2024 14:51:48 -0400 Subject: [PATCH 1/4] Adding endpoint config to `NotDiamond` [ENG-965]. --- .github/workflows/all-sdk-tests.yml | 1 + .gitignore | 1 + notdiamond/llms/client.py | 24 +++++++++++++++++--- notdiamond/llms/request.py | 34 +++++++++++++++++------------ notdiamond/metrics/request.py | 3 ++- notdiamond/settings.py | 4 +++- notdiamond/toolkit/custom_router.py | 13 ++++++++--- 7 files changed, 58 insertions(+), 22 deletions(-) diff --git a/.github/workflows/all-sdk-tests.yml b/.github/workflows/all-sdk-tests.yml index 6e0a3bca..4c105de9 100644 --- a/.github/workflows/all-sdk-tests.yml +++ b/.github/workflows/all-sdk-tests.yml @@ -58,6 +58,7 @@ jobs: REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }} PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + NOTDIAMOND_API_URL: ${{ secrets.NOTDIAMOND_API_URL }} run: | PYTEST_ADDOPTS="--color=yes" poetry run coverage run --branch -m pytest -v tests/ poetry run coverage report -m diff --git a/.gitignore b/.gitignore index 9fff93b7..ed14d79b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ __pycache__/ .env .envrc .python-version +dist/ !dist/.gitkeep diff --git a/notdiamond/llms/client.py b/notdiamond/llms/client.py index a98adafc..9f24b3e9 100644 --- a/notdiamond/llms/client.py +++ b/notdiamond/llms/client.py @@ -32,7 +32,12 @@ MissingLLMConfigs, ) from notdiamond.llms.config import LLMConfig -from notdiamond.llms.request import amodel_select, model_select, report_latency, create_preference_id +from notdiamond.llms.request import ( + amodel_select, + create_preference_id, + model_select, + report_latency, +) from notdiamond.metrics.metric import Metric from notdiamond.prompts import _curly_escape, inject_system_prompt from notdiamond.types import NDApiKeyValidator @@ -132,7 +137,7 @@ def completions(self): return self def create_preference_id(self, name: Optional[str] = None) -> str: - return create_preference_id(self.api_key, name) + return create_preference_id(self.api_key, name, self.nd_api_url) async def amodel_select( self, @@ -205,6 +210,7 @@ async def amodel_select( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) if not best_llm: @@ -286,6 +292,7 @@ def model_select( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) if not best_llm: @@ -1363,6 +1370,7 @@ async def _async_invoke_with_latency_tracking( llm_config=llm_config, tokens_per_second=tokens_per_second, notdiamond_api_key=self.api_key, + nd_api_url=self.nd_api_url, ) self.call_callbacks( "on_latency_tracking", @@ -1406,6 +1414,7 @@ def _invoke_with_latency_tracking( llm_config=llm_config, tokens_per_second=tokens_per_second, notdiamond_api_key=self.api_key, + nd_api_url=self.nd_api_url, ) self.call_callbacks( "on_latency_tracking", @@ -1625,11 +1634,20 @@ class NotDiamond(_NDClient): tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] """Bind tools to the LLM object. The tools will be passed to the LLM object when invoking it.""" + nd_api_url: Optional[str] + """The URL of the NotDiamond API. Defaults to None.""" + class Config: arbitrary_types_allowed = True - def __init__(self, *args, **kwargs): + def __init__( + self, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, + *args, + **kwargs, + ): super().__init__(*args, **kwargs) + self.nd_api_url = nd_api_url def _get_accepted_invoke_errors(provider: str) -> Tuple: diff --git a/notdiamond/llms/request.py b/notdiamond/llms/request.py index 29799396..a0cb1b6d 100644 --- a/notdiamond/llms/request.py +++ b/notdiamond/llms/request.py @@ -25,6 +25,7 @@ def model_select_prepare( tradeoff: Optional[str] = None, preference_id: Optional[str] = None, tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [], + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, ): """ This is the core method for the model_select endpoint. @@ -42,12 +43,12 @@ def model_select_prepare( preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard. Defaults to None. async_mode (bool, optional): whether to run the request in async mode. Defaults to False. + nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None. Returns: tuple(url, payload, headers): returns data to be used for the API call of modelSelect """ - - url = f"{settings.ND_BASE_URL}/v2/modelRouter/modelSelect" + url = f"{nd_api_url}/v2/modelRouter/modelSelect" tools_dict = get_tools_in_openai_format(tools) payload: ModelSelectRequestPayload = { @@ -129,6 +130,7 @@ def model_select( preference_id: Optional[str] = None, tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [], timeout: Optional[int] = 5, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, ): """ This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API. @@ -146,7 +148,7 @@ def model_select( preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard. Defaults to None. timeout (int, optional): timeout for the request. Defaults to 5. - + nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None. Returns: tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string. In case of an error the LLM defaults to None and the session ID defaults @@ -192,6 +194,7 @@ async def amodel_select( preference_id: Optional[str] = None, tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = [], timeout: Optional[int] = 5, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, ): """ This endpoint receives the prompt and routing settings, and makes a call to the NotDiamond API. @@ -209,7 +212,7 @@ async def amodel_select( preference_id (Optional[str], optional): The ID of the router preference that was configured via the Dashboard. Defaults to None. timeout (int, optional): timeout for the request. Defaults to 5. - + nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None. Returns: tuple(LLMConfig, string): returns a tuple of the chosen LLMConfig to call and a session ID string. In case of an error the LLM defaults to None and the session ID defaults @@ -250,6 +253,7 @@ def report_latency( llm_config: LLMConfig, tokens_per_second: float, notdiamond_api_key: str, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, ): """ This method makes an API call to the NotDiamond server to report the latency of an LLM call. @@ -263,14 +267,14 @@ def report_latency( llm_provider (LLMConfig): specifying the LLM provider for which the latency is reported tokens_per_second (float): latency of the model call calculated based on time elapsed, input tokens, and output tokens notdiamond_api_key (str): NotDiamond API call used for authentication - + nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to None. Returns: int: status code of the API call, 200 if it's success Raises: ApiError: if the API call to the NotDiamond backend fails, this error is raised """ - url = f"{settings.ND_BASE_URL}/v2/report/metrics/latency" + url = f"{nd_api_url}/v2/report/metrics/latency" payload = { "session_id": session_id, @@ -290,18 +294,19 @@ def report_latency( return response.status_code -def create_preference_id(notdiamond_api_key: str, name: Optional[str] = None) -> str: + +def create_preference_id( + notdiamond_api_key: str, + name: Optional[str] = None, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, +) -> str: """ Create a preference id with an optional name. The preference name will appear in your dashboard on Not Diamond. """ - url = f"{settings.ND_BASE_URL}/v2/preferences/userPreferenceCreate" + url = f"{nd_api_url}/v2/preferences/userPreferenceCreate" headers = _default_headers(notdiamond_api_key) - res = requests.post( - url=url, - headers=headers, - json={"name": name} - ) + res = requests.post(url=url, headers=headers, json={"name": name}) if res.status_code == 200: preference_id = res.json()["preference_id"] else: @@ -309,9 +314,10 @@ def create_preference_id(notdiamond_api_key: str, name: Optional[str] = None) -> return preference_id + def _default_headers(notdiamond_api_key: str) -> Dict[str, str]: return { "content-type": "application/json", "Authorization": f"Bearer {notdiamond_api_key}", "User-Agent": f"Python-SDK/{settings.VERSION}", - } \ No newline at end of file + } diff --git a/notdiamond/metrics/request.py b/notdiamond/metrics/request.py index 64e95261..a928f606 100644 --- a/notdiamond/metrics/request.py +++ b/notdiamond/metrics/request.py @@ -17,8 +17,9 @@ def feedback_request( llm_config: LLMConfig, feedback_payload: Dict[str, int], notdiamond_api_key: str, + nd_api_url: str = settings.NOTDIAMOND_API_URL, ) -> bool: - url = f"{settings.ND_BASE_URL}/v2/report/metrics/feedback" + url = f"{nd_api_url}/v2/report/metrics/feedback" payload: FeedbackRequestPayload = { "session_id": session_id, diff --git a/notdiamond/settings.py b/notdiamond/settings.py index 7a9264f5..62b6c3bb 100644 --- a/notdiamond/settings.py +++ b/notdiamond/settings.py @@ -18,7 +18,9 @@ REPLICATE_API_KEY = os.getenv("REPLICATE_API_KEY", default="") -ND_BASE_URL = "https://not-diamond-server.onrender.com" +NOTDIAMOND_API_URL = os.getenv( + "ND_BASE_URL", "https://not-diamond-server.onrender.com" +) PROVIDERS = { "openai": { diff --git a/notdiamond/toolkit/custom_router.py b/notdiamond/toolkit/custom_router.py index eadd0e25..07e56071 100644 --- a/notdiamond/toolkit/custom_router.py +++ b/notdiamond/toolkit/custom_router.py @@ -11,7 +11,7 @@ from notdiamond.exceptions import ApiError from notdiamond.llms.client import NotDiamond from notdiamond.llms.config import LLMConfig -from notdiamond.settings import ND_BASE_URL, NOTDIAMOND_API_KEY, VERSION +from notdiamond.settings import NOTDIAMOND_API_KEY, NOTDIAMOND_API_URL, VERSION from notdiamond.types import NDApiKeyValidator @@ -46,8 +46,9 @@ def _request_train_router( dataset_file: str, llm_configs: List[LLMConfig], preference_id: Optional[str], + nd_api_url: str, ) -> str: - url = f"{ND_BASE_URL}/v2/pzn/trainCustomRouter" + url = f"{nd_api_url}/v2/pzn/trainCustomRouter" files = {"dataset_file": open(dataset_file, "rb")} @@ -123,6 +124,7 @@ def fit( response_column: str, score_column: str, preference_id: Optional[str] = None, + nd_api_url: Optional[str] = NOTDIAMOND_API_URL, ) -> str: """ Method to train a custom router using provided dataset. @@ -138,6 +140,7 @@ def fit( to the score given to the response from the LLM. preference_id (Optional[str], optional): If specified, the custom router associated with the preference_id will be updated with the provided dataset. + nd_api_url (Optional[str], optional): The URL of the NotDiamond API. Defaults to prod. Raises: ApiError: When the NotDiamond API fails @@ -157,7 +160,11 @@ def fit( with tempfile.NamedTemporaryFile(suffix=".csv") as joint_csv: joint_df.to_csv(joint_csv.name, index=False) preference_id = self._request_train_router( - prompt_column, joint_csv.name, llm_configs, preference_id + prompt_column, + joint_csv.name, + llm_configs, + preference_id, + nd_api_url, ) return preference_id From e6d887a72966ed9ede9e8d93ddf07b90855984ff Mon Sep 17 00:00:00 2001 From: Alejandro Companioni Date: Wed, 21 Aug 2024 15:22:54 -0400 Subject: [PATCH 2/4] missed methods --- notdiamond/llms/client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/notdiamond/llms/client.py b/notdiamond/llms/client.py index 9f24b3e9..02328c64 100644 --- a/notdiamond/llms/client.py +++ b/notdiamond/llms/client.py @@ -74,6 +74,7 @@ class _NDRouterClient(BaseModel): preference_id: Optional[str] tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] callbacks: Optional[List] + nd_api_url: Optional[str] class Config: arbitrary_types_allowed = True @@ -515,6 +516,7 @@ class _NDInvokerClient(_NDRouterClient, LLM): preference_id: Optional[str] tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] callbacks: Optional[List] + nd_api_url: Optional[str] def __init__( self, @@ -806,6 +808,7 @@ def invoke( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) is_default = False @@ -1001,6 +1004,7 @@ async def ainvoke( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) is_default = False @@ -1187,6 +1191,7 @@ def stream( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) if not best_llm: @@ -1301,6 +1306,7 @@ async def astream( preference_id=self.preference_id, tools=self.tools, timeout=timeout, + nd_api_url=self.nd_api_url, ) if not best_llm: From 338ab0aafd5103a694b2d0c00a30673d6b5f63ab Mon Sep 17 00:00:00 2001 From: Alejandro Companioni Date: Wed, 21 Aug 2024 16:23:26 -0400 Subject: [PATCH 3/4] bug in default env var name --- notdiamond/llms/client.py | 9 +++++++-- notdiamond/settings.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/notdiamond/llms/client.py b/notdiamond/llms/client.py index 02328c64..cba6407d 100644 --- a/notdiamond/llms/client.py +++ b/notdiamond/llms/client.py @@ -91,6 +91,7 @@ def __init__( preference_id: Optional[str] = None, callbacks: Optional[List] = None, tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, **kwargs, ): if api_key is None: @@ -126,6 +127,7 @@ def __init__( preference_id=preference_id, tools=tools, callbacks=callbacks, + nd_api_url=nd_api_url, **kwargs, ) @@ -179,8 +181,8 @@ async def amodel_select( Defaults to Metric("accuracy"). timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend. Default to 5 seconds. + nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL. **kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through. - Returns: tuple[str, Optional[LLMConfig]]: returns the session_id and the chosen LLM """ @@ -530,6 +532,7 @@ def __init__( preference_id: Optional[str] = None, tools: Optional[Sequence[Union[Dict[str, Any], Callable]]] = None, callbacks: Optional[List] = None, + nd_api_url: Optional[str] = settings.NOTDIAMOND_API_URL, **kwargs, ) -> None: super().__init__( @@ -543,6 +546,7 @@ def __init__( preference_id=preference_id, tools=tools, callbacks=callbacks, + nd_api_url=nd_api_url, **kwargs, ) @@ -616,6 +620,7 @@ def create( dict. timeout (int): The number of seconds to wait before terminating the API call to Not Diamond backend. Default to 5 seconds. + nd_api_url (Optional[str]): The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL. **kwargs: Any other arguments that are supported by Langchain's invoke method, will be passed through. Raises: @@ -1641,7 +1646,7 @@ class NotDiamond(_NDClient): """Bind tools to the LLM object. The tools will be passed to the LLM object when invoking it.""" nd_api_url: Optional[str] - """The URL of the NotDiamond API. Defaults to None.""" + """The URL of the NotDiamond API. Defaults to settings.NOTDIAMOND_API_URL.""" class Config: arbitrary_types_allowed = True diff --git a/notdiamond/settings.py b/notdiamond/settings.py index 62b6c3bb..9b65ff85 100644 --- a/notdiamond/settings.py +++ b/notdiamond/settings.py @@ -19,7 +19,7 @@ NOTDIAMOND_API_URL = os.getenv( - "ND_BASE_URL", "https://not-diamond-server.onrender.com" + "NOTDIAMOND_API_URL", "https://not-diamond-server.onrender.com" ) PROVIDERS = { From 1176109cd5d5e14c2b1c11e4505620854bad8843 Mon Sep 17 00:00:00 2001 From: Alejandro Companioni Date: Wed, 21 Aug 2024 16:58:33 -0400 Subject: [PATCH 4/4] pplx fix --- notdiamond/llms/request.py | 1 + tests/test_llm_calls/test_perplexity.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/notdiamond/llms/request.py b/notdiamond/llms/request.py index a0cb1b6d..5a882e9a 100644 --- a/notdiamond/llms/request.py +++ b/notdiamond/llms/request.py @@ -164,6 +164,7 @@ def model_select( tradeoff=tradeoff, preference_id=preference_id, tools=tools, + nd_api_url=nd_api_url, ) try: diff --git a/tests/test_llm_calls/test_perplexity.py b/tests/test_llm_calls/test_perplexity.py index b68f27a1..ed5157d6 100644 --- a/tests/test_llm_calls/test_perplexity.py +++ b/tests/test_llm_calls/test_perplexity.py @@ -7,7 +7,7 @@ @pytest.mark.longrun class Test_Perplexity_LLMs: - def test_llama_3_sonar_large_32k_online(self): + def test_llama_3_1_sonar_large_128k_online(self): provider = NDLLMProviders.LLAMA_3_1_SONAR_LARGE_128K_ONLINE provider.kwargs = {"max_tokens": 10} nd_llm = NotDiamond(