fix(openai.py): creat MistralConfig with response_format mapping for …

…mistral api
BerriAI · May 13, 2024 · 2045696 · 2045696
1 parent 20fe4ff
commit 2045696
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 46 deletions.
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -755,7 +755,7 @@ def identify(event_details):
     AmazonMistralConfig,
     AmazonBedrockGlobalConfig,
 )
-from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
+from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig, MistralConfig
 from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
 from .llms.watsonx import IBMWatsonXAIConfig
 from .main import *  # type: ignore

diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
@@ -53,6 +53,113 @@ def __init__(
         )  # Call the base class constructor with the parameters it needs
 
 
+class MistralConfig:
+    """
+    Reference: https://docs.mistral.ai/api/
+
+    The class `MistralConfig` provides configuration for the Mistral's Chat API interface. Below are the parameters:
+
+    - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2. API Default - 0.7.
+
+    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling. API Default - 1.
+
+    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. API Default - null.
+
+    - `tools` (list or null): A list of available tools for the model. Use this to specify functions for which the model can generate JSON inputs.
+
+    - `tool_choice` (string - 'auto'/'any'/'none' or null): Specifies if/how functions are called. If set to none the model won't call a function and will generate a message instead. If set to auto the model can choose to either generate a message or call a function. If set to any the model is forced to call a function. Default - 'auto'.
+
+    - `random_seed` (integer or null): The seed to use for random sampling. If set, different calls will generate deterministic results.
+
+    - `safe_prompt` (boolean): Whether to inject a safety prompt before all conversations. API Default - 'false'.
+
+    - `response_format` (object or null): An object specifying the format that the model must output. Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message.
+    """
+
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    max_tokens: Optional[int] = None
+    tools: Optional[list] = None
+    tool_choice: Optional[Literal["auto", "any", "none"]] = None
+    random_seed: Optional[int] = None
+    safe_prompt: Optional[bool] = None
+    response_format: Optional[dict] = None
+
+    def __init__(
+        self,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        tools: Optional[list] = None,
+        tool_choice: Optional[Literal["auto", "any", "none"]] = None,
+        random_seed: Optional[int] = None,
+        safe_prompt: Optional[bool] = None,
+        response_format: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+    def get_supported_openai_params(self):
+        return [
+            "stream",
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "tools",
+            "tool_choice",
+            "seed",
+            "response_format",
+        ]
+
+    def _map_tool_choice(self, tool_choice: str) -> str:
+        if tool_choice == "auto" or tool_choice == "none":
+            return tool_choice
+        elif tool_choice == "required":
+            return "any"
+        else:  # openai 'tool_choice' object param not supported by Mistral API
+            return "any"
+
+    def map_openai_params(self, non_default_params: dict, optional_params: dict):
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens"] = value
+            if param == "tools":
+                optional_params["tools"] = value
+            if param == "stream" and value == True:
+                optional_params["stream"] = value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "tool_choice" and isinstance(value, str):
+                optional_params["tool_choice"] = self._map_tool_choice(
+                    tool_choice=value
+                )
+            if param == "seed":
+                optional_params["extra_body"] = {"random_seed": value}
+        return optional_params
+
+
 class OpenAIConfig:
     """
     Reference: https://platform.openai.com/docs/api-reference/chat/create
@@ -1327,8 +1434,8 @@ def add_message(
             client=client,
         )
 
-        thread_message: OpenAIMessage = openai_client.beta.threads.messages.create(
-            thread_id, **message_data
+        thread_message: OpenAIMessage = openai_client.beta.threads.messages.create(  # type: ignore
+            thread_id, **message_data  # type: ignore
         )
 
         response_obj: Optional[OpenAIMessage] = None
@@ -1458,7 +1565,7 @@ def run_thread(
             client=client,
         )
 
-        response = openai_client.beta.threads.runs.create_and_poll(
+        response = openai_client.beta.threads.runs.create_and_poll(  # type: ignore
             thread_id=thread_id,
             assistant_id=assistant_id,
             additional_instructions=additional_instructions,

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
@@ -665,6 +665,7 @@ def test_completion_mistral_api():
                     "content": "Hey, how's it going?",
                 }
             ],
+            seed=10,
         )
         # Add any assertions here to check the response
         print(response)

diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py
@@ -37,14 +37,19 @@ def get_current_weather(location, unit="fahrenheit"):
 
 
 # Example dummy function hard coded to return the same weather
+
+
 # In production, this could be your backend API or an external API
-def test_parallel_function_call():
+@pytest.mark.parametrize(
+    "model", ["gpt-3.5-turbo-1106", "mistral/mistral-large-latest"]
+)
+def test_parallel_function_call(model):
     try:
         # Step 1: send the conversation and available functions to the model
         messages = [
             {
                 "role": "user",
-                "content": "What's the weather like in San Francisco, Tokyo, and Paris?",
+                "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
             }
         ]
         tools = [
@@ -58,7 +63,7 @@ def test_parallel_function_call():
                         "properties": {
                             "location": {
                                 "type": "string",
-                                "description": "The city and state, e.g. San Francisco, CA",
+                                "description": "The city and state",
                             },
                             "unit": {
                                 "type": "string",
@@ -71,7 +76,7 @@ def test_parallel_function_call():
             }
         ]
         response = litellm.completion(
-            model="gpt-3.5-turbo-1106",
+            model=model,
             messages=messages,
             tools=tools,
             tool_choice="auto",  # auto is default, but we'll be explicit
@@ -83,8 +88,8 @@ def test_parallel_function_call():
         print("length of tool calls", len(tool_calls))
         print("Expecting there to be 3 tool calls")
         assert (
-            len(tool_calls) > 1
-        )  # this has to call the function for SF, Tokyo and parise
+            len(tool_calls) > 0
+        )  # this has to call the function for SF, Tokyo and paris
 
         # Step 2: check if the model wanted to call a function
         if tool_calls:
@@ -116,7 +121,7 @@ def test_parallel_function_call():
                 )  # extend conversation with function response
             print(f"messages: {messages}")
             second_response = litellm.completion(
-                model="gpt-3.5-turbo-1106", messages=messages, temperature=0.2, seed=22
+                model=model, messages=messages, temperature=0.2, seed=22
             )  # get a new response from the model where it can see the function response
             print("second response\n", second_response)
             return second_response

diff --git a/litellm/utils.py b/litellm/utils.py
@@ -5617,32 +5617,9 @@ def _map_and_modify_arg(supported_params: dict, provider: str, model: str):
             model=model, custom_llm_provider=custom_llm_provider
         )
         _check_valid_arg(supported_params=supported_params)
-        if temperature is not None:
-            optional_params["temperature"] = temperature
-        if top_p is not None:
-            optional_params["top_p"] = top_p
-        if stream is not None:
-            optional_params["stream"] = stream
-        if max_tokens is not None:
-            optional_params["max_tokens"] = max_tokens
-        if tools is not None:
-            optional_params["tools"] = tools
-        if tool_choice is not None:
-            optional_params["tool_choice"] = tool_choice
-        if response_format is not None:
-            optional_params["response_format"] = response_format
-        # check safe_mode, random_seed: https://docs.mistral.ai/api/#operation/createChatCompletion
-        safe_mode = passed_params.pop("safe_mode", None)
-        random_seed = passed_params.pop("random_seed", None)
-        extra_body = {}
-        if safe_mode is not None:
-            extra_body["safe_mode"] = safe_mode
-        if random_seed is not None:
-            extra_body["random_seed"] = random_seed
-        optional_params["extra_body"] = (
-            extra_body  # openai client supports `extra_body` param
+        optional_params = litellm.MistralConfig().map_openai_params(
+            non_default_params=non_default_params, optional_params=optional_params
         )
-
     elif custom_llm_provider == "groq":
         supported_params = get_supported_openai_params(
             model=model, custom_llm_provider=custom_llm_provider
@@ -5843,7 +5820,8 @@ def _map_and_modify_arg(supported_params: dict, provider: str, model: str):
         for k in passed_params.keys():
             if k not in default_params.keys():
                 extra_body[k] = passed_params[k]
-        optional_params["extra_body"] = extra_body
+        optional_params.setdefault("extra_body", {})
+        optional_params["extra_body"] = {**optional_params["extra_body"], **extra_body}
     else:
         # if user passed in non-default kwargs for specific providers/models, pass them along
         for k in passed_params.keys():
@@ -6212,15 +6190,7 @@ def get_supported_openai_params(model: str, custom_llm_provider: str):
             "max_retries",
         ]
     elif custom_llm_provider == "mistral":
-        return [
-            "temperature",
-            "top_p",
-            "stream",
-            "max_tokens",
-            "tools",
-            "tool_choice",
-            "response_format",
-        ]
+        return litellm.MistralConfig().get_supported_openai_params()
     elif custom_llm_provider == "replicate":
         return [
             "stream",