From 2e537b61166ebab4dd8d91d4b6af89f350f549d1 Mon Sep 17 00:00:00 2001 From: brunoalho Date: Thu, 14 Nov 2024 18:02:10 +0000 Subject: [PATCH 1/6] [feat] code refactor --- .../core/llmstudio_core/providers/vertexai.py | 413 +++--------------- 1 file changed, 70 insertions(+), 343 deletions(-) diff --git a/libs/core/llmstudio_core/providers/vertexai.py b/libs/core/llmstudio_core/providers/vertexai.py index 90e3a399..8b523d53 100644 --- a/libs/core/llmstudio_core/providers/vertexai.py +++ b/libs/core/llmstudio_core/providers/vertexai.py @@ -1,4 +1,3 @@ -import asyncio import json import os import time @@ -17,6 +16,7 @@ import requests from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider +from llmstudio_core.utils import OpenAITool from openai.types.chat import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import ( Choice, @@ -24,50 +24,7 @@ ChoiceDeltaToolCall, ChoiceDeltaToolCallFunction, ) -from pydantic import BaseModel, ValidationError - - -class OpenAIToolParameter(BaseModel): - type: str - description: Optional[str] = None - - -class OpenAIToolParameters(BaseModel): - type: str - properties: Dict[str, OpenAIToolParameter] - required: List[str] - - -class OpenAIToolFunction(BaseModel): - name: str - description: str - parameters: OpenAIToolParameters - - -class OpenAITool(BaseModel): - type: str - function: OpenAIToolFunction - - -class VertexAIToolParameter(BaseModel): - type: str - description: str - - -class VertexAIToolParameters(BaseModel): - type: str - properties: Dict[str, VertexAIToolParameter] - required: List[str] - - -class VertexAIFunctionDeclaration(BaseModel): - name: str - description: str - parameters: VertexAIToolParameters - - -class VertexAI(BaseModel): - function_declarations: List[VertexAIFunctionDeclaration] +from pydantic import ValidationError @provider @@ -87,26 +44,7 @@ async def agenerate_client( self, request: ChatRequest ) -> Coroutine[Any, Any, Generator]: """Initialize Vertex AI""" - - try: - # Init genai - url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:streamGenerateContent?alt=sse" - headers = { - "Content-Type": "application/json", - "x-goog-api-key": self.API_KEY, - } - - # Convert the chat input into VertexAI format - tool_payload = self.process_tools(request.parameters.get("tools")) - message = self.convert_input_to_vertexai(request.chat_input, tool_payload) - - # Generate content - return await asyncio.to_thread( - requests.post, url, headers=headers, json=message, stream=True - ) - - except Exception as e: - raise ProviderError(str(e)) + return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: """Initialize Vertex AI""" @@ -119,12 +57,10 @@ def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator "x-goog-api-key": self.API_KEY, } - # Convert the chat input into VertexAI format - tool_payload = self.process_tools(request.parameters.get("tools")) - message = self.convert_input_to_vertexai(request.chat_input, tool_payload) + tool_payload = self._process_tools(request.parameters.get("tools")) + payload = self._create_request_payload(request.chat_input, tool_payload) - # Generate content - return requests.post(url, headers=headers, json=message, stream=True) + return requests.post(url, headers=headers, json=payload, stream=True) except Exception as e: raise ProviderError(str(e)) @@ -139,7 +75,6 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: if not chunk: continue - # Check if it is a function call if ( "functionCall" in chunk["parts"][0] and chunk["parts"][0]["functionCall"] is not None @@ -177,9 +112,9 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: index=index, id="call_" + str(uuid.uuid4())[:29], function=ChoiceDeltaToolCallFunction( - name=functioncall["functionCall"][ + name=functioncall["functionCall"].get( "name" - ], + ), arguments="", type="function", ), @@ -236,9 +171,9 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: object="chat.completion.chunk", ) yield final_chunk.model_dump() - # Check if it is a normal call + elif chunk.get("parts")[0].get("text"): - # Parse google chunk response into ChatCompletionChunk + yield ChatCompletionChunk( id=str(uuid.uuid4()), choices=[ @@ -270,303 +205,95 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: async def aparse_response( self, response: AsyncGenerator, **kwargs ) -> AsyncGenerator[str, None]: - for chunk in response.iter_content(chunk_size=None): - - chunk = json.loads(chunk.decode("utf-8").lstrip("data: ")) - chunk = chunk.get("candidates")[0].get("content") - - # Check if it is a function call - if ( - "functionCall" in chunk["parts"][0] - and chunk["parts"][0]["functionCall"] is not None - ): - first_chunk = ChatCompletionChunk( - id="chatcmpl-9woLM1b1qGErhTbXA3UBQf2FhUAho", - choices=[ - Choice( - delta=ChoiceDelta( - content=None, - function_call=None, - role="assistant", - tool_calls=None, - ), - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - usage=None, - ) - yield first_chunk.model_dump() + result = self.parse_response(response, kwargs) + for chunk in result: + yield chunk - for index, functioncall in enumerate(chunk["parts"]): - - name_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta( - role="assistant", - tool_calls=[ - ChoiceDeltaToolCall( - index=index, - id="call_" + str(uuid.uuid4())[:29], - function=ChoiceDeltaToolCallFunction( - name=functioncall["functionCall"][ - "name" - ], - arguments="", - type="function", - ), - ) - ], - ), - finish_reason=None, - index=index, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield name_chunk.model_dump() - - args_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=index, - function=ChoiceDeltaToolCallFunction( - arguments=json.dumps( - functioncall["functionCall"]["args"] - ), - ), - ) - ], - ), - finish_reason=None, - index=index, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield args_chunk.model_dump() - - final_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta(), - finish_reason="tool_calls", - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield final_chunk.model_dump() - # Check if it is a normal call - elif chunk.get("parts")[0].get("text"): - # Parse google chunk response into ChatCompletionChunk - yield ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta( - content=chunk.get("parts")[0].get("text"), - role="assistant", - ), - finish_reason=None, - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ).model_dump() - - # Create the closing chunk - yield ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice(delta=ChoiceDelta(), finish_reason="stop", index=0) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ).model_dump() - - def convert_input_to_vertexai( + def _create_request_payload( self, input_data: Union[Dict, str, List[Dict]], tool_payload: Optional[Any] ) -> Dict: - """ - Converts OpenAI formatted input to VertexAI format. - - Args: - input_data (Union[Dict, str, List[Dict]]): The input data in OpenAI format. - tool_payload (Optional[Any]): The tool payload for the request. - - Returns: - Dict: The converted input in VertexAI format. - """ - if isinstance(input_data, dict) and "input" in input_data: - return self._handle_simple_string_input(input_data["input"], tool_payload) if isinstance(input_data, str): - return self._handle_simple_string_input(input_data, tool_payload) - - if isinstance(input_data, list): - return self._convert_list_input_to_vertexai(input_data, tool_payload) - - raise ValueError("Invalid input type. Expected dict, str, or list.") - - def _handle_simple_string_input( - self, input_data: str, tool_payload: Optional[Any] - ) -> Dict: - """ - Handles simple string input and converts it to VertexAI format. - - Args: - input_data (str): The input data as a simple string. - tool_payload (Optional[Any]): The tool payload for the request. - - Returns: - Dict: The converted input in VertexAI format. - """ - return self._initialize_vertexai_message( - user_message=input_data, tool_payload=tool_payload - ) - - def _convert_list_input_to_vertexai( - self, input_data: List[Dict], tool_payload: Optional[Any] - ) -> Dict: - """ - Converts a list of messages in OpenAI format to VertexAI format. - - Args: - input_data (List[Dict]): The input data as a list of messages. - tool_payload (Optional[Any]): The tool payload for the request. + return self._create_vertexai_payload( + user_payload=input_data, tool_payload=tool_payload + ) - Returns: - Dict: The converted input in VertexAI format. - """ - vertexai_format = self._initialize_vertexai_message(tool_payload=tool_payload) - for message in input_data: - role = message.get("role") - if role == "system": - vertexai_format["system_instruction"]["parts"]["text"] = ( - message["content"] or "You are a helpful assistant" - ) - elif role == "user": - vertexai_format["contents"].append( - {"role": "user", "parts": [{"text": message["content"]}]} - ) - elif role == "assistant": - if message["content"] is None and "tool_calls" in message: - tool_call = message["tool_calls"][0] - vertexai_format["contents"].append( - { - "role": "model", - "parts": [ - { - "functionCall": { - "name": tool_call["function"]["name"], - "args": json.loads( - tool_call["function"]["arguments"] - ), + elif isinstance(input_data, list): + payload = self._create_vertexai_payload(tool_payload=tool_payload) + for message in input_data: + if message.get("role") == "system": + payload["system_instruction"]["parts"]["text"] = message["content"] + + if message.get("role") in ["user", "assistant"]: + if message.get("tool_calls"): + tool_call = message["tool_calls"][0] + payload["contents"].append( + { + "role": "model", + "parts": [ + { + "functionCall": { + "name": tool_call["function"]["name"], + "args": json.loads( + tool_call["function"]["arguments"] + ), + } } - } - ], - } - ) - else: - vertexai_format["contents"].append( - {"role": "model", "parts": [{"text": message["content"]}]} - ) - elif role == "tool": - function_name = message["name"] - response = message["content"] - vertexai_format["system_instruction"]["parts"][ - "text" - ] += f"\nYou have called {function_name} and got the following response: {response}." - else: - raise ValueError( - f"Invalid role: {role}. Expected 'system', 'user', 'assistant', or 'tool'." - ) - return vertexai_format + ], + } + ) + else: + payload["contents"].append( + { + "role": message.get("role"), + "parts": [{"text": message["content"]}], + } + ) + elif message.get("role") == "tool": + function_name = message["name"] + response = message["content"] + payload["system_instruction"]["parts"][ + "text" + ] += f"\nYou have called {function_name} and got the following response: {response}." + + return payload - def _initialize_vertexai_message( - self, user_message: Optional[str] = None, tool_payload: Optional[Any] = None + @staticmethod + def _create_vertexai_payload( + user_payload: Optional[str] = None, tool_payload: Optional[Any] = None ) -> Dict: """ - Initializes the basic structure of a VertexAI message. + Initializes the basic structure of a VertexAI payload. Args: - user_message (Optional[str]): The user's message to include in the request. + user_payload (Optional[str]): The user's payload to include in the request. tool_payload (Optional[Any]): The tool payload for the request. Returns: - Dict: The initialized VertexAI message structure. + Dict: The initialized VertexAI payload structure. """ - message_format = { + return { "system_instruction": {"parts": {"text": "You are a helpful assistant"}}, - "contents": [], + "contents": [{"role": "user", "parts": [{"text": user_payload}]}] + if user_payload + else [], "tools": tool_payload, "tool_config": {"function_calling_config": {"mode": "AUTO"}}, } - if user_message: - message_format["contents"].append( - {"role": "user", "parts": [{"text": user_message}]} - ) - return message_format - def process_tools( - self, tools: Optional[Union[List[Dict], Dict]] - ) -> Optional[VertexAI]: + @staticmethod + def _process_tools(tools: Optional[Union[List[Dict], Dict]]) -> dict: if tools is None: return None - try: - # Try to parse as OpenAI format parsed_tools = ( [OpenAITool(**tool) for tool in tools] if isinstance(tools, list) else [OpenAITool(**tools)] ) - # Convert to VertexAI format function_declarations = [] for tool in parsed_tools: - function = tool.function - properties = { - name: VertexAIToolParameter( - type=param.type, description=param.description or "" - ) - for name, param in function.parameters.properties.items() - } - function_decl = VertexAIFunctionDeclaration( - name=function.name, - description=function.description, - parameters=VertexAIToolParameters( - type=function.parameters.type, - properties=properties, - required=function.parameters.required, - ), - ) - function_declarations.append(function_decl) - return VertexAI(function_declarations=function_declarations).model_dump() + function_declarations.append(tool.function.model_dump()) + return {"function_declarations": function_declarations} except ValidationError: - # If the format is not OpenAI, attempt to validate as VertexAI format - try: - return VertexAI(**tools).model_dump() - except ValidationError: - # If it fails to validate as VertexAI, throw an error - raise ValueError( - "Invalid tool format. Tool data must be in OpenAI or VertexAI format." - ) + return tools From 1e73f1ddc62b47b9977a0b6e31a8858aea965b9a Mon Sep 17 00:00:00 2001 From: brunoalho Date: Sat, 16 Nov 2024 11:49:36 +0000 Subject: [PATCH 2/6] [feat] function calling --- .../core/llmstudio_core/providers/vertexai.py | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/libs/core/llmstudio_core/providers/vertexai.py b/libs/core/llmstudio_core/providers/vertexai.py index 8b523d53..b56da67a 100644 --- a/libs/core/llmstudio_core/providers/vertexai.py +++ b/libs/core/llmstudio_core/providers/vertexai.py @@ -16,7 +16,7 @@ import requests from llmstudio_core.exceptions import ProviderError from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider -from llmstudio_core.utils import OpenAITool +from llmstudio_core.utils import OpenAIToolFunction from openai.types.chat import ChatCompletionChunk from openai.types.chat.chat_completion_chunk import ( Choice, @@ -57,7 +57,7 @@ def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator "x-goog-api-key": self.API_KEY, } - tool_payload = self._process_tools(request.parameters.get("tools")) + tool_payload = self._process_tools(request.parameters) payload = self._create_request_payload(request.chat_input, tool_payload) return requests.post(url, headers=headers, json=payload, stream=True) @@ -205,7 +205,7 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: async def aparse_response( self, response: AsyncGenerator, **kwargs ) -> AsyncGenerator[str, None]: - result = self.parse_response(response, kwargs) + result = self.parse_response(response=response, **kwargs) for chunk in result: yield chunk @@ -266,8 +266,8 @@ def _create_vertexai_payload( Initializes the basic structure of a VertexAI payload. Args: - user_payload (Optional[str]): The user's payload to include in the request. - tool_payload (Optional[Any]): The tool payload for the request. + user_payload (Optional[str]): The user's payload to include in the + tool_payload (Optional[Any]): The tool payload for the Returns: Dict: The initialized VertexAI payload structure. @@ -282,18 +282,29 @@ def _create_vertexai_payload( } @staticmethod - def _process_tools(tools: Optional[Union[List[Dict], Dict]]) -> dict: - if tools is None: + def _process_tools(parameters: dict) -> dict: + + if parameters.get("tools") is None and parameters.get("functions") is None: return None try: - parsed_tools = ( - [OpenAITool(**tool) for tool in tools] - if isinstance(tools, list) - else [OpenAITool(**tools)] - ) + if parameters.get("tools"): + parsed_tools = [ + OpenAIToolFunction(**tool["function"]) + for tool in parameters["tools"] + ] + + if parameters.get("functions"): + parsed_tools = [ + OpenAIToolFunction(**tool) for tool in parameters["functions"] + ] + function_declarations = [] for tool in parsed_tools: - function_declarations.append(tool.function.model_dump()) + function_declarations.append(tool.model_dump()) return {"function_declarations": function_declarations} except ValidationError: - return tools + return ( + parameters.get("tools") + if parameters.get("tools") + else parameters.get("functions") + ) From 6e809dd0dc75d201a3e4180cfa0c12c63840c26d Mon Sep 17 00:00:00 2001 From: brunoalho Date: Sun, 17 Nov 2024 15:59:27 +0000 Subject: [PATCH 3/6] [chore] remove async duplicated code from openai --- libs/core/llmstudio_core/providers/openai.py | 22 ++++---------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/libs/core/llmstudio_core/providers/openai.py b/libs/core/llmstudio_core/providers/openai.py index bd8bb6dc..e75fcad4 100644 --- a/libs/core/llmstudio_core/providers/openai.py +++ b/libs/core/llmstudio_core/providers/openai.py @@ -1,4 +1,3 @@ -import asyncio import os from typing import Any, AsyncGenerator, Coroutine, Generator @@ -26,21 +25,7 @@ async def agenerate_client( self, request: ChatRequest ) -> Coroutine[Any, Any, Generator]: """Generate an OpenAI client""" - - try: - return await asyncio.to_thread( - self._client.chat.completions.create, - model=request.model, - messages=( - [{"role": "user", "content": request.chat_input}] - if isinstance(request.chat_input, str) - else request.chat_input - ), - stream=True, - **request.parameters, - ) - except openai._exceptions.APIError as e: - raise ProviderError(str(e)) + return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Generator: """Generate an OpenAI client""" @@ -62,8 +47,9 @@ def generate_client(self, request: ChatRequest) -> Generator: async def aparse_response( self, response: AsyncGenerator, **kwargs ) -> AsyncGenerator[str, None]: - for chunk in response: - yield chunk.model_dump() + result = self.parse_response(response=response, **kwargs) + for chunk in result: + yield chunk def parse_response(self, response: Generator, **kwargs) -> Generator: for chunk in response: From 2a027b0ecf710d1acb91970d6c5a01056aec036c Mon Sep 17 00:00:00 2001 From: brunoalho Date: Sun, 17 Nov 2024 16:29:08 +0000 Subject: [PATCH 4/6] [chore] remove duplicated code from azure --- libs/core/llmstudio_core/providers/azure.py | 67 ++------------------- 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/libs/core/llmstudio_core/providers/azure.py b/libs/core/llmstudio_core/providers/azure.py index 6924f411..3a25bacf 100644 --- a/libs/core/llmstudio_core/providers/azure.py +++ b/libs/core/llmstudio_core/providers/azure.py @@ -1,5 +1,4 @@ import ast -import asyncio import json import os import time @@ -54,57 +53,7 @@ def validate_request(self, request: ChatRequest): async def agenerate_client(self, request: ChatRequest) -> Any: """Generate an AzureOpenAI client""" - - self.is_llama = "llama" in request.model.lower() - self.is_openai = "gpt" in request.model.lower() - self.has_tools = request.parameters.get("tools") is not None - self.has_functions = request.parameters.get("functions") is not None - - try: - messages = self.prepare_messages(request) - - # Prepare the optional tool-related arguments - tool_args = {} - if not self.is_llama and self.has_tools and self.is_openai: - tool_args = { - "tools": request.parameters.get("tools"), - "tool_choice": "auto" if request.parameters.get("tools") else None, - } - - # Prepare the optional function-related arguments - function_args = {} - if not self.is_llama and self.has_functions and self.is_openai: - function_args = { - "functions": request.parameters.get("functions"), - "function_call": "auto" - if request.parameters.get("functions") - else None, - } - - # Prepare the base arguments - base_args = { - "model": request.model, - "messages": messages, - "stream": True, - } - - # Combine all arguments - combined_args = { - **base_args, - **tool_args, - **function_args, - **request.parameters, - } - # Perform the asynchronous call - return await asyncio.to_thread( - self._client.chat.completions.create, **combined_args - ) - - except openai._exceptions.APIConnectionError as e: - raise ProviderError(f"There was an error reaching the endpoint: {e}") - - except openai._exceptions.APIStatusError as e: - raise ProviderError(e.response.json()) + return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Any: """Generate an AzureOpenAI client""" @@ -179,17 +128,11 @@ def prepare_messages(self, request: ChatRequest): async def aparse_response( self, response: AsyncGenerator, **kwargs ) -> AsyncGenerator[str, None]: - if self.is_llama and (self.has_tools or self.has_functions): - for chunk in self.handle_tool_response(response, **kwargs): - if chunk: - yield chunk - else: - for chunk in response: - c = chunk.model_dump() - if c.get("choices"): - yield c + result = self.parse_response(response=response, **kwargs) + for chunk in result: + yield chunk - def parse_response(self, response: Generator, **kwargs) -> Any: + def parse_response(self, response: AsyncGenerator, **kwargs) -> Any: if self.is_llama and (self.has_tools or self.has_functions): for chunk in self.handle_tool_response(response, **kwargs): if chunk: From 103383f8700a09115c200449d0ef03f6821cfd80 Mon Sep 17 00:00:00 2001 From: brunoalho Date: Tue, 19 Nov 2024 10:35:03 +0000 Subject: [PATCH 5/6] [bugfix] get cost using langchain wrapper --- libs/llmstudio/llmstudio/langchain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/llmstudio/llmstudio/langchain.py b/libs/llmstudio/llmstudio/langchain.py index 4a52b0d0..87180304 100644 --- a/libs/llmstudio/llmstudio/langchain.py +++ b/libs/llmstudio/llmstudio/langchain.py @@ -57,7 +57,7 @@ def _create_chat_result(self, response: Any) -> ChatResult: generation_info=generation_info, ) generations.append(gen) - token_usage = response.get("usage", {}) + token_usage = response.get("metrics", {}) llm_output = { "token_usage": token_usage, "model_name": response["model"], From b689ceb4040967449fd4e39d095a03607f1a205a Mon Sep 17 00:00:00 2001 From: brunoalho Date: Tue, 19 Nov 2024 16:58:20 +0000 Subject: [PATCH 6/6] [chore] normalize return --- libs/core/llmstudio_core/providers/vertexai.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libs/core/llmstudio_core/providers/vertexai.py b/libs/core/llmstudio_core/providers/vertexai.py index b56da67a..69eee514 100644 --- a/libs/core/llmstudio_core/providers/vertexai.py +++ b/libs/core/llmstudio_core/providers/vertexai.py @@ -303,8 +303,4 @@ def _process_tools(parameters: dict) -> dict: function_declarations.append(tool.model_dump()) return {"function_declarations": function_declarations} except ValidationError: - return ( - parameters.get("tools") - if parameters.get("tools") - else parameters.get("functions") - ) + return parameters.get("tools", parameters.get("functions"))