merge develop

IBM · Aug 3, 2023 · ac04a9a · ac04a9a
2 parents 2d5da28 + 0568e21
commit ac04a9a
Show file tree

Hide file tree

Showing 10 changed files with 56 additions and 33 deletions.
diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
@@ -2,20 +2,20 @@
 
 ## <a name='TableofContents'></a>Table of Contents
 
-* [Table of Contents](#table-of-contents)
-* [Installation](#installation)
-* [Gen AI Endpoint](#gen-ai-endpoint)
-    * [Example](#example)
-* [Examples](#examples)
-    * [Async Example](#async-example)
-    * [Synchronous Example](#synchronous-example)
-* [Tips and Troubleshooting](#tips-and-troubleshooting)
-    * [Model Availability](#model-availability)
-    * [Enabling Logs](#enabling-logs)
-    * [Experimenting with a Large Number of Prompts](#many-prompts)
-* [Extensions](#extensions)
-    * [LangChain Extension](#langchain-extension)
-* [Support](#support)
+- [Table of Contents](#table-of-contents)
+- [Installation](#installation)
+- [Gen AI Endpoint](#gen-ai-endpoint)
+  - [Example](#example)
+- [Examples](#examples)
+  - [Async Example](#async-example)
+  - [Synchronous Example](#synchronous-example)
+- [Tips and Troubleshooting](#tips-and-troubleshooting)
+  - [Model Availability](#model-availability)
+  - [Enabling Logs](#enabling-logs)
+  - [Experimenting with a Large Number of Prompts](#many-prompts)
+- [Extensions](#extensions)
+  - [LangChain Extension](#langchain-extension)
+- [Support](#support)
 
 ## <a name='Installation'></a>Installation
 
@@ -57,7 +57,7 @@ import os
 
 from dotenv import load_dotenv
 
-from genai.model import Credentials
+from genai.credentials import Credentials
 
 # make sure you have a .env file under genai root with
 # GENAI_KEY=<your-genai-key>
@@ -90,7 +90,8 @@ import os
 
 from dotenv import load_dotenv
 
-from genai.model import Credentials, Model
+from genai.credentials import Credentials
+from genai.model import Model
 from genai.schemas import GenerateParams
 
 # make sure you have a .env file under genai root with
@@ -144,7 +145,8 @@ import os
 
 from dotenv import load_dotenv
 
-from genai.model import Credentials, Model
+from genai.credentials import Credentials
+from genai.model import Model
 from genai.schemas import GenerateParams
 
 # make sure you have a .env file under genai root with
@@ -186,7 +188,9 @@ for response in responses:
 ## <a name='TipsAndTroubleshooting'></a>Tips and Troubleshooting
 
 ### <a name='Model Availability'></a>Model Availability
+
 To test the reachability of your endpoint and availability of desired model, use the following utility script with your model details:
+
 ```python
 import os
 

diff --git a/examples/user/grid_search_params.py b/examples/user/grid_search_params.py
@@ -27,7 +27,7 @@
 }
 
 creds = Credentials(api_key=API_KEY, api_endpoint=API_ENDPOINT)
-pt = PromptPattern.from_str("The capital of {{country}} is {{capital}}. The capital of Taiwan is")
+pt = PromptPattern.from_str("The capital of {{country}} is {{capital}}. The capital of Canada is")
 pt.sub("capital", "Madrid").sub("country", "Spain")
 
 # generate all combinations of parameters, returns a list of GenerateParams

diff --git a/examples/user/prompt_templating/watsonx-prompt-templating.py b/examples/user/prompt_templating/watsonx-prompt-templating.py
@@ -2,7 +2,7 @@
 
 from dotenv import load_dotenv
 
-from genai.model import Credentials
+from genai.credentials import Credentials
 from genai.prompt_pattern import PromptPattern
 from genai.schemas import TokenParams
 from genai.services.prompt_template_manager import PromptTemplateManager

diff --git a/src/genai/extensions/localserver/local_api_server.py b/src/genai/extensions/localserver/local_api_server.py
@@ -136,5 +136,7 @@ async def _route_generate(self, generate_request: GenerateRequestBody):
             for input in generate_request.inputs
         ]
         created_at = datetime.datetime.now().isoformat()
-        response = GenerateResponse(model_id=generate_request.model_id, created_at=created_at, results=results)
+        response = GenerateResponse(
+            id=str(uuid.uuid4()), model_id=generate_request.model_id, created_at=created_at, results=results
+        )
         return response
diff --git a/src/genai/schemas/descriptions.py b/src/genai/schemas/descriptions.py
@@ -33,6 +33,7 @@ class Descriptions:
     TOP_P = "If set to value < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. The range is 0.00 to 1.00. Valid only with decoding_method=sample."
     REPETITION_PENALTY = "The parameter for repetition penalty. 1.0 means no penalty."
     TRUNCATE_INPUT_TOKENS = "Truncate to this many input tokens. Can be used to avoid requests failing due to input being longer than configured limits. Zero means don't truncate."
+    BEAM_WIDTH = "Multiple output sequences of tokens are generated, using your decoding selection, and then the output sequence with the highest overall probability is returned. When beam search is enabled, there will be a performance penalty, and Stop sequences will not be available."  # noqa
 
     # Params.Token
     RETURN_TOKEN = "Return tokens with the response. Defaults to false."

diff --git a/src/genai/schemas/generate_params.py b/src/genai/schemas/generate_params.py
@@ -53,16 +53,19 @@ class Config:
 
     decoding_method: Optional[Literal["greedy", "sample"]] = Field(None, description=tx.DECODING_METHOD)
     length_penalty: Optional[LengthPenalty] = Field(None, description=tx.LENGTH_PENALTY)
-    max_new_tokens: Optional[int] = Field(None, description=tx.MAX_NEW_TOKEN)
-    min_new_tokens: Optional[int] = Field(None, description=tx.MIN_NEW_TOKEN)
-    random_seed: Optional[int] = Field(None, description=tx.RANDOM_SEED, ge=1, le=9999)
-    stop_sequences: Optional[list[str]] = Field(None, description=tx.STOP_SQUENCES)
+    max_new_tokens: Optional[int] = Field(None, description=tx.MAX_NEW_TOKEN, ge=1)
+    min_new_tokens: Optional[int] = Field(None, description=tx.MIN_NEW_TOKEN, ge=0)
+    random_seed: Optional[int] = Field(None, description=tx.RANDOM_SEED, ge=1)
+    stop_sequences: Optional[list[str]] = Field(None, description=tx.STOP_SQUENCES, min_length=1)
     stream: Optional[bool] = Field(None, description=tx.STREAM)
-    temperature: Optional[float] = Field(None, description=tx.TEMPERATURE, ge=0.00, le=2.00)
+    temperature: Optional[float] = Field(None, description=tx.TEMPERATURE, ge=0.05, le=2.00)
     time_limit: Optional[int] = Field(None, description=tx.TIME_LIMIT)
     top_k: Optional[int] = Field(None, description=tx.TOP_K, ge=1)
     top_p: Optional[float] = Field(None, description=tx.TOP_P, ge=0.00, le=1.00)
-    repetition_penalty: Optional[float] = Field(None, description=tx.REPETITION_PENALTY)
-    truncate_input_tokens: Optional[int] = Field(None, description=tx.TRUNCATE_INPUT_TOKENS)
+    repetition_penalty: Optional[float] = Field(
+        None, description=tx.REPETITION_PENALTY, multiple_of=0.01, ge=1.00, le=2.00
+    )
+    truncate_input_tokens: Optional[int] = Field(None, description=tx.TRUNCATE_INPUT_TOKENS, ge=0)
+    beam_width: Optional[int] = Field(None, description=tx.BEAM_WIDTH, ge=0)
     return_options: Optional[ReturnOptions] = Field(None, description=tx.RETURN)
     returns: Optional[Return] = Field(None, description=tx.RETURN, alias="return", deprecated=True)
diff --git a/src/genai/schemas/responses.py b/src/genai/schemas/responses.py
@@ -75,6 +75,7 @@ class GenerateResult(GenAiResponseModel):
 
 
 class GenerateResponse(GenAiResponseModel):
+    id: str
     model_id: str
     created_at: datetime
     results: List[GenerateResult]

diff --git a/tests/assets/response_helper.py b/tests/assets/response_helper.py
@@ -4,6 +4,7 @@ def generate(**kwargs):
         SimpleResponse._check_for_errors("generate", **kwargs)
 
         response = {}
+        response["id"] = "1cf9f510-5549-4ea5-a909-2cf9219c1bb5"
         response["model_id"] = kwargs["model"]
         response["created_at"] = "2023-03-15T18:28:12.007Z"
         response["results"] = [
@@ -24,6 +25,7 @@ def generate_response_array_async(**kwargs):
         arr = []
         for i in range(len(kwargs["inputs"])):
             response = {}
+            response["id"] = "1cf9f510-5549-4ea5-a909-2cf9219c1bb5"
             response["model_id"] = kwargs["model"]
             response["created_at"] = "2023-03-15T18:28:12.007Z"
             response["results"] = [

diff --git a/tests/test_concurrent.py b/tests/test_concurrent.py
@@ -33,7 +33,7 @@ def mock_generate_json(self, mocker):
 
     @pytest.fixture
     def generate_params(self):
-        return GenerateParams(temperature=0, max_new_tokens=3, return_options=ReturnOptions(input_text=True))
+        return GenerateParams(temperature=0.05, max_new_tokens=3, return_options=ReturnOptions(input_text=True))
 
     @pytest.fixture
     def mock_tokenize_json(self, mocker):

diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
@@ -28,6 +28,7 @@ def setup_method(self):
             top_p=0.7,
             repetition_penalty=1.2,
             truncate_input_tokens=2,
+            beam_width=1,
             return_options=ReturnOptions(
                 input_text=True,
                 generated_tokens=True,
@@ -122,8 +123,6 @@ def test_random_seed_invalid_type(self):
             GenerateParams(random_seed="dummy")
         with pytest.raises(ValidationError):
             GenerateParams(random_seed=0)
-        with pytest.raises(ValidationError):
-            GenerateParams(random_seed=10000)
 
     def test_random_seed_valid_type(self, request_body):
         # test that random_seed must be an integer between 1 and 9999
@@ -162,7 +161,7 @@ def test_stream_valid_type(self, request_body):
         assert isinstance(params.stream, bool)
 
     def test_temperature_invalid_type(self):
-        # test that temperature must be a float between 0 and 1
+        # test that temperature must be a float between 0.05 and 1
         with pytest.raises(ValidationError):
             GenerateParams(temperature="")
         with pytest.raises(ValidationError):
@@ -238,7 +237,7 @@ def test_top_p_valid_type(self, request_body):
 
     def test_repetition_penalty_invalid_type(self):
         # test that repetition_penalty must be a float
-        # NOTE: repetition_penalty can be 0 or less then 0?
+        # NOTE: repetition_penalty can be 0 or less than 0?
         with pytest.raises(ValidationError):
             GenerateParams(repetition_penalty="")
         with pytest.raises(ValidationError):
@@ -253,14 +252,25 @@ def test_repetition_penalty_valid_type(self, request_body):
         assert isinstance(params.repetition_penalty, float)
 
     def test_truncate_input_tokens_invalid_type(self):
-        # test that truncate_input_tokens must be a interger
+        # test that truncate_input_tokens must be an integer
         with pytest.raises(ValidationError):
             GenerateParams(truncate_input_tokens="")
         with pytest.raises(ValidationError):
             GenerateParams(truncate_input_tokens=[0, 1, 2])
         with pytest.raises(ValidationError):
             GenerateParams(truncate_input_tokens="dummy")
 
+    def test_beam_width_valid_type(self, request_body):
+        params = request_body["params"]
+        assert isinstance(params.beam_width, int)
+
+    def test_beam_width_invalid_type(self):
+        # test that beam_width must be an non-negative integer
+        with pytest.raises(ValidationError):
+            GenerateParams(beam_width="")
+        with pytest.raises(ValidationError):
+            GenerateParams(beam_width=-100)
+
     def test_truncate_input_tokens_valid_type(self, request_body):
         params = request_body["params"]
         assert isinstance(params.truncate_input_tokens, int)