diff --git a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb index 6a9f5dca23..713e2c237f 100644 --- a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb +++ b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb @@ -676,6 +676,7 @@ " tools: str = None,\n", " knowledge_pieces: str = None,\n", " include_views: bool = False,\n", + " instruct_template: str = None,\n", "):\n", " db_copilot_component(\n", " db_datastore=db_datastore,\n", @@ -693,6 +694,7 @@ " tools=tools,\n", " knowledge_pieces=knowledge_pieces,\n", " include_views=include_views,\n", + " instruct_template=instruct_template,\n", " )\n", " return {}" ] @@ -744,6 +746,41 @@ " # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n", " # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n", " # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n", + " instruct_template=(\n", + " \"\"\"\n", + " You are an assistant that help answer users' questions.\n", + " Your responses should follow this format:\n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " The End\n", + " \n", + " \n", + "\n", + " {api_prompt_text}\n", + "\n", + " You can use the following tools:\n", + "\n", + " {tool_prompt_text}\n", + "\n", + " Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n", + "\n", + " {examples_prompt_text}\n", + "\n", + " The schema of the database you are connected with:\n", + "\n", + " {grounding_prompt_text}\n", + "\n", + " ========================================\n", + " {memory_prompt_text}\n", + " ========================================\n", + " {additional_information}\n", + " You must follow the constraints as follows:\n", + " \"\"\"\n", + " ),\n", + " # instruct_template is used to provide the instruction template for the assistant. It is a string.\n", + " # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n", ")" ] }, @@ -940,7 +977,8 @@ " max_sampling_rows: int = 3,\n", " runtime: str = None,\n", " knowledge_pieces: str = None,\n", - " include_views=False,\n", + " include_views: bool = False,\n", + " instruct_template: str = None,\n", "):\n", " db_copilot_component(\n", " db_datastore=db_datastore,\n", @@ -956,6 +994,7 @@ " runtime=runtime,\n", " knowledge_pieces=knowledge_pieces,\n", " include_views=include_views,\n", + " instruct_template=instruct_template,\n", " )\n", " return {}" ] @@ -976,16 +1015,16 @@ " chat_aoai_deployment_name=aoai_completion_deployment_name,\n", " embedding_aoai_deployment_name=aoai_embedding_deployment_name,\n", " mlindex_dataset_name=asset_name,\n", - " selected_tables = (\n", + " selected_tables=(\n", " '[\"[HumanResources].[Department]\", '\n", " '\"[Person].[Address]\", '\n", " '\"[Person].[BusinessEntity]\"]'\n", " ),\n", " # One database may have multiple tables. You can config this field to select the set of tables you want to enable for grounding and prompt building.\n", " # selected_tables requires a list of table full names. Please note that in SQLServer, the full table name must follow format [{schema_name}].[{table_name}], such as [dbo].[DatabaseLog]\n", - " column_settings = (\n", - " '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n", - " '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n", + " column_settings=(\n", + " '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n", + " '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n", " ),\n", " # column_settings is used to config the description or security information for columns. It is a dictionary object with column full name (table_name + '.' + column_name) as the key. The value is also a dictionary object with following two fields:\n", " # security_level: an integer enum value (i.e., 1, 2, or 3). Defaults to 3. For 1 and 2, the values of the column will not be used for indexing and prompt construction. security_level is used to enable users avoid sending personal or sensitive data under the column (e.g., UserId, PassWord) to embedding model and LLM. 1 is designed for SQL-level protection, but not implemented yet.\n", @@ -993,15 +1032,50 @@ " max_sampling_rows=3,\n", " runtime=runtime,\n", " include_views=False, # Set to True if you want to include views in the schema\n", - " knowledge_pieces = (\n", - " '[{\"text\": \"when fetch address, just return city.\"}, '\n", - " '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n", - " '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n", - " ),\n", + " knowledge_pieces=(\n", + " '[{\"text\": \"when fetch address, just return city.\"}, '\n", + " '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n", + " '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n", + " ),\n", " # This field provides the interface to let users upload their domain specific knowledge. knowledge_pieces is a list. Each element is a knowledge_piece. In online stage, we will use embedding service to retrieve\n", " # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n", " # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n", " # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n", + " instruct_template=(\n", + " \"\"\"\n", + " You are an assistant that help answer users' questions.\n", + " Your responses should follow this format:\n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " The End\n", + " \n", + " \n", + "\n", + " {api_prompt_text}\n", + "\n", + " You can use the following tools:\n", + "\n", + " {tool_prompt_text}\n", + "\n", + " Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n", + "\n", + " {examples_prompt_text}\n", + "\n", + " The schema of the database you are connected with:\n", + "\n", + " {grounding_prompt_text}\n", + "\n", + " ========================================\n", + " {memory_prompt_text}\n", + " ========================================\n", + " {additional_information}\n", + " You must follow the constraints as follows:\n", + " \"\"\"\n", + " ),\n", + " # instruct_template is used to provide the instruction template for the assistant. It is a string.\n", + " # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n", ")" ] },