From 263ae9e70f274162d6d1ba76189ca3629b559405 Mon Sep 17 00:00:00 2001
From: XiangRao <131976564+ricardrao@users.noreply.github.com>
Date: Fri, 17 May 2024 01:44:54 +0800
Subject: [PATCH] Update DBCopilot notebook (#3184)
* Update DBCopilot notebook
* fix
* fix
---
.../rag/notebooks/db_copilot_with_rag.ipynb | 94 +++++++++++++++++--
1 file changed, 84 insertions(+), 10 deletions(-)
diff --git a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
index 6a9f5dca23..713e2c237f 100644
--- a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
+++ b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
@@ -676,6 +676,7 @@
" tools: str = None,\n",
" knowledge_pieces: str = None,\n",
" include_views: bool = False,\n",
+ " instruct_template: str = None,\n",
"):\n",
" db_copilot_component(\n",
" db_datastore=db_datastore,\n",
@@ -693,6 +694,7 @@
" tools=tools,\n",
" knowledge_pieces=knowledge_pieces,\n",
" include_views=include_views,\n",
+ " instruct_template=instruct_template,\n",
" )\n",
" return {}"
]
@@ -744,6 +746,41 @@
" # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
" # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
" # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
+ " instruct_template=(\n",
+ " \"\"\"\n",
+ " You are an assistant that help answer users' questions.\n",
+ " Your responses should follow this format:\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ...\n",
+ " The End | \n",
+ " \n",
+ " \n",
+ "\n",
+ " {api_prompt_text}\n",
+ "\n",
+ " You can use the following tools:\n",
+ "\n",
+ " {tool_prompt_text}\n",
+ "\n",
+ " Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
+ "\n",
+ " {examples_prompt_text}\n",
+ "\n",
+ " The schema of the database you are connected with:\n",
+ "\n",
+ " {grounding_prompt_text}\n",
+ "\n",
+ " ========================================\n",
+ " {memory_prompt_text}\n",
+ " ========================================\n",
+ " {additional_information}\n",
+ " You must follow the constraints as follows:\n",
+ " \"\"\"\n",
+ " ),\n",
+ " # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
+ " # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
")"
]
},
@@ -940,7 +977,8 @@
" max_sampling_rows: int = 3,\n",
" runtime: str = None,\n",
" knowledge_pieces: str = None,\n",
- " include_views=False,\n",
+ " include_views: bool = False,\n",
+ " instruct_template: str = None,\n",
"):\n",
" db_copilot_component(\n",
" db_datastore=db_datastore,\n",
@@ -956,6 +994,7 @@
" runtime=runtime,\n",
" knowledge_pieces=knowledge_pieces,\n",
" include_views=include_views,\n",
+ " instruct_template=instruct_template,\n",
" )\n",
" return {}"
]
@@ -976,16 +1015,16 @@
" chat_aoai_deployment_name=aoai_completion_deployment_name,\n",
" embedding_aoai_deployment_name=aoai_embedding_deployment_name,\n",
" mlindex_dataset_name=asset_name,\n",
- " selected_tables = (\n",
+ " selected_tables=(\n",
" '[\"[HumanResources].[Department]\", '\n",
" '\"[Person].[Address]\", '\n",
" '\"[Person].[BusinessEntity]\"]'\n",
" ),\n",
" # One database may have multiple tables. You can config this field to select the set of tables you want to enable for grounding and prompt building.\n",
" # selected_tables requires a list of table full names. Please note that in SQLServer, the full table name must follow format [{schema_name}].[{table_name}], such as [dbo].[DatabaseLog]\n",
- " column_settings = (\n",
- " '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
- " '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
+ " column_settings=(\n",
+ " '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
+ " '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
" ),\n",
" # column_settings is used to config the description or security information for columns. It is a dictionary object with column full name (table_name + '.' + column_name) as the key. The value is also a dictionary object with following two fields:\n",
" # security_level: an integer enum value (i.e., 1, 2, or 3). Defaults to 3. For 1 and 2, the values of the column will not be used for indexing and prompt construction. security_level is used to enable users avoid sending personal or sensitive data under the column (e.g., UserId, PassWord) to embedding model and LLM. 1 is designed for SQL-level protection, but not implemented yet.\n",
@@ -993,15 +1032,50 @@
" max_sampling_rows=3,\n",
" runtime=runtime,\n",
" include_views=False, # Set to True if you want to include views in the schema\n",
- " knowledge_pieces = (\n",
- " '[{\"text\": \"when fetch address, just return city.\"}, '\n",
- " '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
- " '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
- " ),\n",
+ " knowledge_pieces=(\n",
+ " '[{\"text\": \"when fetch address, just return city.\"}, '\n",
+ " '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
+ " '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
+ " ),\n",
" # This field provides the interface to let users upload their domain specific knowledge. knowledge_pieces is a list. Each element is a knowledge_piece. In online stage, we will use embedding service to retrieve\n",
" # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
" # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
" # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
+ " instruct_template=(\n",
+ " \"\"\"\n",
+ " You are an assistant that help answer users' questions.\n",
+ " Your responses should follow this format:\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ...\n",
+ " The End | \n",
+ " \n",
+ " \n",
+ "\n",
+ " {api_prompt_text}\n",
+ "\n",
+ " You can use the following tools:\n",
+ "\n",
+ " {tool_prompt_text}\n",
+ "\n",
+ " Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
+ "\n",
+ " {examples_prompt_text}\n",
+ "\n",
+ " The schema of the database you are connected with:\n",
+ "\n",
+ " {grounding_prompt_text}\n",
+ "\n",
+ " ========================================\n",
+ " {memory_prompt_text}\n",
+ " ========================================\n",
+ " {additional_information}\n",
+ " You must follow the constraints as follows:\n",
+ " \"\"\"\n",
+ " ),\n",
+ " # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
+ " # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
")"
]
},