From 263ae9e70f274162d6d1ba76189ca3629b559405 Mon Sep 17 00:00:00 2001
From: XiangRao <131976564+ricardrao@users.noreply.github.com>
Date: Fri, 17 May 2024 01:44:54 +0800
Subject: [PATCH] Update DBCopilot notebook (#3184)

* Update DBCopilot notebook

* fix

* fix
---
 .../rag/notebooks/db_copilot_with_rag.ipynb   | 94 +++++++++++++++++--
 1 file changed, 84 insertions(+), 10 deletions(-)
diff --git a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
index 6a9f5dca23..713e2c237f 100644
--- a/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
+++ b/sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
@@ -676,6 +676,7 @@
     "    tools: str = None,\n",
     "    knowledge_pieces: str = None,\n",
     "    include_views: bool = False,\n",
+    "    instruct_template: str = None,\n",
     "):\n",
     "    db_copilot_component(\n",
     "        db_datastore=db_datastore,\n",
@@ -693,6 +694,7 @@
     "        tools=tools,\n",
     "        knowledge_pieces=knowledge_pieces,\n",
     "        include_views=include_views,\n",
+    "        instruct_template=instruct_template,\n",
     "    )\n",
     "    return {}"
    ]
@@ -744,6 +746,41 @@
     "    # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
     "    # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
     "    # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
+    "    instruct_template=(\n",
+    "        \"\"\"\n",
+    "        You are an assistant that help answer users' questions.\n",
+    "        Your responses should follow this format:\n",
+    "        <Response>\n",
+    "        <Cell>...</Cell>\n",
+    "        <Cell>...</Cell>\n",
+    "        ...\n",
+    "        <Cell>The End</Cell>\n",
+    "        </Response>\n",
+    "  \n",
+    "\n",
+    "        {api_prompt_text}\n",
+    "\n",
+    "        You can use the following tools:\n",
+    "\n",
+    "        {tool_prompt_text}\n",
+    "\n",
+    "        Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
+    "\n",
+    "        {examples_prompt_text}\n",
+    "\n",
+    "        The schema of the database you are connected with:\n",
+    "\n",
+    "        {grounding_prompt_text}\n",
+    "\n",
+    "        ========================================\n",
+    "        {memory_prompt_text}\n",
+    "        ========================================\n",
+    "        {additional_information}\n",
+    "        You must follow the constraints as follows:\n",
+    "    \"\"\"\n",
+    "    ),\n",
+    "    # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
+    "    # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
     ")"
    ]
   },
@@ -940,7 +977,8 @@
     "    max_sampling_rows: int = 3,\n",
     "    runtime: str = None,\n",
     "    knowledge_pieces: str = None,\n",
-    "    include_views=False,\n",
+    "    include_views: bool = False,\n",
+    "    instruct_template: str = None,\n",
     "):\n",
     "    db_copilot_component(\n",
     "        db_datastore=db_datastore,\n",
@@ -956,6 +994,7 @@
     "        runtime=runtime,\n",
     "        knowledge_pieces=knowledge_pieces,\n",
     "        include_views=include_views,\n",
+    "        instruct_template=instruct_template,\n",
     "    )\n",
     "    return {}"
    ]
@@ -976,16 +1015,16 @@
     "    chat_aoai_deployment_name=aoai_completion_deployment_name,\n",
     "    embedding_aoai_deployment_name=aoai_embedding_deployment_name,\n",
     "    mlindex_dataset_name=asset_name,\n",
-    "    selected_tables = (\n",
+    "    selected_tables=(\n",
     "        '[\"[HumanResources].[Department]\", '\n",
     "        '\"[Person].[Address]\", '\n",
     "        '\"[Person].[BusinessEntity]\"]'\n",
     "    ),\n",
     "    # One database may have multiple tables. You can config this field to select the set of tables you want to enable for grounding and prompt building.\n",
     "    # selected_tables requires a list of table full names. Please note that in SQLServer, the full table name must follow format [{schema_name}].[{table_name}], such as [dbo].[DatabaseLog]\n",
-    "    column_settings = (\n",
-    "    '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
-    "    '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
+    "    column_settings=(\n",
+    "        '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
+    "        '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
     "    ),\n",
     "    # column_settings is used to config the description or security information for columns. It is a dictionary object with column full name (table_name + '.' + column_name) as the key. The value is also a dictionary object with following two fields:\n",
     "    # security_level: an integer enum value (i.e., 1, 2, or 3). Defaults to 3. For 1 and 2, the values of the column will not be used for indexing and prompt construction. security_level is used to enable users avoid sending personal or sensitive data under the column (e.g., UserId, PassWord) to embedding model and LLM. 1 is designed for SQL-level protection, but not implemented yet.\n",
@@ -993,15 +1032,50 @@
     "    max_sampling_rows=3,\n",
     "    runtime=runtime,\n",
     "    include_views=False,  # Set to True if you want to include views in the schema\n",
-    "    knowledge_pieces = (\n",
-    "    '[{\"text\": \"when fetch address, just return city.\"}, '\n",
-    "    '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
-    "    '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
-    "    )，\n",
+    "    knowledge_pieces=(\n",
+    "        '[{\"text\": \"when fetch address, just return city.\"}, '\n",
+    "        '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
+    "        '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
+    "    ),\n",
     "    # This field provides the interface to let users upload their domain specific knowledge. knowledge_pieces is a list. Each element is a knowledge_piece. In online stage, we will use embedding service to retrieve\n",
     "    # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
     "    # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
     "    # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
+    "    instruct_template=(\n",
+    "        \"\"\"\n",
+    "        You are an assistant that help answer users' questions.\n",
+    "        Your responses should follow this format:\n",
+    "        <Response>\n",
+    "        <Cell>...</Cell>\n",
+    "        <Cell>...</Cell>\n",
+    "        ...\n",
+    "        <Cell>The End</Cell>\n",
+    "        </Response>\n",
+    "  \n",
+    "\n",
+    "        {api_prompt_text}\n",
+    "\n",
+    "        You can use the following tools:\n",
+    "\n",
+    "        {tool_prompt_text}\n",
+    "\n",
+    "        Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
+    "\n",
+    "        {examples_prompt_text}\n",
+    "\n",
+    "        The schema of the database you are connected with:\n",
+    "\n",
+    "        {grounding_prompt_text}\n",
+    "\n",
+    "        ========================================\n",
+    "        {memory_prompt_text}\n",
+    "        ========================================\n",
+    "        {additional_information}\n",
+    "        You must follow the constraints as follows:\n",
+    "    \"\"\"\n",
+    "    ),\n",
+    "    # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
+    "    # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
     ")"
    ]
   },