Skip to content

Commit

Permalink
Update DBCopilot notebook (#3184)
Browse files Browse the repository at this point in the history
* Update DBCopilot notebook

* fix

* fix
  • Loading branch information
ricardrao committed May 16, 2024
1 parent 607923d commit 263ae9e
Showing 1 changed file with 84 additions and 10 deletions.
94 changes: 84 additions & 10 deletions sdk/python/generative-ai/rag/notebooks/db_copilot_with_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@
" tools: str = None,\n",
" knowledge_pieces: str = None,\n",
" include_views: bool = False,\n",
" instruct_template: str = None,\n",
"):\n",
" db_copilot_component(\n",
" db_datastore=db_datastore,\n",
Expand All @@ -693,6 +694,7 @@
" tools=tools,\n",
" knowledge_pieces=knowledge_pieces,\n",
" include_views=include_views,\n",
" instruct_template=instruct_template,\n",
" )\n",
" return {}"
]
Expand Down Expand Up @@ -744,6 +746,41 @@
" # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
" # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
" # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
" instruct_template=(\n",
" \"\"\"\n",
" You are an assistant that help answer users' questions.\n",
" Your responses should follow this format:\n",
" <Response>\n",
" <Cell>...</Cell>\n",
" <Cell>...</Cell>\n",
" ...\n",
" <Cell>The End</Cell>\n",
" </Response>\n",
" \n",
"\n",
" {api_prompt_text}\n",
"\n",
" You can use the following tools:\n",
"\n",
" {tool_prompt_text}\n",
"\n",
" Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
"\n",
" {examples_prompt_text}\n",
"\n",
" The schema of the database you are connected with:\n",
"\n",
" {grounding_prompt_text}\n",
"\n",
" ========================================\n",
" {memory_prompt_text}\n",
" ========================================\n",
" {additional_information}\n",
" You must follow the constraints as follows:\n",
" \"\"\"\n",
" ),\n",
" # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
" # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
")"
]
},
Expand Down Expand Up @@ -940,7 +977,8 @@
" max_sampling_rows: int = 3,\n",
" runtime: str = None,\n",
" knowledge_pieces: str = None,\n",
" include_views=False,\n",
" include_views: bool = False,\n",
" instruct_template: str = None,\n",
"):\n",
" db_copilot_component(\n",
" db_datastore=db_datastore,\n",
Expand All @@ -956,6 +994,7 @@
" runtime=runtime,\n",
" knowledge_pieces=knowledge_pieces,\n",
" include_views=include_views,\n",
" instruct_template=instruct_template,\n",
" )\n",
" return {}"
]
Expand All @@ -976,32 +1015,67 @@
" chat_aoai_deployment_name=aoai_completion_deployment_name,\n",
" embedding_aoai_deployment_name=aoai_embedding_deployment_name,\n",
" mlindex_dataset_name=asset_name,\n",
" selected_tables = (\n",
" selected_tables=(\n",
" '[\"[HumanResources].[Department]\", '\n",
" '\"[Person].[Address]\", '\n",
" '\"[Person].[BusinessEntity]\"]'\n",
" ),\n",
" # One database may have multiple tables. You can config this field to select the set of tables you want to enable for grounding and prompt building.\n",
" # selected_tables requires a list of table full names. Please note that in SQLServer, the full table name must follow format [{schema_name}].[{table_name}], such as [dbo].[DatabaseLog]\n",
" column_settings = (\n",
" '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
" '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
" column_settings=(\n",
" '{\"[HumanResources].[Department].DepartmentID\": {\"description\": \"unique id of department\"}, '\n",
" '\"[HumanResources].[Department].BusinessEntityID\": {\"description\": \"unique id of an employee\", \"security_level\": 1}}'\n",
" ),\n",
" # column_settings is used to config the description or security information for columns. It is a dictionary object with column full name (table_name + '.' + column_name) as the key. The value is also a dictionary object with following two fields:\n",
" # security_level: an integer enum value (i.e., 1, 2, or 3). Defaults to 3. For 1 and 2, the values of the column will not be used for indexing and prompt construction. security_level is used to enable users avoid sending personal or sensitive data under the column (e.g., UserId, PassWord) to embedding model and LLM. 1 is designed for SQL-level protection, but not implemented yet.\n",
" # description: the description or any other column related information. You can use this field to hack the column knowledge. By default, the description will be used for prompt building.\n",
" max_sampling_rows=3,\n",
" runtime=runtime,\n",
" include_views=False, # Set to True if you want to include views in the schema\n",
" knowledge_pieces = (\n",
" '[{\"text\": \"when fetch address, just return city.\"}, '\n",
" '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
" '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
" )\n",
" knowledge_pieces=(\n",
" '[{\"text\": \"when fetch address, just return city.\"}, '\n",
" '{\"text\": \"New Department is defined by the ModifiedDate. New department = true if ModifiedDate is in recent 6 month.\", '\n",
" '\"entities\": [\"[HumanResources].[Department].ModifiedDate\"]}]'\n",
" ),\n",
" # This field provides the interface to let users upload their domain specific knowledge. knowledge_pieces is a list. Each element is a knowledge_piece. In online stage, we will use embedding service to retrieve\n",
" # max_knowledge_pieces (refers to grounding_config) of knowledge and pack them into prompt. Here is the definition for each KnowledgePiece with the following properties:\n",
" # text (str, required): text representation of the knowledge. Be used to calculate embedding and packed into prompt.\n",
" # entities (List[str], optional): relevant table or columns (full names). If provided, when the knowledge piece is retrieved, we need to return the corresponding entities in schema too.\n",
" instruct_template=(\n",
" \"\"\"\n",
" You are an assistant that help answer users' questions.\n",
" Your responses should follow this format:\n",
" <Response>\n",
" <Cell>...</Cell>\n",
" <Cell>...</Cell>\n",
" ...\n",
" <Cell>The End</Cell>\n",
" </Response>\n",
" \n",
"\n",
" {api_prompt_text}\n",
"\n",
" You can use the following tools:\n",
"\n",
" {tool_prompt_text}\n",
"\n",
" Here are some dialogue session examples demonstrating how you should generate the responses (we don't show the system interrupt messages in these examples).\n",
"\n",
" {examples_prompt_text}\n",
"\n",
" The schema of the database you are connected with:\n",
"\n",
" {grounding_prompt_text}\n",
"\n",
" ========================================\n",
" {memory_prompt_text}\n",
" ========================================\n",
" {additional_information}\n",
" You must follow the constraints as follows:\n",
" \"\"\"\n",
" ),\n",
" # instruct_template is used to provide the instruction template for the assistant. It is a string.\n",
" # You can customize your own instruct to fine-tune the behavior of DBCopilot, if you use the default instruct please set the value to None.\n",
")"
]
},
Expand Down

0 comments on commit 263ae9e

Please sign in to comment.