From 1c28cb92b7b9daea5234d40d53b91937b60856c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:09:33 +0000 Subject: [PATCH 1/4] Initial plan From 72cd8ded01cd867a18d9afe770064e3d62591251 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:18:59 +0000 Subject: [PATCH 2/4] Add PARALLELISM option documentation for COPY DATABASE Co-authored-by: WenyXu <32535939+WenyXu@users.noreply.github.com> --- docs/reference/sql/copy.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/reference/sql/copy.md b/docs/reference/sql/copy.md index d0cdfa6f6..d8e4adfd3 100644 --- a/docs/reference/sql/copy.md +++ b/docs/reference/sql/copy.md @@ -188,7 +188,8 @@ COPY DATABASE WITH ( FORMAT = { 'CSV' | 'JSON' | 'PARQUET' }, START_TIME = "", - END_TIME = "" + END_TIME = "", + PARALLELISM = ) [CONNECTION( REGION = "", @@ -203,6 +204,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | Export file format, available options: JSON, CSV, Parquet | **Required** | | `START_TIME`/`END_TIME`| The time range within which data should be exported. `START_TIME` is inclusive and `END_TIME` is exclusive. | Optional | +| `PARALLELISM` | The number of tables to process in parallel. For example, if a database has 30 tables and `PARALLELISM` is 8, 8 tables will be processed concurrently. Defaults to the total number of CPU cores. Minimum value is 1. | Optional | > - When copying databases, `` must end with `/`. > - `CONNECTION` parameters can also be used to copying databases to/from object storage services like AWS S3. @@ -213,11 +215,17 @@ COPY DATABASE -- Export all tables' data to /tmp/export/ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); +-- Export all tables' data with 4 parallel table operations +COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); + -- Export all tables' data within time range 2022-04-11 08:00:00~2022-04-11 09:00:00 to /tmp/export/ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00'); -- Import files under /tmp/export/ directory to database named public. COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); + +-- Import files with 8 parallel table operations +COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ``` ## Special reminder for Windows platforms From 75d8c74130277534e738bae857d0db7947698dfe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:33:09 +0000 Subject: [PATCH 3/4] Update CLI docs and add Chinese version for PARALLELISM option Co-authored-by: WenyXu <32535939+WenyXu@users.noreply.github.com> --- .../reference/command-lines/utilities/data.md | 21 ++++++++++--------- .../reference/command-lines/utilities/data.md | 21 ++++++++++--------- .../current/reference/sql/copy.md | 10 ++++++++- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/docs/reference/command-lines/utilities/data.md b/docs/reference/command-lines/utilities/data.md index 2a89f67d9..d9e72dfc5 100644 --- a/docs/reference/command-lines/utilities/data.md +++ b/docs/reference/command-lines/utilities/data.md @@ -20,7 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | Yes | - | Server address to connect | | `--output-dir` | Yes | - | Directory to store exported data | | `--database` | No | all databasses | Name of the database to export | -| `--export-jobs`, `-j` | No | 1 | Number of parallel export jobs(multiple databases can be exported in parallel) | +| `--db-parallelism`, `-j` | No | 1 | The number of databases exported in parallel. For example, if there are 20 databases and `db-parallelism` is 4, 4 databases will be exported concurrently. (alias: `--export-jobs`) | +| `--table-parallelism` | No | 4 | The number of tables exported in parallel within a single database. For example, if a database has 30 tables and `table-parallelism` is 8, 8 tables will be exported concurrently. | | `--max-retry` | No | 3 | Maximum retry attempts per job | | `--target`, `-t` | No | all | Export target (schema/data/all) | | `--start-time` | No | - | Start of time range for data export | @@ -56,15 +57,15 @@ greptime cli data import [OPTIONS] ``` ### Options -| Option | Required | Default | Description | -| ------------------- | -------- | ------------- | ------------------------------------------------------------------------------- | -| `--addr` | Yes | - | Server address to connect | -| `--input-dir` | Yes | - | Directory containing backup data | -| `--database` | No | all databases | Name of the database to import | -| `--import-jobs, -j` | No | 1 | Number of parallel import jobs (multiple databases can be imported in parallel) | -| `--max-retry` | No | 3 | Maximum retry attempts per job | -| `--target, -t` | No | all | Import target (schema/data/all) | -| `--auth-basic` | No | - | Use the `:` format | +| Option | Required | Default | Description | +| ------------------------ | -------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--addr` | Yes | - | Server address to connect | +| `--input-dir` | Yes | - | Directory containing backup data | +| `--database` | No | all databases | Name of the database to import | +| `--db-parallelism`, `-j` | No | 1 | The number of databases imported in parallel. For example, if there are 20 databases and `db-parallelism` is 4, 4 databases will be imported concurrently. (alias: `--import-jobs`) | +| `--max-retry` | No | 3 | Maximum retry attempts per job | +| `--target, -t` | No | all | Import target (schema/data/all) | +| `--auth-basic` | No | - | Use the `:` format | ### Import Targets - `schema`: Imports table schemas only diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md index e02bda114..746bd1c21 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md @@ -20,7 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | | `--output-dir` | 是 | - | 存储导出数据的目录 | | `--database` | 否 | 所有数据库 | 要导出的数据库名称 | -| `--export-jobs, -j` | 否 | 1 | 并行导出任务数量(多个数据库可以并行导出) | +| `--db-parallelism, -j` | 否 | 1 | 并行导出的数据库数量。例如,如果有 20 个数据库且 `db-parallelism` 为 4,则同时导出 4 个数据库。(别名:`--export-jobs`) | +| `--table-parallelism` | 否 | 4 | 单个数据库内并行导出的表数量。例如,如果数据库有 30 个表且 `table-parallelism` 为 8,则同时导出 8 个表。 | | `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | | `--target, -t` | 否 | all | 导出目标(schema/data/all) | | `--start-time` | 否 | - | 数据导出的开始时间范围 | @@ -56,15 +57,15 @@ greptime cli data import [OPTIONS] ``` ### 选项 -| 选项 | 是否必需 | 默认值 | 描述 | -| ------------------- | -------- | ---------- | ------------------------------------------ | -| `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | -| `--input-dir` | 是 | - | 包含备份数据的目录 | -| `--database` | 否 | 所有数据库 | 要导入的数据库名称 | -| `--import-jobs, -j` | 否 | 1 | 并行导入任务数量(多个数据库可以并行导入) | -| `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | -| `--target, -t` | 否 | all | 导入目标(schema/data/all) | -| `--auth-basic` | 否 | - | 使用 `:` 格式 | +| 选项 | 是否必需 | 默认值 | 描述 | +| ------------------------ | -------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | +| `--input-dir` | 是 | - | 包含备份数据的目录 | +| `--database` | 否 | 所有数据库 | 要导入的数据库名称 | +| `--db-parallelism, -j` | 否 | 1 | 并行导入的数据库数量。例如,如果有 20 个数据库且 `db-parallelism` 为 4,则同时导入 4 个数据库。(别名:`--import-jobs`) | +| `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | +| `--target, -t` | 否 | all | 导入目标(schema/data/all) | +| `--auth-basic` | 否 | - | 使用 `:` 格式 | ### 导入目标 - `schema`: 仅导入表结构 diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md index bdae6e7a5..f07420cae 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md @@ -181,7 +181,8 @@ COPY DATABASE WITH ( FORMAT = { 'CSV' | 'JSON' | 'PARQUET' } START_TIME = "", - END_TIME = "" + END_TIME = "", + PARALLELISM = ) [CONNECTION( REGION = "", @@ -196,6 +197,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | 目标文件格式,例如 JSON, CSV, Parquet | **是** | | `START_TIME`/`END_TIME`| 需要导出数据的时间范围,时间范围为左闭右开 | 可选 | +| `PARALLELISM` | 并行处理的表数量。例如,如果数据库有 30 个表且 `PARALLELISM` 为 8,则同时处理 8 个表。默认值为 CPU 核心总数,最小值为 1。 | 可选 | > - 当导入/导出表时,`` 参数必须以 `/` 结尾; > - COPY DATABASE 同样可以通过 `CONNECTION` 参数将数据导入/导出的路径指向 S3 等对象存储 @@ -207,11 +209,17 @@ COPY DATABASE -- 将 public 数据库中所有数据导出到 /tmp/export/ 目录下 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); +-- 使用 4 个并行表操作导出所有表数据 +COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); + -- 将 public 数据库中时间范围在 2022-04-11 08:00:00 到 2022-04-11 09:00:00 之间的数据导出到 /tmp/export/ 目录下 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00'); -- 从 /tmp/export/ 目录恢复 public 数据库的数据 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); + +-- 使用 8 个并行表操作导入数据 +COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ``` ## Windows 平台上的路径 From 09351102bf21d56e9840391f3a0738db2a6141d2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 18 Nov 2025 06:54:25 +0000 Subject: [PATCH 4/4] Polish documentation text for better readability Co-authored-by: WenyXu <32535939+WenyXu@users.noreply.github.com> --- docs/reference/command-lines/utilities/data.md | 6 +++--- docs/reference/sql/copy.md | 6 +++--- .../current/reference/command-lines/utilities/data.md | 6 +++--- .../current/reference/sql/copy.md | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/reference/command-lines/utilities/data.md b/docs/reference/command-lines/utilities/data.md index d9e72dfc5..10216c574 100644 --- a/docs/reference/command-lines/utilities/data.md +++ b/docs/reference/command-lines/utilities/data.md @@ -20,8 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | Yes | - | Server address to connect | | `--output-dir` | Yes | - | Directory to store exported data | | `--database` | No | all databasses | Name of the database to export | -| `--db-parallelism`, `-j` | No | 1 | The number of databases exported in parallel. For example, if there are 20 databases and `db-parallelism` is 4, 4 databases will be exported concurrently. (alias: `--export-jobs`) | -| `--table-parallelism` | No | 4 | The number of tables exported in parallel within a single database. For example, if a database has 30 tables and `table-parallelism` is 8, 8 tables will be exported concurrently. | +| `--db-parallelism`, `-j` | No | 1 | Number of databases to export in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be exported concurrently. (alias: `--export-jobs`) | +| `--table-parallelism` | No | 4 | Number of tables to export in parallel within a single database. For example, if a database contains 30 tables and `table-parallelism` is set to 8, then 8 tables will be exported concurrently. | | `--max-retry` | No | 3 | Maximum retry attempts per job | | `--target`, `-t` | No | all | Export target (schema/data/all) | | `--start-time` | No | - | Start of time range for data export | @@ -62,7 +62,7 @@ greptime cli data import [OPTIONS] | `--addr` | Yes | - | Server address to connect | | `--input-dir` | Yes | - | Directory containing backup data | | `--database` | No | all databases | Name of the database to import | -| `--db-parallelism`, `-j` | No | 1 | The number of databases imported in parallel. For example, if there are 20 databases and `db-parallelism` is 4, 4 databases will be imported concurrently. (alias: `--import-jobs`) | +| `--db-parallelism`, `-j` | No | 1 | Number of databases to import in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be imported concurrently. (alias: `--import-jobs`) | | `--max-retry` | No | 3 | Maximum retry attempts per job | | `--target, -t` | No | all | Import target (schema/data/all) | | `--auth-basic` | No | - | Use the `:` format | diff --git a/docs/reference/sql/copy.md b/docs/reference/sql/copy.md index d8e4adfd3..cbaecef43 100644 --- a/docs/reference/sql/copy.md +++ b/docs/reference/sql/copy.md @@ -204,7 +204,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | Export file format, available options: JSON, CSV, Parquet | **Required** | | `START_TIME`/`END_TIME`| The time range within which data should be exported. `START_TIME` is inclusive and `END_TIME` is exclusive. | Optional | -| `PARALLELISM` | The number of tables to process in parallel. For example, if a database has 30 tables and `PARALLELISM` is 8, 8 tables will be processed concurrently. Defaults to the total number of CPU cores. Minimum value is 1. | Optional | +| `PARALLELISM` | Number of tables to process in parallel. For example, if a database contains 30 tables and `PARALLELISM` is set to 8, then 8 tables will be processed concurrently. Defaults to the total number of CPU cores, with a minimum value of 1. | Optional | > - When copying databases, `` must end with `/`. > - `CONNECTION` parameters can also be used to copying databases to/from object storage services like AWS S3. @@ -215,7 +215,7 @@ COPY DATABASE -- Export all tables' data to /tmp/export/ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); --- Export all tables' data with 4 parallel table operations +-- Export all table data using 4 parallel operations COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); -- Export all tables' data within time range 2022-04-11 08:00:00~2022-04-11 09:00:00 to /tmp/export/ @@ -224,7 +224,7 @@ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022- -- Import files under /tmp/export/ directory to database named public. COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); --- Import files with 8 parallel table operations +-- Import files using 8 parallel operations COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ``` diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md index 746bd1c21..acd67dc0f 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/reference/command-lines/utilities/data.md @@ -20,8 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | | `--output-dir` | 是 | - | 存储导出数据的目录 | | `--database` | 否 | 所有数据库 | 要导出的数据库名称 | -| `--db-parallelism, -j` | 否 | 1 | 并行导出的数据库数量。例如,如果有 20 个数据库且 `db-parallelism` 为 4,则同时导出 4 个数据库。(别名:`--export-jobs`) | -| `--table-parallelism` | 否 | 4 | 单个数据库内并行导出的表数量。例如,如果数据库有 30 个表且 `table-parallelism` 为 8,则同时导出 8 个表。 | +| `--db-parallelism, -j` | 否 | 1 | 并行导出的数据库数量。例如,有 20 个数据库且 `db-parallelism` 设置为 4 时,将同时导出 4 个数据库。(别名:`--export-jobs`) | +| `--table-parallelism` | 否 | 4 | 单个数据库内并行导出的表数量。例如,数据库包含 30 个表且 `table-parallelism` 设置为 8 时,将同时导出 8 个表。 | | `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | | `--target, -t` | 否 | all | 导出目标(schema/data/all) | | `--start-time` | 否 | - | 数据导出的开始时间范围 | @@ -62,7 +62,7 @@ greptime cli data import [OPTIONS] | `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | | `--input-dir` | 是 | - | 包含备份数据的目录 | | `--database` | 否 | 所有数据库 | 要导入的数据库名称 | -| `--db-parallelism, -j` | 否 | 1 | 并行导入的数据库数量。例如,如果有 20 个数据库且 `db-parallelism` 为 4,则同时导入 4 个数据库。(别名:`--import-jobs`) | +| `--db-parallelism, -j` | 否 | 1 | 并行导入的数据库数量。例如,有 20 个数据库且 `db-parallelism` 设置为 4 时,将同时导入 4 个数据库。(别名:`--import-jobs`) | | `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | | `--target, -t` | 否 | all | 导入目标(schema/data/all) | | `--auth-basic` | 否 | - | 使用 `:` 格式 | diff --git a/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md b/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md index f07420cae..890af7133 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md +++ b/i18n/zh/docusaurus-plugin-content-docs/current/reference/sql/copy.md @@ -197,7 +197,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | 目标文件格式,例如 JSON, CSV, Parquet | **是** | | `START_TIME`/`END_TIME`| 需要导出数据的时间范围,时间范围为左闭右开 | 可选 | -| `PARALLELISM` | 并行处理的表数量。例如,如果数据库有 30 个表且 `PARALLELISM` 为 8,则同时处理 8 个表。默认值为 CPU 核心总数,最小值为 1。 | 可选 | +| `PARALLELISM` | 并行处理的表数量。例如,数据库包含 30 个表且 `PARALLELISM` 设置为 8 时,将同时处理 8 个表。默认值为 CPU 核心总数,最小值为 1。 | 可选 | > - 当导入/导出表时,`` 参数必须以 `/` 结尾; > - COPY DATABASE 同样可以通过 `CONNECTION` 参数将数据导入/导出的路径指向 S3 等对象存储 @@ -209,7 +209,7 @@ COPY DATABASE -- 将 public 数据库中所有数据导出到 /tmp/export/ 目录下 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); --- 使用 4 个并行表操作导出所有表数据 +-- 使用 4 个并行操作导出所有表数据 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); -- 将 public 数据库中时间范围在 2022-04-11 08:00:00 到 2022-04-11 09:00:00 之间的数据导出到 /tmp/export/ 目录下 @@ -218,7 +218,7 @@ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022- -- 从 /tmp/export/ 目录恢复 public 数据库的数据 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); --- 使用 8 个并行表操作导入数据 +-- 使用 8 个并行操作导入数据 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ```