From e78b0a36b72fb3ca91ce03cc767dd57f430f3619 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Wed, 3 Dec 2025 02:41:52 -0800 Subject: [PATCH 1/2] docs: bump v1.0 to beta2 Signed-off-by: Dennis Zhuang --- .../version-1.0/reference/sql/alter.md | 29 ++++++++++ .../version-1.0/reference/sql/copy.md | 52 +++++++++++++++-- .../version-1.0/reference/sql/data-types.md | 54 ++++++++++++------ .../reference/sql/functions/json.md | 9 +++ variables/variables-1.0.ts | 2 +- .../reference/command-lines/utilities/data.md | 21 +++---- .../version-1.0/reference/http-endpoints.md | 23 ++++++++ .../version-1.0/reference/sql/alter.md | 29 ++++++++++ .../version-1.0/reference/sql/copy.md | 50 +++++++++++++++-- .../version-1.0/reference/sql/create.md | 1 + .../version-1.0/reference/sql/data-types.md | 56 +++++++++++++------ .../reference/sql/functions/json.md | 9 +++ .../configuration.md | 53 ------------------ .../monitoring/standalone-monitoring.md | 47 +--------------- .../monitoring/tracing.md | 18 ++++++ 15 files changed, 298 insertions(+), 155 deletions(-) diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/alter.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/alter.md index b8465cd601..151d2ae649 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/alter.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/alter.md @@ -32,6 +32,9 @@ ALTER DATABASE db - 如果之前未设置 ttl,通过 `ALTER` 设置新的 ttl 后,超过保留时间的数据将被删除。 - 如果之前已设置过 ttl,通过 `ALTER` 修改 ttl 后,新的保留时间将立即生效,超过新保留时间的数据将被删除。 - 如果之前已设置过 ttl,通过 `ALTER` 取消 ttl 设置后,新增的数据将不会被删除,但已被删除的数据无法恢复。 +- `compaction.twcs.time_window`: TWCS 压缩策略的时间窗口参数。值应该是一个[时间长度字符串](/reference/time-durations.md)。 +- `compaction.twcs.max_output_file_size`: TWCS 压缩策略的最大允许输出文件大小。 +- `compaction.twcs.trigger_file_num`: 触发压缩的特定时间窗口中的文件数。 ### 示例 @@ -49,6 +52,32 @@ ALTER DATABASE db SET 'ttl'='1d'; ALTER DATABASE db UNSET 'ttl'; ``` +#### 修改数据库的压缩选项 + +修改数据库的压缩时间窗口: + +```sql +ALTER DATABASE db SET 'compaction.twcs.time_window'='2h'; +``` + +修改压缩的最大输出文件大小: + +```sql +ALTER DATABASE db SET 'compaction.twcs.max_output_file_size'='500MB'; +``` + +修改触发压缩的文件数: + +```sql +ALTER DATABASE db SET 'compaction.twcs.trigger_file_num'='8'; +``` + +取消压缩选项: + +```sql +ALTER DATABASE db UNSET 'compaction.twcs.time_window'; +``` + ## ALTER TABLE ## 语法 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/copy.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/copy.md index bdae6e7a54..72ab8c9994 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/copy.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/copy.md @@ -27,6 +27,19 @@ COPY tbl TO '/path/to/file.csv' WITH ( ); ``` +也可以将数据导出为压缩的 CSV 或 JSON 文件: + +```sql +COPY tbl TO '/path/to/file.csv.gz' WITH ( + FORMAT = 'csv', + compression_type = 'gzip' +); +``` + +:::tip NOTE +使用压缩时,请确保文件扩展名与压缩类型匹配:gzip 使用 `.gz`,zstd 使用 `.zst`,bzip2 使用 `.bz2`,xz 使用 `.xz`。 +::: + #### `WITH` 选项 `WITH` 可以添加一些选项,比如文件的 `FORMAT` 用来指定导出文件的格式。本例中的格式为 Parquet,它是一种用于大数据处理的列式存储格式。Parquet 为大数据分析高效地压缩和编码列式数据。 @@ -35,6 +48,7 @@ COPY tbl TO '/path/to/file.csv' WITH ( |---|---|---| | `FORMAT` | 目标文件格式,例如 JSON, CSV, Parquet | **是** | | `START_TIME`/`END_TIME`| 需要导出数据的时间范围,时间范围为左闭右开 | 可选 | +| `compression_type` | 导出文件的压缩算法。支持的值:`gzip`、`zstd`、`bzip2`、`xz`。仅支持 CSV 和 JSON 格式。 | 可选 | | `TIMESTAMP_FORMAT` | 导出 CSV 格式时自定义时间戳列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符(例如 `'%Y-%m-%d %H:%M:%S'`)。仅支持 CSV 格式。 | 可选 | | `DATE_FORMAT` | 导出 CSV 格式时自定义日期列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符(例如 `'%Y-%m-%d'`)。仅支持 CSV 格式。 | 可选 | | `TIME_FORMAT` | 导出 CSV 格式时自定义时间列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符(例如 `'%H:%M:%S'`)。仅支持 CSV 格式。 | 可选 | @@ -78,10 +92,20 @@ COPY tbl FROM '/path/to/folder/' WITH (FORMAT = 'parquet', PATTERN = '.*parquet. COPY tbl FROM '/path/to/folder/xxx.parquet' WITH (FORMAT = 'parquet'); ``` +也可以从压缩的 CSV 或 JSON 文件导入数据: + +```sql +COPY tbl FROM '/path/to/file.csv.gz' WITH ( + FORMAT = 'csv', + compression_type = 'gzip' +); +``` + | 选项 | 描述 | 是否必需 | |---|---|---| | `FORMAT` | 目标文件格式,例如 JSON, CSV, Parquet, ORC | **是** | | `PATTERN` | 使用正则匹配文件,例如 `*_today.parquet` | 可选 | +| `compression_type` | 导入文件的压缩算法。支持的值:`gzip`、`zstd`、`bzip2`、`xz`。仅支持 CSV 和 JSON 格式。 | 可选 | :::tip NOTE CSV 文件必须带有 header,包含表的列名。 @@ -151,6 +175,7 @@ COPY () TO '' WITH (FORMAT = { 'CSV' | 'JSON' | 'PARQUET' }); | `QUERY` | 要执行的 SQL SELECT 语句 | **是** | | `PATH` | 输出文件的路径 | **是** | | `FORMAT` | 输出文件格式:'CSV'、'JSON' 或 'PARQUET' | **是** | +| `compression_type` | 导出文件的压缩算法。支持的值:`gzip`、`zstd`、`bzip2`、`xz`。仅支持 CSV 和 JSON 格式。 | 可选 | | `TIMESTAMP_FORMAT` | 导出 CSV 格式时自定义时间戳列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符。仅支持 CSV 格式。 | 可选 | | `DATE_FORMAT` | 导出 CSV 格式时自定义日期列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符。仅支持 CSV 格式。 | 可选 | | `TIME_FORMAT` | 导出 CSV 格式时自定义时间列的格式。使用 [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) 格式说明符。仅支持 CSV 格式。 | 可选 | @@ -161,6 +186,15 @@ COPY () TO '' WITH (FORMAT = { 'CSV' | 'JSON' | 'PARQUET' }); COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.csv' WITH (FORMAT = 'csv'); ``` +也可以将查询结果导出为压缩文件: + +```sql +COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.json.gz' WITH ( + FORMAT = 'json', + compression_type = 'gzip' +); +``` + 也可以在导出到 CSV 时指定自定义日期和时间格式: ```sql @@ -176,13 +210,14 @@ COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.csv' WITH ( `COPY` 语句除可以导入/导出表之外,也可以导入/导出指定的数据库,其语法如下: ```sql -COPY DATABASE - [TO | FROM] '' +COPY DATABASE + [TO | FROM] '' WITH ( - FORMAT = { 'CSV' | 'JSON' | 'PARQUET' } + FORMAT = { 'CSV' | 'JSON' | 'PARQUET' } START_TIME = "", - END_TIME = "" - ) + END_TIME = "", + PARALLELISM = + ) [CONNECTION( REGION = "", ENDPOINT = "", @@ -196,6 +231,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | 目标文件格式,例如 JSON, CSV, Parquet | **是** | | `START_TIME`/`END_TIME`| 需要导出数据的时间范围,时间范围为左闭右开 | 可选 | +| `PARALLELISM` | 并行处理的表数量。例如,数据库包含 30 个表且 `PARALLELISM` 设置为 8 时,将同时处理 8 个表。默认值为 CPU 核心总数,最小值为 1。 | 可选 | > - 当导入/导出表时,`` 参数必须以 `/` 结尾; > - COPY DATABASE 同样可以通过 `CONNECTION` 参数将数据导入/导出的路径指向 S3 等对象存储 @@ -207,11 +243,17 @@ COPY DATABASE -- 将 public 数据库中所有数据导出到 /tmp/export/ 目录下 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); +-- 使用 4 个并行操作导出所有表数据 +COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); + -- 将 public 数据库中时间范围在 2022-04-11 08:00:00 到 2022-04-11 09:00:00 之间的数据导出到 /tmp/export/ 目录下 COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00'); -- 从 /tmp/export/ 目录恢复 public 数据库的数据 COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); + +-- 使用 8 个并行操作导入数据 +COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ``` ## Windows 平台上的路径 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/data-types.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/data-types.md index c536069215..223e1d1b82 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/data-types.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/data-types.md @@ -31,6 +31,11 @@ SQL 数据类型定义了列可以存储的数据类型。当您运行 `DESC TAB | `Float32` | 32 位 IEEE 754 浮点数 | 4 字节 | | `Float64` | 双精度 IEEE 754 浮点数 | 8 字节 | +:::tip 注意 +这里的描述指的是 GreptimeDB 原生类型信息,这些类型都是以 位(bits) 为单位的。但是,在使用 SQL 时,请遵循 PostgreSQL 和 MySQL 的惯例,其中 `INT2`、`INT4`、`INT8`、`FLOAT4` 和 `FLOAT8` 等类型都是以 字节(bytes) 为单位定义的。 +例如,在 SQL 语句中,`INT8` 实际上对应 `BigInt`(8 个字节,64 位)。 +::: + ## Decimal 类型 GreptimeDB 支持 `decimal` 类型,这是一种定点类型,表示为 `decimal(precision, scale)`,其中 `precision` 是总位数,`scale` 是小数部分的位数。例如,`123.45` 的总位数为 5,小数位数为 2。 @@ -303,24 +308,37 @@ INSERT INTO bools(b) VALUES (TRUE), (FALSE); 对于从 MySQL 或 PostgreSQL 迁移到 GreptimeDB 的用户,GreptimeDB 支持以下类型别名。 -| 数据类型 | 别名 | -| ---------------------- | --------------------------------------------------------------- | -| `String` | `Text`, `TinyText`, `MediumText`, `LongText`, `Varchar`, `Char` | -| `Binary` | `Varbinary` | -| `Int8` | `TinyInt` | -| `Int16` | `SmallInt` | -| `Int32` | `Int` | -| `Int64` | `BigInt` | -| `UInt8` | `UnsignedTinyInt` | -| `UInt16` | `UnsignedSmallInt` | -| `UInt32` | `UnsignedInt` | -| `UInt64` | `UnsignedBigInt` | -| `Float32` | `Float` | -| `Float64` | `Double` | -| `TimestampSecond` | `Timestamp_s`, `Timestamp_sec`, `Timestamp(0)` | -| `TimestampMillisecond` | `Timestamp`, `Timestamp_ms` , `Timestamp(3)` | -| `TimestampMicroSecond` | `Timestamp_us`, `Timestamp(6)` | -| `TimestampNanosecond` | `Timestamp_ns`, `Timestamp(9)` | +| SQL 类型别名 | Native 数据类型 | +| --------------------------------------------------------------- | ---------------------- | +| `Text`, `TinyText`, `MediumText`, `LongText`, `Varchar`, `Char` | `String` | +| `Varbinary` | `Binary` | +| `TinyInt` | `Int8` | +| `SmallInt`, `Int2` | `Int16` | +| `Int`, `Int4` | `Int32` | +| `BigInt`, `Int8` | `Int64` | +| `UnsignedTinyInt` | `UInt8` | +| `UnsignedSmallInt` | `UInt16` | +| `UnsignedInt` | `UInt32` | +| `UnsignedBigInt` | `UInt64` | +| `Float`, `Float4` | `Float32` | +| `Double`, `Float8` | `Float64` | +| `Timestamp_s`, `Timestamp_sec`, `Timestamp(0)` | `TimestampSecond` | +| `Timestamp`, `Timestamp_ms`, `Timestamp(3)` | `TimestampMillisecond` | +| `Timestamp_us`, `Timestamp(6)` | `TimestampMicroSecond` | +| `Timestamp_ns`, `Timestamp(9)` | `TimestampNanosecond` | + +:::warning 注意 +类型别名 `Int2`、`Int4`、`Int8`、`Float4` 和 `Float8` 遵循 PostgreSQL 和 MySQL 的约定,这些标识符表示类型中的**字节**数(而非位数)。 + +具体来说: +- `Int2` = 2 字节 = `SmallInt`(16 位) +- `Int4` = 4 字节 = `Int`(32 位) +- `Int8` = 8 字节 = `BigInt`(64 位) +- `Float4` = 4 字节 = `Float`(32 位) +- `Float8` = 8 字节 = `Double`(64 位) + +注意:GreptimeDB 的原生类型名称(如 `UInt8`、`Int32`、`Int64`)表示**位**数,而 SQL 类型别名 `Int2`、`Int4` 和 `Int8` 遵循 PostgreSQL/MySQL 约定表示**字节**数。例如,原生类型 `Int8` 是 8 **位**整数(`TinyInt`, 1 字节),而 SQL 别名 `INT8` 映射到 8 **字节**整数(`BigInt`,64 位)。 +::: 在创建表时也可以使用这些别名类型。 例如,使用 `Varchar` 代替 `String`,使用 `Double` 代替 `Float64`,使用 `Timestamp(0)` 代替 `TimestampSecond`。 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/functions/json.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/functions/json.md index e9baee1794..320e6d0b0d 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/functions/json.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/sql/functions/json.md @@ -36,6 +36,7 @@ SELECT json_to_string(parse_json('{"a": 1, "b": 2}')); * `json_get_int(json, path)` 按照路径 `path` 从 JSON 中获取整数值。布尔值将被转换为整数。 * `json_get_float(json, path)` 按照路径 `path` 从 JSON 中获取浮点数值。布尔值、整数值将被转换为浮点数。 * `json_get_string(json, path)` 按照路径 `path` 从 JSON 中获取字符串。所有类型的 JSON 值都将被转换为字符串,包括数组、对象和 null。 +* `json_get_object(json, path)` 按照路径 `path` 从 JSON 中获取对象值。如果路径未指向对象,则返回 NULL。 `path` 是一个用于从 JSON 值中选择和提取元素的字符串。`path` 中支持的操作符有: @@ -63,6 +64,14 @@ SELECT json_get_int(parse_json('{"a": {"c": 3}, "b": 2}'), 'a.c'); +-----------------------------------------------------------------------+ | 3 | +-----------------------------------------------------------------------+ + +SELECT json_to_string(json_get_object(parse_json('{"a": {"b": {"c": {"d": 42}}}}'), 'a.b.c')); + ++---------------------------------------------------------------------------------------------------+ +| json_to_string(json_get_object(parse_json(Utf8("{"a": {"b": {"c": {"d": 42}}}}")),Utf8("a.b.c"))) | ++---------------------------------------------------------------------------------------------------+ +| {"d":42} | ++---------------------------------------------------------------------------------------------------+ ``` ## 验证 diff --git a/variables/variables-1.0.ts b/variables/variables-1.0.ts index c142f320c3..2c51ba584f 100644 --- a/variables/variables-1.0.ts +++ b/variables/variables-1.0.ts @@ -1,5 +1,5 @@ export const variables = { - greptimedbVersion: 'v1.0.0-beta.1', + greptimedbVersion: 'v1.0.0-beta.2', prometheusVersion: 'v2.52.0', nodeExporterVersion: 'v1.8.0', goSdkVersion: 'v0.6.2', diff --git a/versioned_docs/version-1.0/reference/command-lines/utilities/data.md b/versioned_docs/version-1.0/reference/command-lines/utilities/data.md index 2a89f67d93..10216c5746 100644 --- a/versioned_docs/version-1.0/reference/command-lines/utilities/data.md +++ b/versioned_docs/version-1.0/reference/command-lines/utilities/data.md @@ -20,7 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | Yes | - | Server address to connect | | `--output-dir` | Yes | - | Directory to store exported data | | `--database` | No | all databasses | Name of the database to export | -| `--export-jobs`, `-j` | No | 1 | Number of parallel export jobs(multiple databases can be exported in parallel) | +| `--db-parallelism`, `-j` | No | 1 | Number of databases to export in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be exported concurrently. (alias: `--export-jobs`) | +| `--table-parallelism` | No | 4 | Number of tables to export in parallel within a single database. For example, if a database contains 30 tables and `table-parallelism` is set to 8, then 8 tables will be exported concurrently. | | `--max-retry` | No | 3 | Maximum retry attempts per job | | `--target`, `-t` | No | all | Export target (schema/data/all) | | `--start-time` | No | - | Start of time range for data export | @@ -56,15 +57,15 @@ greptime cli data import [OPTIONS] ``` ### Options -| Option | Required | Default | Description | -| ------------------- | -------- | ------------- | ------------------------------------------------------------------------------- | -| `--addr` | Yes | - | Server address to connect | -| `--input-dir` | Yes | - | Directory containing backup data | -| `--database` | No | all databases | Name of the database to import | -| `--import-jobs, -j` | No | 1 | Number of parallel import jobs (multiple databases can be imported in parallel) | -| `--max-retry` | No | 3 | Maximum retry attempts per job | -| `--target, -t` | No | all | Import target (schema/data/all) | -| `--auth-basic` | No | - | Use the `:` format | +| Option | Required | Default | Description | +| ------------------------ | -------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--addr` | Yes | - | Server address to connect | +| `--input-dir` | Yes | - | Directory containing backup data | +| `--database` | No | all databases | Name of the database to import | +| `--db-parallelism`, `-j` | No | 1 | Number of databases to import in parallel. For example, if there are 20 databases and `db-parallelism` is set to 4, then 4 databases will be imported concurrently. (alias: `--import-jobs`) | +| `--max-retry` | No | 3 | Maximum retry attempts per job | +| `--target, -t` | No | all | Import target (schema/data/all) | +| `--auth-basic` | No | - | Use the `:` format | ### Import Targets - `schema`: Imports table schemas only diff --git a/versioned_docs/version-1.0/reference/http-endpoints.md b/versioned_docs/version-1.0/reference/http-endpoints.md index e2fa693058..9f6d3e73aa 100644 --- a/versioned_docs/version-1.0/reference/http-endpoints.md +++ b/versioned_docs/version-1.0/reference/http-endpoints.md @@ -111,6 +111,29 @@ This dashboard is packaged with the GreptimeDB server and provides a user-friend For more information, refer to the [how-to documentation](https://github.com/GreptimeTeam/greptimedb/blob/main/docs/how-to/how-to-change-log-level-on-the-fly.md). +### Enable/Disable Trace + +- **Path**: `/debug/enable_trace` +- **Methods**: `POST` +- **Description**: Dynamically enables or disables distributed tracing at runtime. +- **Usage**: Send `true` to enable tracing or `false` to disable tracing. + +Example to enable tracing: + +```bash +curl --data "true" http://127.0.0.1:4000/debug/enable_trace +# Output: trace enabled +``` + +Example to disable tracing: + +```bash +curl --data "false" http://127.0.0.1:4000/debug/enable_trace +# Output: trace disabled +``` + +For more information on tracing configuration, refer to the [tracing documentation](/user-guide/deployments-administration/monitoring/tracing.md). + ### Profiling Tools - **Base Path**: `/debug/prof/` diff --git a/versioned_docs/version-1.0/reference/sql/alter.md b/versioned_docs/version-1.0/reference/sql/alter.md index 93fa90d02f..c048aabc45 100644 --- a/versioned_docs/version-1.0/reference/sql/alter.md +++ b/versioned_docs/version-1.0/reference/sql/alter.md @@ -32,6 +32,9 @@ Currently following options are supported: - If `ttl` was not previously set, defining a new `ttl` using `ALTER` will result in the deletion of data that exceeds the specified retention time. - If `ttl` was already set, modifying it via `ALTER` will enforce the updated retention time immediately, removing data that exceeds the new retention threshold. - If `ttl` was previously set and is unset using `ALTER`, new data will no longer be deleted. However, data that was previously deleted due to the retention policy cannot be restored. +- `compaction.twcs.time_window`: the time window parameter of TWCS compaction strategy. The value should be a [time duration string](/reference/time-durations.md). +- `compaction.twcs.max_output_file_size`: the maximum allowed output file size of TWCS compaction strategy. +- `compaction.twcs.trigger_file_num`: the number of files in a specific time window to trigger a compaction. ### Examples @@ -49,6 +52,32 @@ Remove the default retention time of data in the database: ALTER DATABASE db UNSET 'ttl'; ``` +#### Modify compaction options of database + +Change the compaction time window for the database: + +```sql +ALTER DATABASE db SET 'compaction.twcs.time_window'='2h'; +``` + +Change the maximum output file size for compaction: + +```sql +ALTER DATABASE db SET 'compaction.twcs.max_output_file_size'='500MB'; +``` + +Change the trigger file number for compaction: + +```sql +ALTER DATABASE db SET 'compaction.twcs.trigger_file_num'='8'; +``` + +Remove compaction options: + +```sql +ALTER DATABASE db UNSET 'compaction.twcs.time_window'; +``` + ## ALTER TABLE ### Syntax diff --git a/versioned_docs/version-1.0/reference/sql/copy.md b/versioned_docs/version-1.0/reference/sql/copy.md index d0cdfa6f69..673b0c98a8 100644 --- a/versioned_docs/version-1.0/reference/sql/copy.md +++ b/versioned_docs/version-1.0/reference/sql/copy.md @@ -31,6 +31,19 @@ COPY tbl TO '/path/to/file.csv' WITH ( ); ``` +You can also export data to a compressed CSV or JSON file: + +```sql +COPY tbl TO '/path/to/file.csv.gz' WITH ( + FORMAT = 'csv', + compression_type = 'gzip' +); +``` + +:::tip NOTE +When using compression, ensure the file extension matches the compression type: `.gz` for gzip, `.zst` for zstd, `.bz2` for bzip2, and `.xz` for xz. +::: + #### `WITH` Option `WITH` adds options such as the file `FORMAT` which specifies the format of the exported file. In this example, the format is Parquet; it is a columnar storage format used for big data processing. Parquet efficiently compresses and encodes columnar data for big data analytics. @@ -39,6 +52,7 @@ COPY tbl TO '/path/to/file.csv' WITH ( |---|---|---| | `FORMAT` | Target file(s) format, e.g., JSON, CSV, Parquet | **Required** | | `START_TIME`/`END_TIME`| The time range within which data should be exported. `START_TIME` is inclusive and `END_TIME` is exclusive. | Optional | +| `compression_type` | Compression algorithm for the exported file. Supported values: `gzip`, `zstd`, `bzip2`, `xz`. Only supported for CSV and JSON formats. | Optional | | `TIMESTAMP_FORMAT` | Custom format for timestamp columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers (e.g., `'%Y-%m-%d %H:%M:%S'`). Only supported for CSV format. | Optional | | `DATE_FORMAT` | Custom format for date columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers (e.g., `'%Y-%m-%d'`). Only supported for CSV format. | Optional | | `TIME_FORMAT` | Custom format for time columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers (e.g., `'%H:%M:%S'`). Only supported for CSV format. | Optional | @@ -85,10 +99,20 @@ Specifically, if you only have one file to import, you can use the following syn COPY tbl FROM '/path/to/folder/xxx.parquet' WITH (FORMAT = 'parquet'); ``` +You can also import data from a compressed CSV or JSON file: + +```sql +COPY tbl FROM '/path/to/file.csv.gz' WITH ( + FORMAT = 'csv', + compression_type = 'gzip' +); +``` + | Option | Description | Required | |---|---|---| | `FORMAT` | Target file(s) format, e.g., JSON, CSV, Parquet, ORC | **Required** | | `PATTERN` | Use regex to match files. e.g., `*_today.parquet` | Optional | +| `compression_type` | Compression algorithm for the imported file. Supported values: `gzip`, `zstd`, `bzip2`, `xz`. Only supported for CSV and JSON formats. | Optional | :::tip NOTE The CSV file must have a header row to be imported correctly. The header row should contain the column names of the table. @@ -158,6 +182,7 @@ COPY () TO '' WITH (FORMAT = { 'CSV' | 'JSON' | 'PARQUET' }); | `QUERY` | The SQL SELECT statement to execute | **Required** | | `PATH` | The file path where the output will be written | **Required** | | `FORMAT` | The output file format: 'CSV', 'JSON', or 'PARQUET' | **Required** | +| `compression_type` | Compression algorithm for the exported file. Supported values: `gzip`, `zstd`, `bzip2`, `xz`. Only supported for CSV and JSON formats. | Optional | | `TIMESTAMP_FORMAT` | Custom format for timestamp columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers. Only supported for CSV format. | Optional | | `DATE_FORMAT` | Custom format for date columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers. Only supported for CSV format. | Optional | | `TIME_FORMAT` | Custom format for time columns when exporting to CSV format. Uses [strftime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) format specifiers. Only supported for CSV format. | Optional | @@ -168,6 +193,15 @@ For example, the following statement exports query results to a CSV file: COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.csv' WITH (FORMAT = 'csv'); ``` +You can also export query results to a compressed file: + +```sql +COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.json.gz' WITH ( + FORMAT = 'json', + compression_type = 'gzip' +); +``` + You can also specify custom date and time formats when exporting to CSV: ```sql @@ -183,13 +217,14 @@ COPY (SELECT * FROM tbl WHERE host = 'host1') TO '/path/to/file.csv' WITH ( Beside copying specific table to/from some path, `COPY` statement can also be used to copy whole database to/from some path. The syntax for copying databases is: ```sql -COPY DATABASE - [TO | FROM] '' +COPY DATABASE + [TO | FROM] '' WITH ( FORMAT = { 'CSV' | 'JSON' | 'PARQUET' }, START_TIME = "", - END_TIME = "" - ) + END_TIME = "", + PARALLELISM = + ) [CONNECTION( REGION = "", ENDPOINT = "", @@ -203,6 +238,7 @@ COPY DATABASE |---|---|---| | `FORMAT` | Export file format, available options: JSON, CSV, Parquet | **Required** | | `START_TIME`/`END_TIME`| The time range within which data should be exported. `START_TIME` is inclusive and `END_TIME` is exclusive. | Optional | +| `PARALLELISM` | Number of tables to process in parallel. For example, if a database contains 30 tables and `PARALLELISM` is set to 8, then 8 tables will be processed concurrently. Defaults to the total number of CPU cores, with a minimum value of 1. | Optional | > - When copying databases, `` must end with `/`. > - `CONNECTION` parameters can also be used to copying databases to/from object storage services like AWS S3. @@ -213,11 +249,17 @@ COPY DATABASE -- Export all tables' data to /tmp/export/ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet'); +-- Export all table data using 4 parallel operations +COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=4); + -- Export all tables' data within time range 2022-04-11 08:00:00~2022-04-11 09:00:00 to /tmp/export/ COPY DATABASE public TO '/tmp/export/' WITH (FORMAT='parquet', START_TIME='2022-04-11 08:00:00', END_TIME='2022-04-11 09:00:00'); -- Import files under /tmp/export/ directory to database named public. COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet'); + +-- Import files using 8 parallel operations +COPY DATABASE public FROM '/tmp/export/' WITH (FORMAT='parquet', PARALLELISM=8); ``` ## Special reminder for Windows platforms diff --git a/versioned_docs/version-1.0/reference/sql/create.md b/versioned_docs/version-1.0/reference/sql/create.md index e0e58994ff..cf6c324a0b 100644 --- a/versioned_docs/version-1.0/reference/sql/create.md +++ b/versioned_docs/version-1.0/reference/sql/create.md @@ -151,6 +151,7 @@ Users can add table options by using `WITH`. The valid options contain the follo | `memtable.type` | Type of the memtable. | String value, supports `time_series`, `partition_tree`. | | `append_mode` | Whether the table is append-only | String value. Default is 'false', which removes duplicate rows by primary keys and timestamps according to the `merge_mode`. Setting it to 'true' to enable append mode and create an append-only table which keeps duplicate rows. | | `merge_mode` | The strategy to merge duplicate rows | String value. Only available when `append_mode` is 'false'. Default is `last_row`, which keeps the last row for the same primary key and timestamp. Setting it to `last_non_null` to keep the last non-null field for the same primary key and timestamp. | +| `sst_format` | The format of SST files | String value, supports `primary_key`, `flat`. Default is `primary_key`. `flat` is recommended for tables which have a large number of unique primary keys. | | `comment` | Table level comment | String value. | | `skip_wal` | Whether to disable Write-Ahead-Log for this table | String type. When set to `'true'`, the data written to the table will not be persisted to the write-ahead log, which can avoid storage wear and improve write throughput. However, when the process restarts, any unflushed data will be lost. Please use this feature only when the data source itself can ensure reliability. | | `index.type` | Index type | **Only for metric engine** String value, supports `none`, `skipping`. | diff --git a/versioned_docs/version-1.0/reference/sql/data-types.md b/versioned_docs/version-1.0/reference/sql/data-types.md index 55e0066ed9..9c52167ec3 100644 --- a/versioned_docs/version-1.0/reference/sql/data-types.md +++ b/versioned_docs/version-1.0/reference/sql/data-types.md @@ -31,6 +31,12 @@ The maximum capacities of `String` and `Binary` are determined by their encoding | `Float32` | 32-bit IEEE754 floating point values | 4 Bytes | | `Float64` | Double precision IEEE 754 floating point values | 8 Bytes | +:::tip NOTE +The descriptions here refer to **GreptimeDB native type information**, which are measured in **bits**. +However, when using **SQL**, follow the conventions of **PostgreSQL** and **MySQL**, where types like `INT2`, `INT4`, `INT8`, `FLOAT4` and `FLOAT8` are defined in **bytes**. +For example, in an SQL statement, `INT8` actually corresponds to **BigInt** (8 bytes, 64 bits). +::: + ## Decimal Type GreptimeDB supports the `decimal` type, a fixed-point type represented as `decimal(precision, scale)`, where `precision` is the total number of digits, and `scale` is the number of digits in the fractional part. For example, `123.45` has a precision of 5 and a scale of 2. @@ -303,24 +309,38 @@ INSERT INTO bools(b) VALUES (TRUE), (FALSE); For users migrating from MySQL or PostgreSQL to GreptimeDB, GreptimeDB supports the following alias types. -| Data Type | Alias Types | -| ---------------------- | --------------------------------------------------------------- | -| `String` | `Text`, `TinyText`, `MediumText`, `LongText`, `Varchar`, `Char` | -| `Binary` | `Varbinary` | -| `Int8` | `TinyInt` | -| `Int16` | `SmallInt` | -| `Int32` | `Int` | -| `Int64` | `BigInt` | -| `UInt8` | `UnsignedTinyInt` | -| `UInt16` | `UnsignedSmallInt` | -| `UInt32` | `UnsignedInt` | -| `UInt64` | `UnsignedBigInt` | -| `Float32` | `Float` | -| `Float64` | `Double` | -| `TimestampSecond` | `Timestamp_s`, `Timestamp_sec`, `Timestamp(0)` | -| `TimestampMillisecond` | `Timestamp`, `Timestamp_ms`, `Timestamp(3)` | -| `TimestampMicroSecond` | `Timestamp_us`, `Timestamp(6)` | -| `TimestampNanosecond` | `Timestamp_ns`, `Timestamp(9)` | + +| SQL Datatype Alias | Native Datatype | +| --------------------------------------------------------------- | ---------------------- | +| `Text`, `TinyText`, `MediumText`, `LongText`, `Varchar`, `Char` | `String` | +| `Varbinary` | `Binary` | +| `TinyInt` | `Int8` | +| `SmallInt`, `Int2` | `Int16` | +| `Int`, `Int4` | `Int32` | +| `BigInt`, `Int8` | `Int64` | +| `UnsignedTinyInt` | `UInt8` | +| `UnsignedSmallInt` | `UInt16` | +| `UnsignedInt` | `UInt32` | +| `UnsignedBigInt` | `UInt64` | +| `Float`, `Float4` | `Float32` | +| `Double`, `Float8` | `Float64` | +| `Timestamp_s`, `Timestamp_sec`, `Timestamp(0)` | `TimestampSecond` | +| `Timestamp`, `Timestamp_ms`, `Timestamp(3)` | `TimestampMillisecond` | +| `Timestamp_us`, `Timestamp(6)` | `TimestampMicroSecond` | +| `Timestamp_ns`, `Timestamp(9)` | `TimestampNanosecond` | + +:::warning Note +The type aliases `Int2`, `Int4`, `Int8`, `Float4`, and `Float8` follow the PostgreSQL and MySQL convention where these identifiers refer to the number of **bytes** (not bits) in the type. + +Specifically: +- `Int2` = 2 bytes = `SmallInt` (16-bit) +- `Int4` = 4 bytes = `Int` (32-bit) +- `Int8` = 8 bytes = `BigInt` (64-bit) +- `Float4` = 4 bytes = `Float` (32-bit) +- `Float8` = 8 bytes = `Double` (64-bit) + +Note: GreptimeDB's native type names (like `UInt8`, `Int32`, `Int64`) refer to the number of **bits**, while the SQL aliases `Int2`, `Int4`, and `Int8` refer to the number of **bytes** following PostgreSQL/MySQL conventions. For example, the native type `Int8` is an 8-**bit** integer (`TinyInt`, 1 byte), while the SQL alias `INT8` maps to an 8-**byte** integer (`BigInt`, 64-bit). +::: You can use these alias types when creating tables. For example, use `Varchar` instead of `String`, `Double` instead of `Float64`, and `Timestamp(0)` instead of `TimestampSecond`. diff --git a/versioned_docs/version-1.0/reference/sql/functions/json.md b/versioned_docs/version-1.0/reference/sql/functions/json.md index 4e720ce1d9..c5df6feced 100644 --- a/versioned_docs/version-1.0/reference/sql/functions/json.md +++ b/versioned_docs/version-1.0/reference/sql/functions/json.md @@ -36,6 +36,7 @@ Extracts values with specific types from JSON values through specific paths. * `json_get_int(json, path)` to extract an integer value from a JSON value by the path, while boolean values will be converted to integers. * `json_get_float(json, path)` to extract a float value from a JSON value by the path, while integer and boolean values will be converted to floats. * `json_get_string(json, path)` to extract a string value from a JSON value by the path. All valid JSON values will be converted to strings, including null values, objects and arrays. +* `json_get_object(json, path)` to extract an object value from a JSON value by the path. Returns NULL if the path does not point to an object. `path` is a string that select and extract elements from a json value. The following operators in the path are supported: @@ -63,6 +64,14 @@ SELECT json_get_int(parse_json('{"a": {"c": 3}, "b": 2}'), 'a.c'); +-----------------------------------------------------------------------+ | 3 | +-----------------------------------------------------------------------+ + +SELECT json_to_string(json_get_object(parse_json('{"a": {"b": {"c": {"d": 42}}}}'), 'a.b.c')); + ++---------------------------------------------------------------------------------------------------+ +| json_to_string(json_get_object(parse_json(Utf8("{"a": {"b": {"c": {"d": 42}}}}")),Utf8("a.b.c"))) | ++---------------------------------------------------------------------------------------------------+ +| {"d":42} | ++---------------------------------------------------------------------------------------------------+ ``` ## Validation diff --git a/versioned_docs/version-1.0/user-guide/deployments-administration/configuration.md b/versioned_docs/version-1.0/user-guide/deployments-administration/configuration.md index 3d49d42093..372e11eb57 100644 --- a/versioned_docs/version-1.0/user-guide/deployments-administration/configuration.md +++ b/versioned_docs/version-1.0/user-guide/deployments-administration/configuration.md @@ -508,59 +508,6 @@ The `meta_client` configures the Metasrv client, including: - `ddl_timeout`, DDL execution timeout, `10s` by default. - `tcp_nodelay`, `TCP_NODELAY` option for accepted connections, true by default. -### Monitor metrics options - -These options are used to save system metrics to GreptimeDB itself. -For instructions on how to use this feature, please refer to the [Monitoring](/user-guide/deployments-administration/monitoring/overview.md) guide. - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -``` - -- `enable`: Whether to enable export_metrics, `false` by default. -- `write_interval`: Export time interval. - -#### `self_import` method - -If you are using `standalone`, you can use the `self_import` method to export metrics to GreptimeDB itself. - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -[export_metrics.self_import] -db = "information_schema" -``` - -- `db`: The default database used by `self_import` is `information_schema`. You can also create another database for saving system metrics. - -#### `remote_write` method - -The `remote_write` method is supported by `datanode`, `frontend`, `metasrv`, and `standalone`. -It sends metrics to a receiver compatible with the [Prometheus Remote-Write protocol](https://prometheus.io/docs/concepts/remote_write_spec/). - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -[export_metrics.remote_write] -# URL specified by Prometheus Remote-Write protocol -url = "http://127.0.0.1:4000/v1/prometheus/write?db=information_schema" -# Some optional HTTP parameters, such as authentication information -headers = { Authorization = "Basic Z3JlcHRpbWVfdXNlcjpncmVwdGltZV9wd2Q=" } -``` - -- `url`: URL specified by Prometheus Remote-Write protocol. -- `headers`: Some optional HTTP parameters, such as authentication information. - ### Heartbeat configuration Heartbeat configuration is available in `frontend` and `datanode`. ```toml diff --git a/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md b/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md index 48c0bbb459..2b8ef7cf24 100644 --- a/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md +++ b/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md @@ -7,52 +7,7 @@ description: Guide to monitor GreptimeDB standalone instance using Prometheus me GreptimeDB standalone provides a `/metrics` endpoint on the HTTP port (default `4000`) that exposes [Prometheus metrics](/reference/http-endpoints.md#metrics). -## Monitoring configuration - -You can configure GreptimeDB to export metrics to GreptimeDB itself or to an external Prometheus instance. - -### Internal Metrics Storage - -Configuring GreptimeDB to store its own metrics internally is convenient and recommended for self-monitoring, -it also enables SQL-based querying and analysis. - -To enable self-monitoring, configure the `export_metrics` section in your TOML configuration file: - -```toml -[export_metrics] -enable = true -# Metrics collection interval -write_interval = "30s" -[export_metrics.self_import] -db = "greptime_metrics" -``` - -This configuration: -- Collects and writes metrics every 30 seconds. -- Exports metrics to the `greptime_metrics` database within GreptimeDB. Ensure the `greptime_metrics` database [is created](/reference/sql/create.md#create-database) before exporting metrics. - -### Export metrics to Prometheus - -For environments with existing Prometheus infrastructure, -GreptimeDB can export metrics via the Prometheus remote write protocol. - -To do this, configure the `export_metrics` section in your TOML configuration file with the `remote_write` option: - -```toml -[export_metrics] -enable=true -write_interval = "30s" -[export_metrics.remote_write] -# URL specified by Prometheus Remote-Write protocol -url = "https://your/remote_write/endpoint" -# Some optional HTTP parameters, such as authentication information -headers = { Authorization = {{Authorization}} } -``` - -This configuration: -- Sets the export interval to 30 seconds -- Specifies the Prometheus remote write URL, which should point to your Prometheus instance -- Optionally includes HTTP headers for the remote write URL, such as authentication information +You can use Prometheus to scrape these metrics and Grafana to visualize them. ## Grafana Dashboard Integration diff --git a/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md b/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md index 2d0db63c89..8667362888 100644 --- a/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md +++ b/versioned_docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md @@ -9,6 +9,24 @@ GreptimeDB supports distributed tracing. GreptimeDB exports all collected spans In the [logging section](/user-guide/deployments-administration/configuration.md#logging-options) in the configuration, there are descriptions of configuration items related to tracing, [standalone.example.toml](https://github.com/GreptimeTeam/greptimedb/blob/VAR::greptimedbVersion/config/standalone.example.toml) provide a reference configuration in the logging section. +## Dynamic Tracing Control + +GreptimeDB provides the ability to enable or disable tracing dynamically at runtime using the HTTP API without requiring a server restart. This is useful for troubleshooting production issues or temporarily enabling tracing for debugging purposes. + +To enable tracing: + +```bash +curl --data "true" http://127.0.0.1:4000/debug/enable_trace +# Output: trace enabled +``` + +To disable tracing: + +```bash +curl --data "false" http://127.0.0.1:4000/debug/enable_trace +# Output: trace disabled +``` + ## Tutorial: Use Jaeger to trace GreptimeDB [Jaeger](https://www.jaegertracing.io/) is an open source, end-to-end distributed tracing system, originally developed and open sourced by Uber. Its goal is to help developers monitor and debug the request flow in complex microservice architectures. From 58313e6f2fd321bed97b41b3f68d1e07741a3863 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Wed, 3 Dec 2025 02:41:52 -0800 Subject: [PATCH 2/2] docs: bump v1.0 to beta2 Signed-off-by: Dennis Zhuang --- .../reference/command-lines/utilities/data.md | 21 ++++---- .../version-1.0/reference/http-endpoints.md | 23 ++++++++ .../configuration.md | 53 ------------------- .../monitoring/standalone-monitoring.md | 45 +--------------- .../monitoring/tracing.md | 18 +++++++ 5 files changed, 53 insertions(+), 107 deletions(-) diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/command-lines/utilities/data.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/command-lines/utilities/data.md index e02bda114f..acd67dc0fa 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/command-lines/utilities/data.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/command-lines/utilities/data.md @@ -20,7 +20,8 @@ greptime cli data export [OPTIONS] | `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | | `--output-dir` | 是 | - | 存储导出数据的目录 | | `--database` | 否 | 所有数据库 | 要导出的数据库名称 | -| `--export-jobs, -j` | 否 | 1 | 并行导出任务数量(多个数据库可以并行导出) | +| `--db-parallelism, -j` | 否 | 1 | 并行导出的数据库数量。例如,有 20 个数据库且 `db-parallelism` 设置为 4 时,将同时导出 4 个数据库。(别名:`--export-jobs`) | +| `--table-parallelism` | 否 | 4 | 单个数据库内并行导出的表数量。例如,数据库包含 30 个表且 `table-parallelism` 设置为 8 时,将同时导出 8 个表。 | | `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | | `--target, -t` | 否 | all | 导出目标(schema/data/all) | | `--start-time` | 否 | - | 数据导出的开始时间范围 | @@ -56,15 +57,15 @@ greptime cli data import [OPTIONS] ``` ### 选项 -| 选项 | 是否必需 | 默认值 | 描述 | -| ------------------- | -------- | ---------- | ------------------------------------------ | -| `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | -| `--input-dir` | 是 | - | 包含备份数据的目录 | -| `--database` | 否 | 所有数据库 | 要导入的数据库名称 | -| `--import-jobs, -j` | 否 | 1 | 并行导入任务数量(多个数据库可以并行导入) | -| `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | -| `--target, -t` | 否 | all | 导入目标(schema/data/all) | -| `--auth-basic` | 否 | - | 使用 `:` 格式 | +| 选项 | 是否必需 | 默认值 | 描述 | +| ------------------------ | -------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `--addr` | 是 | - | 要连接的 GreptimeDB 数据库地址 | +| `--input-dir` | 是 | - | 包含备份数据的目录 | +| `--database` | 否 | 所有数据库 | 要导入的数据库名称 | +| `--db-parallelism, -j` | 否 | 1 | 并行导入的数据库数量。例如,有 20 个数据库且 `db-parallelism` 设置为 4 时,将同时导入 4 个数据库。(别名:`--import-jobs`) | +| `--max-retry` | 否 | 3 | 每个任务的最大重试次数 | +| `--target, -t` | 否 | all | 导入目标(schema/data/all) | +| `--auth-basic` | 否 | - | 使用 `:` 格式 | ### 导入目标 - `schema`: 仅导入表结构 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/http-endpoints.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/http-endpoints.md index 99a7462c0c..e5c0d5a93f 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/http-endpoints.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/reference/http-endpoints.md @@ -111,6 +111,29 @@ is_strict_mode = false 有关更多信息,请参阅[如何文档](https://github.com/GreptimeTeam/greptimedb/blob/main/docs/how-to/how-to-change-log-level-on-the-fly.md)。 +### 启用/禁用链路追踪 + +- **路径**: `/debug/enable_trace` +- **方法**: `POST` +- **描述**: 在运行时动态启用或禁用分布式链路追踪。 +- **用法**: 发送 `true` 启用链路追踪,或发送 `false` 禁用链路追踪。 + +启用链路追踪示例: + +```bash +curl --data "true" http://127.0.0.1:4000/debug/enable_trace +# 输出: trace enabled +``` + +禁用链路追踪示例: + +```bash +curl --data "false" http://127.0.0.1:4000/debug/enable_trace +# 输出: trace disabled +``` + +有关链路追踪配置的更多信息,请参阅[链路追踪文档](/user-guide/deployments-administration/monitoring/tracing.md)。 + ### 性能分析工具 - **基础路径**: `/debug/prof/` diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/configuration.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/configuration.md index 8bd77c6027..9610e83213 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/configuration.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/configuration.md @@ -500,59 +500,6 @@ tcp_nodelay = true - `ddl_timeout`,DDL 执行的超时时间,默认 10 秒。 - `tcp_nodelay`,接受连接时的 `TCP_NODELAY` 选项,默认为 true。 -### 指标监控选项 - -这些选项用于将系统监控指标保存到 GreptimeDB 本身。 -有关如何使用此功能的说明,请参见 [监控](/user-guide/deployments-administration/monitoring/overview.md) 指南。 - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -``` - -- `enable`: 是否启用导出指标功能,默认为 `false`。 -- `write_interval`: 指标导出时间间隔。 - -#### `self_import` 方法 - -如果你使用的是 GreptimeDB 单机版,可以使用 `self_import` 方法将指标导入到自身的数据库中。 - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -[export_metrics.self_import] -db = "information_schema" -``` - -- `db`: 默认的数据库为 `information_schema`,你也可以创建另一个数据库来保存系统指标。 - -#### `remote_write` 方法 - -`datanode`、`frontend`、`metasrv` 和 `standalone` 支持使用 `remote_write` 方法导出指标。 -它将指标发送到与 [Prometheus Remote-Write protocol](https://prometheus.io/docs/concepts/remote_write_spec/) 兼容的接受端。 - -```toml -[export_metrics] -# Whether to enable export_metrics -enable=true -# Export time interval -write_interval = "30s" -[export_metrics.remote_write] -# URL specified by Prometheus Remote-Write protocol -url = "http://127.0.0.1:4000/v1/prometheus/write?db=information_schema" -# Some optional HTTP parameters, such as authentication information -headers = { Authorization = "Basic Z3JlcHRpbWVfdXNlcjpncmVwdGltZV9wd2Q=" } -``` - -- `url`: Prometheus Remote-Write 协议指定的 URL。 -- `headers`: 一些可选的 HTTP 参数,比如认证信息。 - ### 心跳配置 心跳配置在 `frontend` 和 `datanode` 中可用。 ```toml diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md index a1848c9b3b..9a19eeb0ff 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/standalone-monitoring.md @@ -7,50 +7,7 @@ description: 使用 Prometheus 指标和 Grafana 监控 GreptimeDB 单机实例 GreptimeDB 单机版在 HTTP 端口(默认 `4000`)上提供 `/metrics` 端点,暴露 [Prometheus 指标](/reference/http-endpoints.md#指标)。 -## 监控配置 - -你可以配置 GreptimeDB 将指标导出到 GreptimeDB 自身或外部的 Prometheus 实例。 - -### 将指标存储到 GreptimeDB 自身 - -将指标存储到 GreptimeDB 自身既方便又推荐用于自监控,且支持基于 SQL 的查询和分析。 - -要启用自监控,请在你的 TOML 配置文件中配置 `export_metrics` 部分: - -```toml -[export_metrics] -enable = true -# 指标收集间隔 -write_interval = "30s" -[export_metrics.self_import] -db = "greptime_metrics" -``` - -此配置: -- 每 30 秒收集和写入指标。 -- 将指标导出到 GreptimeDB 内的 `greptime_metrics` 数据库。请确保在导出指标之前 `greptime_metrics` 数据库[已被创建](/reference/sql/create.md#create-database)。 - -### 导出指标到 Prometheus - -对于已有 Prometheus 基础设施的环境,GreptimeDB 可以通过 Prometheus 远程写入协议导出指标。 - -具体方法为,在 TOML 配置文件中使用 `remote_write` 选项配置 `export_metrics` 部分: - -```toml -[export_metrics] -enable=true -write_interval = "30s" -[export_metrics.remote_write] -# Prometheus Remote-Write 协议指定的 URL -url = "https://your/remote_write/endpoint" -# 一些可选的 HTTP 参数,如身份验证信息 -headers = { Authorization = {{Authorization}} } -``` - -此配置: -- 将导出间隔设置为 30 秒 -- 指定 Prometheus 远程写入 URL,应指向你的 Prometheus 实例 -- 可选择包含远程写入 URL 的 HTTP 头,如身份验证信息 +你可以使用 Prometheus 抓取这些指标,并使用 Grafana 进行可视化展示。 ## Grafana 仪表板集成 diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md index 72a1a00ba2..b86fedcabb 100644 --- a/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md +++ b/i18n/zh/docusaurus-plugin-content-docs/version-1.0/user-guide/deployments-administration/monitoring/tracing.md @@ -9,6 +9,24 @@ GreptimeDB 支持分布式链路追踪。GreptimeDB 使用基于 gRPC 的 OTLP 在配置中的 [logging 部分](/user-guide/deployments-administration/configuration.md#logging-选项) 有对 tracing 的相关配置项说明,[standalone.example.toml](https://github.com/GreptimeTeam/greptimedb/blob/VAR::greptimedbVersion/config/standalone.example.toml) 的 logging 部分提供了参考配置项。 +## 动态链路追踪控制 + +GreptimeDB 提供了通过 HTTP API 在运行时动态启用或禁用链路追踪的功能,无需重启服务器。这对于排查生产环境问题或临时启用追踪进行调试非常有用。 + +启用链路追踪: + +```bash +curl --data "true" http://127.0.0.1:4000/debug/enable_trace +# 输出: trace enabled +``` + +禁用链路追踪: + +```bash +curl --data "false" http://127.0.0.1:4000/debug/enable_trace +# 输出: trace disabled +``` + ## 教程:使用 Jaeger 追踪 GreptimeDB 调用链路 [Jaeger](https://www.jaegertracing.io/) 是一个开源的、端到端的分布式链路追踪系统,最初由 Uber 开发并开源。它的目标是帮助开发人员监测和调试复杂的微服务架构中的请求流程。