From 11eee29023a359a576a761ee65132b6994de1b5e Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 28 Aug 2023 10:21:15 +0400 Subject: [PATCH] Revert "add more precise disk re-balancing for not exists disks, during download, partial fix https://github.com/Altinity/clickhouse-backup/issues/561" This reverts commit 20e250c5a5bbdc9a82845088ee522ef65a262a16. --- ChangeLog.md | 7 ++--- go.mod | 1 - go.sum | 2 -- pkg/backup/download.go | 54 ++---------------------------------- pkg/backup/list.go | 3 +- pkg/clickhouse/clickhouse.go | 27 +++++++----------- pkg/clickhouse/structs.go | 9 +++--- 7 files changed, 20 insertions(+), 83 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 665e9b6a..976cb0d3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,12 +4,11 @@ IMPROVEMENTS - Implementation blacklist for table engines during backup / download / upload / restore [537](https://github.com/Altinity/clickhouse-backup/issues/537) - restore RBAC / configs, refactoring restart clickhouse-server via `sql:SYSTEM SHUTDOWN` or `exec:systemctl restart clickhouse-server`, add `--rbac-only` and `--configs-only` options to `create`, `upload`, `download`, `restore` command. fix [706]https://github.com/Altinity/clickhouse-backup/issues/706 - Backup/Restore RBAC related objects from Zookeeper via direct connection to zookeeper/keeper, fix [604](https://github.com/Altinity/clickhouse-backup/issues/604) -- add `SHARDED_OPERATION_MODE` option, to easy create backup for sharded cluster, available values `none` (no sharding), `table` (table granularity), `database` (database granularity), `first-replica` (on the lexicographically sorted first active replica), thanks @mskwon, fix [639](https://github.com/Altinity/clickhouse-backup/issues/639), fix [648](https://github.com/Altinity/clickhouse-backup/pull/648) -- add support for `compression_format: none` for upload and download backups created with `--rbac` / `--rbac-only` or `--configs` / `--configs-only` options, fix [713](https://github.com/Altinity/clickhouse-backup/issues/713) -- add support for s3 `GLACIER` storage class, when GET return error, then, it requires 5 minutes per key and restore could be slow. Use `GLACIER_IR`, it looks more robust, fix [614](https://github.com/Altinity/clickhouse-backup/issues/614) +- Add `SHARDED_OPERATION_MODE` option, to easy create backup for sharded cluster, available values `none` (no sharding), `table` (table granularity), `database` (database granularity), `first-replica` (on the lexicographically sorted first active replica), thanks @mskwon, fix [639](https://github.com/Altinity/clickhouse-backup/issues/639), fix [648](https://github.com/Altinity/clickhouse-backup/pull/648) +- Add support for `compression_format: none` for upload and download backups created with `--rbac` / `--rbac-only` or `--configs` / `--configs-only` options, fix [713](https://github.com/Altinity/clickhouse-backup/issues/713) +- Add support for s3 `GLACIER` storage class, when GET return error, then, it requires 5 minutes per key and restore could be slow. Use `GLACIER_IR`, it looks more robust, fix [614](https://github.com/Altinity/clickhouse-backup/issues/614) - restore functions via `CREATE OR REPLACE` for more atomic behavior - prepare to Make ./tests/integration/ test parallel fix [721](https://github.com/Altinity/clickhouse-backup/issues/721) -- add more precise disk re-balancing for not exists disks, during download, partial fix [561](https://github.com/Altinity/clickhouse-backup/issues/561) BUG FIXES - fix possible create backup failures during UNFREEZE not exists tables, affected 2.2.7+ version, fix [704](https://github.com/Altinity/clickhouse-backup/issues/704) diff --git a/go.mod b/go.mod index 2c2c7c93..2d4dcbad 100644 --- a/go.mod +++ b/go.mod @@ -34,7 +34,6 @@ require ( github.com/pkg/errors v0.9.1 github.com/pkg/sftp v1.13.5 github.com/prometheus/client_golang v1.15.1 - github.com/ricochet2200/go-disk-usage/du v0.0.0-20210707232629-ac9918953285 github.com/stretchr/testify v1.8.4 github.com/tencentyun/cos-go-sdk-v5 v0.7.41 github.com/urfave/cli v1.22.14 diff --git a/go.sum b/go.sum index 6850d4ae..6c66b79e 100644 --- a/go.sum +++ b/go.sum @@ -338,8 +338,6 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= -github.com/ricochet2200/go-disk-usage/du v0.0.0-20210707232629-ac9918953285 h1:d54EL9l+XteliUfUCGsEwwuk65dmmxX85VXF+9T6+50= -github.com/ricochet2200/go-disk-usage/du v0.0.0-20210707232629-ac9918953285/go.mod h1:fxIDly1xtudczrZeOOlfaUvd2OPb2qZAPuWdU2BsBTk= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= diff --git a/pkg/backup/download.go b/pkg/backup/download.go index dca2fda3..ca6ff65c 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -15,7 +15,6 @@ import ( "github.com/eapache/go-resiliency/retrier" "io" "io/fs" - "math/rand" "os" "path" "path/filepath" @@ -220,16 +219,9 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ if !schemaOnly { for _, t := range tableMetadataAfterDownload { for disk := range t.Parts { - // https://github.com/Altinity/clickhouse-backup/issues/561, @todo think about calculate size of each part and rebalancing t.Parts if _, diskExists := b.DiskToPathMap[disk]; !diskExists && disk != b.cfg.ClickHouse.EmbeddedBackupDisk { - var diskPath string - var diskPathErr error - diskPath, disks, diskPathErr = b.getDiskPathForNonExistsDisk(disk, disks, remoteBackup, t) - if diskPathErr != nil { - return diskPathErr - } - b.DiskToPathMap[disk] = diskPath - log.Warnf("table '%s.%s' require disk '%s' that not found in clickhouse table system.disks, you can add nonexistent disks to `disk_mapping` in `clickhouse` config section, data will download to %s", t.Database, t.Table, disk, b.DiskToPathMap[disk]) + b.DiskToPathMap[disk] = b.DiskToPathMap["default"] + log.Warnf("table '%s.%s' require disk '%s' that not found in clickhouse table system.disks, you can add nonexistent disks to `disk_mapping` in `clickhouse` config section, data will download to %s", t.Database, t.Table, disk, b.DiskToPathMap["default"]) } } } @@ -972,45 +964,3 @@ func (b *Backuper) downloadSingleBackupFile(ctx context.Context, remoteFile stri } return nil } - -// getDiskPathForNonExistsDisk - https://github.com/Altinity/clickhouse-backup/issues/561 -// allow to Restore to new server with different storage policy, different disk count, -// implements `least_used` for normal disk and `round_robin` for Object disks -func (b *Backuper) getDiskPathForNonExistsDisk(disk string, disks []clickhouse.Disk, remoteBackup storage.Backup, t metadata.TableMetadata) (string, []clickhouse.Disk, error) { - diskType, ok := remoteBackup.DiskTypes[disk] - if !ok { - return "", disks, fmt.Errorf("diskType for %s not found in %#v in %s/metadata.json", disk, remoteBackup.DiskTypes, remoteBackup.BackupName) - } - var filteredDisks []clickhouse.Disk - for _, d := range disks { - if !d.IsBackup && d.Type == diskType { - filteredDisks = append(filteredDisks, d) - } - } - if len(filteredDisks) == 0 { - return "", disks, fmt.Errorf("diskType: %s not found in system.disks", diskType) - } - // round_robin for non-local disks - if diskType != "local" { - roundRobinIdx := rand.Intn(len(filteredDisks)) - return filteredDisks[roundRobinIdx].Path, disks, nil - } - // least_used for local - freeSpace := uint64(0) - leastUsedIdx := -1 - for idx, d := range filteredDisks { - if filteredDisks[idx].FreeSpace > freeSpace { - freeSpace = d.FreeSpace - leastUsedIdx = idx - } - } - if leastUsedIdx < 0 { - return "", disks, fmt.Errorf("%s free space, not found in system.disks with `local` type", utils.FormatBytes(t.TotalBytes)) - } - for idx := range disks { - if disks[idx].Name == filteredDisks[leastUsedIdx].Name { - disks[idx].FreeSpace -= t.TotalBytes - } - } - return filteredDisks[leastUsedIdx].Path, disks, nil -} diff --git a/pkg/backup/list.go b/pkg/backup/list.go index 1435589a..25952416 100644 --- a/pkg/backup/list.go +++ b/pkg/backup/list.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "github.com/ricochet2200/go-disk-usage/du" "io" "os" "path" @@ -174,7 +173,7 @@ func (b *Backuper) GetLocalBackups(ctx context.Context, disks []clickhouse.Disk) } if disks == nil { disks = []clickhouse.Disk{ - {Name: "default", Path: "/var/lib/clickhouse", Type: "local", FreeSpace: du.NewDiskUsage("/var/lib/clickhouse").Free()}, + {Name: "default", Path: "/var/lib/clickhouse"}, } } defaultDataPath, err := b.ch.GetDefaultPath(disks) diff --git a/pkg/clickhouse/clickhouse.go b/pkg/clickhouse/clickhouse.go index 5b36ec33..f1b809ca 100644 --- a/pkg/clickhouse/clickhouse.go +++ b/pkg/clickhouse/clickhouse.go @@ -10,7 +10,6 @@ import ( "github.com/Altinity/clickhouse-backup/pkg/common" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" "github.com/antchfx/xmlquery" - "github.com/ricochet2200/go-disk-usage/du" "os" "path" "path/filepath" @@ -30,6 +29,7 @@ type ClickHouse struct { Config *config.ClickHouseConfig Log *apexLog.Entry conn driver.Conn + disks []Disk version int isPartsColumnPresent int8 IsOpen bool @@ -218,10 +218,9 @@ func (ch *ClickHouse) getDisksFromSystemSettings(ctx context.Context) ([]Disk, e dataPathArray := strings.Split(metadataPath, "/") clickhouseData := path.Join(dataPathArray[:len(dataPathArray)-1]...) return []Disk{{ - Name: "default", - Path: path.Join("/", clickhouseData), - Type: "local", - FreeSpace: du.NewDiskUsage(path.Join("/", clickhouseData)).Free(), + Name: "default", + Path: path.Join("/", clickhouseData), + Type: "local", }}, nil } } @@ -252,24 +251,18 @@ func (ch *ClickHouse) getDisksFromSystemDisks(ctx context.Context) ([]Disk, erro case <-ctx.Done(): return nil, ctx.Err() default: - type DiskFields struct { - DiskTypePresent uint64 `ch:"is_disk_type_present"` - FreeSpacePresent uint64 `ch:"is_free_space_present"` - } - diskFields := make([]DiskFields, 0) - if err := ch.SelectContext(ctx, &diskFields, "SELECT countIf(name='type') AS is_disk_type_present, countIf(name='free_space') AS is_free_space_present FROM system.columns WHERE database='system' AND table='disks'"); err != nil { + isDiskType := make([]struct { + Present uint64 `ch:"is_disk_type_present"` + }, 0) + if err := ch.SelectContext(ctx, &isDiskType, "SELECT count() is_disk_type_present FROM system.columns WHERE database='system' AND table='disks' AND name='type'"); err != nil { return nil, err } diskTypeSQL := "'local'" - if len(diskFields) > 0 && diskFields[0].DiskTypePresent > 0 { + if len(isDiskType) > 0 && isDiskType[0].Present > 0 { diskTypeSQL = "any(type)" } - diskFreeSpaceSQL := "0" - if len(diskFields) > 0 && diskFields[0].FreeSpacePresent > 0 { - diskFreeSpaceSQL = "min(free_space)" - } var result []Disk - query := fmt.Sprintf("SELECT path, any(name) AS name, %s AS type, %s AS free_space FROM system.disks GROUP BY path", diskTypeSQL, diskFreeSpaceSQL) + query := fmt.Sprintf("SELECT path, any(name) AS name, %s AS type FROM system.disks GROUP BY path", diskTypeSQL) err := ch.SelectContext(ctx, &result, query) return result, err } diff --git a/pkg/clickhouse/structs.go b/pkg/clickhouse/structs.go index 42c46cb0..dcbc7904 100644 --- a/pkg/clickhouse/structs.go +++ b/pkg/clickhouse/structs.go @@ -38,11 +38,10 @@ type IsSystemTablesFieldPresent struct { } type Disk struct { - Name string `ch:"name"` - Path string `ch:"path"` - Type string `ch:"type"` - FreeSpace uint64 `ch:"free_space"` - IsBackup bool + Name string `ch:"name"` + Path string `ch:"path"` + Type string `ch:"type"` + IsBackup bool } // Database - Clickhouse system.databases struct