diff --git a/README.md b/README.md index eef83e0..510710a 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ It provides utilities to: - Generate memory report for rdb file - Convert RDB files to JSON - Convert RDB files to Redis Serialization Protocol (or AOF file) +- Find Biggest Key in RDB files - Customize data usage Thanks sripathikrishnan for his [redis-rdb-tools](https://github.com/sripathikrishnan/redis-rdb-tools) @@ -22,7 +23,7 @@ If you have installed `go` on your compute, just simply use: go get github.com/hdt3213/rdb ``` -Or, you can download executable binary file from releases(https://github.com/HDT3213/rdb/releases) and put its path to PATH environment. +Or, you can download executable binary file from [releases](https://github.com/HDT3213/rdb/releases) and put its path to PATH environment. use `rdb` command in terminal, you can see it's manual @@ -65,7 +66,7 @@ Example: rdb -c memory -o mem.csv cases/memory.rdb ``` -The examples for json result: +The examples for csv result: ```csv database,key,type,size,size_readable,element_count 0,hash,hash,64,64B,2 @@ -77,6 +78,28 @@ database,key,type,size,size_readable,element_count 0,set,set,39,39B,2 ``` +# Find Biggest Keys + +RDB can find biggest N keys in file +``` +rdb -c bigkey -n +``` + +Example: +``` +rdb -c bigkey -n 5 cases/memory.rdb +``` + +The examples for csv result: +```csv +database,key,type,size,size_readable,element_count +0,large,string,2056,2K,0 +0,list,list,66,66B,4 +0,hash,hash,64,64B,2 +0,zset,zset,57,57B,2 +0,set,set,39,39B,2 +``` + # Convert to AOF Usage: diff --git a/cases/largest.csv b/cases/largest.csv new file mode 100644 index 0000000..41d85c0 --- /dev/null +++ b/cases/largest.csv @@ -0,0 +1,6 @@ +database,key,type,size,size_readable,element_count +0,large,string,2056,2K,0 +0,list,list,66,66B,4 +0,hash,hash,64,64B,2 +0,zset,zset,57,57B,2 +0,set,set,39,39B,2 diff --git a/cmd.go b/cmd.go index 092f5d4..d4101d3 100644 --- a/cmd.go +++ b/cmd.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "github.com/hdt3213/rdb/helper" + "os" ) const help = ` @@ -11,6 +12,7 @@ This is a tool to parse Redis' RDB files Options: -c command, including: json/memory/aof -o output file path + -n number of result Examples: 1. convert rdb to json @@ -19,13 +21,17 @@ Examples: rdb -c memory -o memory.csv dump.rdb 3. convert to aof file rdb -c aof -o dump.aof dump.rdb +4. get largest keys + rdb -c bigkey -o dump.aof dump.rdb ` func main() { var cmd string var output string + var n int flag.StringVar(&cmd, "c", "", "command for rdb: json") flag.StringVar(&output, "o", "", "output file path") + flag.IntVar(&n, "n", 0, "") flag.Parse() src := flag.Arg(0) @@ -37,10 +43,6 @@ func main() { println("src file is required") return } - if output == "" { - println("output file path is required") - return - } var err error switch cmd { @@ -50,6 +52,8 @@ func main() { err = helper.MemoryProfile(src, output) case "aof": err = helper.ToAOF(src, output) + case "bigkey": + err = helper.FindBiggestKeys(src, n, os.Stdout) default: println("unknown command") return diff --git a/core/decoder.go b/core/decoder.go index 752985e..62cfae1 100644 --- a/core/decoder.go +++ b/core/decoder.go @@ -300,7 +300,13 @@ func (dec *Decoder) parse(cb func(object model.RedisObject) bool) error { // Parse parses rdb and callback // cb returns true to continue, returns false to stop the iteration func (dec *Decoder) Parse(cb func(object model.RedisObject) bool) error { - err := dec.checkHeader() + var err error + defer func() { + if err2 := recover(); err2 != nil { + err = fmt.Errorf("panic: %v", err2) + } + }() + err = dec.checkHeader() if err != nil { return err } diff --git a/core/list.go b/core/list.go index 0ada94d..26f2683 100644 --- a/core/list.go +++ b/core/list.go @@ -57,11 +57,6 @@ func (dec *Decoder) readZipList() ([][]byte, error) { } func (dec *Decoder) readZipListEntry(buf []byte, cursor *int) (result []byte, err error) { - defer func() { - if err2 := recover(); err2 != nil { - err = fmt.Errorf("panic: %v", err) - } - }() prevLen := buf[*cursor] *cursor++ if prevLen == zipBigPrevLen { diff --git a/helper/bigkey.go b/helper/bigkey.go new file mode 100644 index 0000000..e5b26c6 --- /dev/null +++ b/helper/bigkey.go @@ -0,0 +1,134 @@ +package helper + +import ( + "container/heap" + "encoding/csv" + "errors" + "fmt" + "github.com/hdt3213/rdb/bytefmt" + "github.com/hdt3213/rdb/core" + "github.com/hdt3213/rdb/model" + "os" + "strconv" +) + +type redisHeap struct { + list []model.RedisObject + capacity int + minSize int // size of min object + minIndex int // index of min object +} + +func (h redisHeap) Len() int { + return len(h.list) +} + +// Max Heap +func (h *redisHeap) Less(i, j int) bool { + return h.list[i].GetSize() > h.list[j].GetSize() +} + +func (h *redisHeap) Swap(i, j int) { + h.list[i], h.list[j] = h.list[j], h.list[i] +} + +func (h *redisHeap) Push(x interface{}) { + h.list = append(h.list, x.(model.RedisObject)) +} + +func (h *redisHeap) Pop() interface{} { + item := h.list[len(h.list)-1] + h.list = h.list[0 : len(h.list)-1] + return item +} + +// time complexity: O(n*log(m)), n is number of redis object, m is heap capacity. m if far less than n +func (h *redisHeap) Append(x model.RedisObject) { + // heap is full, skip + if x.GetSize() <= h.minSize && h.Len() >= h.capacity { + return + } + // if heap is full, pop min object + if h.Len() >= h.capacity { + // assert h.minIndex >= 0 + heap.Remove(h, h.minIndex) + } + heap.Push(h, x) + // update h.minSize + h.minSize = 1<<31 - 1 + for i := h.Len() - 1; i >= 0; i-- { // + o := h.list[i] + if o.GetSize() < h.minSize { + h.minSize = o.GetSize() + h.minIndex = i + } + } +} + +func (h *redisHeap) Dump() []model.RedisObject { + result := make([]model.RedisObject, 0, h.Len()) + for h.Len() > 0 { + o := heap.Pop(h).(model.RedisObject) + result = append(result, o) + } + return result +} + +func newRedisHeap(cap int) *redisHeap { + list := make([]model.RedisObject, 0, cap) + h := &redisHeap{ + list: list, + capacity: cap, + minIndex: -1, + } + heap.Init(h) + return h +} + +// FindBiggestKeys read rdb file and find the largest N keys. +// The invoker owns output, FindBiggestKeys won't close it +func FindBiggestKeys(rdbFilename string, topN int, output *os.File) error { + if rdbFilename == "" { + return errors.New("src file path is required") + } + if topN <= 0 { + return errors.New("n must greater than 0") + } + rdbFile, err := os.Open(rdbFilename) + if err != nil { + return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err) + } + defer func() { + _ = rdbFile.Close() + }() + p := core.NewDecoder(rdbFile) + topList := newRedisHeap(topN) + err = p.Parse(func(object model.RedisObject) bool { + topList.Append(object) + return true + }) + if err != nil { + return err + } + _, err = output.WriteString("database,key,type,size,size_readable,element_count\n") + if err != nil { + return fmt.Errorf("write header failed: %v", err) + } + csvWriter := csv.NewWriter(output) + defer csvWriter.Flush() + for topList.Len() > 0 { + object := heap.Pop(topList).(model.RedisObject) + err = csvWriter.Write([]string{ + strconv.Itoa(object.GetDBIndex()), + object.GetKey(), + object.GetType(), + strconv.Itoa(object.GetSize()), + bytefmt.FormatSize(uint64(object.GetSize())), + strconv.Itoa(object.GetElemCount()), + }) + if err != nil { + return fmt.Errorf("csv write failed: %v", err) + } + } + return nil +} diff --git a/helper/bigkey_test.go b/helper/bigkey_test.go new file mode 100644 index 0000000..e4b28b1 --- /dev/null +++ b/helper/bigkey_test.go @@ -0,0 +1,49 @@ +package helper + +import ( + "github.com/hdt3213/rdb/model" + "math/rand" + "sort" + "strconv" + "testing" +) + +func TestRedisHeap_Append(t *testing.T) { + sizeMap := make(map[int]struct{}) // The behavior when encountering objects of the same size is undefined + topN := 100 + n := topN * 10 + objects := make([]model.RedisObject, 0) + for i := 0; i < n; i++ { + var size int + for { + size = rand.Intn(n * 10) + if _, ok := sizeMap[size]; !ok { + sizeMap[size] = struct{}{} + break + } + } + o := &model.StringObject{ + BaseObject: &model.BaseObject{ + Key: strconv.Itoa(i), + Size: size, + }, + } + objects = append(objects, o) + } + topList := newRedisHeap(topN) + for _, o := range objects { + topList.Append(o) + } + actual := topList.Dump() + sort.Slice(objects, func(i, j int) bool { + return objects[i].GetSize() > objects[j].GetSize() + }) + expect := objects[0:topN] + for i := 0; i < topN; i++ { + o1 := actual[i] + o2 := expect[i] + if o1.GetSize() != o2.GetSize() { + t.Errorf("wrong answer at index: %d", i) + } + } +} diff --git a/helper/helper.go b/helper/helper.go index 1e9743f..76f7996 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -2,6 +2,7 @@ package helper import ( "encoding/json" + "errors" "fmt" "github.com/hdt3213/rdb/core" "github.com/hdt3213/rdb/model" @@ -10,6 +11,12 @@ import ( // ToJsons read rdb file and convert to json file whose each line contains a json object func ToJsons(rdbFilename string, jsonFilename string) error { + if rdbFilename == "" { + return errors.New("src file path is required") + } + if jsonFilename == "" { + return errors.New("output file path is required") + } rdbFile, err := os.Open(rdbFilename) if err != nil { return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err) @@ -60,6 +67,12 @@ func ToJsons(rdbFilename string, jsonFilename string) error { // ToAOF read rdb file and convert to aof file (Redis Serialization ) func ToAOF(rdbFilename string, aofFilename string) error { + if rdbFilename == "" { + return errors.New("src file path is required") + } + if aofFilename == "" { + return errors.New("output file path is required") + } rdbFile, err := os.Open(rdbFilename) if err != nil { return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err) diff --git a/helper/memory.go b/helper/memory.go index 5a680ca..dd40363 100644 --- a/helper/memory.go +++ b/helper/memory.go @@ -2,6 +2,7 @@ package helper import ( "encoding/csv" + "errors" "fmt" "github.com/hdt3213/rdb/bytefmt" "github.com/hdt3213/rdb/core" @@ -12,6 +13,12 @@ import ( // MemoryProfile read rdb file and analysis memory usage then write result to csv file func MemoryProfile(rdbFilename string, csvFilename string) error { + if rdbFilename == "" { + return errors.New("src file path is required") + } + if csvFilename == "" { + return errors.New("output file path is required") + } rdbFile, err := os.Open(rdbFilename) if err != nil { return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err) diff --git a/parser_test.go b/parser_test.go index 37f5265..2a04d42 100644 --- a/parser_test.go +++ b/parser_test.go @@ -112,6 +112,14 @@ func TestToJson(t *testing.T) { continue } } + err = helper.ToJsons("cases/memory.rdb", "") + if err == nil || err.Error() != "output file path is required" { + t.Error("failed when empty output") + } + err = helper.ToJsons("", "tmp/memory.rdb") + if err == nil || err.Error() != "src file path is required" { + t.Error("failed when empty output") + } } func TestMemoryProfile(t *testing.T) { @@ -142,6 +150,14 @@ func TestMemoryProfile(t *testing.T) { t.Errorf("result is not equal of %s", srcRdb) return } + err = helper.MemoryProfile("cases/memory.rdb", "") + if err == nil || err.Error() != "output file path is required" { + t.Error("failed when empty output") + } + err = helper.MemoryProfile("", "tmp/memory.rdb") + if err == nil || err.Error() != "src file path is required" { + t.Error("failed when empty output") + } } func TestToAof(t *testing.T) { @@ -172,4 +188,60 @@ func TestToAof(t *testing.T) { t.Errorf("result is not equal of %s", srcRdb) return } + err = helper.ToAOF("cases/memory.rdb", "") + if err == nil || err.Error() != "output file path is required" { + t.Error("failed when empty output") + } + err = helper.ToAOF("", "tmp/memory.rdb") + if err == nil || err.Error() != "src file path is required" { + t.Error("failed when empty output") + } +} + +func TestFindLargestKeys(t *testing.T) { + err := os.MkdirAll("tmp", os.ModePerm) + if err != nil { + return + } + defer func() { + err := os.RemoveAll("tmp") + if err != nil { + t.Logf("remove tmp directory failed: %v", err) + } + }() + srcRdb := filepath.Join("cases", "memory.rdb") + expectFile := filepath.Join("cases", "largest.csv") + outputFilePath := filepath.Join("tmp", "largest.csv") + output, err := os.Create(outputFilePath) + if err != nil { + t.Errorf("create output file failed: %v", err) + return + } + err = helper.FindBiggestKeys(srcRdb, 5, output) + if err != nil { + t.Errorf("FindLargestKeys failed: %v", err) + } + err = output.Close() + if err != nil { + t.Errorf("error occurs during close output %s, err: %v", srcRdb, err) + return + } + equals, err := compareFileByLine(t, outputFilePath, expectFile) + if err != nil { + t.Errorf("error occurs during compare %s, err: %v", srcRdb, err) + return + } + if !equals { + t.Errorf("result is not equal of %s", srcRdb) + return + } + + err = helper.FindBiggestKeys("", 5, os.Stdout) + if err == nil || err.Error() != "src file path is required" { + t.Error("failed when empty output") + } + err = helper.FindBiggestKeys("cases/memory.rdb", 0, os.Stdout) + if err == nil || err.Error() != "n must greater than 0" { + t.Error("failed when empty output") + } }