Skip to content

Commit

Permalink
Find biggest key
Browse files Browse the repository at this point in the history
  • Loading branch information
HDT3213 committed Feb 20, 2022
1 parent 7fbf15c commit 98fa0a7
Show file tree
Hide file tree
Showing 10 changed files with 321 additions and 12 deletions.
27 changes: 25 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ It provides utilities to:
- Generate memory report for rdb file
- Convert RDB files to JSON
- Convert RDB files to Redis Serialization Protocol (or AOF file)
- Find Biggest Key in RDB files
- Customize data usage

Thanks sripathikrishnan for his [redis-rdb-tools](https://github.com/sripathikrishnan/redis-rdb-tools)
Expand All @@ -22,7 +23,7 @@ If you have installed `go` on your compute, just simply use:
go get github.com/hdt3213/rdb
```

Or, you can download executable binary file from releases(https://github.com/HDT3213/rdb/releases) and put its path to PATH environment.
Or, you can download executable binary file from [releases](https://github.com/HDT3213/rdb/releases) and put its path to PATH environment.

use `rdb` command in terminal, you can see it's manual

Expand Down Expand Up @@ -65,7 +66,7 @@ Example:
rdb -c memory -o mem.csv cases/memory.rdb
```

The examples for json result:
The examples for csv result:
```csv
database,key,type,size,size_readable,element_count
0,hash,hash,64,64B,2
Expand All @@ -77,6 +78,28 @@ database,key,type,size,size_readable,element_count
0,set,set,39,39B,2
```

# Find Biggest Keys

RDB can find biggest N keys in file
```
rdb -c bigkey -n <result_number> <source_path>
```

Example:
```
rdb -c bigkey -n 5 cases/memory.rdb
```

The examples for csv result:
```csv
database,key,type,size,size_readable,element_count
0,large,string,2056,2K,0
0,list,list,66,66B,4
0,hash,hash,64,64B,2
0,zset,zset,57,57B,2
0,set,set,39,39B,2
```

# Convert to AOF

Usage:
Expand Down
6 changes: 6 additions & 0 deletions cases/largest.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
database,key,type,size,size_readable,element_count
0,large,string,2056,2K,0
0,list,list,66,66B,4
0,hash,hash,64,64B,2
0,zset,zset,57,57B,2
0,set,set,39,39B,2
12 changes: 8 additions & 4 deletions cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"flag"
"fmt"
"github.com/hdt3213/rdb/helper"
"os"
)

const help = `
This is a tool to parse Redis' RDB files
Options:
-c command, including: json/memory/aof
-o output file path
-n number of result
Examples:
1. convert rdb to json
Expand All @@ -19,13 +21,17 @@ Examples:
rdb -c memory -o memory.csv dump.rdb
3. convert to aof file
rdb -c aof -o dump.aof dump.rdb
4. get largest keys
rdb -c bigkey -o dump.aof dump.rdb
`

func main() {
var cmd string
var output string
var n int
flag.StringVar(&cmd, "c", "", "command for rdb: json")
flag.StringVar(&output, "o", "", "output file path")
flag.IntVar(&n, "n", 0, "")
flag.Parse()
src := flag.Arg(0)

Expand All @@ -37,10 +43,6 @@ func main() {
println("src file is required")
return
}
if output == "" {
println("output file path is required")
return
}

var err error
switch cmd {
Expand All @@ -50,6 +52,8 @@ func main() {
err = helper.MemoryProfile(src, output)
case "aof":
err = helper.ToAOF(src, output)
case "bigkey":
err = helper.FindBiggestKeys(src, n, os.Stdout)
default:
println("unknown command")
return
Expand Down
8 changes: 7 additions & 1 deletion core/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,13 @@ func (dec *Decoder) parse(cb func(object model.RedisObject) bool) error {
// Parse parses rdb and callback
// cb returns true to continue, returns false to stop the iteration
func (dec *Decoder) Parse(cb func(object model.RedisObject) bool) error {
err := dec.checkHeader()
var err error
defer func() {
if err2 := recover(); err2 != nil {
err = fmt.Errorf("panic: %v", err2)
}
}()
err = dec.checkHeader()
if err != nil {
return err
}
Expand Down
5 changes: 0 additions & 5 deletions core/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,6 @@ func (dec *Decoder) readZipList() ([][]byte, error) {
}

func (dec *Decoder) readZipListEntry(buf []byte, cursor *int) (result []byte, err error) {
defer func() {
if err2 := recover(); err2 != nil {
err = fmt.Errorf("panic: %v", err)
}
}()
prevLen := buf[*cursor]
*cursor++
if prevLen == zipBigPrevLen {
Expand Down
134 changes: 134 additions & 0 deletions helper/bigkey.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package helper

import (
"container/heap"
"encoding/csv"
"errors"
"fmt"
"github.com/hdt3213/rdb/bytefmt"
"github.com/hdt3213/rdb/core"
"github.com/hdt3213/rdb/model"
"os"
"strconv"
)

type redisHeap struct {
list []model.RedisObject
capacity int
minSize int // size of min object
minIndex int // index of min object
}

func (h redisHeap) Len() int {
return len(h.list)
}

// Max Heap
func (h *redisHeap) Less(i, j int) bool {
return h.list[i].GetSize() > h.list[j].GetSize()
}

func (h *redisHeap) Swap(i, j int) {
h.list[i], h.list[j] = h.list[j], h.list[i]
}

func (h *redisHeap) Push(x interface{}) {
h.list = append(h.list, x.(model.RedisObject))
}

func (h *redisHeap) Pop() interface{} {
item := h.list[len(h.list)-1]
h.list = h.list[0 : len(h.list)-1]
return item
}

// time complexity: O(n*log(m)), n is number of redis object, m is heap capacity. m if far less than n
func (h *redisHeap) Append(x model.RedisObject) {
// heap is full, skip
if x.GetSize() <= h.minSize && h.Len() >= h.capacity {
return
}
// if heap is full, pop min object
if h.Len() >= h.capacity {
// assert h.minIndex >= 0
heap.Remove(h, h.minIndex)
}
heap.Push(h, x)
// update h.minSize
h.minSize = 1<<31 - 1
for i := h.Len() - 1; i >= 0; i-- { //
o := h.list[i]
if o.GetSize() < h.minSize {
h.minSize = o.GetSize()
h.minIndex = i
}
}
}

func (h *redisHeap) Dump() []model.RedisObject {
result := make([]model.RedisObject, 0, h.Len())
for h.Len() > 0 {
o := heap.Pop(h).(model.RedisObject)
result = append(result, o)
}
return result
}

func newRedisHeap(cap int) *redisHeap {
list := make([]model.RedisObject, 0, cap)
h := &redisHeap{
list: list,
capacity: cap,
minIndex: -1,
}
heap.Init(h)
return h
}

// FindBiggestKeys read rdb file and find the largest N keys.
// The invoker owns output, FindBiggestKeys won't close it
func FindBiggestKeys(rdbFilename string, topN int, output *os.File) error {
if rdbFilename == "" {
return errors.New("src file path is required")
}
if topN <= 0 {
return errors.New("n must greater than 0")
}
rdbFile, err := os.Open(rdbFilename)
if err != nil {
return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err)
}
defer func() {
_ = rdbFile.Close()
}()
p := core.NewDecoder(rdbFile)
topList := newRedisHeap(topN)
err = p.Parse(func(object model.RedisObject) bool {
topList.Append(object)
return true
})
if err != nil {
return err
}
_, err = output.WriteString("database,key,type,size,size_readable,element_count\n")
if err != nil {
return fmt.Errorf("write header failed: %v", err)
}
csvWriter := csv.NewWriter(output)
defer csvWriter.Flush()
for topList.Len() > 0 {
object := heap.Pop(topList).(model.RedisObject)
err = csvWriter.Write([]string{
strconv.Itoa(object.GetDBIndex()),
object.GetKey(),
object.GetType(),
strconv.Itoa(object.GetSize()),
bytefmt.FormatSize(uint64(object.GetSize())),
strconv.Itoa(object.GetElemCount()),
})
if err != nil {
return fmt.Errorf("csv write failed: %v", err)
}
}
return nil
}
49 changes: 49 additions & 0 deletions helper/bigkey_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package helper

import (
"github.com/hdt3213/rdb/model"
"math/rand"
"sort"
"strconv"
"testing"
)

func TestRedisHeap_Append(t *testing.T) {
sizeMap := make(map[int]struct{}) // The behavior when encountering objects of the same size is undefined
topN := 100
n := topN * 10
objects := make([]model.RedisObject, 0)
for i := 0; i < n; i++ {
var size int
for {
size = rand.Intn(n * 10)
if _, ok := sizeMap[size]; !ok {
sizeMap[size] = struct{}{}
break
}
}
o := &model.StringObject{
BaseObject: &model.BaseObject{
Key: strconv.Itoa(i),
Size: size,
},
}
objects = append(objects, o)
}
topList := newRedisHeap(topN)
for _, o := range objects {
topList.Append(o)
}
actual := topList.Dump()
sort.Slice(objects, func(i, j int) bool {
return objects[i].GetSize() > objects[j].GetSize()
})
expect := objects[0:topN]
for i := 0; i < topN; i++ {
o1 := actual[i]
o2 := expect[i]
if o1.GetSize() != o2.GetSize() {
t.Errorf("wrong answer at index: %d", i)
}
}
}
13 changes: 13 additions & 0 deletions helper/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package helper

import (
"encoding/json"
"errors"
"fmt"
"github.com/hdt3213/rdb/core"
"github.com/hdt3213/rdb/model"
Expand All @@ -10,6 +11,12 @@ import (

// ToJsons read rdb file and convert to json file whose each line contains a json object
func ToJsons(rdbFilename string, jsonFilename string) error {
if rdbFilename == "" {
return errors.New("src file path is required")
}
if jsonFilename == "" {
return errors.New("output file path is required")
}
rdbFile, err := os.Open(rdbFilename)
if err != nil {
return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err)
Expand Down Expand Up @@ -60,6 +67,12 @@ func ToJsons(rdbFilename string, jsonFilename string) error {

// ToAOF read rdb file and convert to aof file (Redis Serialization )
func ToAOF(rdbFilename string, aofFilename string) error {
if rdbFilename == "" {
return errors.New("src file path is required")
}
if aofFilename == "" {
return errors.New("output file path is required")
}
rdbFile, err := os.Open(rdbFilename)
if err != nil {
return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err)
Expand Down
7 changes: 7 additions & 0 deletions helper/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package helper

import (
"encoding/csv"
"errors"
"fmt"
"github.com/hdt3213/rdb/bytefmt"
"github.com/hdt3213/rdb/core"
Expand All @@ -12,6 +13,12 @@ import (

// MemoryProfile read rdb file and analysis memory usage then write result to csv file
func MemoryProfile(rdbFilename string, csvFilename string) error {
if rdbFilename == "" {
return errors.New("src file path is required")
}
if csvFilename == "" {
return errors.New("output file path is required")
}
rdbFile, err := os.Open(rdbFilename)
if err != nil {
return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err)
Expand Down

0 comments on commit 98fa0a7

Please sign in to comment.