Skip to content

Commit

Permalink
fjson and fuzzing cleanup (#1262)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlekSi committed Oct 17, 2022
1 parent 68d11f7 commit b179486
Show file tree
Hide file tree
Showing 24 changed files with 151 additions and 148 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ node_modules/
state.json

# for now
testdata/fuzz/
**/testdata/fuzz/
records/

# for local Docker Compse changes; see https://docs.docker.com/compose/extends/#multiple-compose-files
Expand Down
5 changes: 4 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,13 @@ issues:
path: internal/wire
text: bson

# only `testutil` and `wire` packages can import `fjson` package
# only `testutil`, `bson` (for testing), and `wire` packages can import `fjson` package
- linters: [depguard]
path: internal/util/testutil
text: fjson
- linters: [depguard]
path: internal/bson
text: fjson
- linters: [depguard]
path: internal/wire
text: fjson
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ The `internal` subpackages contain most of the FerretDB code:
* `types/fjson` provides converters from/to FJSON for built-in and `types` types.
FJSON adds some extensions to JSON for keeping object keys in order,
preserving BSON type information in the values themselves, etc.
It is used for logging.
It is used for logging of BSON values and wire protocol messages.
* `bson` package provides converters from/to BSON for built-in and `types` types.
* `wire` package provides wire protocol implementation.
* `clientconn` package provides client connection implementation.
Expand Down
14 changes: 4 additions & 10 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,17 +174,14 @@ tasks:
-coverprofile=integration-mongodb.txt . -target-port=37017 -compat-port=0
bench-short:
desc: "Benchmark for about 40 seconds (with default BENCHTIME)"
desc: "Benchmark for about 25 seconds (with default BENCHTIME)"
cmds:
- go test -list='Benchmark.*' ./...
- echo 'Running eight functions for {{.BENCHTIME}} each...'
- go test -bench=BenchmarkArray -benchtime={{.BENCHTIME}} ./internal/types/fjson/ | tee -a new.txt
- go test -bench=BenchmarkDocument -benchtime={{.BENCHTIME}} ./internal/types/fjson/ | tee -a new.txt
- echo 'Running five functions for {{.BENCHTIME}} each...'
- go test -bench=BenchmarkArray -benchtime={{.BENCHTIME}} ./internal/bson/ | tee -a new.txt
- go test -bench=BenchmarkDocument -benchtime={{.BENCHTIME}} ./internal/bson/ | tee -a new.txt
- go test -bench=BenchmarkArray -benchtime={{.BENCHTIME}} ./internal/handlers/pg/pjson/ | tee -a new.txt
- go test -bench=BenchmarkDocument -benchtime={{.BENCHTIME}} ./internal/handlers/pg/pjson/ | tee -a new.txt
- go test -bench=BenchmarkArray -benchtime={{.BENCHTIME}} ./internal/tjson/ | tee -a new.txt
- go test -bench=BenchmarkDocument -benchtime={{.BENCHTIME}} ./internal/tjson/ | tee -a new.txt
- bin/benchstat{{exeExt}} old.txt new.txt

Expand All @@ -198,17 +195,14 @@ tasks:
# Those commands should still run tests (i.e., should not have -run=XXX flags)
# to fill seed corpus for fuzz tests that use WriteSeedCorpusFile (e.g., FuzzHandler).
fuzz:
desc: "Fuzz for about 3 minutes (with default FUZZTIME)"
desc: "Fuzz for about 2 minutes (with default FUZZTIME)"
cmds:
- go test -list='Fuzz.*' ./...
- echo 'Running eleven functions for {{.FUZZTIME}} each...'
- go test -fuzz=FuzzArray -fuzztime={{.FUZZTIME}} ./internal/types/fjson/
- go test -fuzz=FuzzDocument -fuzztime={{.FUZZTIME}} ./internal/types/fjson/
- echo 'Running eight functions for {{.FUZZTIME}} each...'
- go test -fuzz=FuzzArray -fuzztime={{.FUZZTIME}} ./internal/bson/
- go test -fuzz=FuzzDocument -fuzztime={{.FUZZTIME}} ./internal/bson/
- go test -fuzz=FuzzArray -fuzztime={{.FUZZTIME}} ./internal/handlers/pg/pjson/
- go test -fuzz=FuzzDocument -fuzztime={{.FUZZTIME}} ./internal/handlers/pg/pjson/
- go test -fuzz=FuzzArray -fuzztime={{.FUZZTIME}} ./internal/tjson/
- go test -fuzz=FuzzDocument -fuzztime={{.FUZZTIME}} ./internal/tjson/
- go test -fuzz=FuzzMsg -fuzztime={{.FUZZTIME}} ./internal/wire/
- go test -fuzz=FuzzQuery -fuzztime={{.FUZZTIME}} ./internal/wire/
Expand Down
7 changes: 4 additions & 3 deletions cmd/fuzztool/fuzztool.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,19 +227,20 @@ func main() {
case "generated":
src = generatedCorpus
default:
src, err = filepath.Abs(src)
src, err = filepath.Abs(cli.Corpus.Src)
if err != nil {
logger.Fatal(err)
}
}

switch cli.Corpus.Dst {
case "seed":
dst = seedCorpus
// Because we would need to add `/testdata/fuzz` back, and that's not very easy.
logger.Fatal("Copying to seed corpus is not supported.")
case "generated":
dst = generatedCorpus
default:
dst, err = filepath.Abs(dst)
dst, err = filepath.Abs(cli.Corpus.Dst)
if err != nil {
logger.Fatal(err)
}
Expand Down
56 changes: 32 additions & 24 deletions internal/bson/array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,38 @@ func convertArray(a *types.Array) *arrayType {
return &res
}

var arrayTestCases = []testCase{{
name: "array_all",
v: convertArray(must.NotFail(types.NewArray(
must.NotFail(types.NewArray()),
types.Binary{Subtype: types.BinaryUser, B: []byte{0x42}},
true,
time.Date(2021, 7, 27, 9, 35, 42, 123000000, time.UTC).Local(),
must.NotFail(types.NewDocument()),
42.13,
int32(42),
int64(42),
"foo",
types.Null,
))),
b: testutil.MustParseDumpFile("testdata", "array_all.hex"),
}, {
name: "EOF",
b: []byte{0x00},
bErr: `unexpected EOF`,
}, {
name: "array_fuzz1",
b: testutil.MustParseDumpFile("testdata", "array_fuzz1.hex"),
bErr: `key 0 is "8"`,
}}
var (
arrayAll = testCase{
name: "array_all",
v: convertArray(must.NotFail(types.NewArray(
must.NotFail(types.NewArray()),
types.Binary{Subtype: types.BinaryUser, B: []byte{0x42}},
true,
time.Date(2021, 7, 27, 9, 35, 42, 123000000, time.UTC).Local(),
must.NotFail(types.NewDocument()),
42.13,
int32(42),
int64(42),
"foo",
types.Null,
))),
b: testutil.MustParseDumpFile("testdata", "array_all.hex"),
}

arrayEOF = testCase{
name: "EOF",
b: []byte{0x00},
bErr: `unexpected EOF`,
}

arrayFuzz1 = testCase{
name: "array_fuzz1",
b: testutil.MustParseDumpFile("testdata", "array_fuzz1.hex"),
bErr: `key 0 is "8"`,
}

arrayTestCases = []testCase{arrayAll, arrayEOF, arrayFuzz1}
)

func TestArray(t *testing.T) {
t.Parallel()
Expand Down
4 changes: 1 addition & 3 deletions internal/bson/bson.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ type bsontype interface {
//go-sumtype:decl bsontype

// TODO https://github.com/FerretDB/FerretDB/issues/260
//
//nolint:deadcode,unused // remove later if it is not needed
func fromBSON(v bsontype) any {
switch v := v.(type) {
case *Document:
Expand Down Expand Up @@ -71,7 +69,7 @@ func fromBSON(v bsontype) any {
case *int64Type:
return int64(*v)
case *CString:
panic("not reached")
panic("CString should not be there")
}

panic(fmt.Sprintf("not reached: %T", v)) // for go-sumtype to work
Expand Down
33 changes: 30 additions & 3 deletions internal/bson/bson_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/FerretDB/FerretDB/internal/types/fjson"
)

type testCase struct {
Expand Down Expand Up @@ -108,14 +110,14 @@ func testBinary(t *testing.T, testCases []testCase, newFunc func() bsontype) {
actualB, err := tc.v.MarshalBinary()
require.NoError(t, err)
if !assertEqual(t, tc.b, actualB, "actual:\n%s", hex.Dump(actualB)) {
// unmarshal again to compare BSON values
// unmarshal again to compare values
v := newFunc()
br := bytes.NewReader(actualB)
bufr := bufio.NewReader(br)
err := v.ReadFrom(bufr)
assert.NoError(t, err)
if assertEqual(t, tc.v, v, "expected: %s\nactual : %s", tc.v, v) {
t.Log("BSON values are equal after unmarshalling")
t.Log("values are equal after unmarshalling")
}
assert.Zero(t, br.Len(), "not all br bytes were consumed")
assert.Zero(t, bufr.Buffered(), "not all bufr bytes were consumed")
Expand Down Expand Up @@ -169,7 +171,19 @@ func fuzzBinary(f *testing.F, testCases []testCase, newFunc func() bsontype) {
{
actualB, err := v.MarshalBinary()
require.NoError(t, err)
assert.Equal(t, expectedB, actualB, "MarshalBinary results differ")
if !assert.Equal(t, expectedB, actualB, "MarshalBinary results differ") {
// unmarshal again to compare values
v2 := newFunc()
br2 := bytes.NewReader(actualB)
bufr2 := bufio.NewReader(br2)
err = v2.ReadFrom(bufr2)
assert.NoError(t, err)
if assertEqual(t, v, v2, "expected: %s\nactual : %s", v, v2) {
t.Log("values are equal after unmarshalling")
}
assert.Zero(t, br2.Len(), "not all br bytes were consumed")
assert.Zero(t, bufr2.Buffered(), "not all bufr bytes were consumed")
}
}

// test WriteTo
Expand All @@ -182,6 +196,19 @@ func fuzzBinary(f *testing.F, testCases []testCase, newFunc func() bsontype) {
require.NoError(t, err)
assert.Equal(t, expectedB, bw.Bytes(), "WriteTo results differ")
}

// Test that generated value can be marshaled for logging.
// Currently, that seems to be the best place to check it since generating values from BSON bytes is very easy.
{
// not a "real" type
if _, ok := v.(*CString); ok {
t.Skip()
}

mB, err := fjson.Marshal(fromBSON(v))
require.NoError(t, err)
assert.NotEmpty(t, mB)
}
})
}

Expand Down
2 changes: 2 additions & 0 deletions internal/bson/cstring.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
)

// CString represents BSON zero-terminated UTF-8 string type.
//
// It is not a "real" BSON data type but used by some of them and also by the wire protocol.
type CString string

func (cstr *CString) bsontype() {}
Expand Down
37 changes: 23 additions & 14 deletions internal/bson/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ type document interface {
}

// Document represents BSON Document type.
//
// Duplicate fields are not supported yet.
// TODO https://github.com/FerretDB/FerretDB/issues/1263
type Document struct {
m map[string]any
keys []string
Expand Down Expand Up @@ -125,12 +128,18 @@ func (doc *Document) ReadFrom(r *bufio.Reader) error {
return lazyerrors.Errorf("bson.Document.ReadFrom (ename.ReadFrom): %w", err)
}

doc.keys = append(doc.keys, string(ename))
key := string(ename)
doc.keys = append(doc.keys, key)

if doc.m == nil {
doc.m = map[string]any{}
}

if _, ok := doc.m[key]; ok {
// TODO https://github.com/FerretDB/FerretDB/issues/1263
return lazyerrors.Errorf("duplicate key %q", key)
}

switch tag(t) {
case tagDocument:
// TODO check maximum nesting
Expand All @@ -139,7 +148,7 @@ func (doc *Document) ReadFrom(r *bufio.Reader) error {
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (embedded document): %w", err)
}
doc.m[string(ename)], err = types.ConvertDocument(&v)
doc.m[key], err = types.ConvertDocument(&v)
if err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (embedded document): %w", err)
}
Expand All @@ -152,28 +161,28 @@ func (doc *Document) ReadFrom(r *bufio.Reader) error {
return lazyerrors.Errorf("bson.Document.ReadFrom (Array): %w", err)
}
a := types.Array(v)
doc.m[string(ename)] = &a
doc.m[key] = &a

case tagDouble:
var v doubleType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Double): %w", err)
}
doc.m[string(ename)] = float64(v)
doc.m[key] = float64(v)

case tagString:
var v stringType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (String): %w", err)
}
doc.m[string(ename)] = string(v)
doc.m[key] = string(v)

case tagBinary:
var v binaryType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Binary): %w", err)
}
doc.m[string(ename)] = types.Binary(v)
doc.m[key] = types.Binary(v)

case tagUndefined:
return lazyerrors.Errorf("bson.Document.ReadFrom: unhandled element type `Undefined (value) — Deprecated`")
Expand All @@ -183,53 +192,53 @@ func (doc *Document) ReadFrom(r *bufio.Reader) error {
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (ObjectID): %w", err)
}
doc.m[string(ename)] = types.ObjectID(v)
doc.m[key] = types.ObjectID(v)

case tagBool:
var v boolType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Bool): %w", err)
}
doc.m[string(ename)] = bool(v)
doc.m[key] = bool(v)

case tagDateTime:
var v dateTimeType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (DateTime): %w", err)
}
doc.m[string(ename)] = time.Time(v)
doc.m[key] = time.Time(v)

case tagNull:
// skip calling ReadFrom that does nothing
doc.m[string(ename)] = types.Null
doc.m[key] = types.Null

case tagRegex:
var v regexType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Regex): %w", err)
}
doc.m[string(ename)] = types.Regex(v)
doc.m[key] = types.Regex(v)

case tagInt32:
var v int32Type
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Int32): %w", err)
}
doc.m[string(ename)] = int32(v)
doc.m[key] = int32(v)

case tagTimestamp:
var v timestampType
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Timestamp): %w", err)
}
doc.m[string(ename)] = types.Timestamp(v)
doc.m[key] = types.Timestamp(v)

case tagInt64:
var v int64Type
if err := v.ReadFrom(bufr); err != nil {
return lazyerrors.Errorf("bson.Document.ReadFrom (Int64): %w", err)
}
doc.m[string(ename)] = int64(v)
doc.m[key] = int64(v)

case tagDBPointer, tagDecimal, tagJavaScript, tagJavaScriptScope, tagMaxKey, tagMinKey, tagSymbol:
return lazyerrors.Errorf("bson.Document.ReadFrom: unhandled element type %#02x (%s)", t, tag(t))
Expand Down

1 comment on commit b179486

@vercel
Copy link

@vercel vercel bot commented on b179486 Oct 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

ferret-db – ./

ferret-db-git-main-ferretdb.vercel.app
ferret-db-ferretdb.vercel.app
ferret-db.vercel.app

Please sign in to comment.