Skip to content

Commit

Permalink
internal/zstd: configure window size for single segment frames
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderYastrebov committed Sep 27, 2023
1 parent 5e9afab commit 9bafe01
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 9 deletions.
Binary file not shown.
10 changes: 10 additions & 0 deletions src/internal/zstd/testdata/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
This directory holds files for testing zstd.NewReader.

Each one is a Zstandard compressed file named as hash.arbitrary-name.zst,
where hash is the first eight hexadecimal digits of the SHA256 hash
of the expected uncompressed content:

zstd -d < 1890a371.gettysburg.txt-100x.zst | sha256sum | head -c 8
1890a371

The test uses hash value to verify decompression result.
22 changes: 13 additions & 9 deletions src/internal/zstd/zstd.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,7 @@ retry:
// Figure out the maximum amount of data we need to retain
// for backreferences.
var windowSize int
if singleSegment {
// No window required, as all the data is in a single buffer.
windowSize = 0
} else {
if !singleSegment {
// Window descriptor. RFC 3.1.1.1.2.
windowDescriptor := r.scratch[0]
exponent := uint64(windowDescriptor >> 3)
Expand All @@ -252,11 +249,6 @@ retry:
if fuzzing && (windowLog > 31 || windowSize > 1<<27) {
return r.makeError(relativeOffset, "windowSize too large")
}

// RFC 8878 permits us to set an 8M max on window size.
if windowSize > 8<<20 {
windowSize = 8 << 20
}
}

// Frame_Content_Size. RFC 3.1.1.4.
Expand All @@ -278,6 +270,18 @@ retry:
panic("unreachable")
}

// RFC 3.1.1.1.2.
// When Single_Segment_Flag is set, Window_Descriptor is not present.
// In this case, Window_Size is Frame_Content_Size.
if singleSegment {
windowSize = int(r.remainingFrameSize)
}

// RFC 8878 3.1.1.1.1.2. permits us to set an 8M max on window size.
if windowSize > 8<<20 {
windowSize = 8 << 20
}

relativeOffset += headerSize

r.sawFrameHeader = true
Expand Down
35 changes: 35 additions & 0 deletions src/internal/zstd/zstd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ package zstd

import (
"bytes"
"crypto/sha256"
"fmt"
"internal/race"
"internal/testenv"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -232,6 +234,39 @@ func TestAlloc(t *testing.T) {
}
}

func TestFileSamples(t *testing.T) {
samples, err := os.ReadDir("testdata")
if err != nil {
t.Fatal(err)
}

for _, sample := range samples {
name := sample.Name()
if !strings.HasSuffix(name, ".zst") {
continue
}

t.Run(name, func(t *testing.T) {
f, err := os.Open(filepath.Join("testdata", name))
if err != nil {
t.Fatal(err)
}

r := NewReader(f)
h := sha256.New()
if _, err := io.Copy(h, r); err != nil {
t.Fatal(err)
}
got := fmt.Sprintf("%x", h.Sum(nil))[:8]

want, _, _ := strings.Cut(name, ".")
if got != want {
t.Errorf("Wrong uncompressed content hash: got %s, want %s", got, want)
}
})
}
}

func BenchmarkLarge(b *testing.B) {
b.StopTimer()
b.ReportAllocs()
Expand Down

0 comments on commit 9bafe01

Please sign in to comment.