Skip to content

Commit

Permalink
Threw away complicated caching logic.
Browse files Browse the repository at this point in the history
The read logic now looks like:

* If this read started where the last finished, request the entire rest of the
  object and serve from that until the next seek.

* Otherwise, request only what the user asked, expanding to a 1 MiB minimum
  read size.

This significantly improves sequential read throughput. The cost to random
reads is in the noise.

Fixes #103.

==========================================

% go install -v && gcsfuse --temp-dir /mnt/ssd0 jacobsa-standard-asia ~/mp
% go build ./benchmarks/read_within_file && cputime ./read_within_file --file ~/mp/10g

Before:
    Read 272.00 MiB in 10.439062389s (26.06 MiB/s)

After:
    Read 568.00 MiB in 10.007478618s (56.76 MiB/s)

% go install -v && gcsfuse --temp-dir /mnt/ssd0 jacobsa-standard-asia ~/mp
% go build ./benchmarks/read_within_file && cputime ./read_within_file --random --file ~/mp/10g

Before:
    Read 15.00 MiB in 10.089234126s (1.49 MiB/s)

After:
    Read 15.00 MiB in 10.461594056s (1.43 MiB/s)

% go install -v && gcsfuse --temp-dir /mnt/ssd0 jacobsa-standard-asia ~/mp
% go build ./benchmarks/read_full_file && cputime ./read_full_file --dir ~/mp

Before:

    Full-file read times:
      50th ptile: 12.401941ms (5.04 GiB/s)
      90th ptile: 13.103936ms (4.77 GiB/s)
      98th ptile: 13.727666ms (4.55 GiB/s)

After:

    Full-file read times:
      50th ptile: 12.543545ms (4.98 GiB/s)
      90th ptile: 13.049257ms (4.79 GiB/s)
      98th ptile: 13.714352ms (4.56 GiB/s)
  • Loading branch information
jacobsa committed Aug 5, 2015
2 parents 1d92d3e + cb6655a commit babd421
Show file tree
Hide file tree
Showing 37 changed files with 1,864 additions and 6,087 deletions.
8 changes: 4 additions & 4 deletions README.md
Expand Up @@ -84,17 +84,17 @@ cost of consistency guarantees. These caching behaviors can be controlled with
the flags `--stat-cache-ttl` and `--type-cache-ttl`. See
[semantics.md](docs/semantics.md#caching) for more information.

## Downloading file contents
## Downloading object contents

Behind the scenes, when a newly-opened file is first modified, gcsfuse downloads
the entire backing object's contents from GCS. The contents are stored in a
local temporary file whose location is controlled by the flag `--temp-dir`.
Later, when the file is closed or fsync'd, gcsfuse writes the contents of the
local file back to GCS as a new object generation.

Files that are not modified are read chunk by chunk on demand. Such non-dirty
content is cached in the temporary directory, with a size limit defined by
`--temp-dir-bytes`. The chunk size is controlled by `--gcs-chunk-size`.
Files that are read but not been modified are read portion by portion on demand.
gcsfuse uses a heuristic to detect when a file is being read sequentially, and
will issue fewer, larger read requests to GCS in this case.

The consequence of this is that gcsfuse is relatively efficient when reading or
writing entire large files, but will not be particularly fast for small numbers
Expand Down
16 changes: 0 additions & 16 deletions flags.go
Expand Up @@ -143,25 +143,13 @@ func newApp() (app *cli.App) {
"inodes.",
},

cli.IntFlag{
Name: "gcs-chunk-size",
Value: 1 << 24,
Usage: "Max chunk size for loading GCS objects.",
},

cli.StringFlag{
Name: "temp-dir",
Value: "",
Usage: "Temporary directory for local GCS object copies. " +
"(default: system default, likely /tmp)",
},

cli.IntFlag{
Name: "temp-dir-bytes",
Value: 1 << 31,
Usage: "Size limit of the temporary directory.",
},

/////////////////////////
// Debugging
/////////////////////////
Expand Down Expand Up @@ -208,9 +196,7 @@ type flagStorage struct {
// Tuning
StatCacheTTL time.Duration
TypeCacheTTL time.Duration
GCSChunkSize uint64
TempDir string
TempDirLimit int64

// Debugging
DebugFuse bool
Expand Down Expand Up @@ -238,9 +224,7 @@ func populateFlags(c *cli.Context) (flags *flagStorage) {
// Tuning,
StatCacheTTL: c.Duration("stat-cache-ttl"),
TypeCacheTTL: c.Duration("type-cache-ttl"),
GCSChunkSize: uint64(c.Int("gcs-chunk-size")),
TempDir: c.String("temp-dir"),
TempDirLimit: int64(c.Int("temp-dir-bytes")),
ImplicitDirs: c.Bool("implicit-dirs"),

// Debugging,
Expand Down
6 changes: 0 additions & 6 deletions flags_test.go
Expand Up @@ -78,9 +78,7 @@ func (t *FlagsTest) Defaults() {
// Tuning
ExpectEq(time.Minute, f.StatCacheTTL)
ExpectEq(time.Minute, f.TypeCacheTTL)
ExpectEq(1<<24, f.GCSChunkSize)
ExpectEq("", f.TempDir)
ExpectEq(1<<31, f.TempDirLimit)

// Debugging
ExpectFalse(f.DebugFuse)
Expand Down Expand Up @@ -149,8 +147,6 @@ func (t *FlagsTest) Numbers() {
"--gid=19",
"--limit-bytes-per-sec=123.4",
"--limit-ops-per-sec=56.78",
"--gcs-chunk-size=1000",
"--temp-dir-bytes=2000",
}

f := parseArgs(args)
Expand All @@ -160,8 +156,6 @@ func (t *FlagsTest) Numbers() {
ExpectEq(19, f.Gid)
ExpectEq(123.4, f.EgressBandwidthLimitBytesPerSecond)
ExpectEq(56.78, f.OpRateLimitHz)
ExpectEq(1000, f.GCSChunkSize)
ExpectEq(2000, f.TempDirLimit)
}

func (t *FlagsTest) Strings() {
Expand Down
12 changes: 8 additions & 4 deletions fs/foreign_modifications_test.go
Expand Up @@ -520,6 +520,8 @@ func (t *ForeignModsTest) ReadFromFile_Small() {
}

func (t *ForeignModsTest) ReadFromFile_Large() {
randSrc := rand.New(rand.NewSource(0xdeadbeef))

// Create some random contents.
const contentLen = 1 << 22
contents := randBytes(contentLen)
Expand Down Expand Up @@ -547,8 +549,8 @@ func (t *ForeignModsTest) ReadFromFile_Large() {
defer func() { AssertEq(nil, f.Close()) }()

// Read part of it.
offset := rand.Int63n(contentLen + 1)
size := rand.Intn(int(contentLen - offset))
offset := randSrc.Int63n(contentLen + 1)
size := randSrc.Intn(int(contentLen - offset))

n, err := f.ReadAt(buf[:size], offset)
if offset+int64(size) == contentLen && err == io.EOF {
Expand All @@ -560,9 +562,11 @@ func (t *ForeignModsTest) ReadFromFile_Large() {
AssertTrue(
bytes.Equal(contents[offset:offset+int64(size)], buf[:n]),
"offset: %d\n"+
"size:%d\n",
"size: %d\n"+
"n: %d",
offset,
size)
size,
n)
}

start := time.Now()
Expand Down

0 comments on commit babd421

Please sign in to comment.