Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

profiler: goroutineswait saw-stop mechanism #942

Merged
merged 8 commits into from
Jun 30, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 36 additions & 26 deletions profiler/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"unicode"
Expand Down Expand Up @@ -82,21 +83,22 @@ type config struct {
agentless bool
// targetURL is the upload destination URL. It will be set by the profiler on start to either apiURL or agentURL
// based on the other options.
targetURL string
apiURL string // apiURL is the Datadog intake API URL
agentURL string // agentURL is the Datadog agent profiling URL
service, env string
hostname string
statsd StatsdClient
httpClient *http.Client
tags []string
types map[ProfileType]struct{}
period time.Duration
cpuDuration time.Duration
uploadTimeout time.Duration
mutexFraction int
blockRate int
outputDir string
targetURL string
apiURL string // apiURL is the Datadog intake API URL
agentURL string // agentURL is the Datadog agent profiling URL
service, env string
hostname string
statsd StatsdClient
httpClient *http.Client
tags []string
types map[ProfileType]struct{}
period time.Duration
cpuDuration time.Duration
uploadTimeout time.Duration
maxGoroutinesWait int
mutexFraction int
blockRate int
outputDir string
}

func urlForSite(site string) (string, error) {
Expand Down Expand Up @@ -127,17 +129,18 @@ func (c *config) addProfileType(t ProfileType) {

func defaultConfig() (*config, error) {
c := config{
env: defaultEnv,
apiURL: defaultAPIURL,
service: filepath.Base(os.Args[0]),
statsd: &statsd.NoOpClient{},
httpClient: defaultClient,
period: DefaultPeriod,
cpuDuration: DefaultDuration,
blockRate: DefaultBlockRate,
mutexFraction: DefaultMutexFraction,
uploadTimeout: DefaultUploadTimeout,
tags: []string{fmt.Sprintf("pid:%d", os.Getpid())},
env: defaultEnv,
apiURL: defaultAPIURL,
service: filepath.Base(os.Args[0]),
statsd: &statsd.NoOpClient{},
httpClient: defaultClient,
period: DefaultPeriod,
cpuDuration: DefaultDuration,
blockRate: DefaultBlockRate,
mutexFraction: DefaultMutexFraction,
uploadTimeout: DefaultUploadTimeout,
maxGoroutinesWait: 1000, // arbitrary value, should limit STW to ~30ms
tags: []string{fmt.Sprintf("pid:%d", os.Getpid())},
}
for _, t := range defaultProfileTypes {
c.addProfileType(t)
Expand Down Expand Up @@ -206,6 +209,13 @@ func defaultConfig() (*config, error) {
if v := os.Getenv("DD_PROFILING_OUTPUT_DIR"); v != "" {
withOutputDir(v)(&c)
}
if v := os.Getenv("DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES"); v != "" {
n, err := strconv.Atoi(v)
if err != nil {
return nil, fmt.Errorf("DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES: %s", err)
}
c.maxGoroutinesWait = n
}
return &c, nil
}

Expand Down
5 changes: 5 additions & 0 deletions profiler/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"errors"
"fmt"
"io"
"runtime"
"runtime/pprof"
"time"

Expand Down Expand Up @@ -231,6 +232,10 @@ func goroutineProfile(cfg *config) (*profile, error) {
}

func goroutineWaitProfile(cfg *config) (*profile, error) {
if n := runtime.NumGoroutine(); n > cfg.maxGoroutinesWait {
return nil, fmt.Errorf("skipping goroutines wait profile: %d goroutines exceeds DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES limit of %d", n, cfg.maxGoroutinesWait)
}

var (
text = &bytes.Buffer{}
pprof = &bytes.Buffer{}
Expand Down
51 changes: 51 additions & 0 deletions profiler/profile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ package profiler

import (
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"strconv"
"testing"
"time"

Expand Down Expand Up @@ -166,6 +169,54 @@ main.main()
"main.indirectShortSleepLoop2",
})
})

t.Run("goroutineswaitLimit", func(t *testing.T) {
// spawGoroutines spawns n goroutines, waits for them to start executing,
// and then returns a func to stop them. For more details about `executing`
// see:
// https://github.com/DataDog/dd-trace-go/pull/942#discussion_r656924335
spawnGoroutines := func(n int) func() {
executing := make(chan struct{})
stopping := make(chan struct{})
for i := 0; i < n; i++ {
go func() {
executing <- struct{}{}
stopping <- struct{}{}
}()
<-executing
}
return func() {
for i := 0; i < n; i++ {
<-stopping
}
}
}

goroutines := 100
limit := 10

stop := spawnGoroutines(goroutines)
defer stop()
envVar := "DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES"
oldVal := os.Getenv(envVar)
os.Setenv(envVar, strconv.Itoa(limit))
defer os.Setenv(envVar, oldVal)

defer func(old func(_ string, _ io.Writer, _ int) error) { lookupProfile = old }(lookupProfile)
lookupProfile = func(_ string, w io.Writer, _ int) error {
_, err := w.Write([]byte(""))
return err
}

p, err := unstartedProfiler()
require.NoError(t, err)
_, err = p.runProfile(expGoroutineWaitProfile)
var errRoutines, errLimit int
msg := "skipping goroutines wait profile: %d goroutines exceeds DD_PROFILING_WAIT_PROFILE_MAX_GOROUTINES limit of %d"
fmt.Sscanf(err.Error(), msg, &errRoutines, &errLimit)
require.GreaterOrEqual(t, errRoutines, goroutines)
require.Equal(t, limit, errLimit)
})
}

func Test_goroutineDebug2ToPprof_CrashSafety(t *testing.T) {
Expand Down