Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddtrace/tracer: create debug mode for old, open spans #2149

Merged
merged 35 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
65c8223
debug open spans without generics
hannahkm Jul 24, 2023
b150b1d
typo fix to reportOpenSpans
hannahkm Jul 25, 2023
341ee11
custom linked list and tests
hannahkm Jul 28, 2023
43af0c6
fix mistake in tests
hannahkm Jul 28, 2023
c9954f5
faster testing and fixed lint
hannahkm Jul 31, 2023
0990b67
change log level to warn
hannahkm Aug 1, 2023
7312db9
bucketed linked list implementation - WIP
hannahkm Aug 2, 2023
e029583
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 2, 2023
a71e1b9
fix error while merging
hannahkm Aug 2, 2023
2db154f
improve bucketed linked list
hannahkm Aug 3, 2023
dd9a565
enable printing all open spans
hannahkm Aug 4, 2023
9143c09
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 4, 2023
d4dc06a
remove extra comments
hannahkm Aug 4, 2023
da608da
fix cilint
hannahkm Aug 4, 2023
cebcaaf
resolve data race in testing
hannahkm Aug 4, 2023
80978da
remove locks from code
hannahkm Aug 4, 2023
870b76b
remove generics and improve readability
hannahkm Aug 7, 2023
296a77a
rename file to match contents
hannahkm Aug 7, 2023
9bea3b0
update docs to represent new functionality
hannahkm Aug 7, 2023
5d723db
remove public print
hannahkm Aug 7, 2023
b38206f
list unit tests and less now() calls
hannahkm Aug 8, 2023
388c9cf
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 8, 2023
ea79dfb
switch to container/list
hannahkm Aug 11, 2023
46c0424
truncate abandoned span logs
hannahkm Aug 11, 2023
7e2017b
clean up typos
hannahkm Aug 11, 2023
a1eecee
resolve data race while printing spans
hannahkm Aug 11, 2023
5364a94
new string methods
hannahkm Aug 14, 2023
8329bf6
fixed missing duration check
hannahkm Aug 14, 2023
8d7a057
clarify abandoned span logs
hannahkm Aug 15, 2023
a6d10d9
improved readability and comments
hannahkm Aug 16, 2023
9b4c7d6
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 17, 2023
9e1e417
add tests for completeness
hannahkm Aug 17, 2023
0eff316
add organized struct for abandoned spans
hannahkm Aug 18, 2023
7c0f935
remove commented test
hannahkm Aug 18, 2023
2d7bb8f
Merge branch 'main' into hannahkm/debug-open-spans
ajgajg1134 Aug 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 229 additions & 0 deletions ddtrace/tracer/abandoned_spans.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016 Datadog, Inc.

package tracer

import (
"container/list"
"fmt"
"strings"
"time"

"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)

type abandonedSpansDebugger struct {
// abandonedSpans holds a linked list of potentially abandoned spans for all traces.
abandonedSpans *list.List

// cIn takes newly created spans and adds them to abandonedSpans
cIn chan *span

// cOut signals for a finished span to be removed from abandonedSpans
cOut chan *span
}

var tickerInterval = time.Minute
var logSize = 9000

// castAsBucketNode takes in a list.Element and checks if it is a nonempty
// list Element. If it can be cast as a list.List, that list will be returned
// bool flag `true`. If not, it will return `nil` with bool flag `false`
func castAsBucketNode(e *list.Element) (*list.List, bool) {
if e == nil {
return nil, false
}
ls, ok := e.Value.(*list.List)
if !ok || ls == nil || ls.Front() == nil {
return nil, false
}
return ls, true
}

// castAsSpanNode takes in a list.Element and checks if it is a non-nil
// span object. If it can be cast as a span, that span will be returned
// bool flag `true`. If not, it will return `nil` with bool flag `false`
func castAsSpanNode(e *list.Element) (*span, bool) {
if e == nil {
return nil, false
}
s, ok := e.Value.(*span)
if !ok || s == nil {
return nil, false
}
return s, true
}

// findSpanBucket takes in a start time in Unix Nanoseconds and the user
// configured interval, then finds the bucket that the given span should
// belong in. All spans within the same bucket should have a start time that
// is within `interval` nanoseconds of each other.
func (t *tracer) findSpanBucket(start int64, interval time.Duration) (*list.Element, bool) {
for node := t.spansDebugger.abandonedSpans.Front(); node != nil; node = node.Next() {
bucket, ok := castAsBucketNode(node)
if !ok {
continue
}
for spNode := bucket.Front(); spNode != nil; spNode = spNode.Next() {
sp, ok := castAsSpanNode(spNode)
if !ok {
continue
}
if start >= sp.Start && start-sp.Start < interval.Nanoseconds() {
ajgajg1134 marked this conversation as resolved.
Show resolved Hide resolved
return node, true
}
}
}
return nil, false
}

// reportAbandonedSpans periodically finds and reports potentially abandoned
// spans that are older than the given interval. These spans are stored in a
// bucketed linked list, sorted by their `Start` time, where the front of the
// list contains the oldest spans, and the end of the list contains the newest spans.
func (t *tracer) reportAbandonedSpans(interval time.Duration) {
tick := time.NewTicker(tickerInterval)
defer tick.Stop()
for {
select {
case <-tick.C:
logAbandonedSpans(t.spansDebugger.abandonedSpans, &interval)
case s := <-t.spansDebugger.cIn:
bNode, bOk := t.findSpanBucket(s.Start, interval)
bucket, ok := castAsBucketNode(bNode)
if bOk && ok {
bucket.PushBack(s)
break
}
// If no matching bucket exists, create a new one and append the new
// span to the top of the bucket.
b := list.New()
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
b.PushBack(s)
t.spansDebugger.abandonedSpans.PushBack(b)
case s := <-t.spansDebugger.cOut:
bNode, bOk := t.findSpanBucket(s.Start, interval)
bucket, ok := castAsBucketNode(bNode)
if !bOk || !ok {
break
}
// If a matching bucket exists, attempt to find the element containing
// the finished span, then remove that element from the bucket.
// If a bucket becomes empty, also remove that bucket from the
// abandoned spans list.
for node := bucket.Front(); node != nil; node = node.Next() {
sp, sOk := castAsSpanNode(node)
if !sOk {
continue
}
if sp.SpanID != s.SpanID {
continue
}
bucket.Remove(node)
if bucket.Front() == nil {
t.spansDebugger.abandonedSpans.Remove(bNode)
}
}
case <-t.stop:
logAbandonedSpans(t.spansDebugger.abandonedSpans, nil)
return
}
}
}

// abandonedSpanString takes a span and returns a human readable string representing
// that span. If `interval` is not nil, it will check if the span is older than the
// user configured timeout, and return an empty string if it is not.
func abandonedSpanString(s *span, interval *time.Duration, curTime int64) string {
ajgajg1134 marked this conversation as resolved.
Show resolved Hide resolved
s.Lock()
defer s.Unlock()
age := curTime - s.Start
if interval != nil && age < interval.Nanoseconds() {
return ""
}
a := fmt.Sprintf("%d sec", age/1e9)
return fmt.Sprintf("[name: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.SpanID, s.TraceID, a)
}

// abandonedBucketString takes a bucket and returns a human readable string representing
// the contents of the bucket. If `interval` is not nil, it will check if the bucket might
// contain spans older than the user configured timeout. If it does, it will filter for
// older spans. If not, it will print all spans without checking their duration.
func abandonedBucketString(bucket *list.List, interval *time.Duration, curTime int64) (int, string) {
var sb strings.Builder
spanCount := 0
node := bucket.Back()
back, ok := castAsSpanNode(node)
filter := ok && interval != nil && curTime-back.Start >= interval.Nanoseconds()
for node := bucket.Front(); node != nil; node = node.Next() {
span, ok := castAsSpanNode(node)
if !ok {
continue
}
timeout := interval
if !filter {
timeout = nil
}
msg := abandonedSpanString(span, timeout, curTime)
sb.WriteString(msg)
spanCount++
}
return spanCount, sb.String()
}

// logAbandonedSpans returns a string containing potentially abandoned spans. If `interval` is
// `nil`, it will print all unfinished spans. If `interval` holds a time.Duration, it will
// only print spans that are older than `interval`. It will also truncate the log message to
// `logSize` bytes to prevent overloading the logger.
func logAbandonedSpans(l *list.List, interval *time.Duration) {
darccio marked this conversation as resolved.
Show resolved Hide resolved
var sb strings.Builder
spanCount := 0
truncated := false
curTime := now()

for bucketNode := l.Front(); bucketNode != nil; bucketNode = bucketNode.Next() {
bucket, ok := castAsBucketNode(bucketNode)
if !ok {
continue
}

// since spans are bucketed by time, finding a bucket that is newer
// than the allowed time interval means that all spans in this bucket
// and future buckets will be younger than `interval`, and thus aren't
// worth checking.
if interval != nil {
spanNode := bucket.Front()
sp, ok := castAsSpanNode(spanNode)
if !ok {
continue
}
if curTime-sp.Start < interval.Nanoseconds() {
continue
}
}
if truncated {
continue
}
nSpans, msg := abandonedBucketString(bucket, interval, curTime)
spanCount += nSpans

space := logSize - len(sb.String())
if len(msg) > space {
msg = msg[0:space]
truncated = true
}
sb.WriteString(msg)
}

if spanCount == 0 {
return
}

log.Warn("%d abandoned spans:", spanCount)
if truncated {
log.Warn("Too many abandoned spans. Truncating message.")
sb.WriteString("...")
}
log.Warn(sb.String())
}
Loading
Loading