Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddtrace/tracer: create debug mode for old, open spans #2149

Merged
merged 35 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
65c8223
debug open spans without generics
hannahkm Jul 24, 2023
b150b1d
typo fix to reportOpenSpans
hannahkm Jul 25, 2023
341ee11
custom linked list and tests
hannahkm Jul 28, 2023
43af0c6
fix mistake in tests
hannahkm Jul 28, 2023
c9954f5
faster testing and fixed lint
hannahkm Jul 31, 2023
0990b67
change log level to warn
hannahkm Aug 1, 2023
7312db9
bucketed linked list implementation - WIP
hannahkm Aug 2, 2023
e029583
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 2, 2023
a71e1b9
fix error while merging
hannahkm Aug 2, 2023
2db154f
improve bucketed linked list
hannahkm Aug 3, 2023
dd9a565
enable printing all open spans
hannahkm Aug 4, 2023
9143c09
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 4, 2023
d4dc06a
remove extra comments
hannahkm Aug 4, 2023
da608da
fix cilint
hannahkm Aug 4, 2023
cebcaaf
resolve data race in testing
hannahkm Aug 4, 2023
80978da
remove locks from code
hannahkm Aug 4, 2023
870b76b
remove generics and improve readability
hannahkm Aug 7, 2023
296a77a
rename file to match contents
hannahkm Aug 7, 2023
9bea3b0
update docs to represent new functionality
hannahkm Aug 7, 2023
5d723db
remove public print
hannahkm Aug 7, 2023
b38206f
list unit tests and less now() calls
hannahkm Aug 8, 2023
388c9cf
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 8, 2023
ea79dfb
switch to container/list
hannahkm Aug 11, 2023
46c0424
truncate abandoned span logs
hannahkm Aug 11, 2023
7e2017b
clean up typos
hannahkm Aug 11, 2023
a1eecee
resolve data race while printing spans
hannahkm Aug 11, 2023
5364a94
new string methods
hannahkm Aug 14, 2023
8329bf6
fixed missing duration check
hannahkm Aug 14, 2023
8d7a057
clarify abandoned span logs
hannahkm Aug 15, 2023
a6d10d9
improved readability and comments
hannahkm Aug 16, 2023
9b4c7d6
Merge branch 'main' into hannahkm/debug-open-spans
hannahkm Aug 17, 2023
9e1e417
add tests for completeness
hannahkm Aug 17, 2023
0eff316
add organized struct for abandoned spans
hannahkm Aug 18, 2023
7c0f935
remove commented test
hannahkm Aug 18, 2023
2d7bb8f
Merge branch 'main' into hannahkm/debug-open-spans
ajgajg1134 Aug 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 224 additions & 0 deletions ddtrace/tracer/abandoned_spans.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016 Datadog, Inc.

package tracer

import (
"container/list"
"fmt"
"strings"
"time"

"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
)

var tickerInterval = time.Minute
var logSize = 9000

// isBucketNode takes in a list.Element and checks if it is a nonempty
// list Element
func isBucketNode(e *list.Element) (*list.List, bool) {
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
ls, ok := e.Value.(*list.List)
if !ok || ls == nil || ls.Front() == nil {
return nil, false
}
return ls, true
}

// isSpanNode takes in a list.Element and checks if it is a non-nil
// span object
func isSpanNode(e *list.Element) (*span, bool) {
s, ok := e.Value.(*span)
if !ok || s == nil {
return nil, false
}
return s, true
}

// reportAbandonedSpans periodically finds and reports potentially abandoned
// spans that are older than the given interval. These spans are stored in a
// bucketed linked list, sorted by their `Start` time, where the front of the
// list contains the oldest spans, and the end of the list contains the newest spans.
func (t *tracer) reportAbandonedSpans(interval time.Duration) {
tick := time.NewTicker(tickerInterval)
defer tick.Stop()
for {
select {
case <-tick.C:
logAbandonedSpans(t.abandonedSpans, &interval)
case s := <-t.cIn:
bNode := t.abandonedSpans.Front()
if bNode == nil {
b := list.New()
b.PushBack(s)
t.abandonedSpans.PushBack(b)
break
}
// All spans within the same bucket should have a start time that
// is within `interval` nanoseconds of each other.
// This loop should continue until it finds the an existing bucket
// with spans that have started within `interval` nanoseconds before
// the new span has started.
for bNode != nil {
bucket, bOk := isBucketNode(bNode)
if !bOk {
bNode = bNode.Next()
continue
}
sNode := bucket.Front()
if sNode == nil {
bNode = bNode.Next()
continue
}
sp, sOk := isSpanNode(sNode)
if sOk && s.Start-sp.Start <= interval.Nanoseconds() {
bucket.PushBack(s)
break
}
bNode = bNode.Next()
}
if bNode != nil {
break
}
// If no matching bucket exists, create a new one and append the new
// span to the top of the bucket.
b := list.New()
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
b.PushBack(s)
t.abandonedSpans.PushBack(b)
case s := <-t.cOut:
// This loop should continue until it finds the bucket with spans
// starting within `interval` nanoseconds of the finished span,
// then remove that span from the bucket.
for node := t.abandonedSpans.Front(); node != nil; node = node.Next() {
hannahkm marked this conversation as resolved.
Show resolved Hide resolved
bucket, ok := isBucketNode(node)
if !ok {
continue
}
spNode := bucket.Front()
sp, ok := isSpanNode(spNode)
if !ok {
continue
}
if s.Start-sp.Start > interval.Nanoseconds() || sp.Start-s.Start > interval.Nanoseconds() {
continue
}

for spNode != nil {
sp, ok := isSpanNode(spNode)
if !ok {
continue
}
if s.SpanID == sp.SpanID {
bucket.Remove(spNode)
if bucket.Front() == nil {
t.abandonedSpans.Remove(node)
}
break
}
spNode = spNode.Next()
}
}
case <-t.stop:
logAbandonedSpans(t.abandonedSpans, nil)
return
}
}
}

// abandonedSpanString takes a span and returns a human readable string representing
// that span. If `interval` is not nil, it will check if the span is older than the
// user configured timeout, and return an empty string if it is not.
func abandonedSpanString(s *span, interval *time.Duration, curTime int64) string {
ajgajg1134 marked this conversation as resolved.
Show resolved Hide resolved
s.Lock()
defer s.Unlock()
age := curTime - s.Start
if interval != nil && age < interval.Nanoseconds() {
return ""
}
a := fmt.Sprintf("%d sec", age/1e9)
return fmt.Sprintf("[name: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.SpanID, s.TraceID, a)
}

// abandonedBucketString takes a bucket and returns a human readable string representing
// the contents of the bucket. If `interval` is not nil, it will check if the bucket might
// contain spans older than the user configured timeout. If it does, it will filter for
// older spans. If not, it will print all spans without checking their duration.
func abandonedBucketString(bucket *list.List, interval *time.Duration, curTime int64) (int, string) {
var sb strings.Builder
spanCount := 0
node := bucket.Back()
back, ok := isSpanNode(node)
filter := ok && interval != nil && curTime-back.Start >= interval.Nanoseconds()
for node := bucket.Front(); node != nil; node = node.Next() {
span, ok := isSpanNode(node)
if !ok {
continue
}
timeout := interval
if !filter {
timeout = nil
}
msg := abandonedSpanString(span, timeout, curTime)
sb.WriteString(msg)
spanCount++
}
return spanCount, sb.String()
}

// logAbandonedSpans returns a string containing potentially abandoned spans. If `interval` is
// `nil`, it will print all unfinished spans. If `interval` holds a time.Duration, it will
// only print spans that are older than `interval`. It will also truncate the log message to
// `logSize` bytes to prevent overloading the logger.
func logAbandonedSpans(l *list.List, interval *time.Duration) {
darccio marked this conversation as resolved.
Show resolved Hide resolved
var sb strings.Builder
spanCount := 0
truncated := false
curTime := now()

for bucketNode := l.Front(); bucketNode != nil; bucketNode = bucketNode.Next() {
bucket, ok := isBucketNode(bucketNode)
if !ok {
continue
}

// since spans are bucketed by time, finding a bucket that is newer
// than the allowed time interval means that all spans in this bucket
// and future buckets will be younger than `interval`, and thus aren't
// worth checking.
if interval != nil {
spanNode := bucket.Front()
sp, ok := isSpanNode(spanNode)
if !ok {
continue
}
if curTime-sp.Start < interval.Nanoseconds() {
continue
}
}
if truncated {
continue
}
nSpans, msg := abandonedBucketString(bucket, interval, curTime)
spanCount += nSpans

space := logSize - len(sb.String())
if len(msg) > space {
msg = msg[0:space]
truncated = true
}
sb.WriteString(msg)
}

if spanCount == 0 {
return
}

log.Warn("%d abandoned spans:", spanCount)
if truncated {
log.Warn("Too many abandoned spans. Truncating message.")
sb.WriteString("...")
}
log.Warn(sb.String())
}
Loading
Loading