-
Notifications
You must be signed in to change notification settings - Fork 421
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ddtrace/tracer: create debug mode for old, open spans (#2149)
- Loading branch information
Showing
6 changed files
with
550 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
// Unless explicitly stated otherwise all files in this repository are licensed | ||
// under the Apache License Version 2.0. | ||
// This product includes software developed at Datadog (https://www.datadoghq.com/). | ||
// Copyright 2016 Datadog, Inc. | ||
|
||
package tracer | ||
|
||
import ( | ||
"container/list" | ||
"fmt" | ||
"strings" | ||
"time" | ||
|
||
"gopkg.in/DataDog/dd-trace-go.v1/internal/log" | ||
) | ||
|
||
type abandonedSpansDebugger struct { | ||
// abandonedSpans holds a linked list of potentially abandoned spans for all traces. | ||
abandonedSpans *list.List | ||
|
||
// cIn takes newly created spans and adds them to abandonedSpans | ||
cIn chan *span | ||
|
||
// cOut signals for a finished span to be removed from abandonedSpans | ||
cOut chan *span | ||
} | ||
|
||
var tickerInterval = time.Minute | ||
var logSize = 9000 | ||
|
||
// castAsBucketNode takes in a list.Element and checks if it is a nonempty | ||
// list Element. If it can be cast as a list.List, that list will be returned | ||
// bool flag `true`. If not, it will return `nil` with bool flag `false` | ||
func castAsBucketNode(e *list.Element) (*list.List, bool) { | ||
if e == nil { | ||
return nil, false | ||
} | ||
ls, ok := e.Value.(*list.List) | ||
if !ok || ls == nil || ls.Front() == nil { | ||
return nil, false | ||
} | ||
return ls, true | ||
} | ||
|
||
// castAsSpanNode takes in a list.Element and checks if it is a non-nil | ||
// span object. If it can be cast as a span, that span will be returned | ||
// bool flag `true`. If not, it will return `nil` with bool flag `false` | ||
func castAsSpanNode(e *list.Element) (*span, bool) { | ||
if e == nil { | ||
return nil, false | ||
} | ||
s, ok := e.Value.(*span) | ||
if !ok || s == nil { | ||
return nil, false | ||
} | ||
return s, true | ||
} | ||
|
||
// findSpanBucket takes in a start time in Unix Nanoseconds and the user | ||
// configured interval, then finds the bucket that the given span should | ||
// belong in. All spans within the same bucket should have a start time that | ||
// is within `interval` nanoseconds of each other. | ||
func (t *tracer) findSpanBucket(start int64, interval time.Duration) (*list.Element, bool) { | ||
for node := t.spansDebugger.abandonedSpans.Front(); node != nil; node = node.Next() { | ||
bucket, ok := castAsBucketNode(node) | ||
if !ok { | ||
continue | ||
} | ||
for spNode := bucket.Front(); spNode != nil; spNode = spNode.Next() { | ||
sp, ok := castAsSpanNode(spNode) | ||
if !ok { | ||
continue | ||
} | ||
if start >= sp.Start && start-sp.Start < interval.Nanoseconds() { | ||
return node, true | ||
} | ||
} | ||
} | ||
return nil, false | ||
} | ||
|
||
// reportAbandonedSpans periodically finds and reports potentially abandoned | ||
// spans that are older than the given interval. These spans are stored in a | ||
// bucketed linked list, sorted by their `Start` time, where the front of the | ||
// list contains the oldest spans, and the end of the list contains the newest spans. | ||
func (t *tracer) reportAbandonedSpans(interval time.Duration) { | ||
tick := time.NewTicker(tickerInterval) | ||
defer tick.Stop() | ||
for { | ||
select { | ||
case <-tick.C: | ||
logAbandonedSpans(t.spansDebugger.abandonedSpans, &interval) | ||
case s := <-t.spansDebugger.cIn: | ||
bNode, bOk := t.findSpanBucket(s.Start, interval) | ||
bucket, ok := castAsBucketNode(bNode) | ||
if bOk && ok { | ||
bucket.PushBack(s) | ||
break | ||
} | ||
// If no matching bucket exists, create a new one and append the new | ||
// span to the top of the bucket. | ||
b := list.New() | ||
b.PushBack(s) | ||
t.spansDebugger.abandonedSpans.PushBack(b) | ||
case s := <-t.spansDebugger.cOut: | ||
bNode, bOk := t.findSpanBucket(s.Start, interval) | ||
bucket, ok := castAsBucketNode(bNode) | ||
if !bOk || !ok { | ||
break | ||
} | ||
// If a matching bucket exists, attempt to find the element containing | ||
// the finished span, then remove that element from the bucket. | ||
// If a bucket becomes empty, also remove that bucket from the | ||
// abandoned spans list. | ||
for node := bucket.Front(); node != nil; node = node.Next() { | ||
sp, sOk := castAsSpanNode(node) | ||
if !sOk { | ||
continue | ||
} | ||
if sp.SpanID != s.SpanID { | ||
continue | ||
} | ||
bucket.Remove(node) | ||
if bucket.Front() == nil { | ||
t.spansDebugger.abandonedSpans.Remove(bNode) | ||
} | ||
} | ||
case <-t.stop: | ||
logAbandonedSpans(t.spansDebugger.abandonedSpans, nil) | ||
return | ||
} | ||
} | ||
} | ||
|
||
// abandonedSpanString takes a span and returns a human readable string representing | ||
// that span. If `interval` is not nil, it will check if the span is older than the | ||
// user configured timeout, and return an empty string if it is not. | ||
func abandonedSpanString(s *span, interval *time.Duration, curTime int64) string { | ||
s.Lock() | ||
defer s.Unlock() | ||
age := curTime - s.Start | ||
if interval != nil && age < interval.Nanoseconds() { | ||
return "" | ||
} | ||
a := fmt.Sprintf("%d sec", age/1e9) | ||
return fmt.Sprintf("[name: %s, span_id: %d, trace_id: %d, age: %s],", s.Name, s.SpanID, s.TraceID, a) | ||
} | ||
|
||
// abandonedBucketString takes a bucket and returns a human readable string representing | ||
// the contents of the bucket. If `interval` is not nil, it will check if the bucket might | ||
// contain spans older than the user configured timeout. If it does, it will filter for | ||
// older spans. If not, it will print all spans without checking their duration. | ||
func abandonedBucketString(bucket *list.List, interval *time.Duration, curTime int64) (int, string) { | ||
var sb strings.Builder | ||
spanCount := 0 | ||
node := bucket.Back() | ||
back, ok := castAsSpanNode(node) | ||
filter := ok && interval != nil && curTime-back.Start >= interval.Nanoseconds() | ||
for node := bucket.Front(); node != nil; node = node.Next() { | ||
span, ok := castAsSpanNode(node) | ||
if !ok { | ||
continue | ||
} | ||
timeout := interval | ||
if !filter { | ||
timeout = nil | ||
} | ||
msg := abandonedSpanString(span, timeout, curTime) | ||
sb.WriteString(msg) | ||
spanCount++ | ||
} | ||
return spanCount, sb.String() | ||
} | ||
|
||
// logAbandonedSpans returns a string containing potentially abandoned spans. If `interval` is | ||
// `nil`, it will print all unfinished spans. If `interval` holds a time.Duration, it will | ||
// only print spans that are older than `interval`. It will also truncate the log message to | ||
// `logSize` bytes to prevent overloading the logger. | ||
func logAbandonedSpans(l *list.List, interval *time.Duration) { | ||
var sb strings.Builder | ||
spanCount := 0 | ||
truncated := false | ||
curTime := now() | ||
|
||
for bucketNode := l.Front(); bucketNode != nil; bucketNode = bucketNode.Next() { | ||
bucket, ok := castAsBucketNode(bucketNode) | ||
if !ok { | ||
continue | ||
} | ||
|
||
// since spans are bucketed by time, finding a bucket that is newer | ||
// than the allowed time interval means that all spans in this bucket | ||
// and future buckets will be younger than `interval`, and thus aren't | ||
// worth checking. | ||
if interval != nil { | ||
spanNode := bucket.Front() | ||
sp, ok := castAsSpanNode(spanNode) | ||
if !ok { | ||
continue | ||
} | ||
if curTime-sp.Start < interval.Nanoseconds() { | ||
continue | ||
} | ||
} | ||
if truncated { | ||
continue | ||
} | ||
nSpans, msg := abandonedBucketString(bucket, interval, curTime) | ||
spanCount += nSpans | ||
|
||
space := logSize - len(sb.String()) | ||
if len(msg) > space { | ||
msg = msg[0:space] | ||
truncated = true | ||
} | ||
sb.WriteString(msg) | ||
} | ||
|
||
if spanCount == 0 { | ||
return | ||
} | ||
|
||
log.Warn("%d abandoned spans:", spanCount) | ||
if truncated { | ||
log.Warn("Too many abandoned spans. Truncating message.") | ||
sb.WriteString("...") | ||
} | ||
log.Warn(sb.String()) | ||
} |
Oops, something went wrong.