Skip to content

Commit

Permalink
Merge pull request kubernetes#24 from memoryliu/try_fix
Browse files Browse the repository at this point in the history
Fix goroutine deadlock issue
  • Loading branch information
CarlJi committed Feb 19, 2020
2 parents f1c06ed + ecece77 commit d52ad5b
Showing 1 changed file with 20 additions and 35 deletions.
55 changes: 20 additions & 35 deletions prow/kube/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,30 @@ func getJobLabelMap(pjs []prowapi.ProwJob) map[jobLabel]float64 {
return jobLabelMap
}

func getCompletedJobs(pjs []prowapi.ProwJob) (completedJobs chan prowapi.ProwJob) {
//仅仅遍历最近10分钟内的job,避免把历史的信息也打点到了prometheus
base := metav1.Now().Time.Add(-600 * time.Second)
completedJobs = make(chan prowapi.ProwJob, 10)
func getCompletedJobs(pjs []prowapi.ProwJob) {
//仅仅遍历最近2分钟内的job,避免把历史的信息也打点到了prometheus
base := metav1.Now().Time.Add(-120 * time.Second)
var count float64 = 1
for _, pj := range pjs {
if pj.Status.CompletionTime != nil {
if pj.Status.CompletionTime.Time.After(base) || pj.Status.CompletionTime.Time.Equal(base) {
logrus.Infof("add completed job to chan, %v:%v, completed time : %v, started time : %v, filter time : %v",
pj.Status.BuildID, pj.Spec.Job, pj.Status.CompletionTime.Time, pj.Status.StartTime.Time, base)
completedJobs <- pj
pji := getJobIdentifier(pj)
pjl := getJobLabel(pj)
prowJobPassRate.WithLabelValues(pji.values()...).Set(count) //PassRate需要区分每一个job,所以需要带上BuildId
logrus.Infof("add completed job count to metrics, %v:%v, count: %v, completed time: %v, started time: %v, filter time: %v",
pj.Status.BuildID, pj.Spec.Job, count, pj.Status.CompletionTime.Time, pj.Status.StartTime.Time, base)
duration := pj.Status.CompletionTime.Time.Sub(pj.Status.StartTime.Time).Seconds()
//部分极端情况下开始时间晚于完成时间,原因暂时不清楚,负值时设置duration=0,避免打点异常
if duration < 0 {
duration = 0
logrus.Warnf("job duration is negative value, %v:%v, job duration : %v", pj.Status.BuildID, pj.Spec.Job, duration)
}
prowJobDuration.WithLabelValues(pjl.values()...).Set(duration) //JobDuration仅仅需要区分不同类Job,如名称不同
logrus.Infof("add completed job duration to metrics, %v:%v, duration: %v, completed time: %v, started time: %v, filter time: %v",
pj.Status.BuildID, pj.Spec.Job, duration, pj.Status.CompletionTime.Time, pj.Status.StartTime.Time, base)
}
}
}
close(completedJobs)
return completedJobs
}

func getJobLabel(pj prowapi.ProwJob) jobLabel {
Expand Down Expand Up @@ -181,31 +190,8 @@ func GatherProwJobMetrics(current []prowapi.ProwJob) {
for jl, count := range getJobLabelMap(current) {
prowJobs.WithLabelValues(jl.values()...).Set(count)
}

completedJobs := getCompletedJobs(current)
syncCh := make(chan struct{}, 1)
go func(filterJob chan prowapi.ProwJob) {
for {
var count float64 = 1
if pj, ok := <-filterJob; ok {
pji := getJobIdentifier(pj)
pjl := getJobLabel(pj)
prowJobPassRate.WithLabelValues(pji.values()...).Set(count) //PassRate需要区分每一个job,所以需要带上BuildId
duration := pj.Status.CompletionTime.Time.Sub(pj.Status.StartTime.Time).Seconds()
//部分极端情况下开始时间晚于完成时间,原因暂时不清楚,负值时设置duration=0,避免打点异常
if duration < 0 {
duration = 0
logrus.Warnf("job duration is negative value, %v:%v, job duration : %v", pj.Status.BuildID, pj.Spec.Job, duration)
}
prowJobDuration.WithLabelValues(pjl.values()...).Set(duration) //JobDuration仅仅需要区分不同类Job,如名称不同
logrus.Infof("add completed job to metrics, %v:%v, job count : %v, "+
"job duration : %v", pj.Status.BuildID, pj.Spec.Job, count, duration)
} else {
break
}
}
syncCh <- struct{}{}
}(completedJobs)
// record the current completed state and duration of ProwJob CRs on the system
getCompletedJobs(current)

// record state transitions since the last time we were called
currentStates := map[jobIdentifier]prowapi.ProwJobState{}
Expand All @@ -220,5 +206,4 @@ func GatherProwJobMetrics(current []prowapi.ProwJob) {
}

previousStates = currentStates
<-syncCh
}

0 comments on commit d52ad5b

Please sign in to comment.