Skip to content

Add Prometheus Metric for In-Progress Workflow Job Duration #4042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cmd/actionsmetricsserver/main.go
Original file line number Diff line number Diff line change
@@ -134,9 +134,10 @@ func main() {
}

eventReader := &actionsmetrics.EventReader{
Log: ctrl.Log.WithName("workflowjobmetrics-eventreader"),
GitHubClient: ghClient,
Events: make(chan interface{}, 1024*1024),
Log: ctrl.Log.WithName("workflowjobmetrics-eventreader"),
GitHubClient: ghClient,
Events: make(chan interface{}, 1024*1024),
InProgressJobs: make(map[int64]actionsmetrics.InProgressJob),
}

webhookServer := &actionsmetrics.WebhookServer{
52 changes: 52 additions & 0 deletions pkg/actionsmetrics/event_reader.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import (
"net/http"
"regexp"
"strings"
"sync"
"time"

"github.com/go-logr/logr"
@@ -16,6 +17,16 @@ import (
"github.com/actions/actions-runner-controller/github"
)

const (
inProgressJobCheckInterval = 5 * time.Second
)

// InProgressJob stores timing with labels for an in-progress job
type InProgressJob struct {
StartTime time.Time
Labels prometheus.Labels
}

type EventReader struct {
Log logr.Logger

@@ -24,6 +35,11 @@ type EventReader struct {

// Event queue
Events chan interface{}

// Map of in-progress jobs by job ID
InProgressJobs map[int64]InProgressJob

inProgressJobsLock sync.RWMutex
}

// HandleWorkflowJobEvent send event to reader channel for processing
@@ -38,10 +54,27 @@ func (reader *EventReader) HandleWorkflowJobEvent(event interface{}) {
//
// Should be called asynchronously with `go`
func (reader *EventReader) ProcessWorkflowJobEvents(ctx context.Context) {
// Create a ticker that runs every `inProgressJobCheckInterval`
ticker := time.NewTicker(inProgressJobCheckInterval)
defer ticker.Stop()

for {
select {
case event := <-reader.Events:
reader.ProcessWorkflowJobEvent(ctx, event)
case <-ticker.C:
// For all in-progress jobs, increment the metric by 5 seconds using the stored labels
reader.inProgressJobsLock.Lock()
for _, jobInfo := range reader.InProgressJobs {
// By default, the duration is the check interval
inProgressJobDuration := inProgressJobCheckInterval.Seconds()
if jobInfo.StartTime.Add(inProgressJobCheckInterval).After(time.Now()) {
// If the job started less than the check interval ago, use the actual duration
inProgressJobDuration = time.Since(jobInfo.StartTime).Seconds()
}
githubWorkflowJobInProgressDurationSeconds.With(jobInfo.Labels).Add(inProgressJobDuration)
}
reader.inProgressJobsLock.Unlock()
case <-ctx.Done():
return
}
@@ -122,6 +155,20 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in
case "in_progress":
githubWorkflowJobsStartedTotal.With(labels).Inc()

// Store the start time and labels of this job
jobID := *e.WorkflowJob.ID
reader.inProgressJobsLock.Lock()
// Make a copy of the labels to avoid any potential concurrent modification issues
labelsCopy := make(prometheus.Labels)
for k, v := range labels {
labelsCopy[k] = v
}
reader.InProgressJobs[jobID] = InProgressJob{
StartTime: time.Now(),
Labels: labelsCopy,
}
reader.inProgressJobsLock.Unlock()

if reader.GitHubClient == nil {
return
}
@@ -139,6 +186,11 @@ func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event in
case "completed":
githubWorkflowJobsCompletedTotal.With(labels).Inc()

// Remove the job from tracking since it's no longer in progress
reader.inProgressJobsLock.Lock()
delete(reader.InProgressJobs, *e.WorkflowJob.ID)
reader.inProgressJobsLock.Unlock()

// job_conclusion -> (neutral, success, skipped, cancelled, timed_out, action_required, failure)
githubWorkflowJobConclusionsTotal.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Inc()

8 changes: 8 additions & 0 deletions pkg/actionsmetrics/metrics.go
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ func init() {
metrics.Registry.MustRegister(
githubWorkflowJobQueueDurationSeconds,
githubWorkflowJobRunDurationSeconds,
githubWorkflowJobInProgressDurationSeconds,
githubWorkflowJobConclusionsTotal,
githubWorkflowJobsQueuedTotal,
githubWorkflowJobsStartedTotal,
@@ -91,6 +92,13 @@ var (
},
metricLabels("job_conclusion"),
)
githubWorkflowJobInProgressDurationSeconds = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_job_in_progress_duration_seconds",
Help: "In progress run times for workflow jobs in seconds",
},
metricLabels(),
)
githubWorkflowJobConclusionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_job_conclusions_total",