Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
cc49f41
feat: capture aks-node-controller errors into
Devinwong Feb 3, 2026
057acc2
add shellspec guard
Devinwong Feb 3, 2026
34bd4ec
update
Devinwong Feb 3, 2026
f8b06ce
update
Devinwong Feb 3, 2026
2f6530d
add bash in UT
Devinwong Feb 3, 2026
326db95
fix ut
Devinwong Feb 3, 2026
0e15df0
add ut case
Devinwong Feb 3, 2026
b0bbf4e
move the createGuestAgentEvent to aks-node-controller
Devinwong Feb 3, 2026
47f3714
update
Devinwong Feb 3, 2026
2864a4b
fix comments
Devinwong Feb 4, 2026
7431aca
update permission
Devinwong Feb 4, 2026
6daf1e0
move logCleanup to the bottom
Devinwong Feb 4, 2026
1560fd3
Update aks-node-controller/app.go
Devinwong Feb 4, 2026
88804b4
Update aks-node-controller/app.go
Devinwong Feb 4, 2026
c146c03
update test
Devinwong Feb 4, 2026
5422f93
update test
Devinwong Feb 4, 2026
54e29bf
update comment
Devinwong Feb 4, 2026
24bfea1
update operation id format
Devinwong Feb 4, 2026
0e08df5
update GuestAgentEvent to different files
Devinwong Feb 5, 2026
ed7d5ed
address comments
Devinwong Feb 5, 2026
95a34ef
change type and function to private
Devinwong Feb 5, 2026
0826b36
remove unnecessary eventsLoggingDir
Devinwong Feb 5, 2026
ee00678
update log path
Devinwong Feb 5, 2026
a12a819
add command registry to hold taskname and command handler
Devinwong Feb 5, 2026
815095e
fix golint error
Devinwong Feb 5, 2026
415a64b
update filename to nano seconds
Devinwong Feb 5, 2026
4e2fe43
Update aks-node-controller/helpers/guestagent_test.go
Devinwong Feb 5, 2026
81367ab
add UT
Devinwong Feb 5, 2026
1aa92d7
update log permission to 644
Devinwong Feb 5, 2026
734565b
set to 644
Devinwong Feb 6, 2026
3444e2a
remove extra line
Devinwong Feb 6, 2026
225e572
Update aks-node-controller/helpers/guestagent.go
Devinwong Feb 6, 2026
2f7d444
move event generation from main to app
r2k1 Feb 6, 2026
3c0da94
refactor guest agent events for testability
r2k1 Feb 7, 2026
d0b2479
refactor: replace CreateEventFunc with EventLogger struct
r2k1 Feb 7, 2026
3150963
simplify testing, log on continue on errors from Events method
r2k1 Feb 7, 2026
5c641b5
rename cmdRunner => cmdRun
r2k1 Feb 7, 2026
7adebf2
Update aks-node-controller/helpers/guestagent.go
Devinwong Feb 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 65 additions & 20 deletions aks-node-controller/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,55 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/Azure/agentbaker/aks-node-controller/helpers"
"github.com/Azure/agentbaker/aks-node-controller/parser"
"github.com/Azure/agentbaker/aks-node-controller/pkg/nodeconfigutils"
"github.com/fsnotify/fsnotify"
)

type App struct {
// cmdRunner is a function that runs the given command.
// cmdRun is a function that runs the given command.
// the goal of this field is to make it easier to test the app by mocking the command runner.
cmdRunner func(cmd *exec.Cmd) error
cmdRun func(cmd *exec.Cmd) error
eventLogger *helpers.EventLogger
}

// commandMetadata holds all metadata for a command in one place.
type commandMetadata struct {
taskName string
handler func(*App, context.Context, []string) error
}

// getCommandRegistry returns the command registry mapping command names to their metadata.
// Adding a new command only requires adding one entry here.
func getCommandRegistry() map[string]commandMetadata {
return map[string]commandMetadata{
"provision": {
taskName: "Provision",
handler: func(a *App, ctx context.Context, args []string) error {
provisionResult, err := a.runProvision(ctx, args[2:])
// Always notify after provisioning attempt (success is a no-op inside notifier)
a.writeCompleteFileOnError(provisionResult, err)
return err
},
},
"provision-wait": {
taskName: "ProvisionWait",
handler: func(a *App, ctx context.Context, args []string) error {
provisionStatusFiles := ProvisionStatusFiles{
ProvisionJSONFile: provisionJSONFilePath,
ProvisionCompleteFile: provisionCompleteFilePath,
}
provisionOutput, err := a.ProvisionWait(ctx, provisionStatusFiles)
//nolint:forbidigo // stdout is part of the interface
fmt.Println(provisionOutput)
slog.Info("provision-wait finished", "provisionOutput", provisionOutput)
return err
},
},
}
}

// provision.json values are emitted as strings by the shell jq invocation.
Expand Down Expand Up @@ -64,25 +103,31 @@ func (a *App) Run(ctx context.Context, args []string) int {
}

func (a *App) run(ctx context.Context, args []string) error {
if len(args) < 2 {
command := ""
if len(args) >= 2 {
command = args[1]
}
if command == "" {
return errors.New("missing command argument")
}
switch args[1] {
case "provision":
provisionResult, err := a.runProvision(ctx, args[2:])
// Always notify after provisioning attempt (success is a no-op inside notifier)
a.writeCompleteFileOnError(provisionResult, err)
return err
case "provision-wait":
provisionStatusFiles := ProvisionStatusFiles{ProvisionJSONFile: provisionJSONFilePath, ProvisionCompleteFile: provisionCompleteFilePath}
provisionOutput, err := a.ProvisionWait(ctx, provisionStatusFiles)
//nolint:forbidigo // stdout is part of the interface
fmt.Println(provisionOutput)
slog.Info("provision-wait finished", "provisionOutput", provisionOutput)
return err
default:
return fmt.Errorf("unknown command: %s", args[1])

cmd, ok := getCommandRegistry()[command]
if !ok {
return fmt.Errorf("unknown command: %s", command)
}

startTime := time.Now()
a.eventLogger.LogEvent(cmd.taskName, "Starting", helpers.EventLevelInformational, startTime, startTime)

Comment on lines 119 to 121
Copy link

Copilot AI Feb 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a.createEvent(...) is called unconditionally. If an App instance is constructed without createEvent set (e.g., in future tests/utility code), this will panic at runtime. Consider defaulting createEvent to a no-op in Run/run (or in an App constructor) when it is nil.

Copilot uses AI. Check for mistakes.
err := cmd.handler(a, ctx, args)
endTime := time.Now()
if err != nil {
message := fmt.Sprintf("aks-node-controller exited with error %s", err.Error())
a.eventLogger.LogEvent(cmd.taskName, message, helpers.EventLevelError, startTime, endTime)
} else {
a.eventLogger.LogEvent(cmd.taskName, "Completed", helpers.EventLevelInformational, startTime, endTime)
}
Comment on lines +119 to 129
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run() emits a "Starting" guest agent event and then a second event for "Completed"/error. Existing guest-agent event emitters in parts/linux/cloud-init/artifacts/ generally emit a single event per operation (Timestamp=startTime, OperationId=endTime). Emitting two events per command increases event volume and also makes it hard to correlate start/end because OperationId will differ between the two events. If the goal is to capture errors, consider only emitting an event on failure (or use a single event emitted at the end with start/end timing in the Message).

Copilot uses AI. Check for mistakes.
return err
}

func (a *App) Provision(ctx context.Context, flags ProvisionFlags) (*ProvisionResult, error) {
Expand Down Expand Up @@ -129,7 +174,7 @@ func (a *App) Provision(ctx context.Context, flags ProvisionFlags) (*ProvisionRe
var stdoutBuf, stderrBuf bytes.Buffer
cmd.Stdout = io.MultiWriter(os.Stdout, &stdoutBuf)
cmd.Stderr = io.MultiWriter(os.Stderr, &stderrBuf)
err = a.cmdRunner(cmd)
err = a.cmdRun(cmd)
exitCode := -1
if cmd.ProcessState != nil {
exitCode = cmd.ProcessState.ExitCode()
Expand Down Expand Up @@ -174,7 +219,7 @@ func (a *App) runProvision(ctx context.Context, args []string) (*ProvisionResult
return provisionResult, errors.New(provisionResult.Error)
}
if *dryRun {
a.cmdRunner = cmdRunnerDryRun
a.cmdRun = cmdRunnerDryRun
}
return a.Provision(ctx, ProvisionFlags{ProvisionConfig: *provisionConfig})
}
Expand Down
Loading
Loading