Skip to content

Commit b06c58b

Browse files
committed
libpod: wait for healthy on main thread
wait for the healthy status on the thread where the container lock is held. Otherwise, if it is performed from a go routine, a different thread is used (since the runtime.LockOSThread() call doesn't have any effect), causing pthread_mutex_unlock() to fail with EPERM. Closes: containers#22651 Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
1 parent 87bd775 commit b06c58b

File tree

5 files changed

+50
-19
lines changed

5 files changed

+50
-19
lines changed

libpod/container_api.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
115115
}
116116

117117
// Start the container
118-
return c.start(ctx)
118+
if err := c.start(); err != nil {
119+
return err
120+
}
121+
return c.waitForHealthy(ctx)
119122
}
120123

121124
// Update updates the given container.
@@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
194197
opts.Start = true
195198
opts.Started = startedChan
196199

200+
// attach and start the container on a different thread. waitForHealthy must
201+
// be done later, as it requires to run on the same thread that holds the lock
202+
// for the container.
197203
if err := c.ociRuntime.Attach(c, opts); err != nil {
198204
attachChan <- err
199205
}
@@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
207213
c.newContainerEvent(events.Attach)
208214
}
209215

210-
return attachChan, nil
216+
return attachChan, c.waitForHealthy(ctx)
211217
}
212218

213219
// RestartWithTimeout restarts a running container and takes a given timeout in uint

libpod/container_internal.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
330330
return false, err
331331
}
332332
}
333-
if err := c.start(ctx); err != nil {
333+
if err := c.start(); err != nil {
334334
return false, err
335335
}
336-
return true, nil
336+
return true, c.waitForHealthy(ctx)
337337
}
338338

339339
// Ensure that the container is in a specific state or state.
@@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
12081208

12091209
// Initialize (if necessary) and start a container
12101210
// Performs all necessary steps to start a container that is not running
1211-
// Does not lock or check validity
1211+
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
12121212
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
12131213
// If we are ContainerStateUnknown, throw an error
12141214
if c.state.State == define.ContainerStateUnknown {
@@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
12531253
}
12541254

12551255
// Now start the container
1256-
return c.start(ctx)
1256+
if err := c.start(); err != nil {
1257+
return err
1258+
}
1259+
return c.waitForHealthy(ctx)
12571260
}
12581261

12591262
// Internal, non-locking function to start a container
1260-
func (c *Container) start(ctx context.Context) error {
1263+
func (c *Container) start() error {
12611264
if c.config.Spec.Process != nil {
12621265
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
12631266
}
@@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {
12981301

12991302
c.newContainerEvent(events.Start)
13001303

1301-
if err := c.save(); err != nil {
1302-
return err
1303-
}
1304+
return c.save()
1305+
}
13041306

1307+
// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
1308+
// for the container to get into the healthy state and reports the status to the notify socket.
1309+
// The function unlocks the container lock, so it must be called from the same thread that locks
1310+
// the container.
1311+
func (c *Container) waitForHealthy(ctx context.Context) error {
13051312
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
13061313
return nil
13071314
}
@@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
13151322
}
13161323

13171324
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
1325+
if errors.Is(err, define.ErrNoSuchCtr) {
1326+
return nil
1327+
}
13181328
return err
13191329
}
13201330

@@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
15661576
}
15671577

15681578
// Internal, non-locking function to restart a container
1579+
// It requires to run on the same thread that holds the lock.
15691580
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
15701581
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
15711582
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
@@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
16131624
return err
16141625
}
16151626
}
1616-
return c.start(ctx)
1627+
if err := c.start(); err != nil {
1628+
return err
1629+
}
1630+
return c.waitForHealthy(ctx)
16171631
}
16181632

16191633
// mountStorage sets up the container's root filesystem

libpod/oci_conmon_attach_common.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
package libpod
44

55
import (
6-
"context"
76
"errors"
87
"fmt"
98
"io"
@@ -32,6 +31,7 @@ const (
3231
// Attach to the given container.
3332
// Does not check if state is appropriate.
3433
// started is only required if startContainer is true.
34+
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
3535
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
3636
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging
3737

@@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
8686
// If starting was requested, start the container and notify when that's
8787
// done.
8888
if params.Start {
89-
if err := c.start(context.TODO()); err != nil {
89+
if err := c.start(); err != nil {
9090
return err
9191
}
9292
params.Started <- true

pkg/domain/infra/abi/containers.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
995995
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
996996
}
997997

998-
exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
998+
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
999+
if err2 != nil {
1000+
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
1001+
}
9991002
reports = append(reports, &entities.ContainerStartReport{
10001003
Id: ctr.ID(),
10011004
RawInput: ctr.rawInput,
@@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
11891192
report.ExitCode = define.ExitCode(err)
11901193
return &report, err
11911194
}
1192-
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
1195+
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
11931196
if opts.Rm && !ctr.ShouldRestart(ctx) {
11941197
if err := removeContainer(ctr, false); err != nil {
11951198
if errors.Is(err, define.ErrNoSuchCtr) ||
@@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
12031206
return &report, nil
12041207
}
12051208

1206-
func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
1209+
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
12071210
exitCode, err := ctr.Wait(ctx)
12081211
if err != nil {
1209-
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
12101212
intExitCode := int(define.ExecErrorCodeNotFound)
1211-
return intExitCode
1213+
return intExitCode, err
12121214
}
1213-
return int(exitCode)
1215+
return int(exitCode), nil
12141216
}
12151217

12161218
func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {

test/system/260-sdnotify.bats

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
233233
is "$output" "running" "make sure container is still running"
234234

235235
run_podman rm -f -t0 $ctr
236+
237+
ctr=$(random_string)
238+
run_podman run --name $ctr \
239+
--health-cmd="touch /terminate" \
240+
--sdnotify=healthy \
241+
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
242+
is "$output" "finished" "make sure container exited successfully"
243+
run_podman rm -f -t0 $ctr
244+
236245
_stop_socat
237246
}
238247

0 commit comments

Comments
 (0)