Skip to content

Commit 3280da0

Browse files
committed
fix race conditions in start/attach logic
The current code did something like this: lock() getState() unlock() if state != running lock() getState() == running -> error unlock() This of course is wrong because between the first unlock() and second lock() call another process could have modified the state. This meant that sometimes you would get a weird error on start because the internal setup errored as the container was already running. In general any state check without holding the lock is incorrect and will result in race conditions. As such refactor the code to combine both StartAndAttach and Attach() into one function that can handle both. With that we can move the running check into the locked code. Also use typed error for this specific error case then the callers can check and ignore the specific error when needed. This also allows us to fix races in the compat API that did a similar racy state check. This commit changes slightly how we output the result, previously a start on already running container would never print the id/name of the container which is confusing and sort of breaks idempotence. Now it will include the output except when --all is used. Then it only reports the ids that were actually started. Fixes containers#23246 Signed-off-by: Paul Holzinger <pholzing@redhat.com>
1 parent 360c4f3 commit 3280da0

File tree

7 files changed

+77
-123
lines changed

7 files changed

+77
-123
lines changed

libpod/container_api.go

Lines changed: 32 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"github.com/containers/common/pkg/resize"
1616
"github.com/containers/podman/v5/libpod/define"
1717
"github.com/containers/podman/v5/libpod/events"
18-
"github.com/containers/podman/v5/pkg/signal"
1918
"github.com/containers/storage/pkg/archive"
2019
spec "github.com/opencontainers/runtime-spec/specs-go"
2120
"github.com/sirupsen/logrus"
@@ -142,13 +141,12 @@ func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string
142141
return c.update(resources, restartPolicy, restartRetries)
143142
}
144143

145-
// StartAndAttach starts a container and attaches to it.
146-
// This acts as a combination of the Start and Attach APIs, ensuring proper
144+
// Attach to a container.
145+
// The last parameter "start" can be used to also start the container.
146+
// This will then Start and Attach APIs, ensuring proper
147147
// ordering of the two such that no output from the container is lost (e.g. the
148148
// Attach call occurs before Start).
149-
// In overall functionality, it is identical to the Start call, with the added
150-
// side effect that an attach session will also be started.
151-
func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, recursive bool) (retChan <-chan error, finalErr error) {
149+
func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
152150
defer func() {
153151
if finalErr != nil {
154152
// Have to re-lock.
@@ -175,9 +173,27 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
175173
}
176174
}
177175

178-
if err := c.prepareToStart(ctx, recursive); err != nil {
179-
return nil, err
176+
if c.state.State != define.ContainerStateRunning {
177+
if !start {
178+
return nil, errors.New("you can only attach to running containers")
179+
}
180+
181+
if err := c.prepareToStart(ctx, true); err != nil {
182+
return nil, err
183+
}
184+
}
185+
186+
if !start {
187+
// A bit awkward, technically passthrough never supports attach. We only pretend
188+
// it does as we leak the stdio fds down into the container but that of course only
189+
// works if we are the process that started conmon with the right fds.
190+
if c.LogDriver() == define.PassthroughLogging {
191+
return nil, fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
192+
} else if c.LogDriver() == define.PassthroughTTYLogging {
193+
return nil, fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
194+
}
180195
}
196+
181197
attachChan := make(chan error)
182198

183199
// We need to ensure that we don't return until start() fired in attach.
@@ -194,7 +210,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
194210
opts := new(AttachOptions)
195211
opts.Streams = streams
196212
opts.DetachKeys = &keys
197-
opts.Start = true
213+
opts.Start = start
198214
opts.Started = startedChan
199215

200216
// attach and start the container on a different thread. waitForHealthy must
@@ -213,7 +229,13 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
213229
c.newContainerEvent(events.Attach)
214230
}
215231

216-
return attachChan, c.waitForHealthy(ctx)
232+
if start {
233+
if err := c.waitForHealthy(ctx); err != nil {
234+
return nil, err
235+
}
236+
}
237+
238+
return attachChan, nil
217239
}
218240

219241
// RestartWithTimeout restarts a running container and takes a given timeout in uint
@@ -315,61 +337,6 @@ func (c *Container) Kill(signal uint) error {
315337
return c.save()
316338
}
317339

318-
// Attach attaches to a container.
319-
// This function returns when the attach finishes. It does not hold the lock for
320-
// the duration of its runtime, only using it at the beginning to verify state.
321-
func (c *Container) Attach(streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize) error {
322-
if c.LogDriver() == define.PassthroughLogging {
323-
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
324-
}
325-
if c.LogDriver() == define.PassthroughTTYLogging {
326-
return fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
327-
}
328-
if !c.batched {
329-
c.lock.Lock()
330-
if err := c.syncContainer(); err != nil {
331-
c.lock.Unlock()
332-
return err
333-
}
334-
// We are NOT holding the lock for the duration of the function.
335-
c.lock.Unlock()
336-
}
337-
338-
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
339-
return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid)
340-
}
341-
342-
// HACK: This is really gross, but there isn't a better way without
343-
// splitting attach into separate versions for StartAndAttach and normal
344-
// attaching, and I really do not want to do that right now.
345-
// Send a SIGWINCH after attach succeeds so that most programs will
346-
// redraw the screen for the new attach session.
347-
attachRdy := make(chan bool, 1)
348-
if c.Terminal() {
349-
go func() {
350-
<-attachRdy
351-
c.lock.Lock()
352-
defer c.lock.Unlock()
353-
if err := c.ociRuntime.KillContainer(c, uint(signal.SIGWINCH), false); err != nil {
354-
logrus.Warnf("Unable to send SIGWINCH to container %s after attach: %v", c.ID(), err)
355-
}
356-
}()
357-
}
358-
359-
// Start resizing
360-
if c.LogDriver() != define.PassthroughLogging && c.LogDriver() != define.PassthroughTTYLogging {
361-
registerResizeFunc(resize, c.bundlePath())
362-
}
363-
364-
opts := new(AttachOptions)
365-
opts.Streams = streams
366-
opts.DetachKeys = &keys
367-
opts.AttachReady = attachRdy
368-
369-
c.newContainerEvent(events.Attach)
370-
return c.ociRuntime.Attach(c, opts)
371-
}
372-
373340
// HTTPAttach forwards an attach session over a hijacked HTTP session.
374341
// HTTPAttach will consume and close the included httpCon, which is expected to
375342
// be sourced from a hijacked HTTP connection.

libpod/container_internal.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,11 @@ func (c *Container) save() error {
821821
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
822822
// Container must be created or stopped to be started
823823
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
824+
// Special case: Let the caller know that is is already running,
825+
// the caller can then decide to ignore/handle the error the way it needs.
826+
if c.state.State == define.ContainerStateRunning {
827+
return fmt.Errorf("container %s: %w", c.ID(), define.ErrCtrStateRunning)
828+
}
824829
return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
825830
}
826831

libpod/define/errors.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ var (
6060
// ErrCtrStateInvalid indicates a container is in an improper state for
6161
// the requested operation
6262
ErrCtrStateInvalid = errors.New("container state improper")
63+
// ErrCtrStateRunning indicates a container is running.
64+
ErrCtrStateRunning = errors.New("container is running")
6365
// ErrExecSessionStateInvalid indicates that an exec session is in an
6466
// improper state for the requested operation
6567
ErrExecSessionStateInvalid = errors.New("exec session state improper")

libpod/oci_conmon_attach_common.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
9191
}
9292
params.Started <- true
9393
}
94+
close(params.Started)
9495

9596
if passthrough {
9697
return nil

pkg/api/handlers/compat/containers_start.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package compat
22

33
import (
4+
"errors"
45
"net/http"
56

67
api "github.com/containers/podman/v5/pkg/api/types"
@@ -33,16 +34,11 @@ func StartContainer(w http.ResponseWriter, r *http.Request) {
3334
utils.ContainerNotFound(w, name, err)
3435
return
3536
}
36-
state, err := con.State()
37-
if err != nil {
38-
utils.InternalServerError(w, err)
39-
return
40-
}
41-
if state == define.ContainerStateRunning {
42-
utils.WriteResponse(w, http.StatusNotModified, nil)
43-
return
44-
}
4537
if err := con.Start(r.Context(), true); err != nil {
38+
if errors.Is(err, define.ErrCtrStateRunning) {
39+
utils.WriteResponse(w, http.StatusNotModified, nil)
40+
return
41+
}
4642
utils.InternalServerError(w, err)
4743
return
4844
}

pkg/domain/infra/abi/containers.go

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -795,13 +795,6 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string,
795795
}
796796

797797
ctr := containers[0]
798-
conState, err := ctr.State()
799-
if err != nil {
800-
return fmt.Errorf("unable to determine state of %s: %w", ctr.ID(), err)
801-
}
802-
if conState != define.ContainerStateRunning {
803-
return fmt.Errorf("you can only attach to running containers")
804-
}
805798

806799
// If the container is in a pod, also set to recursively start dependencies
807800
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, false)
@@ -939,14 +932,9 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
939932
// There can only be one container if attach was used
940933
for i := range containers {
941934
ctr := containers[i]
942-
ctrState, err := ctr.State()
943-
if err != nil {
944-
return nil, err
945-
}
946-
ctrRunning := ctrState == define.ContainerStateRunning
947935

948936
if options.Attach {
949-
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, !ctrRunning)
937+
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, true)
950938
if errors.Is(err, define.ErrDetach) {
951939
// User manually detached
952940
// Exit cleanly immediately
@@ -970,16 +958,6 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
970958
return reports, fmt.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
971959
}
972960

973-
if ctrRunning {
974-
reports = append(reports, &entities.ContainerStartReport{
975-
Id: ctr.ID(),
976-
RawInput: ctr.rawInput,
977-
Err: nil,
978-
ExitCode: 0,
979-
})
980-
return reports, err
981-
}
982-
983961
if err != nil {
984962
reports = append(reports, &entities.ContainerStartReport{
985963
Id: ctr.ID(),
@@ -1008,34 +986,43 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
1008986
return reports, nil
1009987
} // end attach
1010988

1011-
// Start the container if it's not running already.
1012-
if !ctrRunning {
1013-
// Handle non-attach start
1014-
// If the container is in a pod, also set to recursively start dependencies
1015-
report := &entities.ContainerStartReport{
1016-
Id: ctr.ID(),
1017-
RawInput: ctr.rawInput,
1018-
ExitCode: 125,
1019-
}
1020-
if err := ctr.Start(ctx, true); err != nil {
1021-
report.Err = err
1022-
if errors.Is(err, define.ErrWillDeadlock) {
1023-
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
989+
// Handle non-attach start
990+
// If the container is in a pod, also set to recursively start dependencies
991+
report := &entities.ContainerStartReport{
992+
Id: ctr.ID(),
993+
RawInput: ctr.rawInput,
994+
ExitCode: 125,
995+
}
996+
if err := ctr.Start(ctx, true); err != nil {
997+
// Already running is no error for the start command as it is idempotent.
998+
if errors.Is(err, define.ErrCtrStateRunning) {
999+
// If all is set we only want to output the actual started containers
1000+
// so do not include the entry in the result.
1001+
if !options.All {
1002+
report.ExitCode = 0
10241003
reports = append(reports, report)
1025-
continue
10261004
}
1027-
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
1005+
continue
1006+
}
1007+
1008+
report.Err = err
1009+
if errors.Is(err, define.ErrWillDeadlock) {
1010+
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
10281011
reports = append(reports, report)
1029-
if ctr.AutoRemove() {
1030-
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
1031-
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
1032-
}
1033-
}
10341012
continue
10351013
}
1036-
report.ExitCode = 0
1014+
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
1015+
if ctr.AutoRemove() {
1016+
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
1017+
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
1018+
}
1019+
}
10371020
reports = append(reports, report)
1021+
continue
10381022
}
1023+
// no error set exit code to 0
1024+
report.ExitCode = 0
1025+
reports = append(reports, report)
10391026
}
10401027
return reports, nil
10411028
}

pkg/domain/infra/abi/terminal/terminal_common.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,7 @@ func StartAttachCtr(ctx context.Context, ctr *libpod.Container, stdout, stderr,
8989
ProxySignals(ctr)
9090
}
9191

92-
if !startContainer {
93-
return ctr.Attach(streams, detachKeys, resize)
94-
}
95-
96-
attachChan, err := ctr.StartAndAttach(ctx, streams, detachKeys, resize, true)
92+
attachChan, err := ctr.Attach(ctx, streams, detachKeys, resize, startContainer)
9793
if err != nil {
9894
return err
9995
}

0 commit comments

Comments
 (0)