Skip to content

Commit c6c14c8

Browse files
authored
Merge pull request #90 from docker/tocttou
Work around TOCTTOU install check race
2 parents 6cb1e6c + 6702fe8 commit c6c14c8

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

pkg/standalone/containers.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ package standalone
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"os"
8+
"regexp"
79
"strconv"
810
"strings"
11+
"time"
912

1013
"github.com/docker/docker/api/types/container"
1114
"github.com/docker/docker/api/types/filters"
@@ -19,6 +22,11 @@ import (
1922
// controllerContainerName is the name to use for the controller container.
2023
const controllerContainerName = "docker-model-runner"
2124

25+
// concurrentInstallMatcher matches error message that indicate a concurrent
26+
// standalone model runner installation is taking place. It extracts the ID of
27+
// the conflicting container in a capture group.
28+
var concurrentInstallMatcher = regexp.MustCompile(`is already in use by container "([a-z0-9]+)"`)
29+
2230
// FindControllerContainer searches for a running controller container. It
2331
// returns the ID of the container (if found), the container name (if any), the
2432
// full container summary (if found), or any error that occurred.
@@ -66,6 +74,28 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient *client.Client)
6674
return "", nil
6775
}
6876

77+
// waitForContainerToStart waits for a container to start.
78+
func waitForContainerToStart(ctx context.Context, dockerClient *client.Client, containerID string) error {
79+
// Unfortunately the Docker API's /containers/{id}/wait API (and the
80+
// corresponding Client.ContainerWait method) don't allow waiting for
81+
// container startup, so instead we'll take a polling approach.
82+
for i := 5; i > 0; i-- {
83+
if status, err := dockerClient.ContainerInspect(ctx, containerID); err != nil {
84+
return fmt.Errorf("unable to inspect container (%s): %w", containerID[:12], err)
85+
} else if status.State.Status == "running" {
86+
return nil
87+
}
88+
if i > 1 {
89+
select {
90+
case <-time.After(1 * time.Second):
91+
case <-ctx.Done():
92+
return errors.New("waiting cancelled")
93+
}
94+
}
95+
}
96+
return errors.New("timed out")
97+
}
98+
6999
// CreateControllerContainer creates and starts a controller container.
70100
func CreateControllerContainer(ctx context.Context, dockerClient *client.Client, port uint16, environment string, doNotTrack bool, gpu gpupkg.GPUSupport, modelStorageVolume string, printer StatusPrinter) error {
71101
// Determine the target image.
@@ -124,9 +154,17 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client,
124154
hostConfig.DeviceRequests = []container.DeviceRequest{{Count: -1, Capabilities: [][]string{{"gpu"}}}}
125155
}
126156

127-
// Create the container.
157+
// Create the container. If we detect that a concurrent installation is in
158+
// progress, then we wait for whichever install process creates the
159+
// container first and then wait for its container to be ready.
128160
resp, err := dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, controllerContainerName)
129161
if err != nil {
162+
if match := concurrentInstallMatcher.FindStringSubmatch(err.Error()); match != nil {
163+
if err := waitForContainerToStart(ctx, dockerClient, match[1]); err != nil {
164+
return fmt.Errorf("failed waiting for concurrent installation: %w", err)
165+
}
166+
return nil
167+
}
130168
return fmt.Errorf("failed to create container %s: %w", controllerContainerName, err)
131169
}
132170

0 commit comments

Comments
 (0)