@@ -2,10 +2,13 @@ package standalone
2
2
3
3
import (
4
4
"context"
5
+ "errors"
5
6
"fmt"
6
7
"os"
8
+ "regexp"
7
9
"strconv"
8
10
"strings"
11
+ "time"
9
12
10
13
"github.com/docker/docker/api/types/container"
11
14
"github.com/docker/docker/api/types/filters"
@@ -19,6 +22,11 @@ import (
19
22
// controllerContainerName is the name to use for the controller container.
20
23
const controllerContainerName = "docker-model-runner"
21
24
25
+ // concurrentInstallMatcher matches error message that indicate a concurrent
26
+ // standalone model runner installation is taking place. It extracts the ID of
27
+ // the conflicting container in a capture group.
28
+ var concurrentInstallMatcher = regexp .MustCompile (`is already in use by container "([a-z0-9]+)"` )
29
+
22
30
// FindControllerContainer searches for a running controller container. It
23
31
// returns the ID of the container (if found), the container name (if any), the
24
32
// full container summary (if found), or any error that occurred.
@@ -66,6 +74,28 @@ func determineBridgeGatewayIP(ctx context.Context, dockerClient *client.Client)
66
74
return "" , nil
67
75
}
68
76
77
+ // waitForContainerToStart waits for a container to start.
78
+ func waitForContainerToStart (ctx context.Context , dockerClient * client.Client , containerID string ) error {
79
+ // Unfortunately the Docker API's /containers/{id}/wait API (and the
80
+ // corresponding Client.ContainerWait method) don't allow waiting for
81
+ // container startup, so instead we'll take a polling approach.
82
+ for i := 5 ; i > 0 ; i -- {
83
+ if status , err := dockerClient .ContainerInspect (ctx , containerID ); err != nil {
84
+ return fmt .Errorf ("unable to inspect container (%s): %w" , containerID [:12 ], err )
85
+ } else if status .State .Status == "running" {
86
+ return nil
87
+ }
88
+ if i > 1 {
89
+ select {
90
+ case <- time .After (1 * time .Second ):
91
+ case <- ctx .Done ():
92
+ return errors .New ("waiting cancelled" )
93
+ }
94
+ }
95
+ }
96
+ return errors .New ("timed out" )
97
+ }
98
+
69
99
// CreateControllerContainer creates and starts a controller container.
70
100
func CreateControllerContainer (ctx context.Context , dockerClient * client.Client , port uint16 , environment string , doNotTrack bool , gpu gpupkg.GPUSupport , modelStorageVolume string , printer StatusPrinter ) error {
71
101
// Determine the target image.
@@ -124,9 +154,17 @@ func CreateControllerContainer(ctx context.Context, dockerClient *client.Client,
124
154
hostConfig .DeviceRequests = []container.DeviceRequest {{Count : - 1 , Capabilities : [][]string {{"gpu" }}}}
125
155
}
126
156
127
- // Create the container.
157
+ // Create the container. If we detect that a concurrent installation is in
158
+ // progress, then we wait for whichever install process creates the
159
+ // container first and then wait for its container to be ready.
128
160
resp , err := dockerClient .ContainerCreate (ctx , config , hostConfig , nil , nil , controllerContainerName )
129
161
if err != nil {
162
+ if match := concurrentInstallMatcher .FindStringSubmatch (err .Error ()); match != nil {
163
+ if err := waitForContainerToStart (ctx , dockerClient , match [1 ]); err != nil {
164
+ return fmt .Errorf ("failed waiting for concurrent installation: %w" , err )
165
+ }
166
+ return nil
167
+ }
130
168
return fmt .Errorf ("failed to create container %s: %w" , controllerContainerName , err )
131
169
}
132
170
0 commit comments