Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion tests/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,28 @@ include $(CURDIR)/versions.mk

E2E_RUNTIME ?= docker

E2E_INSTALL_CTK ?= false

ifeq ($($(DIST)),)
DIST ?= ubuntu20.04
endif
IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)

E2E_SSH_KEY ?=
E2E_SSH_USER ?=
E2E_SSH_HOST ?=
E2E_SSH_PORT ?= 22

.PHONY: test
test:
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
-ginkgo.focus="$(E2E_RUNTIME)" \
-test.timeout=1h \
-ginkgo.v
-ginkgo.v \
-install-ctk=$(E2E_INSTALL_CTK) \
-toolkit-image=$(IMAGE) \
-ssh-key=$(E2E_SSH_KEY) \
-ssh-user=$(E2E_SSH_USER) \
-remote-host=$(E2E_SSH_HOST) \
-remote-port=$(E2E_SSH_PORT)
44 changes: 19 additions & 25 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
package e2e

import (
"bytes"
"context"
"fmt"
"os/exec"
"flag"
"testing"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -30,8 +28,26 @@ import (
// Test context
var (
ctx context.Context

installCTK bool

image string

sshKey string
sshUser string
host string
sshPort string
)

func init() {
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
flag.StringVar(&sshPort, "ssh-port", "22", "SSH port to use for remote login")
}

func TestMain(t *testing.T) {
suiteName := "NVIDIA Container Toolkit E2E"

Expand All @@ -45,25 +61,3 @@ func TestMain(t *testing.T) {
var _ = BeforeSuite(func() {
ctx = context.Background()
})

func runScript(script string) (string, error) {
// Create a command to run the script using bash
cmd := exec.Command("bash", "-c", script)

// Buffer to capture standard output
var stdout bytes.Buffer
cmd.Stdout = &stdout

// Buffer to capture standard error
var stderr bytes.Buffer
cmd.Stderr = &stderr

// Run the command
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
}

// Return the captured stdout and nil error
return stdout.String(), nil
}
118 changes: 118 additions & 0 deletions tests/e2e/installer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package e2e

import (
"bytes"
"fmt"
"text/template"
)

// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
// on a host using Docker.
var dockerInstallTemplate = `
#! /usr/bin/env bash
set -xe

: ${IMAGE:={{.Image}}}

# Create a temporary directory
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
mkdir -p "$TEMP_DIR"

# Given that docker has an init function that checks for the existence of the
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
# in the /usr/bin directory.
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
sudo rm -f /usr/bin/nvidia-container-runtime-hook
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook

docker run --pid=host --rm -i --privileged \
-v /:/host \
-v /var/run/docker.sock:/var/run/docker.sock \
-v "$TEMP_DIR:$TEMP_DIR" \
-v /etc/docker:/config-root \
${IMAGE} \
--root "$TEMP_DIR" \
--runtime=docker \
--config=/config-root/daemon.json \
--driver-root=/ \
--no-daemon \
--restart-mode=systemd
`

type ToolkitInstaller struct {
runner Runner
template string

Image string
}

type installerOption func(*ToolkitInstaller)

func WithRunner(r Runner) installerOption {
return func(i *ToolkitInstaller) {
i.runner = r
}
}

func WithImage(image string) installerOption {
return func(i *ToolkitInstaller) {
i.Image = image
}
}

func WithTemplate(template string) installerOption {
return func(i *ToolkitInstaller) {
i.template = template
}
}

func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
i := &ToolkitInstaller{
runner: localRunner{},
template: dockerInstallTemplate,
}

for _, opt := range opts {
opt(i)
}

if i.Image == "" {
return nil, fmt.Errorf("image is required")
}

return i, nil
}

func (i *ToolkitInstaller) Install() error {
// Parse the combined template
tmpl, err := template.New("installScript").Parse(i.template)
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
}

// Execute the template
var renderedScript bytes.Buffer
err = tmpl.Execute(&renderedScript, i)
if err != nil {
return fmt.Errorf("error executing template: %w", err)
}

_, _, err = i.runner.Run(renderedScript.String())
return err
}
56 changes: 39 additions & 17 deletions tests/e2e/nvidia-container-toolkit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,29 @@ import (
)

// Integration tests for Docker runtime
var _ = Describe("docker", func() {
var _ = Describe("docker", Ordered, func() {
var r Runner

// Install the NVIDIA Container Toolkit
BeforeAll(func(ctx context.Context) {
r = NewRunner(
WithHost(host),
WithPort(sshPort),
WithSshKey(sshKey),
WithSshUser(sshUser),
)
if installCTK {
installer, err := NewToolkitInstaller(
WithRunner(r),
WithImage(image),
WithTemplate(dockerInstallTemplate),
)
Expect(err).ToNot(HaveOccurred())
err = installer.Install()
Expect(err).ToNot(HaveOccurred())
}
})

// GPUs are accessible in a container: Running nvidia-smi -L inside the
// container shows the same output inside the container as outside the
// container. This means that the following commands must all produce
Expand All @@ -33,33 +55,33 @@ var _ = Describe("docker", func() {
var hostOutput string

BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull ubuntu")
_, _, err := r.Run("docker pull ubuntu")
Expect(err).ToNot(HaveOccurred())

hostOutput, err = runScript("nvidia-smi -L")
hostOutput, _, err = r.Run("nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
})

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
containerOutput, err := runScript("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
Expect(err).ToNot(HaveOccurred())
Expect(containerOutput).To(Equal(hostOutput))
})
Expand All @@ -69,34 +91,34 @@ var _ = Describe("docker", func() {
// The following should all produce the same result.
When("Running the cuda-vectorAdd sample", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
})

var referenceOutput string

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())

Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4))
})
Expand All @@ -106,34 +128,34 @@ var _ = Describe("docker", func() {
// The following should all produce the same result.
When("Running the cuda-deviceQuery sample", Ordered, func() {
BeforeAll(func(ctx context.Context) {
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
})

var referenceOutput string

It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
var err error
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())

Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
})

It("should support automatic CDI spec generation", func(ctx context.Context) {
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out2))
})

It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out3))
})

It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
Expect(err).ToNot(HaveOccurred())
Expect(referenceOutput).To(Equal(out4))
})
Expand Down
Loading