diff --git a/cmd/nvidia-ctk-installer/container/container.go b/cmd/nvidia-ctk-installer/container/container.go index 0583e051a..7d907be4d 100644 --- a/cmd/nvidia-ctk-installer/container/container.go +++ b/cmd/nvidia-ctk-installer/container/container.go @@ -49,6 +49,9 @@ type Options struct { SetAsDefault bool RestartMode string HostRootMount string + // NvidiaConfig specifies the path to the NVIDIA-specific config file to use instead of + // modifying the main configuration file. + NvidiaConfig string } // Configure applies the options to the specified config diff --git a/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go b/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go index 342837f6f..6ea210223 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go +++ b/cmd/nvidia-ctk-installer/container/runtime/containerd/containerd.go @@ -180,5 +180,6 @@ func getRuntimeConfig(o *container.Options, co *Options) (engine.Interface, erro containerd.WithRuntimeType(co.runtimeType), containerd.WithUseLegacyConfig(co.useLegacyConfig), containerd.WithContainerAnnotations(co.containerAnnotationsFromCDIPrefixes()...), + containerd.WithNvidiaConfig(o.NvidiaConfig), ) } diff --git a/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go b/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go index 10fb1cbfd..8dcb3fd25 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go +++ b/cmd/nvidia-ctk-installer/container/runtime/crio/crio.go @@ -206,5 +206,6 @@ func getRuntimeConfig(o *container.Options) (engine.Interface, error) { toml.FromFile(o.Config), ), ), + crio.WithNvidiaConfig(o.NvidiaConfig), ) } diff --git a/cmd/nvidia-ctk-installer/container/runtime/runtime.go b/cmd/nvidia-ctk-installer/container/runtime/runtime.go index 320c3e0e1..5ba4d9e4b 100644 --- a/cmd/nvidia-ctk-installer/container/runtime/runtime.go +++ b/cmd/nvidia-ctk-installer/container/runtime/runtime.go @@ -36,6 +36,8 @@ const ( defaultHostRootMount = "/host" runtimeSpecificDefault = "RUNTIME_SPECIFIC_DEFAULT" + + defaultNVIDIARuntimeConfigFilePath = "/etc/nvidia-container-runtime/config.d/99-nvidia.conf" ) type Options struct { @@ -54,6 +56,13 @@ func Flags(opts *Options) []cli.Flag { Destination: &opts.Config, Sources: cli.EnvVars("RUNTIME_CONFIG", "CONTAINERD_CONFIG", "DOCKER_CONFIG"), }, + &cli.StringFlag{ + Name: "drop-in-config", + Usage: "Path to the NVIDIA-specific config file to create. When specified, runtime configurations are saved to this file instead of modifying the main config file", + Destination: &opts.NvidiaConfig, + Value: defaultNVIDIARuntimeConfigFilePath, + Sources: cli.EnvVars("RUNTIME_DROP_IN_CONFIG"), + }, &cli.StringFlag{ Name: "executable-path", Usage: "The path to the runtime executable. This is used to extract the current config", diff --git a/cmd/nvidia-ctk/runtime/configure/configure.go b/cmd/nvidia-ctk/runtime/configure/configure.go index ee419af83..92ce0ba63 100644 --- a/cmd/nvidia-ctk/runtime/configure/configure.go +++ b/cmd/nvidia-ctk/runtime/configure/configure.go @@ -49,6 +49,8 @@ const ( defaultConfigSource = configSourceFile configSourceCommand = "command" configSourceFile = "file" + + defaultNVIDIARuntimeConfigFilePath = "/etc/nvidia-container-runtime/config.d/99-nvidia.conf" ) type command struct { @@ -73,6 +75,7 @@ type config struct { configSource string mode string hookFilePath string + nvidiaConfig string nvidiaRuntime struct { name string @@ -118,6 +121,12 @@ func (m command) build() *cli.Command { Usage: "path to the config file for the target runtime", Destination: &config.configFilePath, }, + &cli.StringFlag{ + Name: "drop-in-config", + Usage: "path to the NVIDIA-specific config file to create. When specified, runtime configurations are saved to this file instead of modifying the main config file", + Destination: &config.nvidiaConfig, + Value: defaultNVIDIARuntimeConfigFilePath, + }, &cli.StringFlag{ Name: "executable-path", Usage: "The path to the runtime executable. This is used to extract the current config", @@ -268,12 +277,14 @@ func (m command) configureConfigFile(config *config) error { containerd.WithLogger(m.logger), containerd.WithPath(config.configFilePath), containerd.WithConfigSource(configSource), + containerd.WithNvidiaConfig(config.nvidiaConfig), ) case "crio": cfg, err = crio.New( crio.WithLogger(m.logger), crio.WithPath(config.configFilePath), crio.WithConfigSource(configSource), + crio.WithNvidiaConfig(config.nvidiaConfig), ) case "docker": cfg, err = docker.New( diff --git a/pkg/config/engine/containerd/config.go b/pkg/config/engine/containerd/config.go index 9911a6fb5..832f64ece 100644 --- a/pkg/config/engine/containerd/config.go +++ b/pkg/config/engine/containerd/config.go @@ -18,6 +18,8 @@ package containerd import ( "fmt" + "os" + "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/toml" @@ -123,12 +125,38 @@ func (c *Config) EnableCDI() { *c.Tree = config } -// RemoveRuntime removes a runtime from the docker config +// RemoveRuntime removes a runtime from the containerd config func (c *Config) RemoveRuntime(name string) error { if c == nil || c.Tree == nil { return nil } + // If using NVIDIA-specific configuration, handle file cleanup + if c.nvidiaConfig != "" { + // Check if all NVIDIA runtimes are being removed + remainingNvidiaRuntimes := 0 + if runtimes := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes"}); runtimes != nil { + if runtimesTree, ok := runtimes.(*toml.Tree); ok { + for _, runtimeName := range runtimesTree.Keys() { + if c.isNvidiaRuntime(runtimeName) && runtimeName != name { + remainingNvidiaRuntimes++ + } + } + } + } + + // If this is the last NVIDIA runtime, remove the NVIDIA config file + if remainingNvidiaRuntimes == 0 { + if err := os.Remove(c.nvidiaConfig); err != nil && !os.IsNotExist(err) { + c.Logger.Warningf("Failed to remove NVIDIA config file %s: %v", c.nvidiaConfig, err) + } else { + c.Logger.Infof("Removed NVIDIA config file: %s", c.nvidiaConfig) + } + // Don't modify the in-memory tree when using NVIDIA-specific configuration + return nil + } + } + config := *c.Tree config.DeletePath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes", name}) @@ -154,3 +182,134 @@ func (c *Config) RemoveRuntime(name string) error { *c.Tree = config return nil } + +// Save writes the config to the specified path or NVIDIA-specific config file +func (c *Config) Save(path string) (int64, error) { + if c.nvidiaConfig == "" { + // Backward compatibility: save to main config + return c.Tree.Save(path) + } + + // Ensure directory for NVIDIA config file exists + dir := filepath.Dir(c.nvidiaConfig) + if err := os.MkdirAll(dir, 0755); err != nil { + return 0, fmt.Errorf("failed to create directory for NVIDIA config: %w", err) + } + + // Save runtime configs to NVIDIA config file + nvidiaConfig := c.extractRuntimeConfig() + n, err := nvidiaConfig.Save(c.nvidiaConfig) + if err != nil { + return n, fmt.Errorf("failed to save NVIDIA config: %w", err) + } + + // Update main config with imports directive + if err := c.updateMainConfigImports(path); err != nil { + // Try to clean up the NVIDIA config file on error + os.Remove(c.nvidiaConfig) + return n, fmt.Errorf("failed to update main config imports: %w", err) + } + + c.Logger.Infof("Wrote NVIDIA runtime configuration to: %s", c.nvidiaConfig) + return n, nil +} + +// extractRuntimeConfig creates a new config tree with only runtime configurations +func (c *Config) extractRuntimeConfig() *toml.Tree { + config, _ := toml.TreeFromMap(map[string]interface{}{ + "version": c.Version, + }) + + // Extract runtime configurations for NVIDIA runtimes + if runtimes := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes"}); runtimes != nil { + if runtimesTree, ok := runtimes.(*toml.Tree); ok { + nvidiaRuntimes, _ := toml.TreeFromMap(map[string]interface{}{}) + for _, name := range runtimesTree.Keys() { + if c.isNvidiaRuntime(name) { + if runtime := runtimesTree.Get(name); runtime != nil { + nvidiaRuntimes.Set(name, runtime) + } + } + } + if len(nvidiaRuntimes.Keys()) > 0 { + config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "runtimes"}, nvidiaRuntimes) + } + } + } + + // Extract default runtime name if it's one of ours + if defaultRuntime, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}).(string); ok { + if c.isNvidiaRuntime(defaultRuntime) { + config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "containerd", "default_runtime_name"}, defaultRuntime) + } + } + + // Extract CDI enablement + if cdiEnabled, ok := c.GetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}).(bool); ok && cdiEnabled { + config.SetPath([]string{"plugins", c.CRIRuntimePluginName, "enable_cdi"}, true) + } + + return config +} + +// updateMainConfigImports ensures the main config includes an imports directive +func (c *Config) updateMainConfigImports(path string) error { + // Load the main config file + mainConfig, err := toml.FromFile(path).Load() + if err != nil { + // If the file doesn't exist, create a minimal config with imports + if os.IsNotExist(err) { + mainConfig, _ = toml.TreeFromMap(map[string]interface{}{ + "version": c.Version, + }) + } else { + return fmt.Errorf("failed to load main config: %w", err) + } + } + + // Add imports directive if not present + importPattern := c.nvidiaConfig + imports := mainConfig.Get("imports") + if imports == nil { + mainConfig.Set("imports", []string{importPattern}) + } else if importsList, ok := imports.([]interface{}); ok { + // Check if the import pattern already exists + found := false + for _, imp := range importsList { + if impStr, ok := imp.(string); ok && impStr == importPattern { + found = true + break + } + } + if !found { + // Add our import pattern + importsList = append(importsList, importPattern) + mainConfig.Set("imports", importsList) + } + } else if importsStrList, ok := imports.([]string); ok { + // Check if the import pattern already exists + found := false + for _, imp := range importsStrList { + if imp == importPattern { + found = true + break + } + } + if !found { + // Add our import pattern + importsStrList = append(importsStrList, importPattern) + mainConfig.Set("imports", importsStrList) + } + } else { + return fmt.Errorf("unexpected imports type: %T", imports) + } + + // Save the updated main config + _, err = mainConfig.Save(path) + return err +} + +// isNvidiaRuntime checks if the runtime name is an NVIDIA runtime +func (c *Config) isNvidiaRuntime(name string) bool { + return name == "nvidia" || name == "nvidia-cdi" || name == "nvidia-legacy" +} diff --git a/pkg/config/engine/containerd/config_v1.go b/pkg/config/engine/containerd/config_v1.go index 6e87a1ef9..335d75a6a 100644 --- a/pkg/config/engine/containerd/config_v1.go +++ b/pkg/config/engine/containerd/config_v1.go @@ -122,7 +122,7 @@ func (c *ConfigV1) RemoveRuntime(name string) error { // Save writes the config to a file func (c ConfigV1) Save(path string) (int64, error) { - return (Config)(c).Save(path) + return (*Config)(&c).Save(path) } func (c *ConfigV1) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) { diff --git a/pkg/config/engine/containerd/containerd.go b/pkg/config/engine/containerd/containerd.go index ca35c75db..404542427 100644 --- a/pkg/config/engine/containerd/containerd.go +++ b/pkg/config/engine/containerd/containerd.go @@ -46,6 +46,9 @@ type Config struct { // for the CRI runtime service. The name of this plugin was changed in v3 of the // containerd configuration file. CRIRuntimePluginName string + // nvidiaConfig specifies the path to the NVIDIA-specific configuration file. + // If set, runtime configurations will be saved to this file instead of the main config. + nvidiaConfig string } var _ engine.Interface = (*Config)(nil) @@ -108,6 +111,7 @@ func New(opts ...Option) (engine.Interface, error) { RuntimeType: b.runtimeType, UseLegacyConfig: b.useLegacyConfig, ContainerAnnotations: b.containerAnnotations, + nvidiaConfig: b.nvidiaConfig, } switch configVersion { diff --git a/pkg/config/engine/containerd/option.go b/pkg/config/engine/containerd/option.go index 2dec62efb..afdec0ba8 100644 --- a/pkg/config/engine/containerd/option.go +++ b/pkg/config/engine/containerd/option.go @@ -29,6 +29,7 @@ type builder struct { path string runtimeType string containerAnnotations []string + nvidiaConfig string } // Option defines a function that can be used to configure the config builder @@ -82,3 +83,12 @@ func WithContainerAnnotations(containerAnnotations ...string) Option { b.containerAnnotations = containerAnnotations } } + +// WithNvidiaConfig sets the NVIDIA-specific config file path for the config builder. +// When set, configurations will be saved to this file instead of modifying +// the main config file directly. +func WithNvidiaConfig(path string) Option { + return func(b *builder) { + b.nvidiaConfig = path + } +} diff --git a/pkg/config/engine/crio/crio.go b/pkg/config/engine/crio/crio.go index ded4dcabd..9c2bb8bf8 100644 --- a/pkg/config/engine/crio/crio.go +++ b/pkg/config/engine/crio/crio.go @@ -18,6 +18,8 @@ package crio import ( "fmt" + "os" + "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" @@ -27,7 +29,8 @@ import ( // Config represents the cri-o config type Config struct { *toml.Tree - Logger logger.Interface + Logger logger.Interface + nvidiaConfig string } type crioRuntime struct { @@ -68,8 +71,9 @@ func New(opts ...Option) (engine.Interface, error) { } cfg := Config{ - Tree: tomlConfig, - Logger: b.logger, + Tree: tomlConfig, + Logger: b.logger, + nvidiaConfig: b.nvidiaConfig, } return &cfg, nil } @@ -137,6 +141,32 @@ func (c *Config) RemoveRuntime(name string) error { return nil } + // If using NVIDIA-specific configuration, handle file cleanup + if c.nvidiaConfig != "" { + // Check if all NVIDIA runtimes are being removed + remainingNvidiaRuntimes := 0 + if runtimes := c.GetPath([]string{"crio", "runtime", "runtimes"}); runtimes != nil { + if runtimesTree, ok := runtimes.(*toml.Tree); ok { + for _, runtimeName := range runtimesTree.Keys() { + if c.isNvidiaRuntime(runtimeName) && runtimeName != name { + remainingNvidiaRuntimes++ + } + } + } + } + + // If this is the last NVIDIA runtime, remove the NVIDIA config file + if remainingNvidiaRuntimes == 0 { + if err := os.Remove(c.nvidiaConfig); err != nil && !os.IsNotExist(err) { + c.Logger.Warningf("Failed to remove NVIDIA config file %s: %v", c.nvidiaConfig, err) + } else { + c.Logger.Infof("Removed NVIDIA config file: %s", c.nvidiaConfig) + } + // Don't modify the in-memory tree when using NVIDIA-specific configuration + return nil + } + } + config := *c.Tree if runtime, ok := config.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { if runtime == name { @@ -173,6 +203,68 @@ func (c *Config) GetRuntimeConfig(name string) (engine.RuntimeConfig, error) { // EnableCDI is a no-op for CRI-O since it always enabled where supported. func (c *Config) EnableCDI() {} +// Save writes the config to the specified path or NVIDIA-specific config file +func (c *Config) Save(path string) (int64, error) { + if c.nvidiaConfig == "" { + // Backward compatibility: save to main config + return c.Tree.Save(path) + } + + // Ensure directory for NVIDIA config file exists + dir := filepath.Dir(c.nvidiaConfig) + if err := os.MkdirAll(dir, 0755); err != nil { + return 0, fmt.Errorf("failed to create directory for NVIDIA config: %w", err) + } + + // Save runtime configs to NVIDIA config file + nvidiaConfig := c.extractRuntimeConfig() + n, err := nvidiaConfig.Save(c.nvidiaConfig) + if err != nil { + return n, fmt.Errorf("failed to save NVIDIA config: %w", err) + } + + // For CRI-O, we don't need to update the main config with imports + // CRI-O automatically loads config files from the config directory + c.Logger.Infof("Wrote NVIDIA runtime configuration to: %s", c.nvidiaConfig) + return n, nil +} + +// extractRuntimeConfig creates a new config tree with only runtime configurations +func (c *Config) extractRuntimeConfig() *toml.Tree { + config, _ := toml.TreeFromMap(map[string]interface{}{}) + + // Extract runtime configurations for NVIDIA runtimes + if runtimes := c.GetPath([]string{"crio", "runtime", "runtimes"}); runtimes != nil { + if runtimesTree, ok := runtimes.(*toml.Tree); ok { + nvidiaRuntimes, _ := toml.TreeFromMap(map[string]interface{}{}) + for _, name := range runtimesTree.Keys() { + if c.isNvidiaRuntime(name) { + if runtime := runtimesTree.Get(name); runtime != nil { + nvidiaRuntimes.Set(name, runtime) + } + } + } + if len(nvidiaRuntimes.Keys()) > 0 { + config.SetPath([]string{"crio", "runtime", "runtimes"}, nvidiaRuntimes) + } + } + } + + // Extract default runtime if it's one of ours + if defaultRuntime, ok := c.GetPath([]string{"crio", "runtime", "default_runtime"}).(string); ok { + if c.isNvidiaRuntime(defaultRuntime) { + config.SetPath([]string{"crio", "runtime", "default_runtime"}, defaultRuntime) + } + } + + return config +} + +// isNvidiaRuntime checks if the runtime name is an NVIDIA runtime +func (c *Config) isNvidiaRuntime(name string) bool { + return name == "nvidia" || name == "nvidia-cdi" || name == "nvidia-legacy" +} + // CommandLineSource returns the CLI-based crio config loader func CommandLineSource(hostRoot string, executablePath string) toml.Loader { if executablePath == "" { diff --git a/pkg/config/engine/crio/option.go b/pkg/config/engine/crio/option.go index 7079fb150..563f8a84e 100644 --- a/pkg/config/engine/crio/option.go +++ b/pkg/config/engine/crio/option.go @@ -25,6 +25,7 @@ type builder struct { logger logger.Interface configSource toml.Loader path string + nvidiaConfig string } // Option defines a function that can be used to configure the config builder @@ -50,3 +51,12 @@ func WithConfigSource(configSource toml.Loader) Option { b.configSource = configSource } } + +// WithNvidiaConfig sets the NVIDIA-specific config file path for the config builder. +// When set, configurations will be saved to this file instead of modifying +// the main config file directly. +func WithNvidiaConfig(path string) Option { + return func(b *builder) { + b.nvidiaConfig = path + } +}