diff --git a/api/config/v1/config.go b/api/config/v1/config.go index 0af2c8125..c102c4239 100644 --- a/api/config/v1/config.go +++ b/api/config/v1/config.go @@ -61,7 +61,6 @@ func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) { if c.IsSet("imex-required") { config.Imex.Required = c.Bool("imex-required") } - updateFromCLIFlag(&config.Imex.NodesConfigFile, c, "imex-nodes-config-file") // If nvidiaDevRoot (the path to the device nodes on the host) is not set, // we default to using the driver root on the host. diff --git a/api/config/v1/imex.go b/api/config/v1/imex.go index 9a98b1f86..928e13e85 100644 --- a/api/config/v1/imex.go +++ b/api/config/v1/imex.go @@ -39,10 +39,6 @@ type Imex struct { // If it is not required its injection is skipped if the device nodes do not exist or if its // existence cannot be queried. Required bool `json:"required,omitempty" yaml:"required,omitempty"` - // NodesConfigFile defines the location to the IMEX nodes config file. - // Such a nodes config file contains the IP addresses of nodes that are part of the IMEX domain. - // Note that this is the absolute path to the file in the device plugin container. - NodesConfigFile *string `json:"nodesConfigFile,omitempty" yaml:"nodesConfigFile,omitempty"` } // AssertChannelIDsIsValid checks whether the specified list of channel IDs is valid. diff --git a/cmd/gpu-feature-discovery/main.go b/cmd/gpu-feature-discovery/main.go index ec242019a..aa8c53121 100644 --- a/cmd/gpu-feature-discovery/main.go +++ b/cmd/gpu-feature-discovery/main.go @@ -86,12 +86,6 @@ func main() { Value: "/etc/kubernetes/node-feature-discovery/features.d/gfd", EnvVars: []string{"GFD_OUTPUT_FILE"}, }, - &cli.StringFlag{ - Name: "imex-nodes-config-file", - Usage: "Path to the IMEX nodes config file. This file contains a list of IP addresses of the nodes in the IMEX domain.", - Value: "/etc/nvidia-imex/nodes_config.cfg", - EnvVars: []string{"GFD_IMEX_NODES_CONFIG_FILE"}, - }, &cli.StringFlag{ Name: "machine-type-file", Value: "/sys/class/dmi/id/product_name", diff --git a/deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml b/deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml index 9e688ac9a..4b398f3bd 100644 --- a/deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml +++ b/deployments/helm/nvidia-device-plugin/templates/daemonset-gfd.yml @@ -57,7 +57,35 @@ spec: {{- end }} {{- if $options.hasConfigMap }} shareProcessNamespace: true + {{- end }} initContainers: + - image: {{ include "nvidia-device-plugin.fullimage" . }} + name: gpu-feature-discovery-imex-init + command: ["/bin/bash", "-c"] + args: + - | + IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg + if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then + echo "Removing cached IMEX nodes config" + rm -f /config/${IMEX_NODES_CONFIG_FILE} + fi + + if [[ ! -f /driver-root/${IMEX_NODES_CONFIG_FILE} ]]; then + echo "No IMEX nodes config path detected; Skipping" + exit 0 + fi + + echo "Copying IMEX nodes config" + mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE}) + cp /driver-root/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE} + volumeMounts: + - name: config + mountPath: /config + - name: driver-root + mountPath: /driver-root/etc + subPath: etc + readOnly: true + {{- if $options.hasConfigMap }} - image: {{ include "nvidia-device-plugin.fullimage" . }} name: gpu-feature-discovery-init command: ["config-manager"] @@ -182,14 +210,12 @@ spec: mountPath: "/etc/kubernetes/node-feature-discovery/features.d" - name: host-sys mountPath: "/sys" - - name: nvidia-imex-dir - mountPath: "/etc/nvidia-imex" {{- if $options.hasConfigMap }} - name: available-configs mountPath: /available-configs + {{- end }} - name: config mountPath: /config - {{- end }} {{- with .Values.resources }} resources: {{- toYaml . | nindent 10 }} @@ -201,17 +227,17 @@ spec: - name: host-sys hostPath: path: "/sys" - - name: nvidia-imex-dir - type: DirectoryOrCreate + - name: driver-root hostPath: - path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot "/etc/nvidia-imex" ) ) | quote }} + path: {{ clean ( join "/" ( list "/" .Values.nvidiaDriverRoot ) ) | quote }} + type: Directory {{- if $options.hasConfigMap }} - name: available-configs configMap: name: {{ $configMapName }} + {{- end }} - name: config emptyDir: {} - {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/internal/lm/fabric.go b/internal/lm/imex.go similarity index 82% rename from internal/lm/fabric.go rename to internal/lm/imex.go index 166603a54..cf0ef092a 100644 --- a/internal/lm/fabric.go +++ b/internal/lm/imex.go @@ -34,19 +34,16 @@ import ( "github.com/NVIDIA/k8s-device-plugin/internal/resource" ) -func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) { - if config.Imex.NodesConfigFile == nil || *config.Imex.NodesConfigFile == "" { - // No imex config file, return empty labels - return empty{}, nil - } - - nodesConfigFiles := []string{*config.Imex.NodesConfigFile} - if root := config.Flags.Plugin.ContainerDriverRoot; root != nil && *root != "" { - nodesConfigFiles = append(nodesConfigFiles, filepath.Join(*root, *config.Imex.NodesConfigFile)) - } +const ( + // ImexNodesConfigFilePath is the path to the IMEX nodes config file. + // This file contains a list of IP addresses of the nodes in the IMEX domain. + ImexNodesConfigFilePath = "/etc/nvidia-imex/nodes_config.cfg" +) +func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, error) { var errs error - for _, configFilePath := range nodesConfigFiles { + for _, root := range imexNodesConfigFilePathSearchRoots(config) { + configFilePath := filepath.Join(root, ImexNodesConfigFilePath) imexLabeler, err := imexLabelerForConfigFile(configFilePath, devices) if err != nil { errs = errors.Join(errs, err) @@ -64,6 +61,19 @@ func newImexLabeler(config *spec.Config, devices []resource.Device) (Labeler, er return empty{}, nil } +// imexNodesConfigFilePathSearchRoots returns a list of roots to search for the IMEX nodes config file. +func imexNodesConfigFilePathSearchRoots(config *spec.Config) []string { + // By default, search / and /config for config files. + roots := []string{"/", "/config"} + + if config == nil || config.Flags.Plugin == nil || config.Flags.Plugin.ContainerDriverRoot == nil { + return roots + } + + // If a driver root is specified, it is also searched. + return append(roots, *config.Flags.Plugin.ContainerDriverRoot) +} + func imexLabelerForConfigFile(configFilePath string, devices []resource.Device) (Labeler, error) { imexConfigFile, err := os.Open(configFilePath) if os.IsNotExist(err) { diff --git a/internal/lm/fabric_test.go b/internal/lm/imex_test.go similarity index 100% rename from internal/lm/fabric_test.go rename to internal/lm/imex_test.go