diff --git a/Dockerfile b/Dockerfile index 07c6d1f..e4fe136 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ COPY cmd/ ./cmd/ COPY api/ ./api/ COPY internal/ ./internal/ -RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ +RUN export CGO_ENABLED=0; export GOOS=${TARGETOS:-linux}; export GOARCH=${TARGETARCH}; \ go build -a -o manager ./cmd/controller/main.go && \ go build -a -o agent ./cmd/agent/main.go && \ go build -a -o smcr_init ./cmd/smcr_init/main.go @@ -28,13 +28,21 @@ USER 65532:65532 ENTRYPOINT ["/manager"] +# for local build speedup +FROM alibaba-cloud-linux-3-registry.cn-hangzhou.cr.aliyuncs.com/alinux3/alinux3 AS public_mirror +ARG PUBLIC_MIRROR="" +RUN if [[ -n ${PUBLIC_MIRROR} ]]; then echo "using public mirror"; sed -i 's/mirrors.cloud.aliyuncs.com/mirrors.aliyun.com/g' /etc/yum.repos.d/*; fi + FROM alibaba-cloud-linux-3-registry.cn-hangzhou.cr.aliyuncs.com/alinux3/alinux3 AS smcr_init -RUN sed -i 's/mirrors.cloud.aliyuncs.com/mirrors.aliyun.com/g' /etc/yum.repos.d/*; yum install -y smc-tools && yum clean all && rm -rf /var/cache/* /var/lib/dnf/history* /var/lib/rpm/rpm.sqlite +RUN yum install -y smc-tools && yum clean all && rm -rf /var/cache/* /var/lib/dnf/history* /var/lib/rpm/rpm.sqlite COPY --from=builder /workspace/smcr_init /usr/local/bin/smcr_init ENTRYPOINT ["/usr/local/bin/smcr_init"] FROM alibaba-cloud-linux-3-registry.cn-hangzhou.cr.aliyuncs.com/alinux3/alinux3 AS agent -RUN sed -i 's/mirrors.cloud.aliyuncs.com/mirrors.aliyun.com/g' /etc/yum.repos.d/*; yum install -y smc-tools procps-ng && yum clean all && rm -rf /var/cache/* /var/lib/dnf/history* /var/lib/rpm/rpm.sqlite +RUN --mount=type=bind,from=public_mirror,source=/etc/yum.repos.d,target=/etc/yum.repos.d \ + yum install -y smc-tools procps-ng kmod wget tar && yum clean all && rm -rf /var/cache/* /var/lib/dnf/history* /var/lib/rpm/rpm.sqlite +# for lifsea erdma driver install +COPY hack/lifsea.repo /etc/yum.repos.d/ COPY --from=builder /workspace/agent /usr/local/bin/agent COPY --from=builder /workspace/smcr_init /usr/local/bin/smcr_init ENTRYPOINT ["/usr/local/bin/agent"] \ No newline at end of file diff --git a/hack/lifsea.repo b/hack/lifsea.repo new file mode 100644 index 0000000..349fd84 --- /dev/null +++ b/hack/lifsea.repo @@ -0,0 +1,9 @@ +[alinux3-lifsea] +name=alinux3-lifsea +baseurl=http://mirrors.cloud.aliyuncs.com/alinux/$releasever/lifsea/$basearch/ + http://mirrors.aliyun.com/alinux/$releasever/lifsea/$basearch/ +gpgcheck=1 +enabled=1 +gpgkey=http://mirrors.cloud.aliyuncs.com/alinux/3/lifsea/RPM-GPG-KEY-ALINUX-3 + http://mirrors.aliyun.com/alinux/3/lifsea/RPM-GPG-KEY-ALINUX-3 +excludepkgs=systemd*,rpcbind,container-selinux,rpm-ostree*,glibc*,ostree*,nfs-utils,libnfsidmap*,lifsea-release \ No newline at end of file diff --git a/internal/drivers/compat.go b/internal/drivers/compat.go index ea99c0b..dbdd062 100644 --- a/internal/drivers/compat.go +++ b/internal/drivers/compat.go @@ -13,24 +13,24 @@ func init() { Register("compat", &CompatDriver{}) } -var compatInstallScript = ` -if [ -d /sys/fs/cgroup/cpu/ ]; then cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cpu/tasks && cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/memory/tasks; else -cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cgroup.procs; fi && -if grep -q "Alibaba Cloud Linux Lifsea" /etc/os-release; then lifseacli pkg install kernel-modules-$(uname -r); modprobe erdma compat_mode=Y; else cd /tmp && rm -f erdma_installer-1.4.0.tar.gz && -wget 'http://mirrors.cloud.aliyuncs.com/erdma/erdma_installer-1.4.0.tar.gz' && tar -xzvf erdma_installer-1.4.0.tar.gz && cd erdma_installer && yum install -y kernel-devel-$(uname -r) gcc-c++ dkms cmake && ERDMA_CM_NO_BOUND_IF=1 ERDMA_FORCE_MAD_ENABLE=1 ./install.sh --batch; fi -` - type CompatDriver struct{} func (d *CompatDriver) Install() error { exist := driverExists() if !exist { - _, err := hostExec(compatInstallScript) - if err != nil { - return err + if isContainerOS() { + err := containerOSDriverInstall(true) + if err != nil { + return err + } + } else { + _, err := hostExec(getInstallScript(true)) + if err != nil { + return err + } } } - _, err := hostExec("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"N\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=Y; else modprobe erdma compat_mode=Y; fi") + _, err := containerExec("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"N\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=Y; else modprobe erdma compat_mode=Y; fi") if err != nil { return fmt.Errorf("install erdma driver failed: %v", err) } diff --git a/internal/drivers/default.go b/internal/drivers/default.go index 1c7cb6c..9dcb3ff 100644 --- a/internal/drivers/default.go +++ b/internal/drivers/default.go @@ -13,25 +13,24 @@ func init() { Register(defaultDriver, &DefaultDriver{}) } -var defaultInstallScript = ` -if [ -d /sys/fs/cgroup/cpu/ ]; then cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cpu/tasks && cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/memory/tasks; else -cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cgroup.procs; fi && -if grep -q "Alibaba Cloud Linux Lifsea" /etc/os-release; then lifseacli pkg install kernel-modules-$(uname -r); modprobe erdma; else -cd /tmp && rm -f erdma_installer-1.4.0.tar.gz && -wget 'http://mirrors.cloud.aliyuncs.com/erdma/erdma_installer-1.4.0.tar.gz' && tar -xzvf erdma_installer-1.4.0.tar.gz && cd erdma_installer && yum install -y kernel-devel-$(uname -r) gcc-c++ dkms cmake && ERDMA_CM_NO_BOUND_IF=1 ./install.sh --batch; fi -` - type DefaultDriver struct{} func (d *DefaultDriver) Install() error { exist := driverExists() if !exist { - _, err := hostExec(defaultInstallScript) - if err != nil { - return err + if isContainerOS() { + err := containerOSDriverInstall(false) + if err != nil { + return err + } + } else { + _, err := hostExec(getInstallScript(false)) + if err != nil { + return err + } } } - _, err := hostExec("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"Y\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=N; else modprobe erdma compat_mode=N; fi") + _, err := containerExec("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"Y\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=N; else modprobe erdma compat_mode=N; fi") if err != nil { return fmt.Errorf("install erdma driver failed: %v", err) } diff --git a/internal/drivers/ofed.go b/internal/drivers/ofed.go index 19dd159..360894b 100644 --- a/internal/drivers/ofed.go +++ b/internal/drivers/ofed.go @@ -20,19 +20,23 @@ cd /tmp && rm -f env_setup.sh && wget http://mirrors.cloud.aliyuncs.com/erdma/en type OFEDDriver struct{} func (d *OFEDDriver) Install() error { + execMethod := hostExec + if isContainerOS() { + execMethod = containerExec + } exist := driverExists() if !exist { - _, err := hostExec(gpuInstallScript) + _, err := execMethod(gpuInstallScript) if err != nil { return err } } - _, err := hostExec("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"N\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=Y; else modprobe erdma compat_mode=Y; fi") + _, err := execMethod("if [ -f /sys/module/erdma/parameters/compat_mode ] && [ \"N\" == $(cat /sys/module/erdma/parameters/compat_mode) ]; then rmmod erdma && modprobe erdma compat_mode=Y; else modprobe erdma compat_mode=Y; fi") if err != nil { return fmt.Errorf("install erdma driver failed: %v", err) } - _, err = hostExec("modprobe erdma") + _, err = execMethod("modprobe erdma") if err != nil { return fmt.Errorf("install erdma driver failed: %v", err) } diff --git a/internal/drivers/utils.go b/internal/drivers/utils.go index 02fae5e..6425857 100644 --- a/internal/drivers/utils.go +++ b/internal/drivers/utils.go @@ -1,198 +1,28 @@ -//go:build linux - package drivers import ( - "bytes" "fmt" - "io/fs" - "net" - "os" "os/exec" - "path" - "path/filepath" - "strconv" - "strings" - - "github.com/AliyunContainerService/alibabacloud-erdma-controller/internal/types" - "github.com/samber/lo" - "github.com/vishvananda/netlink" -) - -func driverExists() bool { - if isContainerOS() { - _, err := hostExec("modinfo erdma") - if err != nil { - driverLog.Info("driver not exists", "checklog", err) - return false - } - return true - } - _, err := hostExec("stat /bin/eadm && modinfo erdma") - if err != nil { - driverLog.Info("driver not exists", "checklog", err) - return false - } - return true -} - -func isContainerOS() bool { - _, err := hostExec("grep -q \"Alibaba Cloud Linux Lifsea\" /etc/os-release") - return err == nil -} - -//nolint:unparam -func hostExec(cmd string) (string, error) { - output, err := exec.Command("nsenter", "-t", "1", "-m", "--", "bash", "-c", cmd).CombinedOutput() - if err != nil { - return "", fmt.Errorf("exec error: %v, output: %s", err, string(output)) - } - return string(output), nil -} - -func EnsureSMCR() error { - _, err := hostExec("which smcss || yum install -y smc-tools || apt install -y smc-tools || lifseacli pkg install smc-tools") - if err != nil { - return err - } - _, err = hostExec("modprobe smc") - if err != nil { - return err - } - return nil -} - -func GetERdmaDevPathsFromRdmaLink(rdmaLink *netlink.RdmaLink) ([]string, error) { - var devPaths []string - ibUverbsDevs, err := os.ReadDir("/sys/class/infiniband_verbs/") - if err != nil { - return nil, fmt.Errorf("read dir /sys/class/infiniband_verbs/ failed: %v", err) - } - lo.ForEach(ibUverbsDevs, func(ibUverbsDev fs.DirEntry, _ int) { - ibDevPath := filepath.Join("/sys/class/infiniband_verbs/", ibUverbsDev.Name(), "ibdev") - driverLog.Info("check infiniband path", "path", ibDevPath) - if _, err = os.Stat(ibDevPath); err == nil { - if devName, err := os.ReadFile(ibDevPath); err == nil { - devNameStr := strings.Trim(string(devName), "\n") - driverLog.Info("infiniband device", "devName", devNameStr) - if devNameStr == rdmaLink.Attrs.Name { - devPaths = append(devPaths, filepath.Join("/dev/infiniband", ibUverbsDev.Name())) - } - } - } - }) - if len(devPaths) == 0 { - return nil, fmt.Errorf("can not find dev path for %s", rdmaLink.Attrs.Name) - } - - if _, err := os.Stat("/dev/infiniband/rdma_cm"); err == nil { - devPaths = append(devPaths, "/dev/infiniband/rdma_cm") - } - return devPaths, nil -} -func GetERdmaFromLink(link netlink.Link) (*netlink.RdmaLink, error) { - rdmaLinks, err := netlink.RdmaLinkList() - if err != nil { - return nil, fmt.Errorf("error list rdma links, %v", err) - } - linkHwAddr := link.Attrs().HardwareAddr - // erdma guid first byte is ^= 0x2 - linkHwAddr[0] ^= 0x2 - for _, rl := range rdmaLinks { - rdmaHwAddr, err := parseERdmaLinkHwAddr(rl.Attrs.NodeGuid) - if err != nil { - return nil, err - } - driverLog.Info("check rdma link", "rdmaLink", rl.Attrs.Name, "rdmaHwAddr", rdmaHwAddr.String(), "linkHwAddr", linkHwAddr.String()) - if rdmaHwAddr.String() == linkHwAddr.String() { - return rl, nil - } - } - return nil, fmt.Errorf("cannot found rdma link for %s", link.Attrs().Name) -} - -func parseERdmaLinkHwAddr(guid string) (net.HardwareAddr, error) { - hwAddrSlice := make([]byte, 8) - guidSlice := strings.Split(guid, ":") - if len(guidSlice) != 8 { - return nil, fmt.Errorf("invalid rdma guid: %s", guid) - } - for i, s := range guidSlice { - sint, err := strconv.ParseUint(s, 16, 8) - if err != nil { - return nil, fmt.Errorf("invalid rdma guid: %s, err: %v", guid, err) - } - hwAddrSlice[7-i] = uint8(sint) - } - return append(hwAddrSlice[0:3], hwAddrSlice[5:8]...), nil -} - -const ( - smcPnet = "smc_pnet" ) -func ConfigSMCPnetForDevice(info *types.ERdmaDeviceInfo) error { - output, err := exec.Command(smcPnet, "-s").CombinedOutput() - if err != nil { - return fmt.Errorf("failed to get smc-pnet stat: %v, output: %v", err, string(output)) - } - if bytes.Contains(output, []byte(PNetIDFromDevice(info))) { - return nil +func getInstallScript(compat bool) string { + script := `if [ -d /sys/fs/cgroup/cpu/ ]; then cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cpu/tasks && cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/memory/tasks; else +cat /proc/self/status | awk '/PPid:/{print $2}' > /sys/fs/cgroup/cgroup.procs; fi && cd /tmp && rm -f erdma_installer-1.4.6.tar.gz && +wget 'http://mirrors.cloud.aliyuncs.com/erdma/erdma_installer-1.4.6.tar.gz' && tar -xzvf erdma_installer-1.4.6.tar.gz && cd erdma_installer && +(type yum && yum install -y kernel-devel-$(uname -r) gcc-c++ dkms cmake) || (apt update && apt install -y debhelper autotools-dev dkms libnl-3-dev libnl-route-3-dev cmake) && +ERDMA_CM_NO_BOUND_IF=1 %s ./install.sh --batch` + if compat { + return fmt.Sprintf(script, "ERDMA_FORCE_MAD_ENABLE=1") } - output, err = exec.Command(smcPnet, "-a", PNetIDFromDevice(info), "-D", info.Name).CombinedOutput() - if err != nil { - return fmt.Errorf("failed to config smc-pnet rdma device: %v, output: %v", err, string(output)) - } - return nil -} - -func PNetIDFromDevice(info *types.ERdmaDeviceInfo) string { - return strings.ReplaceAll(strings.ToUpper(info.MAC), ":", "") + return fmt.Sprintf(script, "") } -func ConfigForNetDevice(pnet string, netDevice string) error { - output, err := exec.Command(smcPnet, "-s").CombinedOutput() - if err != nil { - return fmt.Errorf("failed to get smc-pnet stat for net device: %v, output: %v", err, string(output)) - } - if bytes.Contains(output, []byte(netDevice)) { - return nil - } - output, err = exec.Command(smcPnet, "-a", pnet, "-I", netDevice).CombinedOutput() +func containerOSDriverInstall(compat bool) error { + driverLog.Info("install driver in container os", "compat", compat) + containerOSScript := `yum install -y kernel-modules-$(uname -r)` + output, err := exec.Command("/usr/bin/bash", "-c", containerOSScript).CombinedOutput() if err != nil { - return fmt.Errorf("failed to config smc-pnet net device: %v, output: %v", err, string(output)) + return fmt.Errorf("exec error: %v, output: %s", err, string(output)) } return nil } - -func ConfigForNetnsNetDevice(pnet string, netDevice string, netns string) error { - output, err := exec.Command("nsenter", "-n/proc/1/root/"+netns, "--", smcPnet, "-s").CombinedOutput() - if err != nil { - return fmt.Errorf("failed to get smc-pnet stat for net device: %v, output: %v", err, string(output)) - } - if bytes.Contains(output, []byte(netDevice)) { - return nil - } - output, err = exec.Command("nsenter", "-n/proc/1/root/"+netns, "--", smcPnet, "-a", pnet, "-I", netDevice).CombinedOutput() - if err != nil { - return fmt.Errorf("failed to config smc-pnet net device: %v, output: %v", err, string(output)) - } - return nil -} - -func GetERDMANumaNode(info *netlink.RdmaLink) (int64, error) { - devNumaPath := path.Join("/sys/class/infiniband/", info.Attrs.Name, "device/numa_node") - numaStr, err := os.ReadFile(devNumaPath) - if err != nil { - return -1, fmt.Errorf("failed to get numa node for %s: %v", info.Attrs.Name, err) - } - numaStr = bytes.Trim(numaStr, "\n") - numa, err := strconv.Atoi(string(numaStr)) - if err != nil { - return -1, fmt.Errorf("failed to parse numa node for %s: %v", info.Attrs.Name, err) - } - if numa < 0 { - numa = 0 - } - return int64(numa), nil -} diff --git a/internal/drivers/utils_linux.go b/internal/drivers/utils_linux.go new file mode 100644 index 0000000..fab5c0b --- /dev/null +++ b/internal/drivers/utils_linux.go @@ -0,0 +1,205 @@ +//go:build linux + +package drivers + +import ( + "bytes" + "fmt" + "io/fs" + "net" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" + "strings" + + "github.com/AliyunContainerService/alibabacloud-erdma-controller/internal/types" + "github.com/samber/lo" + "github.com/vishvananda/netlink" +) + +func driverExists() bool { + if isContainerOS() { + _, err := containerExec("modinfo erdma") + if err != nil { + driverLog.Info("driver not exists", "checklog", err) + return false + } + return true + } + _, err := hostExec("stat /bin/eadm && modinfo erdma") + if err != nil { + driverLog.Info("driver not exists", "checklog", err) + return false + } + return true +} + +func isContainerOS() bool { + output, err := exec.Command("uname", "-r").CombinedOutput() + if err != nil { + return false + } + return strings.Contains(string(output), "lifsea") +} + +//nolint:unparam +func hostExec(cmd string) (string, error) { + output, err := exec.Command("nsenter", "-t", "1", "-m", "--", "bash", "-c", cmd).CombinedOutput() + if err != nil { + return "", fmt.Errorf("exec error: %v, output: %s", err, string(output)) + } + return string(output), nil +} + +func containerExec(cmd string) (string, error) { + output, err := exec.Command("bash", "-c", cmd).CombinedOutput() + if err != nil { + return "", fmt.Errorf("exec error: %v, output: %s", err, string(output)) + } + return string(output), nil +} + +func EnsureSMCR() error { + _, err := containerExec("modprobe smc") + if err != nil { + return err + } + return nil +} + +func GetERdmaDevPathsFromRdmaLink(rdmaLink *netlink.RdmaLink) ([]string, error) { + var devPaths []string + ibUverbsDevs, err := os.ReadDir("/sys/class/infiniband_verbs/") + if err != nil { + return nil, fmt.Errorf("read dir /sys/class/infiniband_verbs/ failed: %v", err) + } + lo.ForEach(ibUverbsDevs, func(ibUverbsDev fs.DirEntry, _ int) { + ibDevPath := filepath.Join("/sys/class/infiniband_verbs/", ibUverbsDev.Name(), "ibdev") + driverLog.Info("check infiniband path", "path", ibDevPath) + if _, err = os.Stat(ibDevPath); err == nil { + if devName, err := os.ReadFile(ibDevPath); err == nil { + devNameStr := strings.Trim(string(devName), "\n") + driverLog.Info("infiniband device", "devName", devNameStr) + if devNameStr == rdmaLink.Attrs.Name { + devPaths = append(devPaths, filepath.Join("/dev/infiniband", ibUverbsDev.Name())) + } + } + } + }) + if len(devPaths) == 0 { + return nil, fmt.Errorf("can not find dev path for %s", rdmaLink.Attrs.Name) + } + + if _, err := os.Stat("/dev/infiniband/rdma_cm"); err == nil { + devPaths = append(devPaths, "/dev/infiniband/rdma_cm") + } + return devPaths, nil +} +func GetERdmaFromLink(link netlink.Link) (*netlink.RdmaLink, error) { + rdmaLinks, err := netlink.RdmaLinkList() + if err != nil { + return nil, fmt.Errorf("error list rdma links, %v", err) + } + linkHwAddr := link.Attrs().HardwareAddr + // erdma guid first byte is ^= 0x2 + linkHwAddr[0] ^= 0x2 + for _, rl := range rdmaLinks { + rdmaHwAddr, err := parseERdmaLinkHwAddr(rl.Attrs.NodeGuid) + if err != nil { + return nil, err + } + driverLog.Info("check rdma link", "rdmaLink", rl.Attrs.Name, "rdmaHwAddr", rdmaHwAddr.String(), "linkHwAddr", linkHwAddr.String()) + if rdmaHwAddr.String() == linkHwAddr.String() { + return rl, nil + } + } + return nil, fmt.Errorf("cannot found rdma link for %s", link.Attrs().Name) +} + +func parseERdmaLinkHwAddr(guid string) (net.HardwareAddr, error) { + hwAddrSlice := make([]byte, 8) + guidSlice := strings.Split(guid, ":") + if len(guidSlice) != 8 { + return nil, fmt.Errorf("invalid rdma guid: %s", guid) + } + for i, s := range guidSlice { + sint, err := strconv.ParseUint(s, 16, 8) + if err != nil { + return nil, fmt.Errorf("invalid rdma guid: %s, err: %v", guid, err) + } + hwAddrSlice[7-i] = uint8(sint) + } + return append(hwAddrSlice[0:3], hwAddrSlice[5:8]...), nil +} + +const ( + smcPnet = "smc_pnet" +) + +func ConfigSMCPnetForDevice(info *types.ERdmaDeviceInfo) error { + output, err := exec.Command(smcPnet, "-s").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to get smc-pnet stat: %v, output: %v", err, string(output)) + } + if bytes.Contains(output, []byte(PNetIDFromDevice(info))) { + return nil + } + output, err = exec.Command(smcPnet, "-a", PNetIDFromDevice(info), "-D", info.Name).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to config smc-pnet rdma device: %v, output: %v", err, string(output)) + } + return nil +} + +func PNetIDFromDevice(info *types.ERdmaDeviceInfo) string { + return strings.ReplaceAll(strings.ToUpper(info.MAC), ":", "") +} + +func ConfigForNetDevice(pnet string, netDevice string) error { + output, err := exec.Command(smcPnet, "-s").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to get smc-pnet stat for net device: %v, output: %v", err, string(output)) + } + if bytes.Contains(output, []byte(netDevice)) { + return nil + } + output, err = exec.Command(smcPnet, "-a", pnet, "-I", netDevice).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to config smc-pnet net device: %v, output: %v", err, string(output)) + } + return nil +} + +func ConfigForNetnsNetDevice(pnet string, netDevice string, netns string) error { + output, err := exec.Command("nsenter", "-n/proc/1/root/"+netns, "--", smcPnet, "-s").CombinedOutput() + if err != nil { + return fmt.Errorf("failed to get smc-pnet stat for net device: %v, output: %v", err, string(output)) + } + if bytes.Contains(output, []byte(netDevice)) { + return nil + } + output, err = exec.Command("nsenter", "-n/proc/1/root/"+netns, "--", smcPnet, "-a", pnet, "-I", netDevice).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to config smc-pnet net device: %v, output: %v", err, string(output)) + } + return nil +} + +func GetERDMANumaNode(info *netlink.RdmaLink) (int64, error) { + devNumaPath := path.Join("/sys/class/infiniband/", info.Attrs.Name, "device/numa_node") + numaStr, err := os.ReadFile(devNumaPath) + if err != nil { + return -1, fmt.Errorf("failed to get numa node for %s: %v", info.Attrs.Name, err) + } + numaStr = bytes.Trim(numaStr, "\n") + numa, err := strconv.Atoi(string(numaStr)) + if err != nil { + return -1, fmt.Errorf("failed to parse numa node for %s: %v", info.Attrs.Name, err) + } + if numa < 0 { + numa = 0 + } + return int64(numa), nil +} diff --git a/internal/drivers/utils_unsupported.go b/internal/drivers/utils_unsupported.go index e40b599..00713ba 100644 --- a/internal/drivers/utils_unsupported.go +++ b/internal/drivers/utils_unsupported.go @@ -8,3 +8,8 @@ func ConfigSMCPnetForDevice(info *types.ERdmaDeviceInfo) error { driverLog.Error(nil, "erdma driver is not supported on this platform") return nil } + +func hostExec(cmd string) (string, error) { + driverLog.Error(nil, "host exec is not supported on this platform") + return "", nil +}