diff --git a/docs/building_blocks.md b/docs/building_blocks.md index 251a3a7..a36694f 100644 --- a/docs/building_blocks.md +++ b/docs/building_blocks.md @@ -1540,13 +1540,19 @@ __Parameters__ - __buildlabel__: The build label assigned by Mellanox to the tarball. -For versions 2.17 and later, the default value is `cuda12`. -For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`. -For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`. -For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`. -For versions 2.12 and 2.13 the default value is `cuda11-gdrcopy2-nccl2.12`. -For versions 2.10 and 2.11 the default value is `cuda11-gdrcopy2-nccl2.11`. -This value is ignored for HPC-X version 2.9 and earlier. +For version 2.24 and later, the default value is the value of +`cuda` parameter. For versions 2.17 through 2.23, the default +value is `cuda12`. For version 2.16 the default value is +`cuda12-gdrcopy2-nccl2.18`. For version 2.15 the default value is +`cuda12-gdrcopy2-nccl2.17`. For version 2.14 the default value is +`cuda11-gdrcopy2-nccl2.16`. For versions 2.12 and 2.13 the +default value is `cuda11-gdrcopy2-nccl2.12`. For versions 2.10 +and 2.11 the default value is `cuda11-gdrcopy2-nccl2.11`. This +value is ignored for HPC-X version version 2.9 and earlier. + +- __cuda__: The CUDA label assigned by Mellanox to the tarball. This +parameter is only recognized for version 2.24 and later. The +default value is `cuda13.` - __environment__: Boolean flag to specify whether the environment should be modified to include HPC-X. This option is only @@ -1593,12 +1599,13 @@ tarball. For version 2.21 and later, the default value is - __oslabel__: The Linux distribution label assigned by Mellanox to the tarball. For Ubuntu, the default value is `ubuntu16.04` for Ubuntu 16.04, `ubuntu18.04` for Ubuntu 18.04, `ubuntu20.04` for -Ubuntu 20.04, and `ubuntu22.04` for Ubuntu 22.04. For HPC-X -version 2.10 and later and RHEL-based Linux distributions, the -default value is `redhat7` for version 7 and `redhat8` for version -8. For HPC-X version 2.9 and earlier and RHEL-based Linux -distributions, the default value is `redhat7.6` for version 7 and -`redhat8.0` for version 8. +Ubuntu 20.04, `ubuntu22.04` for Ubuntu 22.04, and `ubuntu24.04` +for Ubuntu 24.04. For HPC-X version 2.10 and later and RHEL-based +Linux distributions, the default value is `redhat7` for version 7, +`redhat8` for version 8, and `redhat9` for version 9. For HPC-X +version 2.9 and earlier and RHEL-based Linux distributions, the +default value is `redhat7.6` for version 7 and `redhat8.0` for +version 8. - __ospackages__: List of OS packages to install prior to installing Mellanox HPC-X. For Ubuntu, the default values are `bzip2`, @@ -1610,7 +1617,7 @@ distributions the default values are `bzip2`, `numactl-libs`, `/usr/local/hpcx`. - __version__: The version of Mellanox HPC-X to install. The default -value is `2.22.1`. +value is `2.24.1`. __Examples__ diff --git a/hpccm/building_blocks/hpcx.py b/hpccm/building_blocks/hpcx.py index 9263cc8..3daeca8 100644 --- a/hpccm/building_blocks/hpcx.py +++ b/hpccm/building_blocks/hpcx.py @@ -49,13 +49,19 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig, # Parameters buildlabel: The build label assigned by Mellanox to the tarball. - For versions 2.17 and later, the default value is `cuda12`. - For version 2.16 the default value is `cuda12-gdrcopy2-nccl2.18`. - For version 2.15 the default value is `cuda12-gdrcopy2-nccl2.17`. - For version 2.14 the default value is `cuda11-gdrcopy2-nccl2.16`. - For versions 2.12 and 2.13 the default value is `cuda11-gdrcopy2-nccl2.12`. - For versions 2.10 and 2.11 the default value is `cuda11-gdrcopy2-nccl2.11`. - This value is ignored for HPC-X version 2.9 and earlier. + For version 2.24 and later, the default value is the value of + `cuda` parameter. For versions 2.17 through 2.23, the default + value is `cuda12`. For version 2.16 the default value is + `cuda12-gdrcopy2-nccl2.18`. For version 2.15 the default value is + `cuda12-gdrcopy2-nccl2.17`. For version 2.14 the default value is + `cuda11-gdrcopy2-nccl2.16`. For versions 2.12 and 2.13 the + default value is `cuda11-gdrcopy2-nccl2.12`. For versions 2.10 + and 2.11 the default value is `cuda11-gdrcopy2-nccl2.11`. This + value is ignored for HPC-X version version 2.9 and earlier. + + cuda: The CUDA label assigned by Mellanox to the tarball. This + parameter is only recognized for version 2.24 and later. The + default value is `cuda13.` environment: Boolean flag to specify whether the environment should be modified to include HPC-X. This option is only @@ -102,12 +108,13 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig, oslabel: The Linux distribution label assigned by Mellanox to the tarball. For Ubuntu, the default value is `ubuntu16.04` for Ubuntu 16.04, `ubuntu18.04` for Ubuntu 18.04, `ubuntu20.04` for - Ubuntu 20.04, and `ubuntu22.04` for Ubuntu 22.04. For HPC-X - version 2.10 and later and RHEL-based Linux distributions, the - default value is `redhat7` for version 7 and `redhat8` for version - 8. For HPC-X version 2.9 and earlier and RHEL-based Linux - distributions, the default value is `redhat7.6` for version 7 and - `redhat8.0` for version 8. + Ubuntu 20.04, `ubuntu22.04` for Ubuntu 22.04, and `ubuntu24.04` + for Ubuntu 24.04. For HPC-X version 2.10 and later and RHEL-based + Linux distributions, the default value is `redhat7` for version 7, + `redhat8` for version 8, and `redhat9` for version 9. For HPC-X + version 2.9 and earlier and RHEL-based Linux distributions, the + default value is `redhat7.6` for version 7 and `redhat8.0` for + version 8. ospackages: List of OS packages to install prior to installing Mellanox HPC-X. For Ubuntu, the default values are `bzip2`, @@ -119,7 +126,7 @@ class hpcx(bb_base, hpccm.templates.envvars, hpccm.templates.ldconfig, `/usr/local/hpcx`. version: The version of Mellanox HPC-X to install. The default - value is `2.22.1`. + value is `2.24.1`. # Examples @@ -139,6 +146,7 @@ def __init__(self, **kwargs): 'https://content.mellanox.com/hpc/hpc-x') self.__bashrc = '' # Filled in by __distro() self.__buildlabel = kwargs.get('buildlabel', None) + self.__cuda = kwargs.get('cuda', 'cuda13') self.__hpcxinit = kwargs.get('hpcxinit', True) self.__inbox = kwargs.get('inbox', False) self.__mlnx_ofed = kwargs.get('mlnx_ofed', None) @@ -148,13 +156,15 @@ def __init__(self, **kwargs): self.__ospackages = kwargs.get('ospackages', []) # Filled in by _distro() self.__packages = kwargs.get('packages', []) self.__prefix = kwargs.get('prefix', '/usr/local/hpcx') - self.__version = kwargs.get('version', '2.22.1') + self.__version = kwargs.get('version', '2.24.1') self.__commands = [] # Filled in by __setup() self.__wd = kwargs.get('wd', hpccm.config.g_wd) # working directory if not self.__buildlabel: - if Version(self.__version) >= Version('2.17'): + if Version(self.__version) >= Version('2.24'): + self.__buildlabel = self.__cuda + elif Version(self.__version) >= Version('2.17'): self.__buildlabel = 'cuda12' elif Version(self.__version) >= Version('2.16'): self.__buildlabel = 'cuda12-gdrcopy2-nccl2.18' @@ -251,16 +261,20 @@ def __setup(self): """Construct the series of shell commands, i.e., fill in self.__commands""" - # For version 2.8 and earlier, the download URL has the format - # MAJOR.MINOR in the path and the tarball contains - # MAJOR.MINOR.REVISION, so pull apart the full version to get - # the individual components. - version_string = self.__version - if Version(self.__version) <= Version('2.8'): + version_dirstring = self.__version + if Version(self.__version) >= Version('2.24'): + # For version 2.24 and later, the download URL has the CUDA + # version appended to the directory name. + version_dirstring += '_{0}'.format(self.__cuda) + elif Version(self.__version) <= Version('2.8'): + # For version 2.8 and earlier, the download URL has the format + # MAJOR.MINOR in the path and the tarball contains + # MAJOR.MINOR.REVISION, so pull apart the full version to get + # the individual components. match = re.match(r'(?P\d+)\.(?P\d+)\.(?P\d+)', self.__version) - version_string = '{0}.{1}'.format(match.groupdict()['major'], - match.groupdict()['minor']) + version_dirstring = '{0}.{1}'.format(match.groupdict()['major'], + match.groupdict()['minor']) if self.__inbox: # Use inbox OFED @@ -283,7 +297,7 @@ def __setup(self): self.__version, self.__ofedlabel, self.__oslabel, self.__arch) tarball = self.__label + '.tbz' - url = '{0}/v{1}/{2}'.format(self.__baseurl, version_string, tarball) + url = '{0}/v{1}/{2}'.format(self.__baseurl, version_dirstring, tarball) # Download source from web self.__commands.append(self.download_step(url=url, directory=self.__wd)) diff --git a/test/test_hpcx.py b/test/test_hpcx.py index c49b742..558c1f3 100644 --- a/test/test_hpcx.py +++ b/test/test_hpcx.py @@ -36,9 +36,9 @@ def setUp(self): @docker def test_defaults_ubuntu20(self): """Default hpcx building block""" - h = hpcx() + h = hpcx(version='2.21.3') self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.22.1 +r'''# Mellanox HPC-X version 2.21.3 RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bzip2 \ @@ -47,12 +47,12 @@ def test_defaults_ubuntu20(self): tar \ wget && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.22.1/hpcx-v2.22.1-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.21.3/hpcx-v2.21.3-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.21.3-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.21.3-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64''') + rm -rf /var/tmp/hpcx-v2.21.3-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.21.3-gcc-doca_ofed-ubuntu20.04-cuda12-x86_64''') @x86_64 @ubuntu24 @@ -61,7 +61,7 @@ def test_defaults_ubuntu24(self): """Default hpcx building block""" h = hpcx() self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.22.1 +r'''# Mellanox HPC-X version 2.24.1 RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bzip2 \ @@ -70,12 +70,12 @@ def test_defaults_ubuntu24(self): tar \ wget && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.22.1/hpcx-v2.22.1-gcc-doca_ofed-ubuntu24.04-cuda12-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu24.04-cuda12-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu24.04-cuda12-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.24.1_cuda13/hpcx-v2.24.1-gcc-doca_ofed-ubuntu24.04-cuda13-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu24.04-cuda13-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu24.04-cuda13-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu24.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu24.04-cuda12-x86_64''') + rm -rf /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu24.04-cuda13-x86_64.tbz /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu24.04-cuda13-x86_64''') @x86_64 @centos @@ -106,7 +106,7 @@ def test_defaults_centos8(self): """Default hpcx building block""" h = hpcx() self.assertEqual(str(h), -r'''# Mellanox HPC-X version 2.22.1 +r'''# Mellanox HPC-X version 2.24.1 RUN yum install -y \ bzip2 \ numactl-libs \ @@ -114,12 +114,12 @@ def test_defaults_centos8(self): tar \ wget && \ rm -rf /var/cache/yum/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.22.1/hpcx-v2.22.1-gcc-doca_ofed-redhat8-cuda12-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-redhat8-cuda12-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-redhat8-cuda12-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.24.1_cuda13/hpcx-v2.24.1-gcc-doca_ofed-redhat8-cuda13-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-redhat8-cuda13-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-redhat8-cuda13-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bashrc && \ echo "hpcx_load" >> /etc/bashrc && \ - rm -rf /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-redhat8-cuda12-x86_64.tbz /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-redhat8-cuda12-x86_64''') + rm -rf /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-redhat8-cuda13-x86_64.tbz /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-redhat8-cuda13-x86_64''') @x86_64 @ubuntu @@ -314,7 +314,7 @@ def test_runtime(self): h = hpcx() r = h.runtime() self.assertEqual(r, -r'''# Mellanox HPC-X version 2.22.1 +r'''# Mellanox HPC-X version 2.24.1 RUN apt-get update -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ bzip2 \ @@ -323,9 +323,9 @@ def test_runtime(self): tar \ wget && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.22.1/hpcx-v2.22.1-gcc-doca_ofed-ubuntu22.04-cuda12-x86_64.tbz && \ - mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu22.04-cuda12-x86_64.tbz -C /var/tmp -j && \ - cp -a /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu22.04-cuda12-x86_64 /usr/local/hpcx && \ +RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://content.mellanox.com/hpc/hpc-x/v2.24.1_cuda13/hpcx-v2.24.1-gcc-doca_ofed-ubuntu22.04-cuda13-x86_64.tbz && \ + mkdir -p /var/tmp && tar -x -f /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu22.04-cuda13-x86_64.tbz -C /var/tmp -j && \ + cp -a /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu22.04-cuda13-x86_64 /usr/local/hpcx && \ echo "source /usr/local/hpcx/hpcx-init-ompi.sh" >> /etc/bash.bashrc && \ echo "hpcx_load" >> /etc/bash.bashrc && \ - rm -rf /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu22.04-cuda12-x86_64.tbz /var/tmp/hpcx-v2.22.1-gcc-doca_ofed-ubuntu22.04-cuda12-x86_64''') + rm -rf /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu22.04-cuda13-x86_64.tbz /var/tmp/hpcx-v2.24.1-gcc-doca_ofed-ubuntu22.04-cuda13-x86_64''')