diff --git a/Makefile b/Makefile index 097bb9d74..58797b5b5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ KUEUE_REPO=https://github.com/kubernetes-sigs/kueue.git KUBECTL_VERSION := $(shell curl -L -s https://dl.k8s.io/release/stable.txt) -KUEUE_VERSION=v0.14.1 +KUEUE_VERSION=v0.12.2 KJOB_VERSION=v0.1.0 OS := $(shell uname -s | tr A-Z a-z) diff --git a/goldens/Basic_cluster_create.txt b/goldens/Basic_cluster_create.txt index 359117b8a..2969b5285 100644 --- a/goldens/Basic_cluster_create.txt +++ b/goldens/Basic_cluster_create.txt @@ -70,14 +70,14 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/Cluster_create_private.txt b/goldens/Cluster_create_private.txt index d17a4a9ae..e267502a2 100644 --- a/goldens/Cluster_create_private.txt +++ b/goldens/Cluster_create_private.txt @@ -75,14 +75,14 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster-private [XPK] Task: `Install PathwaysJob on golden-cluster-private` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/Cluster_create_with_gb200-4.txt b/goldens/Cluster_create_with_gb200-4.txt index dc3afe117..e66dc5f17 100644 --- a/goldens/Cluster_create_with_gb200-4.txt +++ b/goldens/Cluster_create_with_gb200-4.txt @@ -74,14 +74,14 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/NAP_cluster-create.txt b/goldens/NAP_cluster-create.txt index 2210b47ca..dc7158bbd 100644 --- a/goldens/NAP_cluster-create.txt +++ b/goldens/NAP_cluster-create.txt @@ -81,14 +81,14 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/NAP_cluster-create_with_pathways.txt b/goldens/NAP_cluster-create_with_pathways.txt index a202bc3a9..87684b13e 100644 --- a/goldens/NAP_cluster-create_with_pathways.txt +++ b/goldens/NAP_cluster-create_with_pathways.txt @@ -82,14 +82,14 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/src/xpk/core/cluster.py b/src/xpk/core/cluster.py index 697474703..55848adc3 100644 --- a/src/xpk/core/cluster.py +++ b/src/xpk/core/cluster.py @@ -37,7 +37,7 @@ from .system_characteristics import SystemCharacteristics JOBSET_VERSION = 'v0.8.0' -PATHWAYS_JOB_VERSION = 'v0.1.3' +PATHWAYS_JOB_VERSION = 'v0.1.4' INSTALLER_NCCL_TCPX = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpx/nccl-tcpx-installer.yaml' INSTALLER_NCCL_TCPXO = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpxo/nccl-tcpxo-installer.yaml' INSTALLER_NCCL_RDMA = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-rdma/nccl-rdma-installer.yaml' diff --git a/src/xpk/core/config.py b/src/xpk/core/config.py index 26fc14bba..938f8a8ed 100644 --- a/src/xpk/core/config.py +++ b/src/xpk/core/config.py @@ -22,7 +22,7 @@ from ..utils.console import xpk_print # This is the version for XPK PyPI package -__version__ = 'v0.14.1' +__version__ = 'v0.14.2' XPK_CURRENT_VERSION = __version__ XPK_CONFIG_FILE = os.path.expanduser('~/.config/xpk/config.yaml') diff --git a/src/xpk/core/kueue_manager.py b/src/xpk/core/kueue_manager.py index f1d6ddc1b..9152387d3 100644 --- a/src/xpk/core/kueue_manager.py +++ b/src/xpk/core/kueue_manager.py @@ -51,7 +51,7 @@ KUEUE_SUB_SLICING_TOPOLOGY_JINJA_FILE = "kueue_sub_slicing_topology.yaml.j2" MEMORY_SIZE_PER_VM = 1.2 MIN_MEMORY_LIMIT_SIZE = 4096 -KUEUE_VERSION = "v0.14.1" +KUEUE_VERSION = "v0.12.2" @dataclass @@ -131,7 +131,7 @@ def get_installed_kueue_version(self) -> tuple[int, str | None]: command, task, dry_run_return_val=""" - v0.14.1""", + v0.12.1""", ) if return_code != 0: return return_code, None diff --git a/src/xpk/core/kueue_manager_test.py b/src/xpk/core/kueue_manager_test.py index 7f340eacd..8d946fbef 100644 --- a/src/xpk/core/kueue_manager_test.py +++ b/src/xpk/core/kueue_manager_test.py @@ -83,7 +83,7 @@ def test_install_or_upgrade_when_newer_version_already_installed( self, mock_configure, mock_install, mock_get_version ): """Test install_or_upgrade when Kueue is already up to date.""" - mock_get_version.return_value = (0, "v0.99.9") + mock_get_version.return_value = (0, "v0.12.3") kueue_config = MagicMock(spec=KueueConfig) result = self.kueue_manager.install_or_upgrade(kueue_config) diff --git a/src/xpk/templates/kueue_gke_default_topology.yaml.j2 b/src/xpk/templates/kueue_gke_default_topology.yaml.j2 index f2d569c6f..8c9f64479 100644 --- a/src/xpk/templates/kueue_gke_default_topology.yaml.j2 +++ b/src/xpk/templates/kueue_gke_default_topology.yaml.j2 @@ -1,4 +1,4 @@ -apiVersion: kueue.x-k8s.io/v1beta1 +apiVersion: kueue.x-k8s.io/v1alpha1 kind: Topology metadata: name: "gke-default"