From 98f824b3434d5a29e5a23c76a1bde8db7a8290fc Mon Sep 17 00:00:00 2001 From: Yu Li Date: Thu, 23 Oct 2025 11:19:01 -0700 Subject: [PATCH 1/4] Update PathwaysJob version to v0.1.4 (#733) --- goldens/Basic_cluster_create.txt | 2 +- goldens/Cluster_create_private.txt | 2 +- goldens/Cluster_create_with_gb200-4.txt | 2 +- goldens/NAP_cluster-create.txt | 2 +- goldens/NAP_cluster-create_with_pathways.txt | 2 +- src/xpk/core/cluster.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/goldens/Basic_cluster_create.txt b/goldens/Basic_cluster_create.txt index 359117b8a..ddb1a84b9 100644 --- a/goldens/Basic_cluster_create.txt +++ b/goldens/Basic_cluster_create.txt @@ -70,7 +70,7 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' diff --git a/goldens/Cluster_create_private.txt b/goldens/Cluster_create_private.txt index d17a4a9ae..ecf2fab22 100644 --- a/goldens/Cluster_create_private.txt +++ b/goldens/Cluster_create_private.txt @@ -75,7 +75,7 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster-private [XPK] Task: `Install PathwaysJob on golden-cluster-private` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' diff --git a/goldens/Cluster_create_with_gb200-4.txt b/goldens/Cluster_create_with_gb200-4.txt index dc3afe117..6f1769029 100644 --- a/goldens/Cluster_create_with_gb200-4.txt +++ b/goldens/Cluster_create_with_gb200-4.txt @@ -74,7 +74,7 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' diff --git a/goldens/NAP_cluster-create.txt b/goldens/NAP_cluster-create.txt index 2210b47ca..5c476ae84 100644 --- a/goldens/NAP_cluster-create.txt +++ b/goldens/NAP_cluster-create.txt @@ -81,7 +81,7 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' diff --git a/goldens/NAP_cluster-create_with_pathways.txt b/goldens/NAP_cluster-create_with_pathways.txt index a202bc3a9..b24d48451 100644 --- a/goldens/NAP_cluster-create_with_pathways.txt +++ b/goldens/NAP_cluster-create_with_pathways.txt @@ -82,7 +82,7 @@ kubectl get node --no-headers | wc -l kubectl apply -f 1b31e624e490f9c8c4ef4e369f08d3fa467990af5a261e4405bd045265d70e95 [XPK] Try 1: Install PathwaysJob on golden-cluster [XPK] Task: `Install PathwaysJob on golden-cluster` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.3/install.yaml +kubectl apply --server-side -f https://github.com/google/pathways-job/releases/download/v0.1.4/install.yaml [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' diff --git a/src/xpk/core/cluster.py b/src/xpk/core/cluster.py index 697474703..55848adc3 100644 --- a/src/xpk/core/cluster.py +++ b/src/xpk/core/cluster.py @@ -37,7 +37,7 @@ from .system_characteristics import SystemCharacteristics JOBSET_VERSION = 'v0.8.0' -PATHWAYS_JOB_VERSION = 'v0.1.3' +PATHWAYS_JOB_VERSION = 'v0.1.4' INSTALLER_NCCL_TCPX = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpx/nccl-tcpx-installer.yaml' INSTALLER_NCCL_TCPXO = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpxo/nccl-tcpxo-installer.yaml' INSTALLER_NCCL_RDMA = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-rdma/nccl-rdma-installer.yaml' From 2f5f5cd9f729d0142b30a3b39fd555b8e919e71d Mon Sep 17 00:00:00 2001 From: FIoannides Date: Fri, 24 Oct 2025 09:34:56 +0200 Subject: [PATCH 2/4] Merge main back to develop for v0.14.1 (#732) * Release v0.13.0 * Release v0.13.0 * Release v0.14.0 * Release v0.14.1 --- src/xpk/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xpk/core/config.py b/src/xpk/core/config.py index 9873608e9..26fc14bba 100644 --- a/src/xpk/core/config.py +++ b/src/xpk/core/config.py @@ -22,7 +22,7 @@ from ..utils.console import xpk_print # This is the version for XPK PyPI package -__version__ = 'v0.14.0' +__version__ = 'v0.14.1' XPK_CURRENT_VERSION = __version__ XPK_CONFIG_FILE = os.path.expanduser('~/.config/xpk/config.yaml') From de25b6a17104a40faf09971358a2e41b466ee615 Mon Sep 17 00:00:00 2001 From: Dominik Rabij Date: Fri, 24 Oct 2025 10:02:23 +0200 Subject: [PATCH 3/4] Revert "feat: Bump up the default Kueue version to v0.14.1 (#691)" (#735) * feat: Add sub-slicing system validation in workload create * Revert "feat: Bump up the default Kueue version to v0.14.1 (#691)" This reverts commit 556d912fd9131c466cb9b0846285aa3a3c689b7b. --- Makefile | 2 +- goldens/Basic_cluster_create.txt | 4 ++-- goldens/Cluster_create_private.txt | 4 ++-- goldens/Cluster_create_with_gb200-4.txt | 4 ++-- goldens/NAP_cluster-create.txt | 4 ++-- goldens/NAP_cluster-create_with_pathways.txt | 4 ++-- src/xpk/core/kueue_manager.py | 4 ++-- src/xpk/core/kueue_manager_test.py | 2 +- src/xpk/templates/kueue_gke_default_topology.yaml.j2 | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 097bb9d74..58797b5b5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ KUEUE_REPO=https://github.com/kubernetes-sigs/kueue.git KUBECTL_VERSION := $(shell curl -L -s https://dl.k8s.io/release/stable.txt) -KUEUE_VERSION=v0.14.1 +KUEUE_VERSION=v0.12.2 KJOB_VERSION=v0.1.0 OS := $(shell uname -s | tr A-Z a-z) diff --git a/goldens/Basic_cluster_create.txt b/goldens/Basic_cluster_create.txt index ddb1a84b9..2969b5285 100644 --- a/goldens/Basic_cluster_create.txt +++ b/goldens/Basic_cluster_create.txt @@ -74,10 +74,10 @@ kubectl apply --server-side -f https://github.com/google/pathways-job/releases/d [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/Cluster_create_private.txt b/goldens/Cluster_create_private.txt index ecf2fab22..e267502a2 100644 --- a/goldens/Cluster_create_private.txt +++ b/goldens/Cluster_create_private.txt @@ -79,10 +79,10 @@ kubectl apply --server-side -f https://github.com/google/pathways-job/releases/d [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/Cluster_create_with_gb200-4.txt b/goldens/Cluster_create_with_gb200-4.txt index 6f1769029..e66dc5f17 100644 --- a/goldens/Cluster_create_with_gb200-4.txt +++ b/goldens/Cluster_create_with_gb200-4.txt @@ -78,10 +78,10 @@ kubectl apply --server-side -f https://github.com/google/pathways-job/releases/d [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/NAP_cluster-create.txt b/goldens/NAP_cluster-create.txt index 5c476ae84..dc7158bbd 100644 --- a/goldens/NAP_cluster-create.txt +++ b/goldens/NAP_cluster-create.txt @@ -85,10 +85,10 @@ kubectl apply --server-side -f https://github.com/google/pathways-job/releases/d [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/goldens/NAP_cluster-create_with_pathways.txt b/goldens/NAP_cluster-create_with_pathways.txt index b24d48451..87684b13e 100644 --- a/goldens/NAP_cluster-create_with_pathways.txt +++ b/goldens/NAP_cluster-create_with_pathways.txt @@ -86,10 +86,10 @@ kubectl apply --server-side -f https://github.com/google/pathways-job/releases/d [XPK] Enabling Kueue on the cluster [XPK] Task: `Get kueue version on server` is implemented by the following command not running since it is a dry run. kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.spec.template.spec.containers[0].image}' -[XPK] Installing Kueue version v0.14.1... +[XPK] Installing Kueue version v0.12.2... [XPK] Try 1: Install Kueue [XPK] Task: `Install Kueue` is implemented by the following command not running since it is a dry run. -kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.14.1/manifests.yaml +kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml [XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run. kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m [XPK] Applying following Kueue resources: diff --git a/src/xpk/core/kueue_manager.py b/src/xpk/core/kueue_manager.py index f1d6ddc1b..9152387d3 100644 --- a/src/xpk/core/kueue_manager.py +++ b/src/xpk/core/kueue_manager.py @@ -51,7 +51,7 @@ KUEUE_SUB_SLICING_TOPOLOGY_JINJA_FILE = "kueue_sub_slicing_topology.yaml.j2" MEMORY_SIZE_PER_VM = 1.2 MIN_MEMORY_LIMIT_SIZE = 4096 -KUEUE_VERSION = "v0.14.1" +KUEUE_VERSION = "v0.12.2" @dataclass @@ -131,7 +131,7 @@ def get_installed_kueue_version(self) -> tuple[int, str | None]: command, task, dry_run_return_val=""" - v0.14.1""", + v0.12.1""", ) if return_code != 0: return return_code, None diff --git a/src/xpk/core/kueue_manager_test.py b/src/xpk/core/kueue_manager_test.py index 7f340eacd..8d946fbef 100644 --- a/src/xpk/core/kueue_manager_test.py +++ b/src/xpk/core/kueue_manager_test.py @@ -83,7 +83,7 @@ def test_install_or_upgrade_when_newer_version_already_installed( self, mock_configure, mock_install, mock_get_version ): """Test install_or_upgrade when Kueue is already up to date.""" - mock_get_version.return_value = (0, "v0.99.9") + mock_get_version.return_value = (0, "v0.12.3") kueue_config = MagicMock(spec=KueueConfig) result = self.kueue_manager.install_or_upgrade(kueue_config) diff --git a/src/xpk/templates/kueue_gke_default_topology.yaml.j2 b/src/xpk/templates/kueue_gke_default_topology.yaml.j2 index f2d569c6f..8c9f64479 100644 --- a/src/xpk/templates/kueue_gke_default_topology.yaml.j2 +++ b/src/xpk/templates/kueue_gke_default_topology.yaml.j2 @@ -1,4 +1,4 @@ -apiVersion: kueue.x-k8s.io/v1beta1 +apiVersion: kueue.x-k8s.io/v1alpha1 kind: Topology metadata: name: "gke-default" From 039ee5da0bd3a89ef185e9e5f712d0bdd8baaddd Mon Sep 17 00:00:00 2001 From: Feidias Ioannidis Date: Fri, 24 Oct 2025 08:11:04 +0000 Subject: [PATCH 4/4] Release v0.14.2 --- src/xpk/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xpk/core/config.py b/src/xpk/core/config.py index 26fc14bba..938f8a8ed 100644 --- a/src/xpk/core/config.py +++ b/src/xpk/core/config.py @@ -22,7 +22,7 @@ from ..utils.console import xpk_print # This is the version for XPK PyPI package -__version__ = 'v0.14.1' +__version__ = 'v0.14.2' XPK_CURRENT_VERSION = __version__ XPK_CONFIG_FILE = os.path.expanduser('~/.config/xpk/config.yaml')