Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions goldens/Basic_cluster_create.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,100 @@ kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.s
kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml
[XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run.
kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m
[XPK] Applying following Kueue resources:
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ResourceFlavor
metadata:
name: "1xtpu7x-8"
spec:
nodeLabels: {"cloud.google.com/gke-tpu-accelerator": "tpu7x", "cloud.google.com/gke-tpu-topology": "2x2x1"}


---
apiVersion: kueue.x-k8s.io/v1beta1
kind: AdmissionCheck
metadata:
name: dws-prov
spec:
controllerName: kueue.x-k8s.io/provisioning-request
parameters:
apiGroup: kueue.x-k8s.io
kind: ProvisioningRequestConfig
name: dws-config
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ProvisioningRequestConfig
metadata:
name: dws-config
spec:
provisioningClassName: queued-provisioning.gke.io
podSetUpdates:
nodeSelector:
- key: autoscaling.gke.io/provisioning-request
valueFromProvisioningClassDetail: ResizeRequestName
managedResources:
- google.com/tpu
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ClusterQueue
metadata:
name: "cluster-queue"
spec:
preemption:
reclaimWithinCohort: Never # Don't preempt other queues in the cohort.
withinClusterQueue: LowerPriority
namespaceSelector: {} # match all.
resourceGroups: [{'coveredResources': ['google.com/tpu'], 'flavors': [{'name': '1xtpu7x-8', 'resources': [{'name': 'google.com/tpu', 'nominalQuota': 4}]}]}]

---
apiVersion: kueue.x-k8s.io/v1beta1
kind: LocalQueue
metadata:
namespace: default
name: multislice-queue
spec:
clusterQueue: cluster-queue
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-low
value: 100
globalDefault: false
description: "Very Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: low
value: 250
globalDefault: false
description: "Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: medium
value: 500
globalDefault: false
description: "Medium"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high
value: 750
globalDefault: false
description: "High"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-high
value: 1000
globalDefault: false
description: "Very High"
[XPK] Task: `Applying Kueue Custom Resources` is implemented by the following command not running since it is a dry run.
kubectl apply -f a1fe8e014a200d6489b8871301a9e80de7e6f45e94b61ad0e60f40f254711bec
[XPK] Task: `Count total nodes` is implemented by the following command not running since it is a dry run.
Expand Down
105 changes: 104 additions & 1 deletion goldens/Cluster_create_private.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,111 @@ kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.s
kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml
[XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run.
kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m
[XPK] Applying following Kueue resources:
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ResourceFlavor
metadata:
name: "1xv5p-8"
spec:
nodeLabels: {"cloud.google.com/gke-tpu-accelerator": "tpu-v5p-slice", "cloud.google.com/gke-tpu-topology": "2x2x1"}


---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ResourceFlavor
metadata:
name: "cpu-user"
spec:
nodeLabels: {"cloud.google.com/gke-nodepool": "cpu-np"}


---
apiVersion: kueue.x-k8s.io/v1beta1
kind: AdmissionCheck
metadata:
name: dws-prov
spec:
controllerName: kueue.x-k8s.io/provisioning-request
parameters:
apiGroup: kueue.x-k8s.io
kind: ProvisioningRequestConfig
name: dws-config
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ProvisioningRequestConfig
metadata:
name: dws-config
spec:
provisioningClassName: queued-provisioning.gke.io
podSetUpdates:
nodeSelector:
- key: autoscaling.gke.io/provisioning-request
valueFromProvisioningClassDetail: ResizeRequestName
managedResources:
- google.com/tpu
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ClusterQueue
metadata:
name: "cluster-queue"
spec:
preemption:
reclaimWithinCohort: Never # Don't preempt other queues in the cohort.
withinClusterQueue: LowerPriority
namespaceSelector: {} # match all.
resourceGroups: [{'coveredResources': ['google.com/tpu'], 'flavors': [{'name': '1xv5p-8', 'resources': [{'name': 'google.com/tpu', 'nominalQuota': 4}]}]}, {'coveredResources': ['cpu', 'memory'], 'flavors': [{'name': 'cpu-user', 'resources': [{'name': 'cpu', 'nominalQuota': 480}, {'name': 'memory', 'nominalQuota': '2000G'}]}]}]

---
apiVersion: kueue.x-k8s.io/v1beta1
kind: LocalQueue
metadata:
namespace: default
name: multislice-queue
spec:
clusterQueue: cluster-queue
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-low
value: 100
globalDefault: false
description: "Very Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: low
value: 250
globalDefault: false
description: "Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: medium
value: 500
globalDefault: false
description: "Medium"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high
value: 750
globalDefault: false
description: "High"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-high
value: 1000
globalDefault: false
description: "Very High"
[XPK] Task: `Applying Kueue Custom Resources` is implemented by the following command not running since it is a dry run.
kubectl apply -f 02867423642d631296009c1c55aee0eb89304e530f89f1f7beecb629fef962c7
kubectl apply -f 0ff2bce892606d1497f21fc7b2cea78a4ee103094ce0f509211f3f9730536ad6
[XPK] Task: `Count total nodes` is implemented by the following command not running since it is a dry run.
kubectl get node --no-headers | wc -l
[XPK] Try 1: Updating Kueue Controller Manager resources
Expand Down
94 changes: 94 additions & 0 deletions goldens/Cluster_create_with_gb200-4.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,100 @@ kubectl get deployment kueue-controller-manager -n kueue-system -o jsonpath='{.s
kubectl apply --server-side --force-conflicts -f https://github.com/kubernetes-sigs/kueue/releases/download/v0.12.2/manifests.yaml
[XPK] Task: `Wait for Kueue to be available` is implemented by the following command not running since it is a dry run.
kubectl wait deploy/kueue-controller-manager -nkueue-system --for=condition=available --timeout=10m
[XPK] Applying following Kueue resources:
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ResourceFlavor
metadata:
name: "1xgb200-4"
spec:
nodeLabels: {"cloud.google.com/gke-accelerator": "nvidia-gb200"}


---
apiVersion: kueue.x-k8s.io/v1beta1
kind: AdmissionCheck
metadata:
name: dws-prov
spec:
controllerName: kueue.x-k8s.io/provisioning-request
parameters:
apiGroup: kueue.x-k8s.io
kind: ProvisioningRequestConfig
name: dws-config
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ProvisioningRequestConfig
metadata:
name: dws-config
spec:
provisioningClassName: queued-provisioning.gke.io
podSetUpdates:
nodeSelector:
- key: autoscaling.gke.io/provisioning-request
valueFromProvisioningClassDetail: ResizeRequestName
managedResources:
- nvidia.com/gpu
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ClusterQueue
metadata:
name: "cluster-queue"
spec:
preemption:
reclaimWithinCohort: Never # Don't preempt other queues in the cohort.
withinClusterQueue: LowerPriority
namespaceSelector: {} # match all.
resourceGroups: [{'coveredResources': ['nvidia.com/gpu'], 'flavors': [{'name': '1xgb200-4', 'resources': [{'name': 'nvidia.com/gpu', 'nominalQuota': 8}]}]}]

---
apiVersion: kueue.x-k8s.io/v1beta1
kind: LocalQueue
metadata:
namespace: default
name: multislice-queue
spec:
clusterQueue: cluster-queue
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-low
value: 100
globalDefault: false
description: "Very Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: low
value: 250
globalDefault: false
description: "Low"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: medium
value: 500
globalDefault: false
description: "Medium"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high
value: 750
globalDefault: false
description: "High"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: very-high
value: 1000
globalDefault: false
description: "Very High"
[XPK] Task: `Applying Kueue Custom Resources` is implemented by the following command not running since it is a dry run.
kubectl apply -f f807069b73747a423ec0d1915b2e919cfde400b01654de15746b566709b80f7e
[XPK] Task: `Count total nodes` is implemented by the following command not running since it is a dry run.
Expand Down
Loading
Loading