diff --git a/goldens/Workload_create_pathways.txt b/goldens/Workload_create_pathways.txt index 0071893f3..fdff927db 100644 --- a/goldens/Workload_create_pathways.txt +++ b/goldens/Workload_create_pathways.txt @@ -25,7 +25,7 @@ docker tag dry-run-runner gcr.io/golden-project/dry-run-runner:prefix-current [XPK] Task: `Upload Docker Image` is implemented by the following command not running since it is a dry run. docker push gcr.io/golden-project/dry-run-runner:prefix-current [XPK] Task: `Creating Workload` is implemented by the following command not running since it is a dry run. -kubectl apply -f 871fa8b4813a0c43d7d5f0088986e20d11d4f093d6986a542d92a9420afa632b +kubectl apply -f bfdb43fce214301b0be1d293cb623b61df6e14c376a0032cdc3273ed14f5a6f7 [XPK] Task: `GKE Dashboard List` is implemented by the following command not running since it is a dry run. gcloud monitoring dashboards list --project=golden-project --filter="displayName:'GKE - TPU Monitoring Dashboard'" --format="value(name)" --verbosity=error [XPK] Check statistics and outlier mode of GKE metrics here: https://console.cloud.google.com/monitoring/dashboards/builder/0?project=golden-project&f.rlabel.cluster_name.ClusterName=golden-cluster. To view the metric data for your workload, select golden-workload from the JobName filter on the dashboard. diff --git a/src/xpk/commands/workload.py b/src/xpk/commands/workload.py index 67444d4a8..23780cb22 100644 --- a/src/xpk/commands/workload.py +++ b/src/xpk/commands/workload.py @@ -267,6 +267,7 @@ maxSliceRestarts: {args.max_slice_restarts} terminationGracePeriodSeconds: {args.termination_grace_period_seconds} priorityClassName: {args.priority} + capacityNodeSelector: {capacity_node_selector} pathwaysDir: {args.pathways_gcs_location} #This bucket needs to be created in advance. controller: # #Pod template for training, default mode. @@ -545,6 +546,7 @@ def workload_create(args) -> None: colocated_python_sidecar=append_custom_colocated_python_sidecar(args), user_workload=get_user_workload_for_pathways(args, system), local_queue_name=LOCAL_QUEUE_NAME, + capacity_node_selector=autoprovisioning_args, ) else: container, debugging_dashboard_id = get_user_workload_container(