IBM · michael-johnston · Nov 12, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "requirements.txt|^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2025-11-05T16:16:55Z",
+  "generated_at": "2025-11-10T08:32:10Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -414,7 +414,7 @@
       }
     ]
   },
-  "version": "0.13.1+ibm.64.dss",
+  "version": "0.13.1+ibm.62.dss",
   "word_list": {
     "file": null,
     "hash": null

diff --git a/backend/kuberay/README.md b/backend/kuberay/README.md
@@ -18,21 +18,26 @@ the
 
 ## Deploying a RayCluster
 
-> [!WARNING]
+> [!WARNING] Ray version compatibility
 >
-> The `ray` versions must be compatible. For a more in depth guide refer to the
-> [RayCluster configuration](https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html)
+> The `ray` version set in KubeRay YAML and the one
+> used in the ray head and worker containers must be compatible.
+> For a more in depth guide refer to the [RayCluster configuration](https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html)
 > page.
 
-!!! note
+We provide [an example set of values](vanilla-ray.yaml) for deploying a
+RayCluster via KubeRay. To deploy it run:
+
+``` commandline
+helm upgrade --install ado-ray kuberay/ray-cluster --version 1.1.0 --values backend/kuberay/vanilla-ray.yaml
+```
 
-    When running multi-node measurement make sure that
-    all nodes in your multi-node setup have read and write access
-    to your HuggingFace home directory. On Kubernetes with RayCluster,
-    avoid S3-like filesystems as that is known to cause failures
-    in **transformers**. Use a NFS or GPFS-backed PersistentVolumeClaim instead.
+Feel free to customize the example file provided to suit your cluster,
+such as uncommenting GPU-enabled workers.
 
-### Configuring a Kubernetes ServiceAccount for the RayCluster
+### Enabling ado actuators to create K8s resources
+
+#### Configuring a ServiceAccount for the RayCluster
 
 The default Kubernetes ServiceAccount created for a RayCluster does not
 have enough permissions for an ado actuator to create Kubernetes resources
@@ -46,46 +51,14 @@ It also provides access to the RayCluster resources.
 
 <!-- markdownlint-disable-next-line code-block-style -->
 ```yaml
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: ray-deployer
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: ray-deployer
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: ray-deployer
-subjects:
-  - kind: ServiceAccount
-    name: ray-deployer
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: ray-deployer
-rules:
-  - apiGroups: ["ray.io"]
-    resources:
-      - rayclusters
-    verbs: ["get", "patch"]
-  - apiGroups: ["apps"]
-    resources:
-      - pods
-      - deployments
-    verbs: ["get", "create", "delete", "list", "watch", "update"]
-  - apiGroups: [""]
-    resources:
-      - services
-    verbs: ["get", "create", "delete", "list", "watch", "update"]
+{% include "./service-account.yaml" %}
 ```
 
 From the root of the ado project run the below command:
 
-    kubectl apply -f backend/kuberay/service-account.yaml
+```commandline
+kubectl apply -f backend/kuberay/service-account.yaml
+```
 
 This will create a ServiceAccount named `ray-deployer`.
 We will reference this name later when
@@ -94,6 +67,19 @@ We will reference this name later when
 More information about ServiceAccount, Role, and RoleBinding objects can be found
 in the [official Kubernetes RBAC documentation](https://kubernetes.io/docs/reference/access-authn-authz/rbac/).
 
+#### Associating a RayCluster with the ServiceAccount
+
+The below command shows how to set the `serviceAccountName` property for head
+and worker nodes.
+
+<!-- markdownlint-disable-next-line code-block-style -->
+```bash
+helm upgrade --install ado-ray kuberay/ray-cluster --version 1.1.0 \
+  --values backend/kuberay/vanilla-ray-service-account.yaml \
+  --set head.serviceAccountName=ray-deployer \
+  --set worker.serviceAccountName=ray-deployer
+```
+
 ### Best Practices for Efficient GPU Resource Utilization
 
 To maximize the efficiency of your RayCluster and minimize GPU resource
@@ -124,12 +110,13 @@ Recommended worker setup:
 - 4 replicas of a worker with **8 GPUs**
 
 <!-- markdownlint-disable no-inline-html -->
+
 <details>
 <summary>
 Example: The contents of the additionalWorkerGroups field of a RayCluster
 with 4 Nodes each with 8 NVIDIA-A100-SXM4-80GB GPUs, 64 CPU cores, and 1TB memory
 </summary>
-
+<!-- markdownlint-disable MD046 -->
     ```yaml
     one-A100-80G-gpu-WG:
       replicas: 0
@@ -288,34 +275,24 @@ with 4 Nodes each with 8 NVIDIA-A100-SXM4-80GB GPUs, 64 CPU cores, and 1TB memor
       # volumes: ...
       # volumeMounts: ....
     ```
-
+<!-- markdownlint-enable MD046 -->
 </details>
 <!-- markdownlint-enable no-inline-html -->
 
-!!! note
-
-    Notice that the only variant with a **full-worker** custom resource
-    is the one with 8 GPUs. Some actuators, like SFTTrainer, use this
-    custom resource for measurements that involve reserving an entire GPU node.
-
-We provide [an example set of values](vanilla-ray.yaml) for deploying a
-RayCluster via KubeRay. To deploy it, simply run:
-
-    helm upgrade --install ado-ray kuberay/ray-cluster --version 1.1.0 --values backend/kuberay/vanilla-ray.yaml
-
-In the case the ado operation to be executed requires creating Kubernetes
-resources, the RayCluster to be deployed must be associated with a properly
-configured ServiceAccount like the one described [above](#configuring-a-kubernetes-serviceaccount-for-the-raycluster).
-The below command shows how to set the `serviceAccountName` property for head
-and worker nodes.
+> [!IMPORTANT] full-worker custom resource
+>
+> Notice that the only variant with a **full-worker** custom resource
+> is the one with 8 GPUs. Some actuators, like SFTTrainer, use this
+> custom resource for measurements that involve reserving an entire GPU node.
 
-<!-- markdownlint-disable-next-line code-block-style -->
-```bash
-helm upgrade --install ado-ray kuberay/ray-cluster --version 1.1.0 \
-  --values backend/kuberay/vanilla-ray-service-account.yaml \
-  --set head.serviceAccountName=ray-deployer \
-  --set worker.serviceAccountName=ray-deployer
-```
+### RayClusters and SFTTrainer
 
-Feel free to customize the example file provided to suit your cluster,
-such as uncommenting GPU-enabled workers.
+> [!IMPORTANT] HuggingFace home directory
+>
+> If you want to run multi-node measurements with
+> the SFTTrainer actuator make sure that
+> all nodes in your multi-node setup have read and write access
+> to your HuggingFace home directory. On Kubernetes with RayClusters,
+> avoid S3-like filesystems as that is known to cause failures
+> in **transformers**.
+> Use a NFS or GPFS-backed PersistentVolumeClaim instead.
diff --git a/backend/kuberay/service-account.yaml b/backend/kuberay/service-account.yaml
@@ -36,4 +36,5 @@ rules:
 - apiGroups: [""]
   resources:
   - services
+  - persistentvolumeclaims
   verbs: ["get", "create", "delete", "list", "watch", "update"]
diff --git a/plugins/actuators/vllm_performance/yamls/vllm_actuator_configuration.yaml b/plugins/actuators/vllm_performance/yamls/vllm_actuator_configuration.yaml
@@ -0,0 +1,16 @@
+# Copyright (c) IBM Corporation
+# SPDX-License-Identifier: MIT
+actuatorIdentifier: vllm_performance
+metadata:
+  name: "Test actuator deployment"
+parameters:
+  benchmark_retries: 3
+  hf_token: 'test' # Set if you need to access a gated model
+  image_secret: ''
+  in_cluster: false
+  interpreter: python3
+  max_environments: 1
+  namespace: null # Must set to the namespace to create deployments
+  node_selector: {}
+  retries_timeout: 5
+  verify_ssl: false
diff --git a/...mls/discoveryspace_override_defaults.yaml → ...formance/yamls/vllm_deployment_space.yaml b/...mls/discoveryspace_override_defaults.yaml → ...formance/yamls/vllm_deployment_space.yaml
@@ -1,7 +1,5 @@
 # Copyright (c) IBM Corporation
 # SPDX-License-Identifier: MIT
-
-sampleStoreIdentifier: 2963a5
 entitySpace:
   - identifier: model
     propertyDomain:
@@ -11,36 +9,26 @@ entitySpace:
     propertyDomain:
       values:
         - quay.io/dataprep1/data-prep-kit/vllm_image:0.1
-  - identifier: n_cpus
-    propertyDomain:
-      values: [8]
-  - identifier: memory
-    propertyDomain:
-      values: ["128Gi"]
-  - identifier: dtype
+  - identifier: "number_input_tokens"
     propertyDomain:
-      values: ["auto"]
-  - identifier: "num_prompts"
-    propertyDomain:
-      values: [500]
+      values: [1024, 2048, 4096]
   - identifier: "request_rate"
     propertyDomain:
-      values: [-1]
-  - identifier: "max_concurrency"
-    propertyDomain:
-      values: [-1]
-  - identifier: "gpu_memory_utilization"
+      domainRange: [1,10]
+      interval: 1
+  - identifier: n_cpus
     propertyDomain:
-      values: [.9]
-  - identifier: "cpu_offload"
+      domainRange: [2,16]
+      interval: 2
+  - identifier: memory
     propertyDomain:
-      values: [0]
+      values: ["128Gi", "256Gi"]
   - identifier: "max_batch_tokens"
     propertyDomain:
-      values: [16384]
+      values: [1024, 2048, 4096, 8192, 16384, 32768]
   - identifier: "max_num_seq"
     propertyDomain:
-      values: [256]
+      values: [16,32,64]
   - identifier: "n_gpus"
     propertyDomain:
       values: [1]
@@ -51,4 +39,5 @@ experiments:
   - actuatorIdentifier: vllm_performance
     experimentIdentifier: performance-testing-full
 metadata:
-  description: Parameters for VLLM performance testing
+  description: A space of vllm deployment configurations
+  name: vllm_deployments