From 1ebc70cd440a8b1ea6871707bb8c48b1bb0c532f Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 2 Mar 2026 00:49:11 +0000 Subject: [PATCH 1/5] fix kaito to avoid karpenter error --- .../charts/karpenter/crds/kaito.sh_kaitonodeclasses.yaml | 1 + karpenter/charts/karpenter/templates/clusterrole.yaml | 6 ++++++ karpenter/charts/karpenter/values.yaml | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 120000 karpenter/charts/karpenter/crds/kaito.sh_kaitonodeclasses.yaml diff --git a/karpenter/charts/karpenter/crds/kaito.sh_kaitonodeclasses.yaml b/karpenter/charts/karpenter/crds/kaito.sh_kaitonodeclasses.yaml new file mode 120000 index 0000000..e01ca1c --- /dev/null +++ b/karpenter/charts/karpenter/crds/kaito.sh_kaitonodeclasses.yaml @@ -0,0 +1 @@ +../../../pkg/apis/crds/kaito.sh_kaitonodeclasses.yaml \ No newline at end of file diff --git a/karpenter/charts/karpenter/templates/clusterrole.yaml b/karpenter/charts/karpenter/templates/clusterrole.yaml index 4dfdb73..3e6a978 100644 --- a/karpenter/charts/karpenter/templates/clusterrole.yaml +++ b/karpenter/charts/karpenter/templates/clusterrole.yaml @@ -35,6 +35,9 @@ rules: - apiGroups: ["flex.aks.azure.com"] resources: ["nebiusnodeclasses"] verbs: ["get", "list", "watch"] + - apiGroups: ["kaito.sh"] + resources: ["kaitonodeclasses"] + verbs: ["get", "list", "watch"] # Write - apiGroups: ["karpenter.azure.com"] resources: ["aksnodeclasses", "aksnodeclasses/status"] @@ -42,3 +45,6 @@ rules: - apiGroups: ["flex.aks.azure.com"] resources: ["nebiusnodeclasses", "nebiusnodeclasses/status"] verbs: ["patch", "update"] + - apiGroups: ["kaito.sh"] + resources: ["kaitonodeclasses", "kaitonodeclasses/status"] + verbs: ["patch", "update"] diff --git a/karpenter/charts/karpenter/values.yaml b/karpenter/charts/karpenter/values.yaml index 5083f2d..2e3df9f 100644 --- a/karpenter/charts/karpenter/values.yaml +++ b/karpenter/charts/karpenter/values.yaml @@ -94,9 +94,9 @@ controller: # -- Repository path to the controller image. repository: ghcr.io/azure/aks-flex/karpenter # -- Tag of the controller image. - tag: sha-17e5cf4 + tag: main # -- SHA256 digest of the controller image. - digest: sha256:b84b0922e5eabcf00f3addbb712a1718096edf62873b03f6e6ec8e6c533d920f + digest: sha256:811dad55a15faaded4ced16b73231d8b6bf2fcfef5a49d88a54e0c7019a9292c # -- Optional Nebius credentials secret mount. The secret must be created by # the user separately (not managed by this chart). When enabled, the secret is # mounted as a file and passed via the -flex-nebius.credentials-file flag. From b24b327a94e0735937b030198d7fa4b75240e881 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 2 Mar 2026 00:49:54 +0000 Subject: [PATCH 2/5] rename nebius example --- karpenter/examples/nebius/cpu_deployment.yaml | 6 +++--- karpenter/examples/nebius/gpu_deployment.yaml | 6 +++--- karpenter/examples/nebius/nodeclass.yaml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/karpenter/examples/nebius/cpu_deployment.yaml b/karpenter/examples/nebius/cpu_deployment.yaml index 2c0d047..73b86d7 100644 --- a/karpenter/examples/nebius/cpu_deployment.yaml +++ b/karpenter/examples/nebius/cpu_deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: sample-cpu-app + name: nebius-sample-cpu-app spec: replicas: 1 selector: matchLabels: - app: sample-cpu-app + app: nebius-sample-cpu-app template: metadata: labels: - app: sample-cpu-app + app: nebius-sample-cpu-app spec: affinity: nodeAffinity: diff --git a/karpenter/examples/nebius/gpu_deployment.yaml b/karpenter/examples/nebius/gpu_deployment.yaml index 6a8908b..f325862 100644 --- a/karpenter/examples/nebius/gpu_deployment.yaml +++ b/karpenter/examples/nebius/gpu_deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: sample-gpu-app + name: nebius-sample-gpu-app spec: replicas: 1 selector: matchLabels: - app: sample-gpu-app + app: nebius-sample-gpu-app template: metadata: labels: - app: sample-gpu-app + app: nebius-sample-gpu-app spec: affinity: nodeAffinity: diff --git a/karpenter/examples/nebius/nodeclass.yaml b/karpenter/examples/nebius/nodeclass.yaml index 2248846..1fd6187 100644 --- a/karpenter/examples/nebius/nodeclass.yaml +++ b/karpenter/examples/nebius/nodeclass.yaml @@ -7,4 +7,4 @@ spec: region: "" subnetID: "" osDiskSizeGB: 128 - wireguardPeerCIDR: "" + wireguardPeerCIDR: "100.96.1.0/24" From 30dc6b204fc0f3a8b6ec5ea50290e5cf7329ba03 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 2 Mar 2026 01:42:47 +0000 Subject: [PATCH 3/5] add azure example --- karpenter/examples/azure/cpu_deployment.yaml | 38 +++++++++++++++++++ karpenter/examples/azure/cpu_nodepool.yaml | 21 ++++++++++ karpenter/examples/azure/gpu_deployment.yaml | 40 ++++++++++++++++++++ karpenter/examples/azure/gpu_nodepool.yaml | 22 +++++++++++ karpenter/examples/azure/nodeclass.yaml | 6 +++ 5 files changed, 127 insertions(+) create mode 100644 karpenter/examples/azure/cpu_deployment.yaml create mode 100644 karpenter/examples/azure/cpu_nodepool.yaml create mode 100644 karpenter/examples/azure/gpu_deployment.yaml create mode 100644 karpenter/examples/azure/gpu_nodepool.yaml create mode 100644 karpenter/examples/azure/nodeclass.yaml diff --git a/karpenter/examples/azure/cpu_deployment.yaml b/karpenter/examples/azure/cpu_deployment.yaml new file mode 100644 index 0000000..0dd5d53 --- /dev/null +++ b/karpenter/examples/azure/cpu_deployment.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: azure-sample-cpu-app +spec: + replicas: 1 + selector: + matchLabels: + app: azure-sample-cpu-app + template: + metadata: + labels: + app: azure-sample-cpu-app + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.azure.com/mode + operator: NotIn + values: + - system + - key: nodepool + operator: In + values: + - azure-cpu-nodepool + containers: + - name: cpu-container + image: ubuntu:22.04 + command: ["sleep", "infinity"] + resources: + requests: + memory: "256Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "1" diff --git a/karpenter/examples/azure/cpu_nodepool.yaml b/karpenter/examples/azure/cpu_nodepool.yaml new file mode 100644 index 0000000..9bfc076 --- /dev/null +++ b/karpenter/examples/azure/cpu_nodepool.yaml @@ -0,0 +1,21 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: azure-cpu-nodepool +spec: + template: + metadata: + labels: + nodepool: azure-cpu-nodepool + spec: + nodeClassRef: + group: karpenter.azure.com + kind: AKSNodeClass + name: azure + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: ["Standard_D4s_v3"] + limits: + cpu: "100" + memory: 400Gi diff --git a/karpenter/examples/azure/gpu_deployment.yaml b/karpenter/examples/azure/gpu_deployment.yaml new file mode 100644 index 0000000..0561483 --- /dev/null +++ b/karpenter/examples/azure/gpu_deployment.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: azure-sample-gpu-app +spec: + replicas: 1 + selector: + matchLabels: + app: azure-sample-gpu-app + template: + metadata: + labels: + app: azure-sample-gpu-app + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.azure.com/mode + operator: NotIn + values: + - system + - key: nodepool + operator: In + values: + - azure-gpu-nodepool + containers: + - name: gpu-container + image: nvidia/cuda:12.4.0-base-ubuntu22.04 + command: ["nvidia-smi", "-l", "60"] + resources: + requests: + memory: "512Mi" + cpu: "250m" + nvidia.com/gpu: "4" + limits: + memory: "1Gi" + cpu: "500m" + nvidia.com/gpu: "4" diff --git a/karpenter/examples/azure/gpu_nodepool.yaml b/karpenter/examples/azure/gpu_nodepool.yaml new file mode 100644 index 0000000..9a58d7f --- /dev/null +++ b/karpenter/examples/azure/gpu_nodepool.yaml @@ -0,0 +1,22 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: azure-gpu-nodepool +spec: + template: + metadata: + labels: + nodepool: azure-gpu-nodepool + spec: + nodeClassRef: + group: karpenter.azure.com + kind: AKSNodeClass + name: azure + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: ["Standard_ND96isr_H100_v5"] + limits: + cpu: "1000" + memory: 10000Gi + nvidia.com/gpu: "64" diff --git a/karpenter/examples/azure/nodeclass.yaml b/karpenter/examples/azure/nodeclass.yaml new file mode 100644 index 0000000..94557d9 --- /dev/null +++ b/karpenter/examples/azure/nodeclass.yaml @@ -0,0 +1,6 @@ +apiVersion: karpenter.azure.com/v1beta1 +kind: AKSNodeClass +metadata: + name: azure +spec: + imageFamily: Ubuntu2204 From ac635f55e635fbfb640ee70ae28c650f4ac09ac0 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 2 Mar 2026 01:42:57 +0000 Subject: [PATCH 4/5] update image --- karpenter/charts/karpenter/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karpenter/charts/karpenter/values.yaml b/karpenter/charts/karpenter/values.yaml index 2e3df9f..c625f31 100644 --- a/karpenter/charts/karpenter/values.yaml +++ b/karpenter/charts/karpenter/values.yaml @@ -96,7 +96,7 @@ controller: # -- Tag of the controller image. tag: main # -- SHA256 digest of the controller image. - digest: sha256:811dad55a15faaded4ced16b73231d8b6bf2fcfef5a49d88a54e0c7019a9292c + digest: sha256:3625d2a19298da105cfc24f6f0694cbd9dc1d5443ba10b6d9043d0ed1902d470 # -- Optional Nebius credentials secret mount. The secret must be created by # the user separately (not managed by this chart). When enabled, the secret is # mounted as a file and passed via the -flex-nebius.credentials-file flag. From f65bffcc4270b43c7a674c1eabaf8fc2b1023ff9 Mon Sep 17 00:00:00 2001 From: Anson Qian Date: Mon, 2 Mar 2026 01:43:11 +0000 Subject: [PATCH 5/5] update doc --- docs/usages/karpenter.md | 111 +++++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/docs/usages/karpenter.md b/docs/usages/karpenter.md index e3ba7f3..12eabad 100644 --- a/docs/usages/karpenter.md +++ b/docs/usages/karpenter.md @@ -2,22 +2,25 @@ ## Overview -This guide walks through deploying `karpenter` to an AKS Flex cluster and using Karpenter to automatically provision and deprovision Nebius cloud nodes. By the end you will have: +This guide walks through deploying `karpenter` to an AKS Flex cluster and using Karpenter to automatically provision and deprovision cloud nodes. By the end you will have: - The karpenter controller running in the cluster -- A `NebiusNodeClass` and `NodePool` configured for Nebius compute instances -- Workloads that trigger automatic node scale-up on Nebius +- `NodeClass` and `NodePool` resources configured for Azure and/or Nebius compute instances +- Workloads that trigger automatic node scale-up - An understanding of how to scale down and clean up provisioned nodes -Karpenter watches for unschedulable pods and automatically provisions new nodes to meet demand. The `karpenter` extends Karpenter with a Nebius cloud provider, allowing it to create and manage Nebius VMs that join the AKS cluster as worker nodes. +Karpenter watches for unschedulable pods and automatically provisions new nodes to meet demand. The `karpenter` extends Karpenter with multiple cloud providers: + +- **Azure** (`AKSNodeClass`) — provisions Azure VMs directly into the cluster's node resource group, joining the existing AKS cluster. +- **Nebius** (`NebiusNodeClass`) — provisions Nebius VMs that join the AKS cluster as worker nodes over WireGuard. ## Getting Started ### Prerequisites - **AKS Flex CLI** -- installed and configured with a `.env` file. See [CLI Setup](cli-setup.md). -- **AKS cluster with WireGuard** -- the cluster must have WireGuard enabled for cross-cloud node connectivity. See [AKS Cluster Setup](cli-prepare-aks-cluster.md) (specifically the [Enable with WireGuard](cli-prepare-aks-cluster.md#enable-with-wireguard) section). -- **Nebius service account credentials** -- a Nebius credentials JSON file for the karpenter controller. See the [Nebius authorized keys documentation](https://docs.nebius.com/iam/service-accounts/authorized-keys). +- **AKS cluster** -- an AKS cluster provisioned via the CLI. For Nebius nodes, the cluster must also have WireGuard enabled for cross-cloud connectivity. See [AKS Cluster Setup](cli-prepare-aks-cluster.md). +- **Nebius service account credentials** *(Nebius only)* -- a Nebius credentials JSON file for the karpenter controller. See the [Nebius authorized keys documentation](https://docs.nebius.com/iam/service-accounts/authorized-keys). - **Helm** -- required for installing the karpenter chart. ### Configuration @@ -59,10 +62,12 @@ This creates a secret named `nebius-credentials` in the `karpenter` namespace wi ### 3. Grant the kubelet identity required Azure permissions -The karpenter controller uses the AKS kubelet identity for two Azure operations that require explicit role assignments: +The karpenter controller uses the AKS kubelet identity for Azure operations that require explicit role assignments: - **VNET read** — reads the cluster VNET GUID at startup -- **Azure Resource Graph** — manages Azure VM instances lifecycle +- **Subnet join** — attaches NICs to the cluster subnet when provisioning Azure VMs +- **VM/NIC/disk lifecycle** — creates and deletes Azure VMs, NICs, and disks in the node resource group +- **Azure Resource Graph** — enumerates managed VM instances First, retrieve the kubelet identity object ID and the relevant resource IDs: @@ -81,21 +86,24 @@ NODE_RESOURCE_GROUP_ID=$(az aks show \ --query "nodeResourceGroup" \ -o tsv | xargs -I{} echo "/subscriptions/$AZURE_SUBSCRIPTION_ID/resourceGroups/{}") -# Get the VNET resource ID (adjust resource group if VNET is in a different RG) -VNET_ID=$(az network vnet list \ - --resource-group $RESOURCE_GROUP_NAME \ - --query "[0].id" \ - -o tsv) +# Get the VNET resource group ID (where the cluster VNet and subnet live; +# adjust if the VNet is in a different resource group) +VNET_RESOURCE_GROUP_ID="/subscriptions/$AZURE_SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP_NAME" ``` Then create the two role assignments: ```bash -# 1. Reader on the VNET (for VNET GUID resolution at startup) +# 1. Network Contributor on the VNET resource group +# Required for: VNET GUID resolution at startup, and subnet join/action +# when creating NICs for provisioned VMs. +# Note: "Reader" on the VNET alone is insufficient — NIC creation requires +# Microsoft.Network/virtualNetworks/subnets/join/action which is only +# included in Network Contributor or higher. az role assignment create \ --assignee "$KUBELET_OBJECT_ID" \ - --role "Reader" \ - --scope "$VNET_ID" + --role "Network Contributor" \ + --scope "$VNET_RESOURCE_GROUP_ID" # 2. Contributor on the node resource group (for VM/NIC/disk create and delete, # and Azure Resource Graph VM enumeration) @@ -165,6 +173,77 @@ NAME READY STATUS RESTARTS AGE karpenter-6b55df659d-m2d5g 1/1 Running 7 (13m ago) 20m ``` +## Creating Nodes on Azure via Karpenter + +With the karpenter controller running, you can define an `AKSNodeClass` and `NodePool` to provision Azure VMs directly into the cluster's node resource group. + +### Creating an AKSNodeClass + +The `AKSNodeClass` defines the Azure-specific configuration for provisioned nodes: + +```bash +$ kubectl apply -f examples/azure/nodeclass.yaml +``` + +Verify the node class is ready: + +```bash +$ kubectl get aksnodeclass +NAME READY AGE +azure True 5s +``` + +### Creating a CPU NodePool + +```bash +$ kubectl apply -f examples/azure/cpu_nodepool.yaml +``` + +Verify the node pool is ready: + +```bash +$ kubectl get nodepool +NAME NODECLASS NODES READY AGE +azure-cpu-nodepool azure 0 True 4s +``` + +### Creating a GPU NodePool + +For GPU workloads, create a NodePool that pins to a specific GPU SKU via `node.kubernetes.io/instance-type`: + +```bash +$ kubectl apply -f examples/azure/gpu_nodepool.yaml +``` + +Both node pools should now be ready: + +```bash +$ kubectl get nodepool +NAME NODECLASS NODES READY AGE +azure-cpu-nodepool azure 0 True 4s +azure-gpu-nodepool azure 0 True 2s +``` + +### Deploy a workload to trigger scale-up + +```bash +$ kubectl apply -f examples/azure/cpu_deployment.yaml +``` + +Karpenter detects the unschedulable pod and creates a `NodeClaim`: + +```bash +$ kubectl get nodeclaims +NAME TYPE CAPACITY ZONE NODE READY AGE +azure-cpu-nodepool-6rhlk aks-azure-cpu-nodepool-6rhlk True 2m +``` + +> **Note:** GPU workloads require the NVIDIA device plugin. Install it with the CLI before creating GPU workloads: +> +> ```bash +> aks-flex-cli aks deploy --gpu-device-plugin --skip-arm +> ``` + ## Creating Nodes on Nebius via Karpenter With the karpenter controller running, you can define a `NebiusNodeClass` and `NodePool` to tell Karpenter how and when to provision Nebius nodes.