Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@
CONST_DEFAULT_NODE_VM_SIZE = "Standard_DS2_v2"
CONST_DEFAULT_WINDOWS_NODE_VM_SIZE = "Standard_D2s_v3"

# gpu instance
CONST_GPU_INSTANCE_PROFILE_MIG1_G = "MIG1g"
CONST_GPU_INSTANCE_PROFILE_MIG2_G = "MIG2g"
CONST_GPU_INSTANCE_PROFILE_MIG3_G = "MIG3g"
CONST_GPU_INSTANCE_PROFILE_MIG4_G = "MIG4g"
CONST_GPU_INSTANCE_PROFILE_MIG7_G = "MIG7g"

# consts for ManagedCluster
# load balancer sku
CONST_LOAD_BALANCER_SKU_BASIC = "basic"
Expand Down
6 changes: 6 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,9 @@
- name: --http-proxy-config
type: string
short-summary: HTTP Proxy configuration for this cluster.
- name: --gpu-instance-profile
type: string
short-summary: GPU instance profile to partition multi-gpu Nvidia GPUs.

examples:
- name: Create a Kubernetes cluster with an existing SSH public key.
Expand Down Expand Up @@ -1099,6 +1102,9 @@
- name: --host-group-id
type: string
short-summary: The fully qualified dedicated host group id used to provision agent node pool.
- name: --gpu-instance-profile
type: string
short-summary: GPU instance profile to partition multi-gpu Nvidia GPUs.
examples:
- name: Create a nodepool in an existing AKS cluster with ephemeral os enabled.
text: az aks nodepool add -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster --node-osdisk-type Ephemeral --node-osdisk-size 48
Expand Down
15 changes: 14 additions & 1 deletion src/azure-cli/azure/cli/command_modules/acs/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
CONST_SCALE_DOWN_MODE_DELETE, CONST_SCALE_SET_PRIORITY_REGULAR,
CONST_SCALE_SET_PRIORITY_SPOT, CONST_SPOT_EVICTION_POLICY_DEALLOCATE,
CONST_SPOT_EVICTION_POLICY_DELETE, CONST_STABLE_UPGRADE_CHANNEL,
CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PUBLIC, CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PRIVATE)
CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PUBLIC, CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PRIVATE,
CONST_GPU_INSTANCE_PROFILE_MIG1_G, CONST_GPU_INSTANCE_PROFILE_MIG2_G,
CONST_GPU_INSTANCE_PROFILE_MIG3_G, CONST_GPU_INSTANCE_PROFILE_MIG4_G,
CONST_GPU_INSTANCE_PROFILE_MIG7_G)
from azure.cli.command_modules.acs._validators import (
validate_acr, validate_agent_pool_name, validate_assign_identity,
validate_assign_kubelet_identity, validate_azure_keyvault_kms_key_id,
Expand Down Expand Up @@ -116,6 +119,14 @@

keyvault_network_access_types = [CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PUBLIC, CONST_AZURE_KEYVAULT_NETWORK_ACCESS_PRIVATE]

gpu_instance_profiles = [
CONST_GPU_INSTANCE_PROFILE_MIG1_G,
CONST_GPU_INSTANCE_PROFILE_MIG2_G,
CONST_GPU_INSTANCE_PROFILE_MIG3_G,
CONST_GPU_INSTANCE_PROFILE_MIG4_G,
CONST_GPU_INSTANCE_PROFILE_MIG7_G,
]


def load_arguments(self, _):

Expand Down Expand Up @@ -327,6 +338,7 @@ def load_arguments(self, _):
c.argument('yes', options_list=['--yes', '-y'], help='Do not prompt for confirmation.', action='store_true')
c.argument('host_group_id', validator=validate_host_group_id)
c.argument('http_proxy_config')
c.argument('gpu_instance_profile', arg_type=get_enum_type(gpu_instance_profiles))

with self.argument_context('aks update') as c:
# managed cluster paramerters
Expand Down Expand Up @@ -493,6 +505,7 @@ def load_arguments(self, _):
c.argument('kubelet_config')
c.argument('linux_os_config')
c.argument('host_group_id', validator=validate_host_group_id)
c.argument('gpu_instance_profile', arg_type=get_enum_type(gpu_instance_profiles))

with self.argument_context('aks nodepool update', resource_type=ResourceType.MGMT_CONTAINERSERVICE, operation_group='agent_pools') as c:
c.argument('enable_cluster_autoscaler', options_list=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,25 @@ def get_no_wait(self) -> bool:
# this parameter does not need validation
return no_wait

def get_gpu_instance_profile(self) -> Union[str, None]:
"""Obtain the value of gpu_instance_profile.

:return: string or None
"""
# read the original value passed by the command
gpu_instance_profile = self.raw_param.get("gpu_instance_profile")
# try to read the property value corresponding to the parameter from the `mc` object
if (
self.agentpool and
hasattr(self.agentpool, "gpu_instance_profile") and
self.agentpool.gpu_instance_profile is not None
):
gpu_instance_profile = self.agentpool.gpu_instance_profile

# this parameter does not need dynamic completion
# this parameter does not need validation
return gpu_instance_profile


class AKSAgentPoolAddDecorator:
def __init__(
Expand Down Expand Up @@ -1469,6 +1488,16 @@ def set_up_custom_node_config(self, agentpool: AgentPool) -> AgentPool:
agentpool.linux_os_config = self.context.get_linux_os_config()
return agentpool

def set_up_gpu_properties(self, agentpool: AgentPool) -> AgentPool:
"""Set up gpu related properties for the AgentPool object.

:return: the AgentPool object
"""
self._ensure_agentpool(agentpool)

agentpool.gpu_instance_profile = self.context.get_gpu_instance_profile()
return agentpool

def construct_agentpool_profile_default(self, bypass_restore_defaults: bool = False) -> AgentPool:
"""The overall controller used to construct the AgentPool profile by default.

Expand Down Expand Up @@ -1501,6 +1530,8 @@ def construct_agentpool_profile_default(self, bypass_restore_defaults: bool = Fa
agentpool = self.set_up_vm_properties(agentpool)
# set up custom node config
agentpool = self.set_up_custom_node_config(agentpool)
# set up gpu instance profile
agentpool = self.set_up_gpu_properties(agentpool)
# restore defaults
if not bypass_restore_defaults:
agentpool = self._restore_defaults_in_agentpool(agentpool)
Expand Down
2 changes: 2 additions & 0 deletions src/azure-cli/azure/cli/command_modules/acs/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1581,6 +1581,7 @@ def aks_create(
yes=False,
aks_custom_headers=None,
host_group_id=None,
gpu_instance_profile=None,
):
# DO NOT MOVE: get all the original parameters and save them as a dictionary
raw_parameters = locals()
Expand Down Expand Up @@ -2985,6 +2986,7 @@ def aks_agentpool_add(
no_wait=False,
aks_custom_headers=None,
host_group_id=None,
gpu_instance_profile=None,
):
# DO NOT MOVE: get all the original parameters and save them as a dictionary
raw_parameters = locals()
Expand Down
Loading