From 03b1152929139b164a978f5bb466c67805969824 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 29 Aug 2025 15:16:06 -0400 Subject: [PATCH 01/16] implement v3 --- centml/cli/cluster.py | 63 +++++++++++++++++++++++++---------- centml/sdk/api.py | 29 ++++++++++++++-- examples/sdk/create_cserve.py | 20 +++++------ requirements.txt | 2 +- 4 files changed, 84 insertions(+), 30 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index bbb55ad..7d4e9f2 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -14,12 +14,15 @@ DeploymentType.INFERENCE_V2: 'inference', DeploymentType.COMPUTE_V2: 'compute', DeploymentType.CSERVE: 'cserve', - DeploymentType.CSERVE_V2: 'cserve', + DeploymentType.CSERVE_V2: 'cserve-v2', + DeploymentType.CSERVE_V3: 'cserve', DeploymentType.RAG: 'rag', } depl_name_to_type_map = { 'inference': DeploymentType.INFERENCE_V2, - 'cserve': DeploymentType.CSERVE_V2, + 'cserve': DeploymentType.CSERVE_V3, + 'cserve-v2': DeploymentType.CSERVE_V2, + 'cserve-v3': DeploymentType.CSERVE_V3, 'compute': DeploymentType.COMPUTE_V2, 'rag': DeploymentType.RAG, } @@ -56,6 +59,17 @@ def _format_ssh_key(ssh_key): return ssh_key[:32] + "..." +def _get_replica_info(deployment, depl_type): + """Extract replica information handling V2/V3 field differences""" + if depl_type == DeploymentType.CSERVE_V3: + return { + "min": getattr(deployment, 'min_replicas', getattr(deployment, 'min_scale', None)), + "max": getattr(deployment, 'max_replicas', getattr(deployment, 'max_scale', None)), + } + else: # V2 + return {"min": deployment.min_scale, "max": deployment.max_scale} + + def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( @@ -126,7 +140,9 @@ def get(type, id): elif depl_type == DeploymentType.COMPUTE_V2: deployment = cclient.get_compute(id) elif depl_type == DeploymentType.CSERVE_V2: - deployment = cclient.get_cserve(id) + deployment = cclient.get_cserve_v2(id) + elif depl_type == DeploymentType.CSERVE_V3: + deployment = cclient.get_cserve_v3(id) else: sys.exit("Please enter correct deployment type") @@ -157,7 +173,7 @@ def get(type, id): ("Image", deployment.image_url), ("Container port", deployment.container_port), ("Healthcheck", deployment.healthcheck or "/"), - ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), + ("Replicas", _get_replica_info(deployment, depl_type)), ("Environment variables", deployment.env_vars or "None"), ("Max concurrency", deployment.concurrency or "None"), ], @@ -173,21 +189,34 @@ def get(type, id): disable_numparse=True, ) ) - elif depl_type == DeploymentType.CSERVE_V2: + elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]: + replica_info = _get_replica_info(deployment, depl_type) + display_rows = [ + ("Hugging face model", deployment.recipe.model), + ( + "Parallelism", + { + "tensor": deployment.recipe.additional_properties.get('tensor_parallel_size', 'N/A'), + "pipeline": deployment.recipe.additional_properties.get('pipeline_parallel_size', 'N/A'), + }, + ), + ("Replicas", replica_info), + ("Max concurrency", deployment.concurrency or "None"), + ] + + # Add V3-specific rollout information + if depl_type == DeploymentType.CSERVE_V3: + rollout_info = {} + if hasattr(deployment, 'max_surge') and deployment.max_surge is not None: + rollout_info['max_surge'] = deployment.max_surge + if hasattr(deployment, 'max_unavailable') and deployment.max_unavailable is not None: + rollout_info['max_unavailable'] = deployment.max_unavailable + if rollout_info: + display_rows.append(("Rollout strategy", rollout_info)) + click.echo( tabulate( - [ - ("Hugging face model", deployment.recipe.model), - ( - "Parallelism", - { - "tensor": deployment.recipe.additional_properties['tensor_parallel_size'], - "pipeline": deployment.recipe.additional_properties['pipeline_parallel_size'], - }, - ), - ("Replicas", {"min": deployment.min_scale, "max": deployment.max_scale}), - ("Max concurrency", deployment.concurrency or "None"), - ], + display_rows, tablefmt="rounded_outline", disable_numparse=True, ) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index da7d307..c70af9b 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -7,6 +7,7 @@ CreateInferenceDeploymentRequest, CreateComputeDeploymentRequest, CreateCServeV2DeploymentRequest, + CreateCServeV3DeploymentRequest, Metric, ) @@ -34,6 +35,12 @@ def get_compute(self, id): def get_cserve(self, id): return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) + + def get_cserve_v2(self, id): + return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) + + def get_cserve_v3(self, id): + return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) def create_inference(self, request: CreateInferenceDeploymentRequest): return self._api.create_inference_deployment_deployments_inference_post(request) @@ -41,8 +48,17 @@ def create_inference(self, request: CreateInferenceDeploymentRequest): def create_compute(self, request: CreateComputeDeploymentRequest): return self._api.create_compute_deployment_deployments_compute_post(request) - def create_cserve(self, request: CreateCServeV2DeploymentRequest): + def create_cserve(self, request: CreateCServeV3DeploymentRequest): + """Create CServe deployment using V3 API (default)""" + return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) + + def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): + """Create CServe deployment using V2 API (legacy)""" return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request) + + def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): + """Create CServe deployment using V3 API""" + return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest): return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) @@ -50,8 +66,17 @@ def update_inference(self, deployment_id: int, request: CreateInferenceDeploymen def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) - def update_cserve(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): + def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): + """Update CServe deployment using V3 API (default)""" + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + + def update_cserve_v2(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): + """Update CServe deployment using V2 API (legacy)""" return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) + + def update_cserve_v3(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): + """Update CServe deployment using V3 API""" + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) def _update_status(self, id, new_status): status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 54e0c9b..56f92b7 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -1,18 +1,18 @@ import centml from centml.sdk.api import get_centml_client -from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe +from centml.sdk import DeploymentType, CreateCServeV3DeploymentRequest, CServeV2Recipe def get_fastest_cserve_config(cclient, name, model): fastest = cclient.get_cserve_recipe(model=model)[0].fastest - return CreateCServeV2DeploymentRequest( + return CreateCServeV3DeploymentRequest( name=name, cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id), hardware_instance_id=fastest.hardware_instance_id, recipe=fastest.recipe, - min_scale=1, - max_scale=1, + min_replicas=1, + max_replicas=1, env_vars={}, ) @@ -22,13 +22,13 @@ def get_default_cserve_config(cclient, name, model): hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0] - return CreateCServeV2DeploymentRequest( + return CreateCServeV3DeploymentRequest( name=name, cluster_id=hardware_instance.cluster_id, hardware_instance_id=hardware_instance.id, recipe=default_recipe, - min_scale=1, - max_scale=1, + min_replicas=1, + max_replicas=1, env_vars={}, ) @@ -42,12 +42,12 @@ def main(): ### Modify the recipe if necessary qwen_config.recipe.additional_properties["max_num_seqs"] = 512 - # Create CServeV2 deployment + # Create CServeV3 deployment response = cclient.create_cserve(qwen_config) print("Create deployment response: ", response) - ### Get deployment details - deployment = cclient.get_cserve(response.id) + ### Get deployment details + deployment = cclient.get_cserve_v3(response.id) print("Deployment details: ", deployment) ''' diff --git a/requirements.txt b/requirements.txt index f022c63..1fe82b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ cryptography==44.0.1 prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 -platform-api-python-client==4.0.12 +platform-api-python-client==4.1.9 From 7f58300383a9db86a76f67e64e47b0f393e8b169 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 29 Aug 2025 15:35:55 -0400 Subject: [PATCH 02/16] change default to v3 --- centml/sdk/api.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index c70af9b..46fbbbf 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -34,11 +34,11 @@ def get_compute(self, id): return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id) def get_cserve(self, id): - return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) - + return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) + def get_cserve_v2(self, id): return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) - + def get_cserve_v3(self, id): return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) @@ -49,15 +49,12 @@ def create_compute(self, request: CreateComputeDeploymentRequest): return self._api.create_compute_deployment_deployments_compute_post(request) def create_cserve(self, request: CreateCServeV3DeploymentRequest): - """Create CServe deployment using V3 API (default)""" return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - + def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): - """Create CServe deployment using V2 API (legacy)""" return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request) - + def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): - """Create CServe deployment using V3 API""" return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest): @@ -67,15 +64,12 @@ def update_compute(self, deployment_id: int, request: CreateComputeDeploymentReq return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - """Update CServe deployment using V3 API (default)""" return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) - + def update_cserve_v2(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): - """Update CServe deployment using V2 API (legacy)""" return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) - + def update_cserve_v3(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - """Update CServe deployment using V3 API""" return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) def _update_status(self, id, new_status): From 2f37c563bde21f73c49584379dcf26dc9b28d7ce Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Fri, 29 Aug 2025 16:07:26 -0400 Subject: [PATCH 03/16] format --- centml/cli/cluster.py | 126 ++++++++++++++++++++++++---------- centml/sdk/api.py | 93 +++++++++++++++++++------ examples/sdk/create_cserve.py | 13 ++-- 3 files changed, 171 insertions(+), 61 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 7d4e9f2..72dbf5b 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -3,28 +3,34 @@ from typing import Dict import click from tabulate import tabulate -from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse +from centml.sdk import ( + DeploymentType, + DeploymentStatus, + ServiceStatus, + ApiException, + HardwareInstanceResponse, +) from centml.sdk.api import get_centml_client depl_type_to_name_map = { - DeploymentType.INFERENCE: 'inference', - DeploymentType.COMPUTE: 'compute', - DeploymentType.COMPILATION: 'compilation', - DeploymentType.INFERENCE_V2: 'inference', - DeploymentType.COMPUTE_V2: 'compute', - DeploymentType.CSERVE: 'cserve', - DeploymentType.CSERVE_V2: 'cserve-v2', - DeploymentType.CSERVE_V3: 'cserve', - DeploymentType.RAG: 'rag', + DeploymentType.INFERENCE: "inference", + DeploymentType.COMPUTE: "compute", + DeploymentType.COMPILATION: "compilation", + DeploymentType.INFERENCE_V2: "inference", + DeploymentType.COMPUTE_V2: "compute", + DeploymentType.CSERVE: "cserve", + DeploymentType.CSERVE_V2: "cserve-v2", + DeploymentType.CSERVE_V3: "cserve", + DeploymentType.RAG: "rag", } depl_name_to_type_map = { - 'inference': DeploymentType.INFERENCE_V2, - 'cserve': DeploymentType.CSERVE_V3, - 'cserve-v2': DeploymentType.CSERVE_V2, - 'cserve-v3': DeploymentType.CSERVE_V3, - 'compute': DeploymentType.COMPUTE_V2, - 'rag': DeploymentType.RAG, + "inference": DeploymentType.INFERENCE_V2, + "cserve": DeploymentType.CSERVE_V3, + "cserve-v2": DeploymentType.CSERVE_V2, + "cserve-v3": DeploymentType.CSERVE_V3, + "compute": DeploymentType.COMPUTE_V2, + "rag": DeploymentType.RAG, } @@ -63,8 +69,12 @@ def _get_replica_info(deployment, depl_type): """Extract replica information handling V2/V3 field differences""" if depl_type == DeploymentType.CSERVE_V3: return { - "min": getattr(deployment, 'min_replicas', getattr(deployment, 'min_scale', None)), - "max": getattr(deployment, 'max_replicas', getattr(deployment, 'max_scale', None)), + "min": getattr( + deployment, "min_replicas", getattr(deployment, "min_scale", None) + ), + "max": getattr( + deployment, "max_replicas", getattr(deployment, "max_scale", None) + ), } else: # V2 return {"min": deployment.min_scale, "max": deployment.max_scale} @@ -73,36 +83,67 @@ def _get_replica_info(deployment, depl_type): def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( - cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None + cclient.get_status(deployment.id).service_status + if deployment.status == DeploymentStatus.ACTIVE + else None ) status_styles = { (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"), (DeploymentStatus.DELETED, None): ("deleted", "white", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"), - (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"), + (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ( + "starting", + "black", + "white", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ( + "starting", + "black", + "white", + ), (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): ( "createContainerConfigError", "red", "black", ), - (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ( + "crashLoopBackOff", + "red", + "black", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ( + "imagePullBackOff", + "red", + "black", + ), + (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ( + "progressDeadlineExceeded", + "red", + "black", + ), } - style = status_styles.get((api_status, service_status), ("unknown", "black", "white")) + style = status_styles.get( + (api_status, service_status), ("unknown", "black", "white") + ) # Handle foreground and background colors return click.style(style[0], fg=style[1], bg=style[2]) @click.command(help="List all deployments") -@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None) +@click.argument( + "type", + type=click.Choice(list(depl_name_to_type_map.keys())), + required=False, + default=None, +) def ls(type): with get_centml_client() as cclient: - depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None + depl_type = ( + depl_name_to_type_map[type] if type in depl_name_to_type_map else None + ) deployments = cclient.get(depl_type) rows = [] for d in deployments: @@ -184,7 +225,10 @@ def get(type, id): elif depl_type == DeploymentType.COMPUTE_V2: click.echo( tabulate( - [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))], + [ + ("Username", "centml"), + ("SSH key", _format_ssh_key(deployment.ssh_public_key)), + ], tablefmt="rounded_outline", disable_numparse=True, ) @@ -196,24 +240,34 @@ def get(type, id): ( "Parallelism", { - "tensor": deployment.recipe.additional_properties.get('tensor_parallel_size', 'N/A'), - "pipeline": deployment.recipe.additional_properties.get('pipeline_parallel_size', 'N/A'), + "tensor": deployment.recipe.additional_properties.get( + "tensor_parallel_size", "N/A" + ), + "pipeline": deployment.recipe.additional_properties.get( + "pipeline_parallel_size", "N/A" + ), }, ), ("Replicas", replica_info), ("Max concurrency", deployment.concurrency or "None"), ] - + # Add V3-specific rollout information if depl_type == DeploymentType.CSERVE_V3: rollout_info = {} - if hasattr(deployment, 'max_surge') and deployment.max_surge is not None: - rollout_info['max_surge'] = deployment.max_surge - if hasattr(deployment, 'max_unavailable') and deployment.max_unavailable is not None: - rollout_info['max_unavailable'] = deployment.max_unavailable + if ( + hasattr(deployment, "max_surge") + and deployment.max_surge is not None + ): + rollout_info["max_surge"] = deployment.max_surge + if ( + hasattr(deployment, "max_unavailable") + and deployment.max_unavailable is not None + ): + rollout_info["max_unavailable"] = deployment.max_unavailable if rollout_info: display_rows.append(("Rollout strategy", rollout_info)) - + click.echo( tabulate( display_rows, diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 46fbbbf..f649a43 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -28,19 +28,37 @@ def get_status(self, id): return self._api.get_deployment_status_deployments_status_deployment_id_get(id) def get_inference(self, id): - return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id) + return ( + self._api.get_inference_deployment_deployments_inference_deployment_id_get( + id + ) + ) def get_compute(self, id): - return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id) + return self._api.get_compute_deployment_deployments_compute_deployment_id_get( + id + ) def get_cserve(self, id): - return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) + return ( + self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get( + id + ) + ) def get_cserve_v2(self, id): - return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) + return ( + self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get( + id + ) + ) def get_cserve_v3(self, id): - return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) + return ( + self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get( + id + ) + ) def create_inference(self, request: CreateInferenceDeploymentRequest): return self._api.create_inference_deployment_deployments_inference_post(request) @@ -57,24 +75,48 @@ def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest): - return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) + def update_inference( + self, deployment_id: int, request: CreateInferenceDeploymentRequest + ): + return self._api.update_inference_deployment_deployments_inference_put( + deployment_id, request + ) - def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): - return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) + def update_compute( + self, deployment_id: int, request: CreateComputeDeploymentRequest + ): + return self._api.update_compute_deployment_deployments_compute_put( + deployment_id, request + ) - def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + def update_cserve( + self, deployment_id: int, request: CreateCServeV3DeploymentRequest + ): + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( + deployment_id, request + ) - def update_cserve_v2(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) + def update_cserve_v2( + self, deployment_id: int, request: CreateCServeV2DeploymentRequest + ): + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put( + deployment_id, request + ) - def update_cserve_v3(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + def update_cserve_v3( + self, deployment_id: int, request: CreateCServeV3DeploymentRequest + ): + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( + deployment_id, request + ) def _update_status(self, id, new_status): - status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) - self._api.update_deployment_status_deployments_status_deployment_id_put(id, status_req) + status_req = platform_api_python_client.DeploymentStatusRequest( + status=new_status + ) + self._api.update_deployment_status_deployments_status_deployment_id_put( + id, status_req + ) def delete(self, id): self._update_status(id, DeploymentStatus.DELETED) @@ -97,10 +139,16 @@ def get_prebuilt_images(self, depl_type: DeploymentType): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) def get_cserve_recipe(self, model=None, hf_token=None): - return self._api.get_cserve_recipe_deployments_cserve_recipes_get(model=model, hf_token=hf_token).results + return self._api.get_cserve_recipe_deployments_cserve_recipes_get( + model=model, hf_token=hf_token + ).results def get_cluster_id(self, hardware_instance_id): - filtered_hw = list(filter(lambda h: h.id == hardware_instance_id, self.get_hardware_instances())) + filtered_hw = list( + filter( + lambda h: h.id == hardware_instance_id, self.get_hardware_instances() + ) + ) if len(filtered_hw) == 0: raise Exception(f"Invalid hardware instance id {hardware_instance_id}") @@ -114,7 +162,12 @@ def get_user_vault(self, type): # pylint: disable=R0917 def get_deployment_usage( - self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int + self, + id: int, + metric: Metric, + start_time_in_seconds: int, + end_time_in_seconds: int, + step: int, ): return self._api.get_usage_deployments_usage_deployment_id_get( deployment_id=id, diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 56f92b7..4eb169f 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -36,8 +36,10 @@ def get_default_cserve_config(cclient, name, model): def main(): with get_centml_client() as cclient: ### Get the configurations for the Qwen model - qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct") - #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct") + qwen_config = get_fastest_cserve_config( + cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct" + ) + # qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct") ### Modify the recipe if necessary qwen_config.recipe.additional_properties["max_num_seqs"] = 512 @@ -46,17 +48,18 @@ def main(): response = cclient.create_cserve(qwen_config) print("Create deployment response: ", response) - ### Get deployment details + ### Get deployment details deployment = cclient.get_cserve_v3(response.id) print("Deployment details: ", deployment) - ''' + """ ### Pause the deployment cclient.pause(deployment.id) ### Delete the deployment cclient.delete(deployment.id) - ''' + """ + if __name__ == "__main__": main() From 8b370228ee1f37713dbfdb3d6230df65ac6f674a Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 09:55:39 -0400 Subject: [PATCH 04/16] black format --- centml/cli/login.py | 22 +++++++++++++++++----- centml/sdk/auth.py | 10 ++++++++-- centml/sdk/config.py | 12 +++++++++--- centml/sdk/utils/client_certs.py | 26 ++++++++++++++++++++------ 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/centml/cli/login.py b/centml/cli/login.py index dd4c7ed..48754e4 100644 --- a/centml/cli/login.py +++ b/centml/cli/login.py @@ -26,7 +26,11 @@ def generate_pkce_pair(): verifier = secrets.token_urlsafe(64) - challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).decode().rstrip("=") + challenge = ( + base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()) + .decode() + .rstrip("=") + ) return verifier, challenge @@ -102,14 +106,18 @@ def login(token_file): else: click.echo("Logging into CentML...") - choice = click.confirm("Do you want to log in with your browser now?", default=True) + choice = click.confirm( + "Do you want to log in with your browser now?", default=True + ) if choice: try: # PKCE Flow code_verifier, code_challenge = generate_pkce_pair() auth_url = build_auth_url(CLIENT_ID, REDIRECT_URI, code_challenge) click.echo("A browser window will open for you to authenticate.") - click.echo("If it doesn't open automatically, you can copy and paste this URL:") + click.echo( + "If it doesn't open automatically, you can copy and paste this URL:" + ) click.echo(f" {auth_url}\n") webbrowser.open(auth_url) click.echo("Waiting for authentication...") @@ -121,9 +129,13 @@ def login(token_file): click.echo("Login failed. Please try again.") else: cred = { - key: response_dict[key] for key in ("access_token", "refresh_token") if key in response_dict + key: response_dict[key] + for key in ("access_token", "refresh_token") + if key in response_dict } - os.makedirs(os.path.dirname(settings.CENTML_CRED_FILE_PATH), exist_ok=True) + os.makedirs( + os.path.dirname(settings.CENTML_CRED_FILE_PATH), exist_ok=True + ) with open(settings.CENTML_CRED_FILE_PATH, "w") as f: json.dump(cred, f) click.echo("✅ Login successful") diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py index 4547edf..8f58b20 100644 --- a/centml/sdk/auth.py +++ b/centml/sdk/auth.py @@ -29,7 +29,11 @@ def refresh_centml_token(refresh_token): os.remove(settings.CENTML_CRED_FILE_PATH) cred = None else: - cred = {key: response_dict[key] for key in ("access_token", "refresh_token") if key in response_dict} + cred = { + key: response_dict[key] + for key in ("access_token", "refresh_token") + if key in response_dict + } with open(settings.CENTML_CRED_FILE_PATH, "w") as f: json.dump(cred, f) @@ -61,7 +65,9 @@ def get_centml_token(): cred = load_centml_cred() if not cred: sys.exit("CentML credentials not found. Please login...") - exp_time = int(jwt.decode(cred["access_token"], options={"verify_signature": False})["exp"]) + exp_time = int( + jwt.decode(cred["access_token"], options={"verify_signature": False})["exp"] + ) if time.time() >= exp_time - 100: cred = refresh_centml_token(cred["refresh_token"]) diff --git a/centml/sdk/config.py b/centml/sdk/config.py index 3f935c0..bb3ad68 100644 --- a/centml/sdk/config.py +++ b/centml/sdk/config.py @@ -8,13 +8,19 @@ class Config(BaseSettings): model_config = SettingsConfigDict(env_file=Path(".env")) CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="https://app.centml.com/") - CENTML_CONFIG_PATH: str = os.getenv("CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml")) + CENTML_CONFIG_PATH: str = os.getenv( + "CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml") + ) CENTML_CRED_FILE: str = os.getenv("CENTML_CRED_FILE", default="credentials.json") CENTML_CRED_FILE_PATH: str = os.path.join(CENTML_CONFIG_PATH, CENTML_CRED_FILE) - CENTML_PLATFORM_API_URL: str = os.getenv("CENTML_PLATFORM_API_URL", default="https://api.centml.com") + CENTML_PLATFORM_API_URL: str = os.getenv( + "CENTML_PLATFORM_API_URL", default="https://api.centml.com" + ) - CENTML_WORKOS_CLIENT_ID: str = os.getenv("CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWW2997MF8AYQXHJEGYR0") + CENTML_WORKOS_CLIENT_ID: str = os.getenv( + "CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWW2997MF8AYQXHJEGYR0" + ) settings = Config() diff --git a/centml/sdk/utils/client_certs.py b/centml/sdk/utils/client_certs.py index d3c2ec3..f77ec99 100644 --- a/centml/sdk/utils/client_certs.py +++ b/centml/sdk/utils/client_certs.py @@ -23,7 +23,9 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: # Details about who we are. For a self-signed certificate, the subject # and issuer are always the same. - ca_subject = x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, f"ca.{service_name}.user.centml.ai")]) + ca_subject = x509.Name( + [x509.NameAttribute(NameOID.COMMON_NAME, f"ca.{service_name}.user.centml.ai")] + ) ca_certificate = ( x509.CertificateBuilder() @@ -44,7 +46,13 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: client_private_key = ec.generate_private_key(ec.SECP384R1()) # Information about the client - client_subject = x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, f"client.{service_name}.user.centml.ai")]) + client_subject = x509.Name( + [ + x509.NameAttribute( + NameOID.COMMON_NAME, f"client.{service_name}.user.centml.ai" + ) + ] + ) client_certificate = ( x509.CertificateBuilder() @@ -63,8 +71,12 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: ) return CAClientCertTriplet( - certificate_authority=ca_certificate.public_bytes(serialization.Encoding.PEM).decode("ascii"), - client_certificate=client_certificate.public_bytes(serialization.Encoding.PEM).decode("ascii"), + certificate_authority=ca_certificate.public_bytes( + serialization.Encoding.PEM + ).decode("ascii"), + client_certificate=client_certificate.public_bytes( + serialization.Encoding.PEM + ).decode("ascii"), client_private_key=client_private_key.private_bytes( encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.PKCS8, @@ -85,9 +97,11 @@ def save_pem_file(service_name, client_private_key, client_certificate): try: # Save the combined PEM file - with open(ca_file_path, 'w') as combined_pem_file: + with open(ca_file_path, "w") as combined_pem_file: combined_pem_file.write(client_private_key + client_certificate) - click.echo(f"Combined PEM file for accessing the private endpoint has been saved to {ca_file_path}") + click.echo( + f"Combined PEM file for accessing the private endpoint has been saved to {ca_file_path}" + ) except Exception as e: click.echo(f"Error saving PEM files: {e}") From 059bb0ca2171e9ae2f43098356b748608cafaab8 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 09:57:28 -0400 Subject: [PATCH 05/16] black format --- centml/cli/cluster.py | 96 ++++++-------------------------- centml/cli/login.py | 22 ++------ centml/sdk/api.py | 93 +++++++------------------------ centml/sdk/auth.py | 10 +--- centml/sdk/config.py | 12 +--- centml/sdk/utils/client_certs.py | 24 ++------ 6 files changed, 53 insertions(+), 204 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 72dbf5b..3aab191 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -3,13 +3,7 @@ from typing import Dict import click from tabulate import tabulate -from centml.sdk import ( - DeploymentType, - DeploymentStatus, - ServiceStatus, - ApiException, - HardwareInstanceResponse, -) +from centml.sdk import DeploymentType, DeploymentStatus, ServiceStatus, ApiException, HardwareInstanceResponse from centml.sdk.api import get_centml_client @@ -69,12 +63,8 @@ def _get_replica_info(deployment, depl_type): """Extract replica information handling V2/V3 field differences""" if depl_type == DeploymentType.CSERVE_V3: return { - "min": getattr( - deployment, "min_replicas", getattr(deployment, "min_scale", None) - ), - "max": getattr( - deployment, "max_replicas", getattr(deployment, "max_scale", None) - ), + "min": getattr(deployment, "min_replicas", getattr(deployment, "min_scale", None)), + "max": getattr(deployment, "max_replicas", getattr(deployment, "max_scale", None)), } else: # V2 return {"min": deployment.min_scale, "max": deployment.max_scale} @@ -83,67 +73,36 @@ def _get_replica_info(deployment, depl_type): def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( - cclient.get_status(deployment.id).service_status - if deployment.status == DeploymentStatus.ACTIVE - else None + cclient.get_status(deployment.id).service_status if deployment.status == DeploymentStatus.ACTIVE else None ) status_styles = { (DeploymentStatus.PAUSED, None): ("paused", "yellow", "black"), (DeploymentStatus.DELETED, None): ("deleted", "white", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.HEALTHY): ("ready", "green", "black"), - (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ( - "starting", - "black", - "white", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ( - "starting", - "black", - "white", - ), + (DeploymentStatus.ACTIVE, ServiceStatus.INITIALIZING): ("starting", "black", "white"), + (DeploymentStatus.ACTIVE, ServiceStatus.MISSING): ("starting", "black", "white"), (DeploymentStatus.ACTIVE, ServiceStatus.ERROR): ("error", "red", "black"), (DeploymentStatus.ACTIVE, ServiceStatus.CREATECONTAINERCONFIGERROR): ( "createContainerConfigError", "red", "black", ), - (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ( - "crashLoopBackOff", - "red", - "black", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ( - "imagePullBackOff", - "red", - "black", - ), - (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ( - "progressDeadlineExceeded", - "red", - "black", - ), + (DeploymentStatus.ACTIVE, ServiceStatus.CRASHLOOPBACKOFF): ("crashLoopBackOff", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.IMAGEPULLBACKOFF): ("imagePullBackOff", "red", "black"), + (DeploymentStatus.ACTIVE, ServiceStatus.PROGRESSDEADLINEEXCEEDED): ("progressDeadlineExceeded", "red", "black"), } - style = status_styles.get( - (api_status, service_status), ("unknown", "black", "white") - ) + style = status_styles.get((api_status, service_status), ("unknown", "black", "white")) # Handle foreground and background colors return click.style(style[0], fg=style[1], bg=style[2]) @click.command(help="List all deployments") -@click.argument( - "type", - type=click.Choice(list(depl_name_to_type_map.keys())), - required=False, - default=None, -) +@click.argument("type", type=click.Choice(list(depl_name_to_type_map.keys())), required=False, default=None) def ls(type): with get_centml_client() as cclient: - depl_type = ( - depl_name_to_type_map[type] if type in depl_name_to_type_map else None - ) + depl_type = depl_name_to_type_map[type] if type in depl_name_to_type_map else None deployments = cclient.get(depl_type) rows = [] for d in deployments: @@ -225,10 +184,7 @@ def get(type, id): elif depl_type == DeploymentType.COMPUTE_V2: click.echo( tabulate( - [ - ("Username", "centml"), - ("SSH key", _format_ssh_key(deployment.ssh_public_key)), - ], + [("Username", "centml"), ("SSH key", _format_ssh_key(deployment.ssh_public_key))], tablefmt="rounded_outline", disable_numparse=True, ) @@ -240,12 +196,8 @@ def get(type, id): ( "Parallelism", { - "tensor": deployment.recipe.additional_properties.get( - "tensor_parallel_size", "N/A" - ), - "pipeline": deployment.recipe.additional_properties.get( - "pipeline_parallel_size", "N/A" - ), + "tensor": deployment.recipe.additional_properties.get("tensor_parallel_size", "N/A"), + "pipeline": deployment.recipe.additional_properties.get("pipeline_parallel_size", "N/A"), }, ), ("Replicas", replica_info), @@ -255,26 +207,14 @@ def get(type, id): # Add V3-specific rollout information if depl_type == DeploymentType.CSERVE_V3: rollout_info = {} - if ( - hasattr(deployment, "max_surge") - and deployment.max_surge is not None - ): + if hasattr(deployment, "max_surge") and deployment.max_surge is not None: rollout_info["max_surge"] = deployment.max_surge - if ( - hasattr(deployment, "max_unavailable") - and deployment.max_unavailable is not None - ): + if hasattr(deployment, "max_unavailable") and deployment.max_unavailable is not None: rollout_info["max_unavailable"] = deployment.max_unavailable if rollout_info: display_rows.append(("Rollout strategy", rollout_info)) - click.echo( - tabulate( - display_rows, - tablefmt="rounded_outline", - disable_numparse=True, - ) - ) + click.echo(tabulate(display_rows, tablefmt="rounded_outline", disable_numparse=True)) @click.command(help="Delete a deployment") diff --git a/centml/cli/login.py b/centml/cli/login.py index 48754e4..dd4c7ed 100644 --- a/centml/cli/login.py +++ b/centml/cli/login.py @@ -26,11 +26,7 @@ def generate_pkce_pair(): verifier = secrets.token_urlsafe(64) - challenge = ( - base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()) - .decode() - .rstrip("=") - ) + challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).decode().rstrip("=") return verifier, challenge @@ -106,18 +102,14 @@ def login(token_file): else: click.echo("Logging into CentML...") - choice = click.confirm( - "Do you want to log in with your browser now?", default=True - ) + choice = click.confirm("Do you want to log in with your browser now?", default=True) if choice: try: # PKCE Flow code_verifier, code_challenge = generate_pkce_pair() auth_url = build_auth_url(CLIENT_ID, REDIRECT_URI, code_challenge) click.echo("A browser window will open for you to authenticate.") - click.echo( - "If it doesn't open automatically, you can copy and paste this URL:" - ) + click.echo("If it doesn't open automatically, you can copy and paste this URL:") click.echo(f" {auth_url}\n") webbrowser.open(auth_url) click.echo("Waiting for authentication...") @@ -129,13 +121,9 @@ def login(token_file): click.echo("Login failed. Please try again.") else: cred = { - key: response_dict[key] - for key in ("access_token", "refresh_token") - if key in response_dict + key: response_dict[key] for key in ("access_token", "refresh_token") if key in response_dict } - os.makedirs( - os.path.dirname(settings.CENTML_CRED_FILE_PATH), exist_ok=True - ) + os.makedirs(os.path.dirname(settings.CENTML_CRED_FILE_PATH), exist_ok=True) with open(settings.CENTML_CRED_FILE_PATH, "w") as f: json.dump(cred, f) click.echo("✅ Login successful") diff --git a/centml/sdk/api.py b/centml/sdk/api.py index f649a43..46fbbbf 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -28,37 +28,19 @@ def get_status(self, id): return self._api.get_deployment_status_deployments_status_deployment_id_get(id) def get_inference(self, id): - return ( - self._api.get_inference_deployment_deployments_inference_deployment_id_get( - id - ) - ) + return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id) def get_compute(self, id): - return self._api.get_compute_deployment_deployments_compute_deployment_id_get( - id - ) + return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id) def get_cserve(self, id): - return ( - self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get( - id - ) - ) + return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) def get_cserve_v2(self, id): - return ( - self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get( - id - ) - ) + return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) def get_cserve_v3(self, id): - return ( - self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get( - id - ) - ) + return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) def create_inference(self, request: CreateInferenceDeploymentRequest): return self._api.create_inference_deployment_deployments_inference_post(request) @@ -75,48 +57,24 @@ def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - def update_inference( - self, deployment_id: int, request: CreateInferenceDeploymentRequest - ): - return self._api.update_inference_deployment_deployments_inference_put( - deployment_id, request - ) + def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest): + return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) - def update_compute( - self, deployment_id: int, request: CreateComputeDeploymentRequest - ): - return self._api.update_compute_deployment_deployments_compute_put( - deployment_id, request - ) + def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): + return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) - def update_cserve( - self, deployment_id: int, request: CreateCServeV3DeploymentRequest - ): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( - deployment_id, request - ) + def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) - def update_cserve_v2( - self, deployment_id: int, request: CreateCServeV2DeploymentRequest - ): - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put( - deployment_id, request - ) + def update_cserve_v2(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) - def update_cserve_v3( - self, deployment_id: int, request: CreateCServeV3DeploymentRequest - ): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( - deployment_id, request - ) + def update_cserve_v3(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) def _update_status(self, id, new_status): - status_req = platform_api_python_client.DeploymentStatusRequest( - status=new_status - ) - self._api.update_deployment_status_deployments_status_deployment_id_put( - id, status_req - ) + status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) + self._api.update_deployment_status_deployments_status_deployment_id_put(id, status_req) def delete(self, id): self._update_status(id, DeploymentStatus.DELETED) @@ -139,16 +97,10 @@ def get_prebuilt_images(self, depl_type: DeploymentType): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) def get_cserve_recipe(self, model=None, hf_token=None): - return self._api.get_cserve_recipe_deployments_cserve_recipes_get( - model=model, hf_token=hf_token - ).results + return self._api.get_cserve_recipe_deployments_cserve_recipes_get(model=model, hf_token=hf_token).results def get_cluster_id(self, hardware_instance_id): - filtered_hw = list( - filter( - lambda h: h.id == hardware_instance_id, self.get_hardware_instances() - ) - ) + filtered_hw = list(filter(lambda h: h.id == hardware_instance_id, self.get_hardware_instances())) if len(filtered_hw) == 0: raise Exception(f"Invalid hardware instance id {hardware_instance_id}") @@ -162,12 +114,7 @@ def get_user_vault(self, type): # pylint: disable=R0917 def get_deployment_usage( - self, - id: int, - metric: Metric, - start_time_in_seconds: int, - end_time_in_seconds: int, - step: int, + self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int ): return self._api.get_usage_deployments_usage_deployment_id_get( deployment_id=id, diff --git a/centml/sdk/auth.py b/centml/sdk/auth.py index 8f58b20..4547edf 100644 --- a/centml/sdk/auth.py +++ b/centml/sdk/auth.py @@ -29,11 +29,7 @@ def refresh_centml_token(refresh_token): os.remove(settings.CENTML_CRED_FILE_PATH) cred = None else: - cred = { - key: response_dict[key] - for key in ("access_token", "refresh_token") - if key in response_dict - } + cred = {key: response_dict[key] for key in ("access_token", "refresh_token") if key in response_dict} with open(settings.CENTML_CRED_FILE_PATH, "w") as f: json.dump(cred, f) @@ -65,9 +61,7 @@ def get_centml_token(): cred = load_centml_cred() if not cred: sys.exit("CentML credentials not found. Please login...") - exp_time = int( - jwt.decode(cred["access_token"], options={"verify_signature": False})["exp"] - ) + exp_time = int(jwt.decode(cred["access_token"], options={"verify_signature": False})["exp"]) if time.time() >= exp_time - 100: cred = refresh_centml_token(cred["refresh_token"]) diff --git a/centml/sdk/config.py b/centml/sdk/config.py index bb3ad68..3f935c0 100644 --- a/centml/sdk/config.py +++ b/centml/sdk/config.py @@ -8,19 +8,13 @@ class Config(BaseSettings): model_config = SettingsConfigDict(env_file=Path(".env")) CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="https://app.centml.com/") - CENTML_CONFIG_PATH: str = os.getenv( - "CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml") - ) + CENTML_CONFIG_PATH: str = os.getenv("CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml")) CENTML_CRED_FILE: str = os.getenv("CENTML_CRED_FILE", default="credentials.json") CENTML_CRED_FILE_PATH: str = os.path.join(CENTML_CONFIG_PATH, CENTML_CRED_FILE) - CENTML_PLATFORM_API_URL: str = os.getenv( - "CENTML_PLATFORM_API_URL", default="https://api.centml.com" - ) + CENTML_PLATFORM_API_URL: str = os.getenv("CENTML_PLATFORM_API_URL", default="https://api.centml.com") - CENTML_WORKOS_CLIENT_ID: str = os.getenv( - "CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWW2997MF8AYQXHJEGYR0" - ) + CENTML_WORKOS_CLIENT_ID: str = os.getenv("CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWW2997MF8AYQXHJEGYR0") settings = Config() diff --git a/centml/sdk/utils/client_certs.py b/centml/sdk/utils/client_certs.py index f77ec99..4c18b3c 100644 --- a/centml/sdk/utils/client_certs.py +++ b/centml/sdk/utils/client_certs.py @@ -23,9 +23,7 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: # Details about who we are. For a self-signed certificate, the subject # and issuer are always the same. - ca_subject = x509.Name( - [x509.NameAttribute(NameOID.COMMON_NAME, f"ca.{service_name}.user.centml.ai")] - ) + ca_subject = x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, f"ca.{service_name}.user.centml.ai")]) ca_certificate = ( x509.CertificateBuilder() @@ -46,13 +44,7 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: client_private_key = ec.generate_private_key(ec.SECP384R1()) # Information about the client - client_subject = x509.Name( - [ - x509.NameAttribute( - NameOID.COMMON_NAME, f"client.{service_name}.user.centml.ai" - ) - ] - ) + client_subject = x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, f"client.{service_name}.user.centml.ai")]) client_certificate = ( x509.CertificateBuilder() @@ -71,12 +63,8 @@ def generate_ca_client_triplet(service_name: str) -> CAClientCertTriplet: ) return CAClientCertTriplet( - certificate_authority=ca_certificate.public_bytes( - serialization.Encoding.PEM - ).decode("ascii"), - client_certificate=client_certificate.public_bytes( - serialization.Encoding.PEM - ).decode("ascii"), + certificate_authority=ca_certificate.public_bytes(serialization.Encoding.PEM).decode("ascii"), + client_certificate=client_certificate.public_bytes(serialization.Encoding.PEM).decode("ascii"), client_private_key=client_private_key.private_bytes( encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.PKCS8, @@ -99,9 +87,7 @@ def save_pem_file(service_name, client_private_key, client_certificate): # Save the combined PEM file with open(ca_file_path, "w") as combined_pem_file: combined_pem_file.write(client_private_key + client_certificate) - click.echo( - f"Combined PEM file for accessing the private endpoint has been saved to {ca_file_path}" - ) + click.echo(f"Combined PEM file for accessing the private endpoint has been saved to {ca_file_path}") except Exception as e: click.echo(f"Error saving PEM files: {e}") From da862d9264188fb9e1156ab2789467ec34c5e3f8 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 11:27:56 -0400 Subject: [PATCH 06/16] addressing comments --- centml/cli/cluster.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 3aab191..dce963a 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -62,14 +62,10 @@ def _format_ssh_key(ssh_key): def _get_replica_info(deployment, depl_type): """Extract replica information handling V2/V3 field differences""" if depl_type == DeploymentType.CSERVE_V3: - return { - "min": getattr(deployment, "min_replicas", getattr(deployment, "min_scale", None)), - "max": getattr(deployment, "max_replicas", getattr(deployment, "max_scale", None)), - } + return {"min": deployment.min_replicas, "max": deployment.max_replicas,} else: # V2 return {"min": deployment.min_scale, "max": deployment.max_scale} - def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( @@ -204,16 +200,6 @@ def get(type, id): ("Max concurrency", deployment.concurrency or "None"), ] - # Add V3-specific rollout information - if depl_type == DeploymentType.CSERVE_V3: - rollout_info = {} - if hasattr(deployment, "max_surge") and deployment.max_surge is not None: - rollout_info["max_surge"] = deployment.max_surge - if hasattr(deployment, "max_unavailable") and deployment.max_unavailable is not None: - rollout_info["max_unavailable"] = deployment.max_unavailable - if rollout_info: - display_rows.append(("Rollout strategy", rollout_info)) - click.echo(tabulate(display_rows, tablefmt="rounded_outline", disable_numparse=True)) From 7c2e67b0c198f8c13adf1c8313b38fe25f4e1f6a Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 12:20:56 -0400 Subject: [PATCH 07/16] gs --- centml/cli/cluster.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index dce963a..3414851 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -7,22 +7,23 @@ from centml.sdk.api import get_centml_client +# convert deployment type enum to a user friendly name depl_type_to_name_map = { DeploymentType.INFERENCE: "inference", DeploymentType.COMPUTE: "compute", DeploymentType.COMPILATION: "compilation", DeploymentType.INFERENCE_V2: "inference", DeploymentType.COMPUTE_V2: "compute", + # For user, they are all cserve. DeploymentType.CSERVE: "cserve", - DeploymentType.CSERVE_V2: "cserve-v2", + DeploymentType.CSERVE_V2: "cserve", DeploymentType.CSERVE_V3: "cserve", DeploymentType.RAG: "rag", } +# use latest type to for user requests depl_name_to_type_map = { "inference": DeploymentType.INFERENCE_V2, "cserve": DeploymentType.CSERVE_V3, - "cserve-v2": DeploymentType.CSERVE_V2, - "cserve-v3": DeploymentType.CSERVE_V3, "compute": DeploymentType.COMPUTE_V2, "rag": DeploymentType.RAG, } @@ -66,6 +67,7 @@ def _get_replica_info(deployment, depl_type): else: # V2 return {"min": deployment.min_scale, "max": deployment.max_scale} + def _get_ready_status(cclient, deployment): api_status = deployment.status service_status = ( From ff716b137bd0459bb34c3325bece81721da58c6f Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 13:00:20 -0400 Subject: [PATCH 08/16] unify v2 / v3 getters, change get replica info helper --- centml/cli/cluster.py | 25 +++++++++++++++---------- centml/sdk/api.py | 24 +++++++++++++++++------- examples/sdk/create_cserve.py | 2 +- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 3414851..22618a6 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -60,12 +60,19 @@ def _format_ssh_key(ssh_key): return ssh_key[:32] + "..." -def _get_replica_info(deployment, depl_type): +def _get_replica_info(deployment): """Extract replica information handling V2/V3 field differences""" - if depl_type == DeploymentType.CSERVE_V3: - return {"min": deployment.min_replicas, "max": deployment.max_replicas,} - else: # V2 + # Check actual deployment object fields rather than depl_type + # since unified get_cserve() can return either V2 or V3 objects + if hasattr(deployment, 'min_replicas'): + # V3 deployment response object + return {"min": deployment.min_replicas, "max": deployment.max_replicas} + elif hasattr(deployment, 'min_scale'): + # V2 deployment response object return {"min": deployment.min_scale, "max": deployment.max_scale} + else: + # Fallback - shouldn't happen + return {"min": "N/A", "max": "N/A"} def _get_ready_status(cclient, deployment): @@ -137,10 +144,8 @@ def get(type, id): deployment = cclient.get_inference(id) elif depl_type == DeploymentType.COMPUTE_V2: deployment = cclient.get_compute(id) - elif depl_type == DeploymentType.CSERVE_V2: - deployment = cclient.get_cserve_v2(id) - elif depl_type == DeploymentType.CSERVE_V3: - deployment = cclient.get_cserve_v3(id) + elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]: + deployment = cclient.get_cserve(id) # handles both V2 and V3 else: sys.exit("Please enter correct deployment type") @@ -171,7 +176,7 @@ def get(type, id): ("Image", deployment.image_url), ("Container port", deployment.container_port), ("Healthcheck", deployment.healthcheck or "/"), - ("Replicas", _get_replica_info(deployment, depl_type)), + ("Replicas", _get_replica_info(deployment)), ("Environment variables", deployment.env_vars or "None"), ("Max concurrency", deployment.concurrency or "None"), ], @@ -188,7 +193,7 @@ def get(type, id): ) ) elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]: - replica_info = _get_replica_info(deployment, depl_type) + replica_info = _get_replica_info(deployment) display_rows = [ ("Hugging face model", deployment.recipe.model), ( diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 46fbbbf..ca2392e 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -8,6 +8,8 @@ CreateComputeDeploymentRequest, CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest, + CServeV2Recipe, + ApiException, Metric, ) @@ -34,13 +36,21 @@ def get_compute(self, id): return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id) def get_cserve(self, id): - return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) - - def get_cserve_v2(self, id): - return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) - - def get_cserve_v3(self, id): - return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) + """Get CServe deployment details - automatically handles both V2 and V3 deployments""" + # Try V3 first (recommended), fallback to V2 if deployment is V2 + try: + return self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(id) + except ApiException as e: + # If V3 fails with 404 or similar, try V2 + if e.status in [404, 400]: # Deployment might be V2 or endpoint not found + try: + return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) + except ApiException as v2_error: + # If both fail, raise the original V3 error as it's more likely to be the real issue + raise e + else: + # For other errors (auth, network, etc.), raise immediately + raise def create_inference(self, request: CreateInferenceDeploymentRequest): return self._api.create_inference_deployment_deployments_inference_post(request) diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py index 4eb169f..086fe4d 100644 --- a/examples/sdk/create_cserve.py +++ b/examples/sdk/create_cserve.py @@ -49,7 +49,7 @@ def main(): print("Create deployment response: ", response) ### Get deployment details - deployment = cclient.get_cserve_v3(response.id) + deployment = cclient.get_cserve(response.id) # Automatically detects V2 print("Deployment details: ", deployment) """ From ac79cec40e24237c7888697f91f16c39c9a3d390 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 13:36:10 -0400 Subject: [PATCH 09/16] make update cserve not choosing version --- centml/sdk/api.py | 97 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 8 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index ca2392e..b6c56c3 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +from typing import Union import platform_api_python_client from platform_api_python_client import ( @@ -73,14 +74,58 @@ def update_inference(self, deployment_id: int, request: CreateInferenceDeploymen def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) - def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) - - def update_cserve_v2(self, deployment_id: int, request: CreateCServeV2DeploymentRequest): - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) - - def update_cserve_v3(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + def update_cserve( + self, deployment_id: int, request: Union[CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest] + ): + """Update CServe deployment - automatically handles both V2 and V3 deployments""" + # Determine the approach based on the request type + if isinstance(request, CreateCServeV3DeploymentRequest): + # V3 request - try V3 API first, fallback if deployment is actually V2 + try: + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + except ApiException as e: + if e.status in [404, 400]: # V3 API failed, deployment might be V2 + # Convert V3 request to V2 and try V2 API + v2_request = self._convert_v3_to_v2_request(request) + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, v2_request) + else: + raise + elif isinstance(request, CreateCServeV2DeploymentRequest): + # V2 request - try V2 API first, fallback to V3 if deployment is actually V3 + try: + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) + except ApiException as e: + if e.status in [404, 400]: # V2 API failed, deployment might be V3 + # Convert V2 request to V3 and try V3 API + v3_request = self.convert_v2_to_v3_request(request) + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, v3_request) + else: + raise + else: + raise ValueError( + f"Unsupported request type: {type(request)}. Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest." + ) + + def _convert_v3_to_v2_request(self, v3_request: CreateCServeV3DeploymentRequest) -> CreateCServeV2DeploymentRequest: + """Convert V3 request format to V2 format (reverse of convert_v2_to_v3_request)""" + # Get all fields from V3 request + kwargs = v3_request.model_dump() if hasattr(v3_request, 'model_dump') else v3_request.dict() + + # Remove old V3 field names + min_replicas = kwargs.pop('min_replicas', None) + max_replicas = kwargs.pop('max_replicas', None) + initial_replicas = kwargs.pop('initial_replicas', None) + # Remove V3-only fields + kwargs.pop('max_surge', None) + kwargs.pop('max_unavailable', None) + + # Add new V2 field names + kwargs['min_scale'] = min_replicas + kwargs['max_scale'] = max_replicas + if initial_replicas is not None: + kwargs['initial_scale'] = initial_replicas + + return CreateCServeV2DeploymentRequest(**kwargs) def _update_status(self, id, new_status): status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) @@ -122,6 +167,42 @@ def get_user_vault(self, type): return {i.key: i.value for i in items} + def detect_cserve_deployment_version(self, deployment_response): + """Detect if a CServe deployment is V2 or V3 based on response fields""" + # Check for V3-specific fields + if hasattr(deployment_response, 'max_surge') or hasattr(deployment_response, 'max_unavailable'): + return 'v3' + # Check for V3 field names (min_replicas vs min_scale) + if hasattr(deployment_response, 'min_replicas'): + return 'v3' + # Check for V2 field names + if hasattr(deployment_response, 'min_scale'): + return 'v2' + # Default to V2 for backward compatibility + return 'v2' + + def convert_v2_to_v3_request(self, v2_request: CreateCServeV2DeploymentRequest) -> CreateCServeV3DeploymentRequest: + """Convert V2 request format to V3 format with field mapping""" + # Get all fields from V2 request + kwargs = v2_request.model_dump() if hasattr(v2_request, 'model_dump') else v2_request.dict() + + # Remove old V2 field names + min_scale = kwargs.pop('min_scale', None) + max_scale = kwargs.pop('max_scale', None) + initial_scale = kwargs.pop('initial_scale', None) + + # Add new V3 field names + kwargs['min_replicas'] = min_scale + kwargs['max_replicas'] = max_scale + if initial_scale is not None: + kwargs['initial_replicas'] = initial_scale + + # Add V3-specific fields + kwargs['max_surge'] = None + kwargs['max_unavailable'] = None + + return CreateCServeV3DeploymentRequest(**kwargs) + # pylint: disable=R0917 def get_deployment_usage( self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int From 2db4023372d7aa1f4d96e274e2aa5147bac61653 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 13:46:09 -0400 Subject: [PATCH 10/16] simplify update --- centml/sdk/api.py | 100 ++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 66 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index b6c56c3..6f9c71b 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -9,7 +9,6 @@ CreateComputeDeploymentRequest, CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest, - CServeV2Recipe, ApiException, Metric, ) @@ -74,59 +73,50 @@ def update_inference(self, deployment_id: int, request: CreateInferenceDeploymen def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) + def detect_deployment_version(self, deployment_id: int) -> str: + """Detect if a deployment is V2 or V3 by testing the specific API endpoints""" + try: + # Try V3 endpoint first + self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(deployment_id) + return 'v3' + except ApiException as e: + if e.status in [404, 400]: # V3 endpoint doesn't exist for this deployment + try: + # Try V2 endpoint + self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id) + return 'v2' + except ApiException: + # If both fail, it might not be a CServe deployment or doesn't exist + raise ValueError(f"Deployment {deployment_id} is not a valid CServe deployment or does not exist") + else: + # Other error (auth, network, etc.) + raise + def update_cserve( self, deployment_id: int, request: Union[CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest] ): - """Update CServe deployment - automatically handles both V2 and V3 deployments""" - # Determine the approach based on the request type + """Update CServe deployment - validates request type matches deployment version""" + # Detect the deployment version + deployment_version = self.detect_deployment_version(deployment_id) + + # Validate request type matches deployment version if isinstance(request, CreateCServeV3DeploymentRequest): - # V3 request - try V3 API first, fallback if deployment is actually V2 - try: - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) - except ApiException as e: - if e.status in [404, 400]: # V3 API failed, deployment might be V2 - # Convert V3 request to V2 and try V2 API - v2_request = self._convert_v3_to_v2_request(request) - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, v2_request) - else: - raise + if deployment_version != 'v3': + raise ValueError( + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." + ) + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) elif isinstance(request, CreateCServeV2DeploymentRequest): - # V2 request - try V2 API first, fallback to V3 if deployment is actually V3 - try: - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) - except ApiException as e: - if e.status in [404, 400]: # V2 API failed, deployment might be V3 - # Convert V2 request to V3 and try V3 API - v3_request = self.convert_v2_to_v3_request(request) - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, v3_request) - else: - raise + if deployment_version != 'v2': + raise ValueError( + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." + ) + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) else: raise ValueError( f"Unsupported request type: {type(request)}. Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest." ) - def _convert_v3_to_v2_request(self, v3_request: CreateCServeV3DeploymentRequest) -> CreateCServeV2DeploymentRequest: - """Convert V3 request format to V2 format (reverse of convert_v2_to_v3_request)""" - # Get all fields from V3 request - kwargs = v3_request.model_dump() if hasattr(v3_request, 'model_dump') else v3_request.dict() - - # Remove old V3 field names - min_replicas = kwargs.pop('min_replicas', None) - max_replicas = kwargs.pop('max_replicas', None) - initial_replicas = kwargs.pop('initial_replicas', None) - # Remove V3-only fields - kwargs.pop('max_surge', None) - kwargs.pop('max_unavailable', None) - - # Add new V2 field names - kwargs['min_scale'] = min_replicas - kwargs['max_scale'] = max_replicas - if initial_replicas is not None: - kwargs['initial_scale'] = initial_replicas - - return CreateCServeV2DeploymentRequest(**kwargs) - def _update_status(self, id, new_status): status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) self._api.update_deployment_status_deployments_status_deployment_id_put(id, status_req) @@ -181,28 +171,6 @@ def detect_cserve_deployment_version(self, deployment_response): # Default to V2 for backward compatibility return 'v2' - def convert_v2_to_v3_request(self, v2_request: CreateCServeV2DeploymentRequest) -> CreateCServeV3DeploymentRequest: - """Convert V2 request format to V3 format with field mapping""" - # Get all fields from V2 request - kwargs = v2_request.model_dump() if hasattr(v2_request, 'model_dump') else v2_request.dict() - - # Remove old V2 field names - min_scale = kwargs.pop('min_scale', None) - max_scale = kwargs.pop('max_scale', None) - initial_scale = kwargs.pop('initial_scale', None) - - # Add new V3 field names - kwargs['min_replicas'] = min_scale - kwargs['max_replicas'] = max_scale - if initial_scale is not None: - kwargs['initial_replicas'] = initial_scale - - # Add V3-specific fields - kwargs['max_surge'] = None - kwargs['max_unavailable'] = None - - return CreateCServeV3DeploymentRequest(**kwargs) - # pylint: disable=R0917 def get_deployment_usage( self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int From 73e8e6909ac857687a9be5c4d22970668e002e3b Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 15:45:30 -0400 Subject: [PATCH 11/16] revert changes done by black --- centml/sdk/utils/client_certs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centml/sdk/utils/client_certs.py b/centml/sdk/utils/client_certs.py index 4c18b3c..d3c2ec3 100644 --- a/centml/sdk/utils/client_certs.py +++ b/centml/sdk/utils/client_certs.py @@ -85,7 +85,7 @@ def save_pem_file(service_name, client_private_key, client_certificate): try: # Save the combined PEM file - with open(ca_file_path, "w") as combined_pem_file: + with open(ca_file_path, 'w') as combined_pem_file: combined_pem_file.write(client_private_key + client_certificate) click.echo(f"Combined PEM file for accessing the private endpoint has been saved to {ca_file_path}") From 6367b1cab1f3691b30f45403baffc8df253d1535 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 18:19:57 -0400 Subject: [PATCH 12/16] add inference v3 support - testing --- centml/cli/cluster.py | 34 +++++++-------- centml/sdk/api.py | 72 ++++++++++++++++++++++++++++++-- examples/sdk/create_inference.py | 16 ++++--- 3 files changed, 95 insertions(+), 27 deletions(-) diff --git a/centml/cli/cluster.py b/centml/cli/cluster.py index 22618a6..7e8a16c 100644 --- a/centml/cli/cluster.py +++ b/centml/cli/cluster.py @@ -13,6 +13,7 @@ DeploymentType.COMPUTE: "compute", DeploymentType.COMPILATION: "compilation", DeploymentType.INFERENCE_V2: "inference", + DeploymentType.INFERENCE_V3: "inference", DeploymentType.COMPUTE_V2: "compute", # For user, they are all cserve. DeploymentType.CSERVE: "cserve", @@ -22,7 +23,7 @@ } # use latest type to for user requests depl_name_to_type_map = { - "inference": DeploymentType.INFERENCE_V2, + "inference": DeploymentType.INFERENCE_V3, "cserve": DeploymentType.CSERVE_V3, "compute": DeploymentType.COMPUTE_V2, "rag": DeploymentType.RAG, @@ -140,8 +141,8 @@ def get(type, id): with get_centml_client() as cclient: depl_type = depl_name_to_type_map[type] - if depl_type == DeploymentType.INFERENCE_V2: - deployment = cclient.get_inference(id) + if depl_type in [DeploymentType.INFERENCE_V2, DeploymentType.INFERENCE_V3]: + deployment = cclient.get_inference(id) # handles both V2 and V3 elif depl_type == DeploymentType.COMPUTE_V2: deployment = cclient.get_compute(id) elif depl_type in [DeploymentType.CSERVE_V2, DeploymentType.CSERVE_V3]: @@ -169,21 +170,18 @@ def get(type, id): ) click.echo("Additional deployment configurations:") - if depl_type == DeploymentType.INFERENCE_V2: - click.echo( - tabulate( - [ - ("Image", deployment.image_url), - ("Container port", deployment.container_port), - ("Healthcheck", deployment.healthcheck or "/"), - ("Replicas", _get_replica_info(deployment)), - ("Environment variables", deployment.env_vars or "None"), - ("Max concurrency", deployment.concurrency or "None"), - ], - tablefmt="rounded_outline", - disable_numparse=True, - ) - ) + if depl_type in [DeploymentType.INFERENCE_V2, DeploymentType.INFERENCE_V3]: + replica_info = _get_replica_info(deployment) + display_rows = [ + ("Image", deployment.image_url), + ("Container port", deployment.container_port), + ("Healthcheck", deployment.healthcheck or "/"), + ("Replicas", replica_info), + ("Environment variables", deployment.env_vars or "None"), + ("Max concurrency", deployment.concurrency or "None"), + ] + + click.echo(tabulate(display_rows, tablefmt="rounded_outline", disable_numparse=True)) elif depl_type == DeploymentType.COMPUTE_V2: click.echo( tabulate( diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 6f9c71b..e8afa80 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -6,6 +6,7 @@ DeploymentType, DeploymentStatus, CreateInferenceDeploymentRequest, + CreateInferenceV3DeploymentRequest, CreateComputeDeploymentRequest, CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest, @@ -30,7 +31,21 @@ def get_status(self, id): return self._api.get_deployment_status_deployments_status_deployment_id_get(id) def get_inference(self, id): - return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id) + """Get Inference deployment details - automatically handles both V2 and V3 deployments""" + # Try V3 first (recommended), fallback to V2 if deployment is V2 + try: + return self._api.get_inference_v3_deployment_deployments_inference_v3_deployment_id_get(id) + except ApiException as e: + # If V3 fails with 404 or similar, try V2 + if e.status in [404, 400]: # Deployment might be V2 or endpoint not found + try: + return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id) + except ApiException as v2_error: + # If both fail, raise the original V3 error as it's more likely to be the real issue + raise e + else: + # For other errors (auth, network, etc.), raise immediately + raise def get_compute(self, id): return self._api.get_compute_deployment_deployments_compute_deployment_id_get(id) @@ -52,9 +67,15 @@ def get_cserve(self, id): # For other errors (auth, network, etc.), raise immediately raise - def create_inference(self, request: CreateInferenceDeploymentRequest): + def create_inference(self, request: CreateInferenceV3DeploymentRequest): + return self._api.create_inference_v3_deployment_deployments_inference_v3_post(request) + + def create_inference_v2(self, request: CreateInferenceDeploymentRequest): return self._api.create_inference_deployment_deployments_inference_post(request) + def create_inference_v3(self, request: CreateInferenceV3DeploymentRequest): + return self._api.create_inference_v3_deployment_deployments_inference_v3_post(request) + def create_compute(self, request: CreateComputeDeploymentRequest): return self._api.create_compute_deployment_deployments_compute_post(request) @@ -67,8 +88,51 @@ def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - def update_inference(self, deployment_id: int, request: CreateInferenceDeploymentRequest): - return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) + def detect_inference_deployment_version(self, deployment_id: int) -> str: + """Detect if an inference deployment is V2 or V3 by testing the specific API endpoints""" + try: + # Try V3 endpoint first + self._api.get_inference_v3_deployment_deployments_inference_v3_deployment_id_get(deployment_id) + return 'v3' + except ApiException as e: + if e.status in [404, 400]: # V3 endpoint doesn't exist for this deployment + try: + # Try V2 endpoint + self._api.get_inference_deployment_deployments_inference_deployment_id_get(deployment_id) + return 'v2' + except ApiException: + # If both fail, it might not be an inference deployment or doesn't exist + raise ValueError( + f"Deployment {deployment_id} is not a valid inference deployment or does not exist" + ) + else: + # Other error (auth, network, etc.) + raise + + def update_inference( + self, deployment_id: int, request: Union[CreateInferenceDeploymentRequest, CreateInferenceV3DeploymentRequest] + ): + """Update Inference deployment - validates request type matches deployment version""" + # Detect the deployment version + deployment_version = self.detect_inference_deployment_version(deployment_id) + + # Validate request type matches deployment version + if isinstance(request, CreateInferenceV3DeploymentRequest): + if deployment_version != 'v3': + raise ValueError( + f"Deployment {deployment_id} is Inference {deployment_version.upper()}, but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead." + ) + return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request) + elif isinstance(request, CreateInferenceDeploymentRequest): + if deployment_version != 'v2': + raise ValueError( + f"Deployment {deployment_id} is Inference {deployment_version.upper()}, but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead." + ) + return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) + else: + raise ValueError( + f"Unsupported request type: {type(request)}. Expected CreateInferenceDeploymentRequest or CreateInferenceV3DeploymentRequest." + ) def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) diff --git a/examples/sdk/create_inference.py b/examples/sdk/create_inference.py index 8af4d20..5531c72 100644 --- a/examples/sdk/create_inference.py +++ b/examples/sdk/create_inference.py @@ -1,26 +1,32 @@ import centml from centml.sdk.api import get_centml_client -from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType +from centml.sdk import DeploymentType, CreateInferenceV3DeploymentRequest, UserVaultType def main(): with get_centml_client() as cclient: certs = cclient.get_user_vault(UserVaultType.CERTIFICATES) - request = CreateInferenceDeploymentRequest( + request = CreateInferenceV3DeploymentRequest( name="nginx", cluster_id=1000, hardware_instance_id=1000, image_url="nginxinc/nginx-unprivileged", port=8080, - min_scale=1, - max_scale=1, + min_replicas=1, # V3 uses min_replicas instead of min_scale + max_replicas=3, # V3 uses max_replicas instead of max_scale + initial_replicas=1, # Optional in V3 - initial number of replicas endpoint_certificate_authority=certs["my_cert"], + # V3 rollout strategy parameters + max_surge=1, # Allow 1 extra pod during updates + max_unavailable=0, # Keep all pods available during updates + healthcheck="/", + concurrency=10, ) response = cclient.create_inference(request) print("Create deployment response: ", response) - ### Get deployment details + ### Get deployment details (automatically detects V2 or V3) deployment = cclient.get_inference(response.id) print("Deployment details: ", deployment) From 7924f56b4c927e6d545013c0ef7831fc8523bc05 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Tue, 2 Sep 2025 18:48:37 -0400 Subject: [PATCH 13/16] pylint --- centml/sdk/api.py | 48 +++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index e8afa80..cc86d2b 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -42,7 +42,7 @@ def get_inference(self, id): return self._api.get_inference_deployment_deployments_inference_deployment_id_get(id) except ApiException as v2_error: # If both fail, raise the original V3 error as it's more likely to be the real issue - raise e + raise e from v2_error else: # For other errors (auth, network, etc.), raise immediately raise @@ -62,7 +62,7 @@ def get_cserve(self, id): return self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(id) except ApiException as v2_error: # If both fail, raise the original V3 error as it's more likely to be the real issue - raise e + raise e from v2_error else: # For other errors (auth, network, etc.), raise immediately raise @@ -100,11 +100,11 @@ def detect_inference_deployment_version(self, deployment_id: int) -> str: # Try V2 endpoint self._api.get_inference_deployment_deployments_inference_deployment_id_get(deployment_id) return 'v2' - except ApiException: + except ApiException as exc: # If both fail, it might not be an inference deployment or doesn't exist raise ValueError( f"Deployment {deployment_id} is not a valid inference deployment or does not exist" - ) + ) from exc else: # Other error (auth, network, etc.) raise @@ -120,18 +120,25 @@ def update_inference( if isinstance(request, CreateInferenceV3DeploymentRequest): if deployment_version != 'v3': raise ValueError( - f"Deployment {deployment_id} is Inference {deployment_version.upper()}, but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead." + f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " + f"but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead." ) - return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request) + return self._api.update_inference_v3_deployment_deployments_inference_v3_put( + deployment_id, request + ) elif isinstance(request, CreateInferenceDeploymentRequest): if deployment_version != 'v2': raise ValueError( - f"Deployment {deployment_id} is Inference {deployment_version.upper()}, but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead." + f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " + f"but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead." ) - return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) + return self._api.update_inference_deployment_deployments_inference_put( + deployment_id, request + ) else: raise ValueError( - f"Unsupported request type: {type(request)}. Expected CreateInferenceDeploymentRequest or CreateInferenceV3DeploymentRequest." + f"Unsupported request type: {type(request)}. " + f"Expected CreateInferenceDeploymentRequest or CreateInferenceV3DeploymentRequest." ) def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): @@ -149,9 +156,11 @@ def detect_deployment_version(self, deployment_id: int) -> str: # Try V2 endpoint self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id) return 'v2' - except ApiException: + except ApiException as exc: # If both fail, it might not be a CServe deployment or doesn't exist - raise ValueError(f"Deployment {deployment_id} is not a valid CServe deployment or does not exist") + raise ValueError( + f"Deployment {deployment_id} is not a valid CServe deployment or does not exist" + ) from exc else: # Other error (auth, network, etc.) raise @@ -167,18 +176,25 @@ def update_cserve( if isinstance(request, CreateCServeV3DeploymentRequest): if deployment_version != 'v3': raise ValueError( - f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " + f"but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." ) - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( + deployment_id, request + ) elif isinstance(request, CreateCServeV2DeploymentRequest): if deployment_version != 'v2': raise ValueError( - f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " + f"but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." ) - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put( + deployment_id, request + ) else: raise ValueError( - f"Unsupported request type: {type(request)}. Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest." + f"Unsupported request type: {type(request)}. " + f"Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest." ) def _update_status(self, id, new_status): From e328151dd55796afd5937db9dcdfb16454329721 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 4 Sep 2025 11:35:34 -0400 Subject: [PATCH 14/16] black format --- centml/sdk/api.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index cc86d2b..d082a0f 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -123,18 +123,14 @@ def update_inference( f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " f"but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead." ) - return self._api.update_inference_v3_deployment_deployments_inference_v3_put( - deployment_id, request - ) + return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request) elif isinstance(request, CreateInferenceDeploymentRequest): if deployment_version != 'v2': raise ValueError( f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " f"but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead." ) - return self._api.update_inference_deployment_deployments_inference_put( - deployment_id, request - ) + return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) else: raise ValueError( f"Unsupported request type: {type(request)}. " @@ -179,18 +175,14 @@ def update_cserve( f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " f"but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." ) - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put( - deployment_id, request - ) + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) elif isinstance(request, CreateCServeV2DeploymentRequest): if deployment_version != 'v2': raise ValueError( f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " f"but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." ) - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put( - deployment_id, request - ) + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) else: raise ValueError( f"Unsupported request type: {type(request)}. " From 5dfc12f82e680fd6f94f6b4399481c0f660ff117 Mon Sep 17 00:00:00 2001 From: Anand J Date: Wed, 10 Sep 2025 14:28:33 -0400 Subject: [PATCH 15/16] Remove create and update support for V2 deployment types --- centml/sdk/api.py | 126 ++-------------------------------------------- 1 file changed, 4 insertions(+), 122 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index d082a0f..514e0b5 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -70,124 +70,20 @@ def get_cserve(self, id): def create_inference(self, request: CreateInferenceV3DeploymentRequest): return self._api.create_inference_v3_deployment_deployments_inference_v3_post(request) - def create_inference_v2(self, request: CreateInferenceDeploymentRequest): - return self._api.create_inference_deployment_deployments_inference_post(request) - - def create_inference_v3(self, request: CreateInferenceV3DeploymentRequest): - return self._api.create_inference_v3_deployment_deployments_inference_v3_post(request) - def create_compute(self, request: CreateComputeDeploymentRequest): return self._api.create_compute_deployment_deployments_compute_post(request) def create_cserve(self, request: CreateCServeV3DeploymentRequest): return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest): - return self._api.create_cserve_v2_deployment_deployments_cserve_v2_post(request) - - def create_cserve_v3(self, request: CreateCServeV3DeploymentRequest): - return self._api.create_cserve_v3_deployment_deployments_cserve_v3_post(request) - - def detect_inference_deployment_version(self, deployment_id: int) -> str: - """Detect if an inference deployment is V2 or V3 by testing the specific API endpoints""" - try: - # Try V3 endpoint first - self._api.get_inference_v3_deployment_deployments_inference_v3_deployment_id_get(deployment_id) - return 'v3' - except ApiException as e: - if e.status in [404, 400]: # V3 endpoint doesn't exist for this deployment - try: - # Try V2 endpoint - self._api.get_inference_deployment_deployments_inference_deployment_id_get(deployment_id) - return 'v2' - except ApiException as exc: - # If both fail, it might not be an inference deployment or doesn't exist - raise ValueError( - f"Deployment {deployment_id} is not a valid inference deployment or does not exist" - ) from exc - else: - # Other error (auth, network, etc.) - raise - - def update_inference( - self, deployment_id: int, request: Union[CreateInferenceDeploymentRequest, CreateInferenceV3DeploymentRequest] - ): - """Update Inference deployment - validates request type matches deployment version""" - # Detect the deployment version - deployment_version = self.detect_inference_deployment_version(deployment_id) - - # Validate request type matches deployment version - if isinstance(request, CreateInferenceV3DeploymentRequest): - if deployment_version != 'v3': - raise ValueError( - f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " - f"but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead." - ) - return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request) - elif isinstance(request, CreateInferenceDeploymentRequest): - if deployment_version != 'v2': - raise ValueError( - f"Deployment {deployment_id} is Inference {deployment_version.upper()}, " - f"but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead." - ) - return self._api.update_inference_deployment_deployments_inference_put(deployment_id, request) - else: - raise ValueError( - f"Unsupported request type: {type(request)}. " - f"Expected CreateInferenceDeploymentRequest or CreateInferenceV3DeploymentRequest." - ) + def update_inference(self, deployment_id: int, request: CreateInferenceV3DeploymentRequest): + return self._api.update_inference_v3_deployment_deployments_inference_v3_put(deployment_id, request) def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest): return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request) - def detect_deployment_version(self, deployment_id: int) -> str: - """Detect if a deployment is V2 or V3 by testing the specific API endpoints""" - try: - # Try V3 endpoint first - self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(deployment_id) - return 'v3' - except ApiException as e: - if e.status in [404, 400]: # V3 endpoint doesn't exist for this deployment - try: - # Try V2 endpoint - self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id) - return 'v2' - except ApiException as exc: - # If both fail, it might not be a CServe deployment or doesn't exist - raise ValueError( - f"Deployment {deployment_id} is not a valid CServe deployment or does not exist" - ) from exc - else: - # Other error (auth, network, etc.) - raise - - def update_cserve( - self, deployment_id: int, request: Union[CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest] - ): - """Update CServe deployment - validates request type matches deployment version""" - # Detect the deployment version - deployment_version = self.detect_deployment_version(deployment_id) - - # Validate request type matches deployment version - if isinstance(request, CreateCServeV3DeploymentRequest): - if deployment_version != 'v3': - raise ValueError( - f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " - f"but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." - ) - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) - elif isinstance(request, CreateCServeV2DeploymentRequest): - if deployment_version != 'v2': - raise ValueError( - f"Deployment {deployment_id} is CServe {deployment_version.upper()}, " - f"but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." - ) - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) - else: - raise ValueError( - f"Unsupported request type: {type(request)}. " - f"Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest." - ) + def update_cserve(self, deployment_id: int, request: CreateCServeV3DeploymentRequest): + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) def _update_status(self, id, new_status): status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status) @@ -229,20 +125,6 @@ def get_user_vault(self, type): return {i.key: i.value for i in items} - def detect_cserve_deployment_version(self, deployment_response): - """Detect if a CServe deployment is V2 or V3 based on response fields""" - # Check for V3-specific fields - if hasattr(deployment_response, 'max_surge') or hasattr(deployment_response, 'max_unavailable'): - return 'v3' - # Check for V3 field names (min_replicas vs min_scale) - if hasattr(deployment_response, 'min_replicas'): - return 'v3' - # Check for V2 field names - if hasattr(deployment_response, 'min_scale'): - return 'v2' - # Default to V2 for backward compatibility - return 'v2' - # pylint: disable=R0917 def get_deployment_usage( self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int From eb6e5d81b4c3e539d335418e81560707bc259b88 Mon Sep 17 00:00:00 2001 From: Anand J Date: Wed, 10 Sep 2025 14:35:26 -0400 Subject: [PATCH 16/16] Lint fix --- centml/sdk/api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 514e0b5..8e77548 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -1,14 +1,11 @@ from contextlib import contextmanager -from typing import Union import platform_api_python_client from platform_api_python_client import ( DeploymentType, DeploymentStatus, - CreateInferenceDeploymentRequest, CreateInferenceV3DeploymentRequest, CreateComputeDeploymentRequest, - CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest, ApiException, Metric,