Skip to content

Commit c2eaa15

Browse files
Add UI table with Cluster specs and action buttons (#681)
* Add UI table with RayCluster specs and action buttons * Update cpu and mem names from UI table * Merge requests-limits into single column in UI table * Enhance notebook outputs/display on button clicks * Refactor and move UI table to widgets.py file * Add unit tests for UI table functions * Add timeout and interval parameters to _delete_cluster function * Pre-select cluster if exists, and suppress widgets and outputs on creation of Cluster Object, and bug fixes * Add UI table to regression and functionality tests * Update codeflare_sdk.egg-info * Fix to hide toolbar before capturing snapshots for UI notebook tests * Add head comments to functions and add num_workers to data frame * Reformat for pre-commit checks * Revert codeflare_sdk.egg-info name
1 parent 80fabe3 commit c2eaa15

File tree

13 files changed

+756
-43
lines changed

13 files changed

+756
-43
lines changed

.github/workflows/ui_notebooks_test.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ jobs:
8686
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 3_widget_example.ipynb > 3_widget_example.ipynb.tmp && mv 3_widget_example.ipynb.tmp 3_widget_example.ipynb
8787
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 3_widget_example.ipynb > 3_widget_example.ipynb.tmp && mv 3_widget_example.ipynb.tmp 3_widget_example.ipynb
8888
# Set explicit namespace as SDK need it (currently) to resolve local queues
89-
sed -i "s/head_memory_limits=2,/head_memory_limits=2, namespace='default',/" 3_widget_example.ipynb
89+
sed -i "s|head_memory_limits=2,|head_memory_limits=2, namespace='default', image='quay.io/modh/ray:2.35.0-py39-cu121',|" 3_widget_example.ipynb
90+
sed -i "s|view_clusters()|view_clusters('default')|" 3_widget_example.ipynb
9091
working-directory: demo-notebooks/guided-demos
9192

9293
- name: Run UI notebook tests

demo-notebooks/guided-demos/3_widget_example.ipynb

+15-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"outputs": [],
2020
"source": [
2121
"# Import pieces from codeflare-sdk\n",
22-
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication"
22+
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, view_clusters"
2323
]
2424
},
2525
{
@@ -61,7 +61,7 @@
6161
"# Create and configure our cluster object\n",
6262
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
6363
"cluster = Cluster(ClusterConfiguration(\n",
64-
" name='raytest', \n",
64+
" name='raytest',\n",
6565
" head_cpu_requests='500m',\n",
6666
" head_cpu_limits='500m',\n",
6767
" head_memory_requests=2,\n",
@@ -73,12 +73,22 @@
7373
" worker_cpu_limits=1,\n",
7474
" worker_memory_requests=2,\n",
7575
" worker_memory_limits=2,\n",
76-
" # image=\"\", # Optional Field \n",
77-
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
76+
" # image=\"\", # Optional Field\n",
77+
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources\n",
7878
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
7979
"))"
8080
]
8181
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"id": "3de6403c",
86+
"metadata": {},
87+
"outputs": [],
88+
"source": [
89+
"view_clusters()"
90+
]
91+
},
8292
{
8393
"cell_type": "code",
8494
"execution_count": null,
@@ -106,7 +116,7 @@
106116
"name": "python",
107117
"nbconvert_exporter": "python",
108118
"pygments_lexer": "ipython3",
109-
"version": "3.9.19"
119+
"version": "3.9.18"
110120
},
111121
"vscode": {
112122
"interpreter": {

src/codeflare_sdk.egg-info/SOURCES.txt

+2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ src/codeflare_sdk/cluster/awload.py
1212
src/codeflare_sdk/cluster/cluster.py
1313
src/codeflare_sdk/cluster/config.py
1414
src/codeflare_sdk/cluster/model.py
15+
src/codeflare_sdk/cluster/widgets.py
1516
src/codeflare_sdk/job/__init__.py
1617
src/codeflare_sdk/job/ray_jobs.py
1718
src/codeflare_sdk/utils/__init__.py
19+
src/codeflare_sdk/utils/demos.py
1820
src/codeflare_sdk/utils/generate_cert.py
1921
src/codeflare_sdk/utils/generate_yaml.py
2022
src/codeflare_sdk/utils/kube_api_helpers.py

src/codeflare_sdk/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
get_cluster,
1515
list_all_queued,
1616
list_all_clusters,
17+
view_clusters,
1718
)
1819

1920
from .job import RayJobClient

src/codeflare_sdk/cluster/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,8 @@
2121
list_all_clusters,
2222
)
2323

24+
from .widgets import (
25+
view_clusters,
26+
)
27+
2428
from .awload import AWManager

src/codeflare_sdk/cluster/cluster.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"""
2020

2121
import re
22+
import subprocess
2223
from time import sleep
2324
from typing import List, Optional, Tuple, Dict
2425

@@ -862,16 +863,19 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
862863
name=rc["metadata"]["name"],
863864
status=status,
864865
# for now we are not using autoscaling so same replicas is fine
865-
workers=rc["spec"]["workerGroupSpecs"][0]["replicas"],
866+
num_workers=rc["spec"]["workerGroupSpecs"][0]["replicas"],
866867
worker_mem_limits=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
867868
"containers"
868869
][0]["resources"]["limits"]["memory"],
869870
worker_mem_requests=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
870871
"containers"
871872
][0]["resources"]["requests"]["memory"],
872-
worker_cpu=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][
873-
0
874-
]["resources"]["limits"]["cpu"],
873+
worker_cpu_requests=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
874+
"containers"
875+
][0]["resources"]["requests"]["cpu"],
876+
worker_cpu_limits=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
877+
"containers"
878+
][0]["resources"]["limits"]["cpu"],
875879
worker_extended_resources=worker_extended_resources,
876880
namespace=rc["metadata"]["namespace"],
877881
head_cpu_requests=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][
@@ -907,10 +911,11 @@ def _copy_to_ray(cluster: Cluster) -> RayCluster:
907911
ray = RayCluster(
908912
name=cluster.config.name,
909913
status=cluster.status(print_to_console=False)[0],
910-
workers=cluster.config.num_workers,
914+
num_workers=cluster.config.num_workers,
911915
worker_mem_requests=cluster.config.worker_memory_requests,
912916
worker_mem_limits=cluster.config.worker_memory_limits,
913-
worker_cpu=cluster.config.worker_cpu_requests,
917+
worker_cpu_requests=cluster.config.worker_cpu_requests,
918+
worker_cpu_limits=cluster.config.worker_cpu_limits,
914919
worker_extended_resources=cluster.config.worker_extended_resource_requests,
915920
namespace=cluster.config.namespace,
916921
dashboard=cluster.cluster_dashboard_uri(),

src/codeflare_sdk/cluster/model.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from dataclasses import dataclass, field
2222
from enum import Enum
2323
import typing
24+
from typing import Union
2425

2526

2627
class RayClusterStatus(Enum):
@@ -77,10 +78,11 @@ class RayCluster:
7778
head_cpu_limits: int
7879
head_mem_requests: str
7980
head_mem_limits: str
80-
workers: int
81+
num_workers: int
8182
worker_mem_requests: str
8283
worker_mem_limits: str
83-
worker_cpu: int
84+
worker_cpu_requests: Union[int, str]
85+
worker_cpu_limits: Union[int, str]
8486
namespace: str
8587
dashboard: str
8688
worker_extended_resources: typing.Dict[str, int] = field(default_factory=dict)

0 commit comments

Comments
 (0)