Skip to content

Commit

Permalink
update job spec to abide by kueue and update the gpu types and count …
Browse files Browse the repository at this point in the history
…to reflect latest update
  • Loading branch information
AntreasAntoniou committed Feb 16, 2024
1 parent bd68e4e commit dbcc583
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
9 changes: 8 additions & 1 deletion kubejobs/jobs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import enum
import grp
import json
import logging
Expand Down Expand Up @@ -59,7 +60,11 @@ class GPU_PRODUCT:
NVIDIA_A100_SXM4_40GB = "NVIDIA-A100-SXM4-40GB"
NVIDIA_A100_SXM4_40GB_MIG_3G_20GB = "NVIDIA-A100-SXM4-40GB-MIG-3g.20gb"
NVIDIA_A100_SXM4_40GB_MIG_1G_5GB = "NVIDIA-A100-SXM4-40GB-MIG-1g.5gb"
NVIDIA_H100_80GB = "NVIDIA-H100-80GB-HBM3"

class KubeQueue:
INFORMATICS = "informatics-user-queue"


class KubernetesJob:
"""
Expand Down Expand Up @@ -96,6 +101,7 @@ def __init__(
self,
name: str,
image: str,
kubernetes_queue_name: str,
command: List[str] = None,
args: Optional[List[str]] = None,
cpu_request: Optional[str] = None,
Expand Down Expand Up @@ -160,8 +166,9 @@ def __init__(

self.user_name = user_name or os.environ.get("USER", "unknown")
self.user_email = user_email # This is now a required field.
self.kubernetes_queue_name = kubernetes_queue_name

self.labels = {"eidf/user": self.user_name}
self.labels = {"eidf/user": self.user_name, "kueue.x-k8s.io/queue-name": self.kubernetes_queue_name}

if labels is not None:
self.labels.update(labels)
Expand Down
10 changes: 6 additions & 4 deletions kubejobs/useful_single_liners/count_gpu_usage_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from rich.table import Table
from tqdm.auto import tqdm


# GPU details
GPU_DETAIL_DICT = {
"NVIDIA-A100-SXM4-80GB": 32,
"NVIDIA-A100-SXM4-40GB": 88,
"NVIDIA-A100-SXM4-40GB-MIG-3g.20gb": 28,
"NVIDIA-A100-SXM4-40GB-MIG-1g.5gb": 140,
"NVIDIA-A100-SXM4-80GB": 40,
"NVIDIA-A100-SXM4-40GB": 112,
"NVIDIA-A100-SXM4-40GB-MIG-3g.20gb": 16,
"NVIDIA-A100-SXM4-40GB-MIG-1g.5gb": 56,
"NVIDIA-H100-80GB-HBM3": 32,
}

INFORMATICS_GPU_ALLOWANCE = 60
Expand Down

0 comments on commit dbcc583

Please sign in to comment.