Skip to content

Commit

Permalink
Add compute validations (#2371)
Browse files Browse the repository at this point in the history
* Add compute validations

* fix formatting
  • Loading branch information
aggarwal-k committed Jun 14, 2023
1 parent af1f3d3 commit d93f9cb
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@
" gpu_count_found = True\n",
"# if gpu_count_found not found, then print an error\n",
"if gpu_count_found:\n",
" print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
" print(f\"Number of GPU's in compute {compute.size}: {gpus_per_node}\")\n",
"else:\n",
" raise ValueError(\n",
" f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"Number of GPU's in compute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
" )\n",
"# CPU based finetune works only for single-node single-process\n",
Expand All @@ -135,6 +135,18 @@
" )\n",
" gpus_per_node = 1\n",
"\n",
"# Computes with K80 GPUs are not supported\n",
"unsupported_gpu_vm_list = [\n",
" \"standard_nc6\",\n",
" \"standard_nc12\",\n",
" \"standard_nc24\",\n",
" \"standard_nc24r\",\n",
"]\n",
"if compute.size.lower() in unsupported_gpu_vm_list:\n",
" raise ValueError(\n",
" f\"VM size {compute.size} is currently not supported for finetuning\"\n",
" )\n",
"\n",
"# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@
" gpu_count_found = True\n",
"# if gpu_count_found not found, then print an error\n",
"if gpu_count_found:\n",
" print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
" print(f\"Number of GPU's in compute {compute.size}: {gpus_per_node}\")\n",
"else:\n",
" raise ValueError(\n",
" f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"Number of GPU's in compute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
" )\n",
"# CPU based finetune works only for single-node single-process\n",
Expand All @@ -134,6 +134,18 @@
" )\n",
" gpus_per_node = 1\n",
"\n",
"# Computes with K80 GPUs are not supported\n",
"unsupported_gpu_vm_list = [\n",
" \"standard_nc6\",\n",
" \"standard_nc12\",\n",
" \"standard_nc24\",\n",
" \"standard_nc24r\",\n",
"]\n",
"if compute.size.lower() in unsupported_gpu_vm_list:\n",
" raise ValueError(\n",
" f\"VM size {compute.size} is currently not supported for finetuning\"\n",
" )\n",
"\n",
"# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@
" gpu_count_found = True\n",
"# if gpu_count_found not found, then print an error\n",
"if gpu_count_found:\n",
" print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
" print(f\"Number of GPU's in compute {compute.size}: {gpus_per_node}\")\n",
"else:\n",
" raise ValueError(\n",
" f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"Number of GPU's in compute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
" )\n",
"# CPU based finetune works only for single-node single-process\n",
Expand All @@ -135,6 +135,18 @@
" )\n",
" gpus_per_node = 1\n",
"\n",
"# Computes with K80 GPUs are not supported\n",
"unsupported_gpu_vm_list = [\n",
" \"standard_nc6\",\n",
" \"standard_nc12\",\n",
" \"standard_nc24\",\n",
" \"standard_nc24r\",\n",
"]\n",
"if compute.size.lower() in unsupported_gpu_vm_list:\n",
" raise ValueError(\n",
" f\"VM size {compute.size} is currently not supported for finetuning\"\n",
" )\n",
"\n",
"# generating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
" print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
"else:\n",
" raise ValueError(\n",
" f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"Number of GPU's in compute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
" )\n",
"# CPU based finetune works only for single-node single-process\n",
Expand All @@ -135,6 +135,18 @@
" )\n",
" gpus_per_node = 1\n",
"\n",
"# Computes with K80 GPUs are not supported\n",
"unsupported_gpu_vm_list = [\n",
" \"standard_nc6\",\n",
" \"standard_nc12\",\n",
" \"standard_nc24\",\n",
" \"standard_nc24r\",\n",
"]\n",
"if compute.size.lower() in unsupported_gpu_vm_list:\n",
" raise ValueError(\n",
" f\"VM size {compute.size} is currently not supported for finetuning\"\n",
" )\n",
"\n",
"# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
" print(f\"Number of GPU's in copute {compute.size}: {gpus_per_node}\")\n",
"else:\n",
" raise ValueError(\n",
" f\"Number of GPU's in copute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"Number of GPU's in compute {compute.size} not found. Available skus are: {available_sku_sizes}.\"\n",
" f\"This should not happen. Please check the selected compute cluster: {compute_cluster} and try again.\"\n",
" )\n",
"# CPU based finetune works only for single-node single-process\n",
Expand All @@ -134,6 +134,18 @@
" )\n",
" gpus_per_node = 1\n",
"\n",
"# Computes with K80 GPUs are not supported\n",
"unsupported_gpu_vm_list = [\n",
" \"standard_nc6\",\n",
" \"standard_nc12\",\n",
" \"standard_nc24\",\n",
" \"standard_nc24r\",\n",
"]\n",
"if compute.size.lower() in unsupported_gpu_vm_list:\n",
" raise ValueError(\n",
" f\"VM size {compute.size} is currently not supported for finetuning\"\n",
" )\n",
"\n",
"# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))"
]
Expand Down

0 comments on commit d93f9cb

Please sign in to comment.