diff --git a/src/terraform/gke/nodepools.tf b/src/terraform/gke/nodepools.tf index 58f083bfd..4e3dfe067 100644 --- a/src/terraform/gke/nodepools.tf +++ b/src/terraform/gke/nodepools.tf @@ -19,6 +19,7 @@ resource "google_container_node_pool" "primary_nodes" { oauth_scopes = [ "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/devstorage.read_only" ] labels = { @@ -60,6 +61,7 @@ resource "google_container_node_pool" "gpu_nodes" { oauth_scopes = [ "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/devstorage.read_only" ] labels = { @@ -69,7 +71,7 @@ resource "google_container_node_pool" "gpu_nodes" { disk_size_gb = var.disk_size_in_gb preemptible = true - machine_type = var.gpu_node_type + machine_type = var.cpu_node_type tags = ["gke-node", "${var.project_id}-gke"] metadata = { disable-legacy-endpoints = "true" @@ -90,4 +92,4 @@ resource "google_container_node_pool" "gpu_nodes" { max_surge = 1 max_unavailable = 0 } -} \ No newline at end of file +} diff --git a/src/terraform/gke/plugin.tf b/src/terraform/gke/plugin.tf new file mode 100644 index 000000000..d29eac485 --- /dev/null +++ b/src/terraform/gke/plugin.tf @@ -0,0 +1,29 @@ +data "google_container_cluster" "cluster" { + depends_on = [ google_container_cluster.primary ] + project = var.project_id + name = var.cluster_name + location = var.region +} + +data "google_client_config" "default" {} + +provider "kubernetes" { + host = data.google_container_cluster.cluster.endpoint + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(data.google_container_cluster.cluster.master_auth[0].cluster_ca_certificate) +} + +provider "kubectl" { + host = data.google_container_cluster.cluster.endpoint + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(data.google_container_cluster.cluster.master_auth[0].cluster_ca_certificate) + load_config_file = false +} + +data "http" "nvidia_driver_installer_manifest" { + url = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml" +} + +resource "kubectl_manifest" "nvidia_driver_installer" { + yaml_body = data.http.nvidia_driver_installer_manifest.body +} \ No newline at end of file diff --git a/src/terraform/gke/variables.tf b/src/terraform/gke/variables.tf index 9fbcf98f6..f208d6775 100644 --- a/src/terraform/gke/variables.tf +++ b/src/terraform/gke/variables.tf @@ -63,13 +63,13 @@ variable "max_gpu_node" { variable "initial_node_count" { description = "Initial number of nodes in this pool" type = number - default = 1 + default = 1 } variable "create_gpu_node_pool" { description = "Decide if this resource pool has to be created" type = bool - default = false + default = true } variable "disk_size_in_gb" { diff --git a/src/terraform/gke/versions.tf b/src/terraform/gke/versions.tf index a5423ea3e..3acf04c8b 100644 --- a/src/terraform/gke/versions.tf +++ b/src/terraform/gke/versions.tf @@ -8,6 +8,11 @@ terraform { version = "4.64.0" } + kubectl = { + source = "gavinbunney/kubectl" + version = "1.14.0" + } + kubernetes = { source = "hashicorp/kubernetes" } diff --git a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx index 0657b27cc..db3cc8b12 100644 --- a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx +++ b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx @@ -20,8 +20,11 @@ const Placeholders: OndemandGKEConfig = { gcp_config_serialized: '', keepalive: '1200', cpu_node_type: 'n1-standard-4', + gpu_node_type: 'nvidia-tesla-t4', min_cpu_node: '1', max_cpu_node: '1', + min_gpu_node: '0', + max_gpu_node: '1', }; const GCPPlaceholders: GCPConfig = { @@ -81,6 +84,18 @@ export const GCPDialog: React.FC< }} /> + { + setValue('gpu_node_type', event.target.value); + }} + /> + + + { + setValue('min_gpu_node', event.target.value); + }} + /> + + { + setValue('max_gpu_node', event.target.value); + }} + /> ); diff --git a/src/ui/common/src/utils/resources.ts b/src/ui/common/src/utils/resources.ts index 7460d073b..6ff84e85e 100644 --- a/src/ui/common/src/utils/resources.ts +++ b/src/ui/common/src/utils/resources.ts @@ -272,8 +272,11 @@ export type OndemandGKEConfig = { gcp_config_serialized: string; keepalive: string; cpu_node_type: string; + gpu_node_type: string; min_cpu_node: string; max_cpu_node: string; + min_gpu_node: string; + max_gpu_node: string; }; export type GCPConfig = {