Azure · jackfrancis · Oct 24, 2017 · Jul 13, 2017 · Jul 14, 2017 · Jul 25, 2017
diff --git a/docs/kubernetes/gpu.md b/docs/kubernetes/gpu.md
@@ -1,65 +1,75 @@
-# Microsoft Azure Container Service Engine - Kubernetes Multi-GPU support Walkthrough
+# Microsoft Azure Container Service Engine - Using GPUs with Kubernetes
 
-## Deployment
+If you created a Kubernetes cluster with one or multiple agent pool(s) whose VM size is `Standard_NC*` or `Standard_NV*` you can schedule GPU workload on your cluster.
+The NVIDIA drivers are automatically installed on every GPU agent in your cluster, so you don't need to do that manually, unless you require a specific version of the drivers. Currently, the installed driver is version 378.13.
 
-Here are the steps to deploy a simple Kubernetes cluster with multi-GPU support:
-
-1. [Install a Kubernetes cluster][Kubernetes Walkthrough](deploy.md) - shows how to create a Kubernetes cluster.
-  > NOTE: Make sure to configure the agent nodes with vm size `Standard_NC12` or above to utilize the GPUs
+To make sure everything is fine, run `kubectl describe node <name-of-a-gpu-node>`. You should see the correct number of GPU reported (in this example shows 2 GPU for a NC12 VM):
+```
+[...]
+Capacity:
+ alpha.kubernetes.io/nvidia-gpu:	2
+ cpu:					12
+[...]
+```
 
-2. Install drivers:
-  * SSH into each node and run the following scripts : 
-  install-nvidia-driver.sh
-  ```
-  curl -L -sf https://raw.githubusercontent.com/ritazh/acs-k8s-gpu/master/install-nvidia-driver.sh | sudo sh
-  ```
+If `alpha.kubernetes.io/nvidia-gpu` is `0` and you just created the cluster, you might have to wait a little bit. The driver installation takes about 12 minutes, and the node might join the cluster before the installation is completed. After a few minute the node should restart, and report the correct number of GPUs.
 
-  To verify, when you run `kubectl describe node <node-name>`, you should get something like the following:
+## Running a GPU-enabled container
 
-  ```
-  Capacity:
-  alpha.kubernetes.io/nvidia-gpu:    2
-  cpu:                               12
-  memory:                            115505744Ki
-  pods:                              110
-  ```
+When running a GPU container, you will need to specify how many GPU you want to use. If you don't specify a GPU count, kubernetes will asumme you don't require any, and will not map the device into the container.
+You will also need to mount the drivers from the host (the kubernetes agent) into the container.
 
-3. Scheduling a multi-GPU container
+On the host, the drivers are installed under `/usr/lib/nvidia-378`.
 
-* You need to specify `alpha.kubernetes.io/nvidia-gpu: 2` as a limit
-* You need to expose the drivers to the container as a volume. If you are using TF original docker image, it is based on ubuntu 16.04, just like your cluster's VM, so you can just mount `/usr/bin` and `/usr/lib/x86_64-linux-gnu`, it's a bit dirty but it works. Ideally, improve the previous script to install the driver in a specific directory and only expose this one.
+Here is an example template running TensorFlow: 
 
-``` yaml
-apiVersion: v1
-kind: Pod
+```
+apiVersion: extensions/v1beta1
+kind: Deployment
 metadata:
-  name: gpu-test
   labels:
-    app: gpu-test
+    app: tensorflow
+  name: tensorflow
 spec:
-  volumes:
-  - name: binaries
-    hostPath:
-      path: /usr/bin/
-  - name: libraries
-    hostPath:
-      path: /usr/lib/x86_64-linux-gnu
-  containers:
-  - name: tensorflow
-    image: gcr.io/tensorflow/tensorflow:latest-gpu
-    ports:
-    - containerPort: 8888
-    resources:
-      limits:
-        alpha.kubernetes.io/nvidia-gpu: 2
-    volumeMounts:
-    - mountPath: /usr/bin/
-      name: binaries
-    - mountPath: /usr/lib/x86_64-linux-gnu
-      name: libraries
+  template:
+    metadata:
+      labels:
+        app: tensorflow
+    spec:     
+      containers:
+      - name: tensorflow
+        image: tensorflow/tensorflow:latest-gpu
+        command: ["python main.py"]      
+        imagePullPolicy: IfNotPresent
+        env:
+        - name: LD_LIBRARY_PATH
+          value: /usr/lib/nvidia:/usr/lib/x86_64-linux-gnu
+        resources:
+          requests:
+            alpha.kubernetes.io/nvidia-gpu: 2 
+        volumeMounts:
+        - mountPath: /usr/local/nvidia/bin
+          name: bin
+        - mountPath: /usr/lib/nvidia
+          name: lib
+        - mountPath: /usr/lib/x86_64-linux-gnu/libcuda.so.1
+          name: libcuda
+      volumes:
+        - name: bin
+          hostPath: 
+            path: /usr/lib/nvidia-378/bin
+        - name: lib
+          hostPath: 
+            path: /usr/lib/nvidia-378
+        - name: libcuda
+          hostPath:
+            path: /usr/lib/x86_64-linux-gnu/libcuda.so.1
 ```
-To verify, when you run `kubectl describe pod <pod-name>`, you see get the following:
 
-```
-Successfully assigned gpu-test to k8s-agentpool1-10960440-1
-```
+We specify `alpha.kubernetes.io/nvidia-gpu: 1` in the resources requests, and we mount the drivers from the host into the container.
+Note that we also modify the `LD_LIBRARY_PATH` environment variable to let python know where to find the driver's libraries.
+
+Some libraries, such as `libcuda.so` are installed under `/usr/lib/x86_64-linux-gnu` on the host, you might need to mount them separatly as shown above based on your needs.
+
+
+
diff --git a/examples/kubernetes-gpu/kubernetes.json b/examples/kubernetes-gpu/kubernetes.json
@@ -0,0 +1,35 @@
+{
+  "apiVersion": "vlabs",
+  "properties": {
+    "orchestratorProfile": {
+      "orchestratorType": "Kubernetes"
+    },
+    "masterProfile": {
+      "count": 1,
+      "dnsPrefix": "",
+      "vmSize": "Standard_D2_v2"
+    },
+    "agentPoolProfiles": [
+      {
+        "name": "agentpool1",
+        "count": 3,
+        "vmSize": "Standard_NC6",
+        "availabilityProfile": "AvailabilitySet"
+      }
+    ],
+    "linuxProfile": {
+      "adminUsername": "azureuser",
+      "ssh": {
+        "publicKeys": [
+          {
+            "keyData": ""
+          }
+        ]
+      }
+    },
+    "servicePrincipalProfile": {
+      "clientId": "",
+      "secret": ""
+    }
+  }
+}
diff --git a/parts/kubernetesagentcustomdata.yml b/parts/kubernetesagentcustomdata.yml
@@ -176,6 +176,7 @@ runcmd:
 - systemctl restart docker
 - mkdir -p /etc/kubernetes/manifests
 - usermod -aG docker {{WrapAsVariable "username"}}
+{{GetGPUDriversInstallScript .}}
 - echo `date`,`hostname`, PRE-APT-SYSTEMD-DAILY>>/opt/m
 - /usr/lib/apt/apt.systemd.daily
 - echo `date`,`hostname`, POST-APT-SYSTEMD-DAILY>>/opt/m

diff --git a/pkg/acsengine/engine.go b/pkg/acsengine/engine.go
@@ -1069,6 +1069,9 @@ func (t *TemplateGenerator) getTemplateFuncMap(cs *api.ContainerService) templat
 			}
 			return false
 		},
+		"GetGPUDriversInstallScript": func(profile *api.AgentPoolProfile) string {
+			return getGPUDriversInstallScript(profile)
+		},
 		"HasLinuxSecrets": func() bool {
 			return cs.Properties.LinuxProfile.HasSecrets()
 		},
@@ -1297,6 +1300,58 @@ func getPackageGUID(orchestratorType string, orchestratorVersion string, masterC
 	return ""
 }
 
+func getGPUDriversInstallScript(profile *api.AgentPoolProfile) string {
+
+	// latest version of the drivers. Later this parameter could be bubbled up so that users can choose specific driver versions.
+	dv := "384"
+
+	/*
+		First we remove the nouveau drivers, which are the open source drivers for NVIDIA cards. Nouveau is installed on NV Series VMs by default.
+		Then we add the graphics-drivers ppa repository and get the proprietary drivers from there.
+	*/
+	ppaScript := fmt.Sprintf(`- rmmod nouveau
+- sh -c "echo \"blacklist nouveau\" >> /etc/modprobe.d/blacklist.conf"
+- update-initramfs -u
+- sudo add-apt-repository -y ppa:graphics-drivers
+- sudo apt-get update
+- sudo apt-get install -y nvidia-%s`, dv)
+
+	// We don't have an agreement in place with NVIDIA to provide the drivers on every sku. For this VMs we simply log a warning message.
+	na := getGPUDriversNotInstalledWarningMessage(profile.VMSize)
+
+	/* If a new GPU sku becomes available, add a key to this map, but only provide an installation script if you have a confirmation
+	   that we have an agreement with NVIDIA for this specific gpu. Otherwise use the warning message.
+	*/
+	dm := map[string]string{
+		"Standard_NC6":      ppaScript,
+		"Standard_NC12":     ppaScript,
+		"Standard_NC24":     ppaScript,
+		"Standard_NC24r":    ppaScript,
+		"Standard_NV6":      ppaScript,
+		"Standard_NV12":     ppaScript,
+		"Standard_NV24":     ppaScript,
+		"Standard_NV24r":    ppaScript,
+		"Standard_NC6_v2":   na,
+		"Standard_NC12_v2":  na,
+		"Standard_NC24_v2":  na,
+		"Standard_NC24r_v2": na,
+		"Standard_ND6":      na,
+		"Standard_ND12":     na,
+		"Standard_ND24":     na,
+		"Standard_ND24r":    na,
+	}
+	if _, ok := dm[profile.VMSize]; ok {
+		return dm[profile.VMSize]
+	}
+
+	// The VM is not part of the GPU skus, no extra steps.
+	return ""
+}
+
+func getGPUDriversNotInstalledWarningMessage(VMSize string) string {
+	return fmt.Sprintf("echo 'Warning: NVIDIA Drivers for this VM SKU (%v) are not automatically installed'", VMSize)
+}
+
 func getDCOSCustomDataPublicIPStr(orchestratorType string, masterCount int) string {
 	if orchestratorType == api.DCOS {
 		var buf bytes.Buffer

diff --git a/pkg/acsengine/engine_test.go b/pkg/acsengine/engine_test.go
@@ -317,3 +317,50 @@ func TestTemplateOutputPresence(t *testing.T) {
 		}
 	}
 }
+
+func TestGetGPUDriversInstallScript(t *testing.T) {
+
+	// VMSize with GPU and NVIDIA agreement for drivers distribution
+	validSkus := []string{
+		"Standard_NC6",
+		"Standard_NC12",
+		"Standard_NC24",
+		"Standard_NC24r",
+		"Standard_NV6",
+		"Standard_NV12",
+		"Standard_NV24",
+		"Standard_NV24r",
+	}
+
+	// VMSize with GPU but NO NVIDIA agreement for drivers distribution
+	noLicenceSkus := []string{
+		"Standard_NC6_v2",
+		"Standard_NC12_v2",
+		"Standard_NC24_v2",
+		"Standard_NC24r_v2",
+		"Standard_ND6",
+		"Standard_ND12",
+		"Standard_ND24",
+		"Standard_ND24r",
+	}
+
+	for _, sku := range validSkus {
+		s := getGPUDriversInstallScript(&api.AgentPoolProfile{VMSize: sku})
+		if s == "" || s == getGPUDriversNotInstalledWarningMessage(sku) {
+			t.Fatalf("Expected NVIDIA driver install script for sku %v", sku)
+		}
+	}
+
+	for _, sku := range noLicenceSkus {
+		s := getGPUDriversInstallScript(&api.AgentPoolProfile{VMSize: sku})
+		if s != getGPUDriversNotInstalledWarningMessage(sku) {
+			t.Fatalf("NVIDIA driver install script was provided for a VM sku (%v) that does not meet NVIDIA agreement.", sku)
+		}
+	}
+
+	// VMSize without GPU
+	s := getGPUDriversInstallScript(&api.AgentPoolProfile{VMSize: "Standard_D2_v2"})
+	if s != "" {
+		t.Fatalf("VMSize without GPU should not receive a script, expected empty string, received %v", s)
+	}
+}