Support CUDA 10

* Move to support CUDA 10, cudnn 7.3, cub 1.8. * Fixed a bug related to "pointer to pin pointer is disallowed" #3063, which is exposed in newer version vctools. * Added workaround for a potential vs2017 15.9 bug with cntk Debug version.
microsoft · Dec 13, 2018 · f178144 · f178144
1 parent 93e1009
commit f178144
Show file tree

Hide file tree

Showing 14 changed files with 72 additions and 55 deletions.
diff --git a/CNTK.Cpp.props b/CNTK.Cpp.props
@@ -3,7 +3,7 @@
   <Import Project="$(SolutionDir)\CNTK.Common.props" />
   <PropertyGroup>
     <CudaVersion />
-    <CudaVersion Condition="Exists('$(CUDA_PATH_V9_0)') And '$(CudaVersion)' == ''">9.0</CudaVersion>
+    <CudaVersion Condition="Exists('$(CUDA_PATH_V10_0)') And '$(CudaVersion)' == ''">10.0</CudaVersion>
 
     <NvmlDll>%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml.dll</NvmlDll>
     <NvmlDll Condition="Exists('c:\local\nvsmi9\NVSMI\nvml.dll')">c:\local\nvsmi9\NVSMI\nvml.dll</NvmlDll>
@@ -110,10 +110,10 @@
     <ProtobufLib Condition="$(DebugBuild)">libprotobufd.lib</ProtobufLib>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(CudaVersion)' == '9.0'">
-    <CudaPath>$(CUDA_PATH_V9_0)</CudaPath>
-    <CudaRuntimeDll>cudart64_90.dll</CudaRuntimeDll>
-    <CudaDlls>cublas64_90.dll;cusparse64_90.dll;curand64_90.dll;$(CudaRuntimeDll)</CudaDlls>
+  <PropertyGroup Condition="'$(CudaVersion)' == '10.0'">
+    <CudaPath>$(CUDA_PATH_V10_0)</CudaPath>
+    <CudaRuntimeDll>cudart64_100.dll</CudaRuntimeDll>
+    <CudaDlls>cublas64_100.dll;cusparse64_100.dll;curand64_100.dll;$(CudaRuntimeDll)</CudaDlls>
 
     <!-- Use NvidiaCompute to define nvcc target architectures (will generate code to support them all, i.e. fat-binary, in release mode)
     In debug mode we only include cubin/PTX for 30 and rely on PTX / JIT to generate the required native cubin format
@@ -122,7 +122,7 @@
     <NvidiaCompute Condition="$(DebugBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30</NvidiaCompute>
 
     <NvidiaCompute Condition="$(ReleaseBuild)">$(CNTK_CUDA_CODEGEN_RELEASE)</NvidiaCompute>
-    <NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70</NvidiaCompute>
+    <NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75</NvidiaCompute>
   </PropertyGroup>
 
   <PropertyGroup>
@@ -158,7 +158,7 @@
       <PreprocessorDefinitions>CNTK_VERSION="$(CntkVersion)";CNTK_VERSION_BANNER="$(CntkVersionBanner)";CNTK_COMPONENT_VERSION="$(CntkComponentVersion)"</PreprocessorDefinitions>
       <!-- UWP does not use MPI -->
       <PreprocessorDefinitions Condition="!$(IsUWP)">%(PreprocessorDefinitions);HAS_MPI=1</PreprocessorDefinitions>
-      <PreprocessorDefinitions Condition="'$(CudaVersion)' == '9.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(CudaVersion)' == '10.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
     </ClCompile>
   </ItemDefinitionGroup>
 

diff --git a/Documentation/current_iteration.md b/Documentation/current_iteration.md
@@ -3,3 +3,16 @@
 ## Highlights of this release
 * Moved to CUDA 10 for both Windows and Linux.
 * Support advance RNN loop in ONNX export.
+
+## CUDA support for CUDA 10
+
+CNTK now supports CUDA 10. This requires an update to build environment to Visual Studio 2017 v15.9 for Windows.
+
+To setup build and runtime environment on Windows:
+* Install [Visual Studio 2017](https://www.visualstudio.com/downloads/). Note: going forward for CUDA 10 and beyond, it is no longer required to install and run with the specific VC Tools version 14.11.
+* Install [Nvidia CUDA 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64)
+* From PowerShell, run:
+    [DevInstall.ps1](./Tools/devInstall/Windows/DevInstall.ps1)
+* Start Visual Studio 2017 and open [CNTK.sln](./CNTK.sln).
+
+To setup build and runtime environment on Linux using docker, please build Unbuntu 16.04 docker image using Dockerfiles [here](./Tools/docker). For other Linux systems, please refer to the Dockerfiles to setup dependent libraries for CNTK.
diff --git a/Makefile b/Makefile
@@ -22,7 +22,7 @@
 #   CUDA_PATH= Path to CUDA
 #     If not specified, GPU will not be enabled
 #   CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
-#     defaults to /usr/local/cub-1.4.1
+#     defaults to /usr/local/cub-1.8.0
 #   CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
 #     CuDNN version needs to be 5.0 or higher.
 #   KALDI_PATH= Path to Kaldi
@@ -144,8 +144,8 @@ ifdef CUDA_PATH
   endif
 
   ifndef CUB_PATH
-    $(info defaulting CUB_PATH to /usr/local/cub-1.4.1)
-    CUB_PATH=/usr/local/cub-1.4.1
+    $(info defaulting CUB_PATH to /usr/local/cub-1.8.0)
+    CUB_PATH=/usr/local/cub-1.8.0
   endif
 
   DEVICE = gpu

diff --git a/Source/ActionsLib/NDLNetworkBuilder.h b/Source/ActionsLib/NDLNetworkBuilder.h
@@ -500,7 +500,9 @@ class NDLBuilder
             }
         }
 
-        Init(executionEngine, networkConfig, newConfig, dumpFileName, deviceId);
+        // workaround for VS2017 15.9.2 Debug Win32 Access Violation error.
+        wstring networkConfigWstring = networkConfig;
+        Init(executionEngine, networkConfigWstring, newConfig, dumpFileName, deviceId);
     }
 
     virtual ~NDLBuilder()

diff --git a/Source/Extensibility/EvalWrapper/EvalWrapper.cpp b/Source/Extensibility/EvalWrapper/EvalWrapper.cpp
@@ -231,15 +231,15 @@ public ref class IEvaluateModelManaged : IDisposable
                 pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
                 shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
                 sharedInputVectors.push_back(ptr);
-                stdInputs.insert(MapEntry(key, ptr.get()));
+                stdInputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
             }
 
             for each (auto item in outputs)
             {
                 pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
                 shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
                 sharedOutputVectors.push_back(ptr);
-                stdOutputs.insert(MapEntry(key, ptr.get()));
+                stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
             }
 
             try
@@ -382,13 +382,13 @@ public ref class IEvaluateModelManaged : IDisposable
         std::vector<shared_ptr<std::vector<ElemType>>> sharedOutputVectors;
         pin_ptr<const WCHAR> inputKey = PtrToStringChars(inputNodeName);
         shared_ptr<std::vector<ElemType>> f2(featureVector);
-        stdInputs.insert(MapEntry(inputKey, f2.get()));
+        stdInputs.insert(MapEntry(static_cast<std::wstring>(inputKey), f2.get()));
 
         pin_ptr<const WCHAR> key = PtrToStringChars(outputKey);
         // Do we have to initialize the output nodes?
         shared_ptr<std::vector<ElemType>> ptr(new std::vector<ElemType>(outputSize));
         sharedOutputVectors.push_back(ptr);
-        stdOutputs.insert(MapEntry(key, ptr.get()));
+        stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
         try
         {
             m_eval->Evaluate(stdInputs, stdOutputs);
@@ -517,7 +517,7 @@ public ref class IEvaluateModelManaged : IDisposable
             pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
             shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
             sharedOutputVectors.push_back(ptr);
-            stdOutputs.insert(MapEntry(key, ptr.get()));
+            stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
         }
 
         try

diff --git a/Source/Math/GPUMatrixCUDAKernels.cuh b/Source/Math/GPUMatrixCUDAKernels.cuh
@@ -15,7 +15,6 @@
 #include "CommonMatrix.h"
 #include "GPUMatrix.h"
 #include "TensorOps.h" // for exp_() etc.
-#include "device_functions.h"
 #include <cuda_runtime.h>
 #include <assert.h>
 #include <float.h>

diff --git a/Source/Math/cudalib.cpp b/Source/Math/cudalib.cpp
@@ -22,6 +22,8 @@
 #pragma comment(lib, "cudart.lib") // link CUDA runtime
 #pragma comment(lib, "cublas.lib")
 
+#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
+
 namespace msra { namespace cuda {
 
 static int devicesallocated = -1; // -1 means not initialized

diff --git a/Source/Math/half.hpp b/Source/Math/half.hpp
@@ -11,7 +11,10 @@
 #include "../CNTKv2LibraryDll/API/HalfConverter.hpp"
 
 #if !defined(CPUONLY) && __has_include("cuda_fp16.h")
-#include <cuda_fp16.h> // ASSUME CUDA9
+
+#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
+
+#include <cuda_fp16.h> // ASSUME CUDA10
 #else
 class alignas(2) __half
 {

diff --git a/Tools/devInstall/Windows/DevInstall.ps1 b/Tools/devInstall/Windows/DevInstall.ps1
@@ -136,9 +136,9 @@ Function main
         $operation += OpScanProgram
         $operation += OpCheckVS2017
 
-        $operation += OpCheckCuda9
-        $operation += OpNVidiaCudnn7090 -cache $localCache -targetFolder $localDir
-        $operation += OpNvidiaCub174 -cache $localCache -targetFolder $localDir
+        $operation += OpCheckCuda10
+        $operation += OpNVidiaCudnn73100 -cache $localCache -targetFolder $localDir
+        $operation += OpNvidiaCub180 -cache $localCache -targetFolder $localDir
 
         $operation += OpCMake362 -cache $localCache
         $operation += OpMSMPI70 -cache $localCache

diff --git a/Tools/devInstall/Windows/helper/Operations.ps1 b/Tools/devInstall/Windows/helper/Operations.ps1
@@ -149,19 +149,19 @@ function OpMSMPI70SDK(
         } )
 }
 
-function OpNvidiaCub174(
+function OpNvidiaCub180(
     [parameter(Mandatory=$true)][string] $cache,
     [parameter(Mandatory=$true)][string] $targetFolder)
 {
-    $prodName = "NVidia CUB 1.7.4"
-    $prodFile = "cub-1.7.4.zip"
-    $prodSubDir = "cub-1.7.4"
+    $prodName = "NVidia CUB 1.8.0"
+    $prodFile = "cub-1.8.0.zip"
+    $prodSubDir = "cub-1.8.0"
     $targetPath = join-path $targetFolder $prodSubDir
     $envVar = "CUB_PATH";
     $envValue = $targetPath
-    $downloadSource = "https://github.com/NVlabs/cub/archive/1.7.4.zip"
+    $downloadSource = "https://github.com/NVlabs/cub/archive/1.8.0.zip"
 
-    @( @{ShortName = "CUB174"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
+    @( @{ShortName = "CUB180"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
          Verification = @( @{Function = "VerifyDirectory"; Path = "$targetPath" },
                            @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
          Download = @( @{Function = "Download"; Method = "WebRequest"; Source = $downloadSource; Destination = "$cache\$prodFile" } );
@@ -170,20 +170,20 @@ function OpNvidiaCub174(
          } )
 }
 
-function OpNVidiaCudnn7090(
+function OpNVidiaCudnn73100(
     [parameter(Mandatory=$true)][string] $cache,
     [parameter(Mandatory=$true)][string] $targetFolder)
 {
-    $prodName = "NVidia CUDNN 7.0.5 for CUDA 9.0"
-    $cudnnWin = "cudnn-9.0-windows10-x64-v7.zip"
+    $prodName = "NVidia CUDNN 7.3.1 for CUDA 10.0"
+    $cudnnWin = "cudnn-10.0-windows10-x64-v7.3.1.20.zip"
 
-    $prodSubDir =  "cudnn-9.0-v7.0.5"
+    $prodSubDir =  "cudnn-10.0-v7.3.1"
     $targetPath = join-path $targetFolder $prodSubDir
     $envVar = "CUDNN_PATH"
     $envValue = join-path $targetPath "cuda"
-    $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.0.5"
+    $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.3.1"
 
-    @( @{ShortName = "CUDNN7090"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
+    @( @{ShortName = "CUDNN73100"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
          Verification = @( @{Function = "VerifyDirectory"; Path = $targetPath },
                            @{Function = "VerifyDirectory"; Path = $envValue },
                            @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
@@ -308,13 +308,13 @@ function OpCheckVS2017
                         } )
 }
 
-function OpCheckCuda9
+function OpCheckCuda10
 {
-    $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v9.0"
-    @( @{Name = "Verify Installation of NVidia Cuda 9.0"; ShortName = "PRECUDA90"; VerifyInfo = "Checking for NVidia Cuda 9.0"; 
+    $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v10.0"
+    @( @{Name = "Verify Installation of NVidia Cuda 10.0"; ShortName = "PRECUDA100"; VerifyInfo = "Checking for NVidia Cuda 10.0";
          Verification = @( @{Function = "VerifyDirectory"; Path = $programPath },
-                           @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V9_0"; Content = $programPath } ); 
-         PreReq = @( @{Function = "PrereqInfoCuda9" } );
+                           @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V10_0"; Content = $programPath } );
+         PreReq = @( @{Function = "PrereqInfoCuda10" } );
          Action = @( @{Function = "StopInstallation" } )
         } )
 }

diff --git a/Tools/devInstall/Windows/helper/PreRequisites.ps1 b/Tools/devInstall/Windows/helper/PreRequisites.ps1
@@ -44,14 +44,14 @@ for more details.
 "
 }
 
-function PrereqInfoCuda9(
+function PrereqInfoCuda10(
     [Parameter(Mandatory = $true)][hashtable] $table
 )
 {
     FunctionIntro $table
     Write-Warning "
 
-Installation of NVidia CUDA 9.0 is a pre-requisite before installation can continue.
+Installation of NVidia CUDA 10.0 is a pre-requisite before installation can continue.
 Please check 
   https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-Windows
 for more details.

diff --git a/Tools/docker/CNTK-GPU-Image/Dockerfile b/Tools/docker/CNTK-GPU-Image/Dockerfile
@@ -1,16 +1,14 @@
-# Tag: nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-# Created: 2017-11-21T06:34:14.675603521Z
-# Label: com.nvidia.build.id: 41212533
-# Label: com.nvidia.build.ref: e0edb5359ecb7bd3d86f0c9bfa18c2260b741ebb
-# Label: com.nvidia.cuda.version: 9.0.176
-# Label: com.nvidia.cudnn.version: 7.0.4.31
-# Label: com.nvidia.nccl.version: 2.1.2
+# Tag: nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
+# Created: 2018-10-22T21:14:30.605789926Z
+# Label: com.nvidia.cuda.version: 10.0.
+# Label: com.nvidia.cudnn.version: 7.3.1.20
+# Label: com.nvidia.nccl.version: 2.3.5
 #
 # To build, run from the parent with the command line:
 # 	docker build -t <image name> -f CNTK-GPU-Image/Dockerfile .
 
 # Ubuntu 16.04.5
-FROM nvidia/cuda@sha256:33add9c50ab76b8f3a92187c0418ed600d5bea27690fda40711122fdc28ce2f4
+FROM nvidia/cuda@sha256:362e4e25aa46a18dfa834360140e91b61cdb0a3a2796c8e09dadb268b9de3f6b
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         autotools-dev \
@@ -85,7 +83,7 @@ RUN LIBZIP_VERSION=1.1.2 && \
 
 ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH
 
-RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.7.4.tar.gz | tar -C /usr/local -xzf -
+RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.8.0.tar.gz | tar -C /usr/local -xzf -
 
 RUN OPENCV_VERSION=3.1.0 && \
     wget -q -O - https://github.com/Itseez/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && \

diff --git a/bindings/java/Swig/post-build.cmd b/bindings/java/Swig/post-build.cmd
@@ -25,7 +25,7 @@ echo Building java.
 if not exist "%project_dir%com\microsoft\CNTK\lib\windows" mkdir "%project_dir%com\microsoft\CNTK\lib\windows"
 
 if "%is_gpu%" == "true" (
-  for %%x in (cublas64_90.dll cudart64_90.dll cudnn64_7.dll curand64_90.dll cusparse64_90.dll nvml.dll) do (
+  for %%x in (cublas64_100.dll cudart64_100.dll cudnn64_7.dll curand64_100.dll cusparse64_100.dll nvml.dll) do (
     copy "%output_dir%/%%x" ".\com\microsoft\CNTK\lib\windows\%%x" 
     echo %%x>> .\com\microsoft\CNTK\lib\windows\NATIVE_MANIFEST
   )

diff --git a/configure b/configure
@@ -127,13 +127,13 @@ default_openblas=""
 
 default_boost="boost-1.60.0"
 
-default_cudas="cuda-9.0"
+default_cudas="cuda-10.0"
 default_nccls="nccl"
 default_kaldis="kaldi-trunk kaldi-c024e8aa"
 default_gdk_includes="include/nvidia/gdk cuda/include"
 default_gdk_nvml_libs="src/gdk/nvml/lib cuda/lib64/stubs"
-default_cubs="cub-1.7.4"
-default_cudnns="cudnn-7.0"
+default_cubs="cub-1.8.0"
+default_cudnns="cudnn-7.3"
 default_opencvs="opencv-3.1.0 opencv-3.0.0"
 default_protobuf="protobuf-3.1.0"
 default_libzips="libzip-1.1.2"
@@ -688,7 +688,7 @@ do
                 then
                     echo "Cannot find NVIDIA CUB directory."
                     echo "Please specify a value for --with-cub"
-                    echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local"
+                    echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local"
                     exit 1
                 fi
             else
@@ -1082,7 +1082,7 @@ then
     if test x$cub_path = x ; then
         echo Cannot locate NVIDIA CUB directory
         echo GPU will be disabled
-        echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local
+        echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local
         enable_cuda=no
     else
         echo Found CUB at $cub_path