diff --git a/dpbench/benchmarks/CMakeLists.txt b/dpbench/benchmarks/CMakeLists.txt index 6869220b..8032000b 100644 --- a/dpbench/benchmarks/CMakeLists.txt +++ b/dpbench/benchmarks/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(rambo) add_subdirectory(kmeans) add_subdirectory(knn) add_subdirectory(gpairs) +add_subdirectory(deformable_convolution) add_subdirectory(dbscan) # generate dpcpp version into config diff --git a/dpbench/benchmarks/deformable_convolution/CMakeLists.txt b/dpbench/benchmarks/deformable_convolution/CMakeLists.txt new file mode 100644 index 00000000..820b345a --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/CMakeLists.txt @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +add_subdirectory(deformable_convolution_sycl_native_ext) diff --git a/dpbench/benchmarks/deformable_convolution/__init__.py b/dpbench/benchmarks/deformable_convolution/__init__.py new file mode 100644 index 00000000..a94a9e6b --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/__init__.py @@ -0,0 +1,6 @@ +# Copyright 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache 2.0 + +"""Deformable convolution +""" diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_initialize.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_initialize.py new file mode 100644 index 00000000..9903e9d8 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_initialize.py @@ -0,0 +1,53 @@ +# Copyright 2022 Intel Corp. +# +# SPDX-License-Identifier: Apache-2.0 + + +def initialize( + batch, + in_chw, + out_chw, + kernel_hw, + stride_hw, + dilation_hw, + pad_hw, + groups, + deformable_groups, + seed, + types_dict, +): + import numpy as np + import numpy.random as default_rng + + dtype: np.dtype = types_dict["float"] + + default_rng.seed(seed) + + input_size = [batch] + in_chw # nchw + output_size = [batch] + out_chw # nchw + offset_size = kernel_hw + [2, out_chw[1], out_chw[2]] # kh, kw, 2, oh, ow + weights_size = [out_chw[0], in_chw[0]] + kernel_hw # oc, ic, kh, kw + bias_size = out_chw[0] # oc + tmp_size = [ + in_chw[0], + kernel_hw[0], + kernel_hw[1], + out_chw[1], + out_chw[2], + ] # ic, kh, kw, oh, ow + + input = default_rng.random(input_size).astype(dtype) + output = np.empty(output_size, dtype=dtype) + offset = 2 * default_rng.random(offset_size).astype("float32") - 1 + weights = default_rng.random(weights_size).astype(dtype) + bias = default_rng.random(bias_size).astype(dtype) + tmp = np.empty(tmp_size, dtype=dtype) + + return ( + input, + output, + offset, + weights, + bias, + tmp, + ) diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_numba_mlir_p.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_numba_mlir_p.py new file mode 100644 index 00000000..77cbc5a9 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_numba_mlir_p.py @@ -0,0 +1,166 @@ +# Copyright 2022 Intel Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +import math + +import numpy as np +from numba import prange +from numba_mlir import njit + + +@njit(parallel=True, inline="always", fastmath=True, gpu_fp64_truncate="auto") +def bilinear(input, offset_y, offset_x): + height, width = input.shape + start_x = int(math.floor(offset_x)) + start_x_weight = 1 - (offset_x - start_x) + start_y = int(math.floor(offset_y)) + start_y_weight = 1 - (offset_y - start_y) + + output = 0 + if ( + offset_x >= width + or offset_y >= height + or offset_x <= -1 + or offset_y <= -1 + ): + return output + + if start_x >= 0 and start_y >= 0: + w = start_x_weight * start_y_weight + output += w * input[start_y, start_x] + + if start_x + 1 < width and start_y >= 0: + w = (1 - start_x_weight) * start_y_weight + output += w * input[start_y, start_x + 1] + + if start_x >= 0 and start_y + 1 < height: + w = start_x_weight * (1 - start_y_weight) + output += w * input[start_y + 1, start_x] + + if start_x + 1 < width and start_y + 1 < height: + w = (1 - start_x_weight) * (1 - start_y_weight) + output += w * input[start_y + 1, start_x + 1] + + return output / 2 + + +@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto") +def deform( + input, offset, output, stride, pad, dilation, groups, deformable_groups +): + k_height, k_width, _, out_height, out_width = offset.shape + channels, _, _ = input.shape + + k_h_m = (k_height - 1) // 2 + k_w_m = (k_width - 1) // 2 + for ckhkw in prange(channels * k_height * k_width): + for h in prange(out_height): + for w in prange(out_width): + c = ckhkw // (k_height * k_width) + khkw = ckhkw % (k_height * k_width) + kh = khkw // k_width + kw = khkw % k_width + + offset_y = ( + offset[kh, kw, 1, h, w] + + h * stride[0] + + (kh - k_h_m) * dilation[0] + - (pad[0] - k_h_m) + ) + offset_x = ( + offset[kh, kw, 0, h, w] + + w * stride[1] + + (kw - k_w_m) * dilation[1] + - (pad[1] - k_w_m) + ) + + output[c, kh, kw, h, w] = bilinear(input[c], offset_y, offset_x) + + +@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto") +def deformable_convolution_b1( + input, + output, + offset, + weights, + bias, + tmp, + stride, + pad, + dilation, + groups, + deformable_groups, +): + out_channels, height, width = output.shape + _, in_channels, k_height, k_width = weights.shape + + deform(input, offset, tmp, stride, pad, dilation, groups, deformable_groups) + + tmp = tmp.reshape((in_channels * k_height * k_width, height * width)) + + _weights = weights.reshape((out_channels, in_channels * k_height * k_width)) + _output = output.reshape((out_channels, height * width)) + np.dot(_weights, tmp, _output) + + _bias = bias.reshape((out_channels, 1)) + _output[:] = _output + _bias + + +@njit(parallel=True, gpu_fp64_truncate="auto") +def jdeformable_convolution( + input, + output, + offset, + weights, + bias, + tmp, + stride, + pad, + dilation, + groups, + deformable_groups, +): + batch, _, _, _ = input.shape + for b in range(batch): + deformable_convolution_b1( + input[b], + output[b], + offset, + weights, + bias, + tmp, + stride, + pad, + dilation, + groups, + deformable_groups, + ) + + +def deformable_convolution( + input, + output, + offset, + weights, + bias, + tmp, + stride_hw, + pad_hw, + dilation_hw, + groups, + deformable_groups, +): + jdeformable_convolution( + input, + output, + offset, + weights, + bias, + tmp, + tuple(stride_hw), + tuple(pad_hw), + tuple(dilation_hw), + groups, + deformable_groups, + ) diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/CMakeLists.txt b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/CMakeLists.txt new file mode 100644 index 00000000..e5911eb6 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/CMakeLists.txt @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.23) + +set(py_module_name _deformable_convolution_sycl) +pybind11_add_module(${py_module_name} + MODULE + deformable_convolution_sycl/impl.cpp +) + +find_package(TBB CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH) +find_package(IntelSYCL CONFIG REQUIRED) + +if (DEFINED ENV{CONDA_PREFIX}) + set(MKL_ROOT $ENV{CONDA_PREFIX}) +endif() +find_package(MKL CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH) + +target_compile_options(${py_module_name} PUBLIC $) +target_include_directories(${py_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS} $) +target_link_libraries(${py_module_name} PUBLIC $ ${MKL_SYCL}) + +file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +install(TARGETS ${py_module_name} + DESTINATION ${py_module_dest}/deformable_convolution_sycl +) diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/__init__.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/__init__.py new file mode 100644 index 00000000..01a4191d --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/__init__.py @@ -0,0 +1,9 @@ +# Copyright 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache 2.0 + +from .deformable_convolution_sycl._deformable_convolution_sycl import ( + deformable_convolution as deformable_convolution_sycl, +) + +__all__ = ["deformable_convolution_sycl"] diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/MKLConfig.cmake b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/MKLConfig.cmake new file mode 100644 index 00000000..e83661c4 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/MKLConfig.cmake @@ -0,0 +1,852 @@ +#=============================================================================== +# Copyright 2021-2022 Intel Corporation. +# +# This software and the related documents are Intel copyrighted materials, and +# your use of them is governed by the express license under which they were +# provided to you (License). Unless the License provides otherwise, you may not +# use, modify, copy, publish, distribute, disclose or transmit this software or +# the related documents without Intel's prior written permission. +# +# This software and the related documents are provided as is, with no express +# or implied warranties, other than those that are expressly stated in the +# License. +#=============================================================================== + +#=================================================================== +# CMake Config file for Intel(R) oneAPI Math Kernel Library (oneMKL) +#=================================================================== + +#=============================================================================== +# Input parameters +#================= +#------------- +# Main options +#------------- +# MKL_ROOT: oneMKL root directory (May be required for non-standard install locations. Optional otherwise.) +# Default: use location from MKLROOT environment variable or /../../../ if MKLROOT is not defined +# MKL_ARCH +# Values: ia32 intel64 +# Default: intel64 +# MKL_LINK +# Values: static, dynamic, sdl +# Default: dynamic +# Exceptions:- DPC++ doesn't support sdl +# MKL_THREADING +# Values: sequential, +# intel_thread (Intel OpenMP), +# gnu_thread (GNU OpenMP), +# pgi_thread (PGI OpenMP), +# tbb_thread +# Default: intel_thread +# Exceptions:- DPC++ defaults to tbb, PGI compiler on Windows defaults to pgi_thread +# MKL_INTERFACE (for MKL_ARCH=intel64 only) +# Values: lp64, ilp64 +# GNU or INTEL interface will be selected based on Compiler. +# Default: ilp64 +# MKL_MPI +# Values: intelmpi, mpich, openmpi, msmpi, mshpc +# Default: intelmpi +#----------------------------------- +# Special options (OFF by default) +#----------------------------------- +# ENABLE_BLAS95: Enables BLAS Fortran95 API +# ENABLE_LAPACK95: Enables LAPACK Fortran95 API +# ENABLE_BLACS: Enables cluster BLAS library +# ENABLE_CDFT: Enables cluster DFT library +# ENABLE_CPARDISO: Enables cluster PARDISO functionality +# ENABLE_SCALAPACK: Enables cluster LAPACK library +# ENABLE_OMP_OFFLOAD: Enables OpenMP Offload functionality +# +#================== +# Output parameters +#================== +# MKL_ROOT +# oneMKL root directory. +# MKL_INCLUDE +# Use of target_include_directories() is recommended. +# INTERFACE_INCLUDE_DIRECTORIES property is set on mkl_core and mkl_rt libraries. +# Alternatively, this variable can be used directly (not recommended as per Modern CMake) +# MKL_ENV +# Provides all environment variables based on input parameters. +# Currently useful for mkl_rt linking and BLACS on Windows. +# Must be set as an ENVIRONMENT property. +# Example: +# add_test(NAME mytest COMMAND myexe) +# if(MKL_ENV) +# set_tests_properties(mytest PROPERTIES ENVIRONMENT "${MKL_ENV}") +# endif() +# +# MKL:: +# IMPORTED targets to link MKL libraries individually or when using a custom link-line. +# mkl_core and mkl_rt have INTERFACE_* properties set to them. +# Please refer to Intel(R) oneMKL Link Line Advisor for help with linking. +# +# Below INTERFACE targets provide full link-lines for direct use. +# Example: +# target_link_options( PUBLIC $) +# +# MKL::MKL +# Link line for C and Fortran API +# MKL::MKL_DPCPP +# Link line for DPC++ API +# +# Note: For Device API, library linking is not required. +# Compile options can be added from the INTERFACE_COMPILE_OPTIONS property on MKL::MKL_DPCPP +# Include directories can be added from the INTERFACE_INCLUDE_DIRECTORIES property on MKL::MKL_DPCPP +# +# Note: Output parameters' and targets' availability can change +# based on Input parameters and application project languages. +#=============================================================================== + +function(mkl_message MSG_MODE MSG_TEXT) + if(MSG_MODE STREQUAL "FATAL_ERROR") + message(${MSG_MODE} ${MSG_TEXT}) + else() + if(NOT MKL_FIND_QUIETLY) + message(${MSG_MODE} ${MSG_TEXT}) + endif() + endif() +endfunction() + +if(${CMAKE_VERSION} VERSION_LESS "3.13") + mkl_message(FATAL_ERROR "The minimum supported CMake version is 3.13. You are running version ${CMAKE_VERSION}") +endif() + +include_guard() +include(FindPackageHandleStandardArgs) + +if(NOT MKL_LIBRARIES) + +# Set CMake policies for well-defined behavior across CMake versions +cmake_policy(SET CMP0011 NEW) +cmake_policy(SET CMP0057 NEW) + +# Project Languages +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +list(APPEND MKL_LANGS C CXX Fortran) +foreach(lang ${languages}) + if(${lang} IN_LIST MKL_LANGS) + list(APPEND CURR_LANGS ${lang}) + endif() +endforeach() +list(REMOVE_DUPLICATES CURR_LANGS) + +option(ENABLE_BLAS95 "Enables BLAS Fortran95 API" OFF) +option(ENABLE_LAPACK95 "Enables LAPACK Fortran95 API" OFF) +option(ENABLE_BLACS "Enables cluster BLAS library" OFF) +option(ENABLE_CDFT "Enables cluster DFT library" OFF) +option(ENABLE_CPARDISO "Enables cluster PARDISO functionality" OFF) +option(ENABLE_SCALAPACK "Enables cluster LAPACK library" OFF) +option(ENABLE_OMP_OFFLOAD "Enables OpenMP Offload functionality" OFF) + +# Use MPI if any of these are enabled +if(ENABLE_BLACS OR ENABLE_CDFT OR ENABLE_SCALAPACK OR ENABLE_CPARDISO) + set(USE_MPI ON) +endif() + +# Check Parameters +function(define_param TARGET_PARAM DEFAULT_PARAM SUPPORTED_LIST) + if(NOT DEFINED ${TARGET_PARAM} AND NOT DEFINED ${DEFAULT_PARAM}) + mkl_message(STATUS "${TARGET_PARAM}: Undefined") + elseif(NOT DEFINED ${TARGET_PARAM} AND DEFINED ${DEFAULT_PARAM}) + set(${TARGET_PARAM} "${${DEFAULT_PARAM}}" CACHE STRING "Choose ${TARGET_PARAM} options are: ${${SUPPORTED_LIST}}") + foreach(opt ${${DEFAULT_PARAM}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(STATUS "${TARGET_PARAM}: None, set to `${STR_LIST}` by default") + elseif(${SUPPORTED_LIST}) + set(ITEM_FOUND 1) + foreach(opt ${${TARGET_PARAM}}) + if(NOT ${opt} IN_LIST ${SUPPORTED_LIST}) + set(ITEM_FOUND 0) + endif() + endforeach() + if(ITEM_FOUND EQUAL 0) + foreach(opt ${${SUPPORTED_LIST}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(FATAL_ERROR "Invalid ${TARGET_PARAM} `${${TARGET_PARAM}}`, options are: ${STR_LIST}") + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() +endfunction() + +#================ +# Compiler checks +#================ + +if(CMAKE_C_COMPILER) + get_filename_component(C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME) +endif() +if(CMAKE_CXX_COMPILER) + get_filename_component(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME) +endif() +if(CMAKE_Fortran_COMPILER) + get_filename_component(Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME) +endif() + +# Determine Compiler Family +if(CXX_COMPILER_NAME STREQUAL "dpcpp" OR CXX_COMPILER_NAME STREQUAL "dpcpp.exe" + OR CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + set(DPCPP_COMPILER ON) +endif() +if(C_COMPILER_NAME MATCHES "^clang") + set(CLANG_COMPILER ON) +endif() +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + set(PGI_COMPILER ON) +elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" + OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM") + set(INTEL_COMPILER ON) +else() + if(CMAKE_C_COMPILER_ID STREQUAL "GNU") + set(GNU_C_COMPILER ON) + endif() + if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + set(GNU_Fortran_COMPILER ON) + endif() +endif() + +if(USE_MPI AND (C_COMPILER_NAME MATCHES "^mpi" OR Fortran_COMPILER_NAME MATCHES "^mpi")) + set(USE_MPI_SCRIPT ON) +endif() + +#================ + +#================ +# System-specific +#================ + +# Extensions +if(UNIX) + set(LIB_PREFIX "lib") + set(LIB_EXT ".a") + set(DLL_EXT ".so") + if(APPLE) + set(DLL_EXT ".dylib") + endif() + set(LINK_PREFIX "-l") + set(LINK_SUFFIX "") +else() + set(LIB_PREFIX "") + set(LIB_EXT ".lib") + set(DLL_EXT "_dll.lib") + set(LINK_PREFIX "") + set(LINK_SUFFIX ".lib") +endif() + +# Set target system architecture +set(DEFAULT_MKL_ARCH intel64) +if(DPCPP_COMPILER OR PGI_COMPILER OR ENABLE_OMP_OFFLOAD OR USE_MPI) + set(MKL_ARCH_LIST intel64) +else() + set(MKL_ARCH_LIST ia32 intel64) +endif() +define_param(MKL_ARCH DEFAULT_MKL_ARCH MKL_ARCH_LIST) + +#================ + +#========== +# Setup MKL +#========== + +# Set MKL_ROOT directory +if(NOT DEFINED MKL_ROOT) + if(DEFINED ENV{MKLROOT}) + set(MKL_ROOT $ENV{MKLROOT}) + else() + get_filename_component(MKL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) + get_filename_component(MKL_ROOT "${MKL_CMAKE_PATH}/../../../" ABSOLUTE) + mkl_message(STATUS "MKL_ROOT ${MKL_ROOT}") + endif() +endif() +string(REPLACE "\\" "/" MKL_ROOT ${MKL_ROOT}) + +# Define MKL_LINK +set(DEFAULT_MKL_LINK dynamic) +if(DPCPP_COMPILER OR USE_MPI) + set(MKL_LINK_LIST static dynamic) +else() + set(MKL_LINK_LIST static dynamic sdl) +endif() +define_param(MKL_LINK DEFAULT_MKL_LINK MKL_LINK_LIST) + +# Define MKL_INTERFACE +if(MKL_ARCH STREQUAL "intel64") + set(IFACE_TYPE intel) + if(GNU_Fortran_COMPILER) + set(IFACE_TYPE gf) + endif() + if(DPCPP_COMPILER) + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL intel_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE intel_ilp64) + set(MKL_INTERFACE_LIST intel_ilp64) + else() + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL ${IFACE_TYPE}_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE ${IFACE_TYPE}_ilp64) + set(MKL_INTERFACE_LIST ${IFACE_TYPE}_ilp64 ${IFACE_TYPE}_lp64) + endif() + define_param(MKL_INTERFACE_FULL DEFAULT_MKL_INTERFACE MKL_INTERFACE_LIST) +else() + if(WIN32) + set(MKL_INTERFACE_FULL intel_c) + elseif(NOT APPLE) + if(GNU_Fortran_COMPILER) + set(MKL_INTERFACE_FULL gf) + else() + set(MKL_INTERFACE_FULL intel) + endif() + else() + mkl_message(FATAL_ERROR "OSX does not support MKL_ARCH ia32.") + endif() +endif() +if(MKL_INTERFACE_FULL MATCHES "ilp64") + set(MKL_INTERFACE "ilp64") +else() + set(MKL_INTERFACE "lp64") +endif() + +# Define MKL headers +find_path(MKL_H mkl.h + HINTS ${MKL_ROOT} + PATH_SUFFIXES include) +list(APPEND MKL_INCLUDE ${MKL_H}) + +# Add pre-built F95 Interface Modules +if(INTEL_COMPILER AND (ENABLE_BLAS95 OR ENABLE_LAPACK95)) + if(MKL_ARCH STREQUAL "intel64") + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}/${MKL_INTERFACE}") + else() + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}") + endif() +endif() + +# Define MKL_THREADING +# All APIs support sequential threading +set(MKL_THREADING_LIST "sequential" "intel_thread" "tbb_thread") +set(DEFAULT_MKL_THREADING intel_thread) +# DPC++ API supports TBB threading, but not OpenMP threading +if(DPCPP_COMPILER) + set(DEFAULT_MKL_THREADING tbb_thread) + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) +# C, Fortran API +elseif(PGI_COMPILER) + # PGI compiler supports PGI OpenMP threading, additionally + list(APPEND MKL_THREADING_LIST pgi_thread) + # PGI compiler does not support TBB threading + list(REMOVE_ITEM MKL_THREADING_LIST tbb_thread) + if(WIN32) + # PGI 19.10 and 20.1 on Windows, do not support Intel OpenMP threading + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) + set(DEFAULT_MKL_THREADING pgi_thread) + endif() +elseif(GNU_C_COMPILER OR GNU_Fortran_COMPILER OR CLANG_COMPILER) + list(APPEND MKL_THREADING_LIST gnu_thread) +else() + # Intel and Microsoft compilers + # Nothing to do, only for completeness +endif() +define_param(MKL_THREADING DEFAULT_MKL_THREADING MKL_THREADING_LIST) + +# Define MKL_MPI +set(DEFAULT_MKL_MPI intelmpi) +if(UNIX) + if(APPLE) + # Override defaults for OSX + set(DEFAULT_MKL_MPI mpich) + set(MKL_MPI_LIST mpich) + else() + set(MKL_MPI_LIST intelmpi openmpi mpich mpich2) + endif() +else() + # Windows + set(MKL_MPI_LIST intelmpi mshpc msmpi) +endif() +define_param(MKL_MPI DEFAULT_MKL_MPI MKL_MPI_LIST) +# MSMPI is now called MSHPC. MSMPI option exists for backward compatibility. +if(MKL_MPI STREQUAL "mshpc") + set(MKL_MPI msmpi) +endif() +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_MPI) + +# Checkpoint - Verify if required options are defined +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_ROOT MKL_ARCH MKL_INCLUDE MKL_LINK MKL_THREADING MKL_INTERFACE_FULL) + +# Provides a list of IMPORTED targets for the project +if(NOT DEFINED MKL_IMPORTED_TARGETS) + set(MKL_IMPORTED_TARGETS "") +endif() + +# Clear temporary variables +set(MKL_C_COPT "") +set(MKL_F_COPT "") +set(MKL_SDL_COPT "") +set(MKL_CXX_COPT "") +set(MKL_DPCPP_COPT "") +set(MKL_DPCPP_LOPT "") +set(MKL_OFFLOAD_COPT "") +set(MKL_OFFLOAD_LOPT "") + +set(MKL_SUPP_LINK "") # Other link options. Usually at the end of the link-line. +set(MKL_LINK_LINE) # For MPI only +set(MKL_ENV_PATH "") # Temporary variable to work with PATH +set(MKL_ENV "") # Exported environment variables + +# Modify PATH variable to make it CMake-friendly +set(OLD_PATH $ENV{PATH}) +string(REPLACE ";" "\;" OLD_PATH "${OLD_PATH}") + +# Compiler options +if(GNU_C_COMPILER OR GNU_Fortran_COMPILER) + if(MKL_ARCH STREQUAL "ia32") + list(APPEND MKL_C_COPT -m32) + list(APPEND MKL_F_COPT -m32) + else() + list(APPEND MKL_C_COPT -m64) + list(APPEND MKL_F_COPT -m64) + endif() +endif() + +# Additonal compiler & linker options +if(CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + list(APPEND MKL_DPCPP_COPT "-fsycl") + list(APPEND MKL_DPCPP_LOPT "-fsycl") +endif() +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(MKL_LINK STREQUAL "static") + list(APPEND MKL_DPCPP_LOPT "-fsycl-device-code-split=per_kernel") + list(APPEND MKL_OFFLOAD_LOPT "-fsycl-device-code-split=per_kernel") + endif() +endif() + +# For OpenMP Offload +if(ENABLE_OMP_OFFLOAD) + if(WIN32) + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -Qopenmp-version:51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64) + endif() + # -MD and -MDd are manually added here because offload functionality uses DPC++ runtime. + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_OFFLOAD_COPT -MDd) + else() + list(APPEND MKL_OFFLOAD_COPT -MD) + endif() + list(APPEND MKL_OFFLOAD_LOPT -Qiopenmp -Qopenmp-targets:spir64 -fsycl) + set(SKIP_LIBPATH ON) + else() + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -fopenmp-version=51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else () + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64) + endif() + list(APPEND MKL_OFFLOAD_LOPT -fiopenmp -fopenmp-targets=spir64 -fsycl) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() +endif() + +# For selected Interface +if(MKL_INTERFACE_FULL) + if(MKL_ARCH STREQUAL "ia32") + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU") + endif() + else() + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU,${MKL_INTERFACE}") + else() + set(MKL_SDL_IFACE_ENV "${MKL_INTERFACE}") + endif() + if(MKL_INTERFACE STREQUAL "ilp64") + if("Fortran" IN_LIST CURR_LANGS) + if(INTEL_COMPILER) + if(WIN32) + list(APPEND MKL_F_COPT "-4I8") + else() + list(APPEND MKL_F_COPT "-i8") + endif() + elseif(GNU_Fortran_COMPILER) + list(APPEND MKL_F_COPT "-fdefault-integer-8") + elseif(PGI_COMPILER) + list(APPEND MKL_F_COPT "-i8") + endif() + endif() + list(INSERT MKL_C_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_SDL_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_CXX_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_OFFLOAD_COPT 0 "-DMKL_ILP64") + else() + # lp64 + endif() + endif() + if(MKL_SDL_IFACE_ENV) + string(TOUPPER ${MKL_SDL_IFACE_ENV} MKL_SDL_IFACE_ENV) + endif() +endif() # MKL_INTERFACE_FULL + +# All MKL Libraries +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + set(MKL_SYCL mkl_sycld) +else() + set(MKL_SYCL mkl_sycl) +endif() +set(MKL_IFACE_LIB mkl_${MKL_INTERFACE_FULL}) +set(MKL_CORE mkl_core) +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo" AND MKL_THREADING STREQUAL "tbb_thread") + set(MKL_THREAD mkl_tbb_threadd) +else() + set(MKL_THREAD mkl_${MKL_THREADING}) +endif() +set(MKL_SDL mkl_rt) +if(MKL_ARCH STREQUAL "ia32") + set(MKL_BLAS95 mkl_blas95) + set(MKL_LAPACK95 mkl_lapack95) +else() + set(MKL_BLAS95 mkl_blas95_${MKL_INTERFACE}) + set(MKL_LAPACK95 mkl_lapack95_${MKL_INTERFACE}) +endif() +# BLACS +set(MKL_BLACS mkl_blacs_${MKL_MPI}_${MKL_INTERFACE}) +if(UNIX AND NOT APPLE AND MKL_MPI MATCHES "mpich") + # MPICH is compatible with INTELMPI Wrappers on Linux + set(MKL_BLACS mkl_blacs_intelmpi_${MKL_INTERFACE}) +endif() +if(WIN32) + if(MKL_MPI STREQUAL "msmpi") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_SUPP_LINK "msmpifec.lib") + endif() + # MSMPI and MSHPC are supported with the same BLACS library + set(MKL_BLACS mkl_blacs_msmpi_${MKL_INTERFACE}) + if(NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV MSMPI) + endif() + elseif(MKL_MPI STREQUAL "intelmpi" AND NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV INTELMPI) + endif() +endif() +# CDFT & SCALAPACK +set(MKL_CDFT mkl_cdft_core) +set(MKL_SCALAPACK mkl_scalapack_${MKL_INTERFACE}) + + +if (UNIX) + if(NOT APPLE) + if(MKL_LINK STREQUAL "static") + set(START_GROUP "-Wl,--start-group") + set(END_GROUP "-Wl,--end-group") + if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + set(EXPORT_DYNAMIC "-Wl,-export-dynamic") + endif() + elseif(MKL_LINK STREQUAL "dynamic") + set(MKL_RPATH "-Wl,-rpath=$") + if((GNU_Fortran_COMPILER OR PGI_COMPILER) AND "Fortran" IN_LIST CURR_LANGS) + set(NO_AS_NEEDED -Wl,--no-as-needed) + endif() + else() + set(MKL_RPATH "-Wl,-rpath=$") + endif() + endif() +endif() + +# Create a list of requested libraries, based on input options (MKL_LIBRARIES) +# Create full link-line in MKL_LINK_LINE +list(APPEND MKL_LINK_LINE $,${MKL_OFFLOAD_LOPT},> + $,${MKL_DPCPP_LOPT},> ${EXPORT_DYNAMIC} ${NO_AS_NEEDED} ${MKL_RPATH}) +if(ENABLE_BLAS95) + list(APPEND MKL_LIBRARIES ${MKL_BLAS95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLAS95}) +endif() +if(ENABLE_LAPACK95) + list(APPEND MKL_LIBRARIES ${MKL_LAPACK95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_LAPACK95}) +endif() +if(ENABLE_SCALAPACK) + list(APPEND MKL_LIBRARIES ${MKL_SCALAPACK}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SCALAPACK}) +endif() +if(DPCPP_COMPILER OR (ENABLE_OMP_OFFLOAD AND NOT MKL_LINK STREQUAL "sdl")) + list(APPEND MKL_LIBRARIES ${MKL_SYCL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SYCL}) +endif() +list(APPEND MKL_LINK_LINE ${START_GROUP}) +if(ENABLE_CDFT) + list(APPEND MKL_LIBRARIES ${MKL_CDFT}) + list(APPEND MKL_LINK_LINE MKL::${MKL_CDFT}) +endif() +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_LIBRARIES ${MKL_SDL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SDL}) +else() + list(APPEND MKL_LIBRARIES ${MKL_IFACE_LIB} ${MKL_THREAD} ${MKL_CORE}) + list(APPEND MKL_LINK_LINE MKL::${MKL_IFACE_LIB} MKL::${MKL_THREAD} MKL::${MKL_CORE}) +endif() +if(USE_MPI) + list(APPEND MKL_LIBRARIES ${MKL_BLACS}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLACS}) +endif() +list(APPEND MKL_LINK_LINE ${END_GROUP}) + +# Find all requested libraries +foreach(lib ${MKL_LIBRARIES}) + unset(${lib}_file CACHE) + if(MKL_LINK STREQUAL "static" AND NOT ${lib} STREQUAL ${MKL_SDL}) + find_library(${lib}_file ${LIB_PREFIX}${lib}${LIB_EXT} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} STATIC IMPORTED) + else() + find_library(${lib}_file NAMES ${LIB_PREFIX}${lib}${DLL_EXT} ${lib} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} SHARED IMPORTED) + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS ${lib}_file) + # CMP0111, implemented in CMake 3.20+ requires a shared library target on Windows + # to be defined with IMPLIB and LOCATION property. + # It also requires a static library target to be defined with LOCATION property. + # Setting the policy to OLD usage, using cmake_policy() does not work as of 3.20.0, hence the if-else below. + if(WIN32 AND NOT MKL_LINK STREQUAL "static") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_IMPLIB "${${lib}_file}") + # Find corresponding DLL + set(MKL_DLL_GLOB ${lib}.*.dll) + file(GLOB MKL_DLL_FILE "${MKL_ROOT}/redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/mkl/${MKL_DLL_GLOB}" + "${MKL_ROOT}/bin/${MKL_DLL_GLOB}" + "${MKL_ROOT}/lib") + if(NOT ${lib} STREQUAL ${MKL_IFACE_LIB} AND NOT ${lib} STREQUAL ${MKL_BLAS95} AND NOT ${lib} STREQUAL ${MKL_LAPACK95}) # Windows IFACE libs are static only + list(LENGTH MKL_DLL_FILE MKL_DLL_FILE_LEN) + if(MKL_DLL_FILE_LEN) + # in case multiple versions of the same dll are found, select the highest version + list(SORT MKL_DLL_FILE) + list(REVERSE MKL_DLL_FILE) + list(GET MKL_DLL_FILE 0 MKL_DLL_FILE) + + mkl_message(STATUS "Found DLL: ${MKL_DLL_FILE}") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${MKL_DLL_FILE}") + else() + mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found. MKL_ROOT was '${MKL_ROOT}'. MKL_DLL_FILE is '${MKL_DLL_FILE}'") + endif() + endif() + else() + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${${lib}_file}") + endif() + list(APPEND MKL_IMPORTED_TARGETS MKL::${lib}) +endforeach() + +# Threading selection +if(MKL_THREADING) + if(MKL_THREADING STREQUAL "tbb_thread") + find_package(TBB REQUIRED CONFIG COMPONENTS tbb) + set(MKL_THREAD_LIB $) + set(MKL_SDL_THREAD_ENV "TBB") + get_property(TBB_LIB TARGET TBB::tbb PROPERTY IMPORTED_LOCATION_RELEASE) + get_filename_component(TBB_LIB_DIR ${TBB_LIB} DIRECTORY) + if(UNIX) + if(CMAKE_SKIP_BUILD_RPATH) + set(TBB_LINK "-L${TBB_LIB_DIR} -ltbb") + else() + set(TBB_LINK "-Wl,-rpath,${TBB_LIB_DIR} -L${TBB_LIB_DIR} -ltbb") + endif() + list(APPEND MKL_SUPP_LINK ${TBB_LINK}) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() + if(WIN32 OR APPLE) + set(MKL_ENV_PATH ${TBB_LIB_DIR}) + endif() + elseif(MKL_THREADING MATCHES "_thread") + if(MKL_THREADING STREQUAL "pgi_thread") + list(APPEND MKL_SUPP_LINK -mp -pgf90libs) + set(MKL_SDL_THREAD_ENV "PGI") + elseif(MKL_THREADING STREQUAL "gnu_thread") + list(APPEND MKL_SUPP_LINK -lgomp) + set(MKL_SDL_THREAD_ENV "GNU") + else() + # intel_thread + if(UNIX) + set(MKL_OMP_LIB iomp5) + set(LIB_EXT ".so") + if(APPLE) + set(LIB_EXT ".dylib") + endif() + else() + set(MKL_OMP_LIB libiomp5md) + endif() + set(MKL_SDL_THREAD_ENV "INTEL") + set(OMP_LIBNAME ${LIB_PREFIX}${MKL_OMP_LIB}${LIB_EXT}) + + find_library(OMP_LIBRARY ${OMP_LIBNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}" + "lib/${MKL_ARCH}_lin" "lib/${MKL_ARCH}_win" + "linux/compiler/lib/${MKL_ARCH}" + "linux/compiler/lib/${MKL_ARCH}_lin" + "windows/compiler/lib/${MKL_ARCH}" + "windows/compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}_lin" "../compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}" "../compiler/lib" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}_lin" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}_win" + "../../compiler/latest/mac/compiler/lib") + if(WIN32) + set(OMP_DLLNAME ${LIB_PREFIX}${MKL_OMP_LIB}.dll) + find_path(OMP_DLL_DIR ${OMP_DLLNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "redist/${MKL_ARCH}" + "redist/${MKL_ARCH}_win" "redist/${MKL_ARCH}_win/compiler" + "../redist/${MKL_ARCH}/compiler" "../compiler/lib" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win/compiler" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win/compiler") + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_DLL_DIR) + set(MKL_ENV_PATH "${OMP_DLL_DIR}") + endif() + + if(WIN32 AND SKIP_LIBPATH) + # Only for Intel OpenMP Offload + set(OMP_LINK "libiomp5md.lib") + else() + set(OMP_LINK "${OMP_LIBRARY}") + if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + # Disable PGI OpenMP runtime for correct work of Intel OpenMP runtime + list(APPEND MKL_SUPP_LINK -nomp) + endif() + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_LIBRARY OMP_LINK) + set(MKL_THREAD_LIB ${OMP_LINK}) + endif() + else() + # Sequential threading + set(MKL_SDL_THREAD_ENV "SEQUENTIAL") + endif() +endif() # MKL_THREADING + +if (UNIX) + list(APPEND MKL_SUPP_LINK -lm -ldl -lpthread) +endif() + +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(WIN32) + # Detect sycl library version + if(NOT DEFINED SYCL_LIB_VER_CACHE) + set(SYCL_LIB_VER "") + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(NOT SYCL_LIB_DIR) + foreach(ver RANGE 6 99) + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${ver}${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(SYCL_LIB_DIR) + set(SYCL_LIB_VER ${ver}) + break() + endif() + endforeach() + endif() + set(SYCL_LIB_VER_CACHE ${SYCL_LIB_VER} CACHE STRING "") + endif() + + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}d${LINK_SUFFIX}) + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}${LINK_SUFFIX}) + endif() + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${LINK_SUFFIX}) + endif() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}OpenCL${LINK_SUFFIX}) +endif() + +# Setup link types based on input options +set(LINK_TYPES "") + +if(DPCPP_COMPILER) + add_library(MKL::MKL_DPCPP INTERFACE IMPORTED GLOBAL) + target_compile_options(MKL::MKL_DPCPP INTERFACE ${MKL_DPCPP_COPT}) + target_link_libraries(MKL::MKL_DPCPP INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) + list(APPEND LINK_TYPES MKL::MKL_DPCPP) +endif() +# Single target for all C, Fortran link-lines +add_library(MKL::MKL INTERFACE IMPORTED GLOBAL) +target_compile_options(MKL::MKL INTERFACE + $<$,C>:${MKL_C_COPT}> + $<$,Fortran>:${MKL_F_COPT}> + $<$,CXX>:${MKL_CXX_COPT}> + $,${MKL_OFFLOAD_COPT},>) +target_link_libraries(MKL::MKL INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) +list(APPEND LINK_TYPES MKL::MKL) + +foreach(link ${LINK_TYPES}) + # Set properties on all INTERFACE targets + target_include_directories(${link} BEFORE INTERFACE "${MKL_INCLUDE}") + list(APPEND MKL_IMPORTED_TARGETS ${link}) +endforeach(link) # LINK_TYPES + +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_ENV "MKL_INTERFACE_LAYER=${MKL_SDL_IFACE_ENV}" "MKL_THREADING_LAYER=${MKL_SDL_THREAD_ENV}") +endif() +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + list(APPEND MKL_ENV "MKL_BLACS_MPI=${MKL_BLACS_ENV}") +endif() + +# Add MKL dynamic libraries if RPATH is not defined on Unix +if(UNIX AND CMAKE_SKIP_BUILD_RPATH) + if(MKL_LINK STREQUAL "sdl") + set(MKL_LIB_DIR $) + else() + set(MKL_LIB_DIR $) + endif() + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{DYLD_LIBRARY_PATH}") + else() + list(APPEND MKL_ENV "LD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{LD_LIBRARY_PATH}") + endif() +endif() + +# Add MKL dynamic libraries to PATH on Windows +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + get_filename_component(MKL_DLL_DIR ${MKL_DLL_FILE} DIRECTORY) + set(MKL_ENV_PATH "${MKL_DLL_DIR}\;${MKL_ENV_PATH}") +endif() + +if(MKL_ENV_PATH) + list(APPEND MKL_ENV "PATH=${MKL_ENV_PATH}\;${OLD_PATH}") + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_ENV_PATH}\:${OLD_PATH}") + endif() +endif() + +unset(MKL_DLL_FILE) + +endif() # MKL_LIBRARIES diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/TBBConfig.cmake b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/TBBConfig.cmake new file mode 100644 index 00000000..5363c1d3 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/cmake/TBBConfig.cmake @@ -0,0 +1,193 @@ +# Copyright (c) 2017-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It defines the following variables: +# TBB__FOUND +# TBB_IMPORTED_TARGETS +# +# TBBConfigVersion.cmake defines TBB_VERSION +# +# Initialize to default values +if (NOT TBB_IMPORTED_TARGETS) + set(TBB_IMPORTED_TARGETS "") +endif() + +if (NOT TBB_FIND_COMPONENTS) + set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy") + foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_FIND_REQUIRED_${_tbb_component} 1) + endforeach() +endif() + +get_filename_component(_tbb_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_tbb_root "${_tbb_root}/../../.." ABSOLUTE) + +set(TBB_INTERFACE_VERSION ) + +set(_tbb_bin_version 12) +set(_tbbmalloc_bin_version 2) +set(_tbbmalloc_proxy_bin_version 2) +set(_tbbbind_bin_version 3) + +# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc +list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix) +if (NOT _tbbmalloc_proxy_ix EQUAL -1) + list(APPEND TBB_FIND_COMPONENTS tbbmalloc) + list(REMOVE_DUPLICATES TBB_FIND_COMPONENTS) + set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy}) +endif() +unset(_tbbmalloc_proxy_ix) + +if (CMAKE_SIZEOF_VOID_P STREQUAL "8") + set(_tbb_subdir intel64/gcc4.8) +else () + set(_tbb_subdir ia32/gcc4.8) +endif() + +if (UNIX) + set(_tbb_lib_ext ".so") + set(_tbb_lib_prefix "lib") + set(_tbb_lib_dir_conda "lib") + set(_bin_version "") +elseif (WIN32) + set(_bin_version "") + set(_tbb_lib_prefix "") + set(_tbb_lib_ext ".dll") + set(_tbb_impllib_ext ".lib") + set(_tbb_lib_dir_conda "bin") + set(_tbb_impllib_dir_conda "lib") +else() + message(FATAL_ERROR "Unsupported platform. Only Unix and Windows are supported.") +endif() + +foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_${_tbb_component}_FOUND 0) + +if(WIN32) + unset(_bin_version) + if (_tbb_component STREQUAL tbb) + set(_bin_version ${_tbb_bin_version}) + endif() +endif() + + if(UNIX) + find_library(_tbb_release_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + + else() + find_file(_tbb_release_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + + if (EXISTS "${_tbb_release_lib}") + find_library(_tbb_release_impllib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_impllib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}") + endif() + endif() + + if (NOT TBB_FIND_RELEASE_ONLY) + find_library(_tbb_debug_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + if(WIN32 AND EXISTS "${_tbb_debug_lib}") + find_library(_tbb_debug_impllib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_impllib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}") + endif() + endif() + + if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}") + if (NOT TARGET TBB::${_tbb_component}) + add_library(TBB::${_tbb_component} SHARED IMPORTED) + + find_path(_tbb_include_dir + oneapi/tbb.h + PATHS ${_tbb_root} + PATH_SUFFIXES include + HINTS ENV TBB_ROOT_HINT + ) + +if(WIN32) + set_target_properties( + TBB::${_tbb_component} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}" + INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1" + ) +else() + set_target_properties( + TBB::${_tbb_component} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}" + ) +endif() + unset(_tbb_current_realpath) + unset(_tbb_include_dir) + + if (EXISTS "${_tbb_release_lib}") +if(WIN32) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}" + IMPORTED_IMPLIB_RELEASE "${_tbb_release_impllib}") +else() + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}") +endif() + set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + endif() + + if (EXISTS "${_tbb_debug_lib}") +if(WIN32) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}" + IMPORTED_IMPLIB_DEBUG "${_tbb_debug_impllib}" + ) +else() + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}") +endif() + set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + endif() + + # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc + if (_tbb_component STREQUAL tbbmalloc_proxy) + set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) + endif() + endif() + list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) + set(TBB_${_tbb_component}_FOUND 1) + elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) + message(STATUS "Missed required oneTBB component: ${_tbb_component}") + if (TBB_FIND_RELEASE_ONLY) + message(STATUS " ${_tbb_release_lib} must exist.") + else() + message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + endif() + set(TBB_FOUND FALSE) + endif() +endforeach() +list(REMOVE_DUPLICATES TBB_IMPORTED_TARGETS) +unset(_tbb_release_lib) +unset(_tbb_debug_lib) +unset(_tbb_root) diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/impl.cpp b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/impl.cpp new file mode 100644 index 00000000..3bc49f39 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/impl.cpp @@ -0,0 +1,349 @@ +//==- impl.cpp - Python native extension of deformable convolution ===// +// +// Copyright 2022 Intel Corp. +// +// SPDX - License - Identifier : Apache 2.0 +/// +/// \file +/// The files implements a SYCL-based Python native extension for the +/// deformable convolution benchmark. + +#include "CL/sycl.hpp" +#include "cmath" +#include "dpctl4pybind11.hpp" +#include "oneapi/mkl.hpp" +#include "utils.hpp" + +using namespace sycl; +namespace py = pybind11; + +template +__attribute__((always_inline)) DataType bilinear(const DataType *input, + int height, + int width, + float offset_y, + float offset_x) +{ + auto start_x = int(std::floor(offset_x)); + auto start_x_weight = 1 - (offset_x - start_x); + auto start_y = int(std::floor(offset_y)); + auto start_y_weight = 1 - (offset_y - start_y); + + DataType result = 0; + if (offset_x >= width || offset_y >= height || offset_x <= -1 || + offset_y <= -1) + return result; + + if (start_x >= 0 && start_y >= 0) { + auto w0 = start_x_weight * start_y_weight; + auto v0 = *get_ptr_2d(input, height, width, start_y, start_x); + + result += w0 * v0; + } + + if (start_x + 1 < width && start_y >= 0) { + auto w1 = (1 - start_x_weight) * start_y_weight; + auto v1 = *get_ptr_2d(input, height, width, start_y, start_x + 1); + + result += w1 * v1; + } + + if (start_x >= 0 && start_y + 1 < height) { + auto w2 = start_x_weight * (1 - start_y_weight); + auto v2 = *get_ptr_2d(input, height, width, start_y + 1, start_x); + + result += w2 * v2; + } + + if (start_x + 1 < width && start_y + 1 < height) { + auto w3 = (1 - start_x_weight) * (1 - start_y_weight); + auto v3 = *get_ptr_2d(input, height, width, start_y + 1, start_x + 1); + + result += w3 * v3; + } + + return result / 2; +} + +template class deform; + +template +inline auto deform_input(cl::sycl::queue &queue, + const DataType *input, + const Shape3D in_shape, + DataType *output, + const Shape5D out_shape, + const float *offset, + int stride_y, + int stride_x, + int pad_y, + int pad_x, + int dilation_y, + int dilation_x) +{ + auto in_channels = in_shape[CHW::C]; + auto in_height = in_shape[CHW::H]; + auto in_width = in_shape[CHW::W]; + + auto k_height = out_shape[CKHW::KH]; + auto k_width = out_shape[CKHW::KW]; + auto out_height = out_shape[CKHW::H]; + auto out_width = out_shape[CKHW::W]; + + assert(out_shape[CKHW::C] == in_channels); + + auto wsize = + sycl::range<3>(in_channels * k_height * k_width, out_height, out_width); + return queue.parallel_for>(wsize, [=](sycl::id<3> idx) { + auto ckhkw = static_cast(idx[0]); + auto h = static_cast(idx[1]); + auto w = static_cast(idx[2]); + + auto c = ckhkw / (k_height * k_width); + auto khkw = ckhkw % (k_height * k_width); + + auto kh = khkw / k_width; + auto kw = khkw % k_width; + + auto k_h_m = (k_height - 1) / 2; + auto k_w_m = (k_width - 1) / 2; + + auto _output = get_ptr_5d(output, in_channels, k_height, k_width, + out_height, out_width, c, kh, kw, h, 0); + + auto offset_y = *get_ptr_5d(offset, k_height, k_width, 2, out_height, + out_width, kh, kw, 1, h, w) + + h * stride_y + (kh - k_h_m) * dilation_y - + (pad_y - k_h_m); + auto offset_x = *get_ptr_5d(offset, k_height, k_width, 2, out_height, + out_width, kh, kw, 0, h, w) + + w * stride_x + (kw - k_w_m) * dilation_x - + (pad_x - k_w_m); + + auto _input = + get_ptr_3d(input, in_channels, in_height, in_width, c, 0, 0); + + _output[w] = bilinear(_input, in_height, in_width, offset_y, offset_x); + }); +} + +template class fill_output; + +template +auto output_fill_with_bias(cl::sycl::queue &queue, + DataType *output, + const Shape3D out_shape, + const DataType *bias) +{ + auto out_c = out_shape[CHW::C]; + auto out_h = out_shape[CHW::H]; + auto out_w = out_shape[CHW::W]; + + return queue.parallel_for>( + sycl::range<3>(out_c, out_h, out_w), [=](sycl::id<3> idx) { + auto c = static_cast(idx[0]); + auto h = static_cast(idx[1]); + auto w = static_cast(idx[2]); + + auto out_ptr = get_ptr_3d(output, out_c, out_h, out_w, c, h, w); + *out_ptr = bias[c]; + }); +} + +template +void deformable_convolution_b1_impl(cl::sycl::queue &queue, + const DataType *input, + const Shape3D in_shape, + DataType *output, + const Shape3D out_shape, + DataType *tmp, + const float *offset, + const DataType *weights, + const Shape4D weights_shape, + const DataType *bias, + int stride_y, + int stride_x, + int pad_y, + int pad_x, + int dilation_y, + int dilation_x, + int groups, + int deformable_groups) +{ + using oneapi::mkl::transpose; + using oneapi::mkl::blas::row_major::gemm; + + auto in_c = in_shape[CHW::C]; + auto in_h = in_shape[CHW::H]; + auto in_w = in_shape[CHW::W]; + + auto out_c = out_shape[CHW::C]; + auto out_h = out_shape[CHW::H]; + auto out_w = out_shape[CHW::W]; + + assert(out_c == weights_shape[OIHW::OC]); + assert(in_c == weights_shape[OIHW::IC]); + auto ker_h = weights_shape[OIHW::H]; + auto ker_w = weights_shape[OIHW::W]; + + auto edeform = deform_input( + queue, input, in_shape, tmp, {in_c, ker_h, ker_w, out_h, out_w}, offset, + stride_y, stride_x, pad_y, pad_x, dilation_y, dilation_x); + + auto efill = output_fill_with_bias(queue, output, out_shape, bias); + auto egemm = gemm(queue, transpose::N, transpose::N, /*transpose a, b*/ + out_c, out_h * out_w, in_c * ker_h * ker_w, /*m, n, k*/ + 1, /*alpha*/ + weights, in_c * ker_h * ker_w, /*a, lda*/ + tmp, out_h * out_w, /*b, ldb*/ + 1, /*beta*/ + output, out_h * out_w, /*c, ldc*/ + {edeform, efill} /*events*/); + egemm.wait(); +} + +template +void deformable_convolution_impl(cl::sycl::queue &queue, + const DataType *input, + const Shape4D in_shape, + DataType *output, + const Shape4D out_shape, + DataType *tmp, + const float *offset, + const DataType *weights, + const Shape4D weights_shape, + const DataType *bias, + int stride_y, + int stride_x, + int pad_y, + int pad_x, + int dilation_y, + int dilation_x, + int groups, + int deformable_groups) +{ + auto in_b = in_shape[NCHW::N]; + auto in_c = in_shape[NCHW::C]; + auto in_h = in_shape[NCHW::H]; + auto in_w = in_shape[NCHW::W]; + + assert(in_b == out_shape[NCHW::N]); + auto out_c = out_shape[NCHW::C]; + auto out_h = out_shape[NCHW::H]; + auto out_w = out_shape[NCHW::W]; + + assert(out_c == weights_shape[OIHW::OC]); + assert(in_c == weights_shape[OIHW::IC]); + + for (auto b = 0; b < in_b; ++b) { + auto input_ptr = get_ptr_4d(input, in_b, in_c, in_h, in_w, b, 0, 0, 0); + auto output_ptr = + get_ptr_4d(output, in_b, out_c, out_h, out_w, b, 0, 0, 0); + deformable_convolution_b1_impl( + queue, input_ptr, {in_c, in_h, in_w}, output_ptr, + {out_c, out_h, out_w}, tmp, offset, weights, weights_shape, bias, + stride_y, stride_x, pad_y, pad_x, dilation_y, dilation_x, groups, + deformable_groups); + } +} + +template bool ensure_compatibility(const Args &...args) +{ + std::vector arrays = {args...}; + + auto arr = arrays.at(0); + + for (auto &arr : arrays) { + if (!(arr.get_flags() & (USM_ARRAY_C_CONTIGUOUS))) { + std::cerr << "All arrays need to be C contiguous.\n"; + return false; + } + } + return true; +} + +void deformable_convolution(dpctl::tensor::usm_ndarray input, + dpctl::tensor::usm_ndarray output, + dpctl::tensor::usm_ndarray offset, + dpctl::tensor::usm_ndarray weights, + dpctl::tensor::usm_ndarray bias, + dpctl::tensor::usm_ndarray tmp, + py::list stride_hw, + py::list pad_hw, + py::list dilation_hw, + int groups, + int deformable_groups) +{ + auto queue = input.get_queue(); + + if (!ensure_compatibility(input, output, offset, weights, bias, tmp)) + throw std::runtime_error("Input arrays are not acceptable."); + + if (input.get_typenum() != output.get_typenum() or + input.get_typenum() != offset.get_typenum() or + input.get_typenum() != weights.get_typenum() or + input.get_typenum() != bias.get_typenum() or + input.get_typenum() != tmp.get_typenum()) + { + throw std::runtime_error("All arrays must have the same precision"); + } + + int batch = input.get_shape(0); + + int in_channels = input.get_shape(1); + int in_height = input.get_shape(2); + int in_width = input.get_shape(3); + + int out_channels = output.get_shape(1); + int out_height = output.get_shape(2); + int out_width = output.get_shape(3); + + int kernel_height = weights.get_shape(2); + int kernel_width = weights.get_shape(3); + + auto stride_y = stride_hw[0].cast(); + auto stride_x = stride_hw[1].cast(); + + auto pad_y = pad_hw[0].cast(); + auto pad_x = pad_hw[1].cast(); + + auto dilation_y = pad_hw[0].cast(); + auto dilation_x = pad_hw[1].cast(); + + auto input_shape = Shape4D({batch, in_channels, in_height, in_width}); + auto output_shape = Shape4D({batch, out_channels, out_height, out_width}); + auto weights_shape = + Shape4D({out_channels, in_channels, kernel_height, kernel_width}); + +#define dispatch_dc(typ) \ + deformable_convolution_impl( \ + queue, input.get_data(), input_shape, output.get_data(), \ + output_shape, tmp.get_data(), offset.get_data(), \ + weights.get_data(), weights_shape, bias.get_data(), \ + stride_y, stride_x, pad_y, pad_x, dilation_y, dilation_x, groups, \ + deformable_groups) + + if (input.get_typenum() == UAR_FLOAT) { + dispatch_dc(float); + } + else if (input.get_typenum() == UAR_DOUBLE) { + dispatch_dc(double); + } + else { + throw std::runtime_error("Unsupported type"); + } + +#undef dispatch_dc +} + +PYBIND11_MODULE(_deformable_convolution_sycl, m) +{ + import_dpctl(); + + m.def("deformable_convolution", &deformable_convolution, + "Defromable convolution", py::arg("input"), py::arg("output"), + py::arg("offset"), py::arg("weights"), py::arg("bias"), + py::arg("tmp"), py::arg("stride_hw"), py::arg("pad_hw"), + py::arg("dilation_hw"), py::arg("groups"), + py::arg("deformable_groups")); +} diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/utils.hpp b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/utils.hpp new file mode 100644 index 00000000..603c4144 --- /dev/null +++ b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/deformable_convolution_sycl/utils.hpp @@ -0,0 +1,88 @@ +// +// Copyright 2022 Intel Corp. +// +// SPDX - License - Identifier : Apache 2.0 +/// + +#include + +template class TensorShape : public std::array +{ +public: + template const int &operator[](DimType dim) const + { + return std::array::operator[](static_cast(dim)); + } + + template int &operator[](DimType dim) + { + return std::array::operator[](static_cast(dim)); + } +}; + +enum class CHW : int +{ + C, + H, + W, +}; + +enum class NCHW : int +{ + N, + C, + H, + W, +}; + +enum class OIHW : int +{ + OC, + IC, + H, + W, +}; + +enum class HWCK : int +{ + H, + W, + C, + KH, + KW +}; + +enum class CKHW : int +{ + C, + KH, + KW, + H, + W, +}; + +using Shape1D = TensorShape<1>; +using Shape2D = TensorShape<2>; +using Shape3D = TensorShape<3>; +using Shape4D = TensorShape<4>; +using Shape5D = TensorShape<5>; +using DType = float; + +#define get_ptr_1d(data_ptr, max_dim_0, dim_0) (data_ptr + (dim_0)) +#define get_ptr_2d(data_ptr, max_dim_0, max_dim_1, dim_0, dim_1) \ + (get_ptr_1d(data_ptr + (dim_0) * (max_dim_1), max_dim_1, dim_1)) +#define get_ptr_3d(data_ptr, max_dim_0, max_dim_1, max_dim_2, dim_0, dim_1, \ + dim_2) \ + (get_ptr_2d(data_ptr + (dim_0) * (max_dim_1) * (max_dim_2), max_dim_1, \ + max_dim_2, dim_1, dim_2)) +#define get_ptr_4d(data_ptr, max_dim_0, max_dim_1, max_dim_2, max_dim_3, \ + dim_0, dim_1, dim_2, dim_3) \ + (get_ptr_3d(data_ptr + (dim_0) * (max_dim_1) * (max_dim_2) * (max_dim_3), \ + max_dim_1, max_dim_2, max_dim_3, dim_1, dim_2, dim_3)) + +#define get_ptr_5d(data_ptr, max_dim_0, max_dim_1, max_dim_2, max_dim_3, \ + max_dim_4, dim_0, dim_1, dim_2, dim_3, dim_4) \ + (get_ptr_4d(data_ptr + (dim_0) * (max_dim_1) * (max_dim_2) * (max_dim_3) * \ + (max_dim_4), \ + max_dim_1, max_dim_2, max_dim_3, max_dim_4, dim_1, dim_2, \ + dim_3, dim_4)) diff --git a/dpbench/configs/bench_info/deformable_convolution.toml b/dpbench/configs/bench_info/deformable_convolution.toml new file mode 100644 index 00000000..93c2e50a --- /dev/null +++ b/dpbench/configs/bench_info/deformable_convolution.toml @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +[benchmark] +reference_implementation_postfix = "sycl" +name = "Deformable convolution" +short_name = "deformable-convolution" +relative_path = "deformable_convolution" +module_name = "deformable_convolution" +func_name = "deformable_convolution" +kind = "microbenchmark" +domain = "Deep learning" +input_args = [ + "input", + "output", + "offset", + "weights", + "bias", + "tmp", + "stride_hw", + "dilation_hw", + "pad_hw", + "groups", + "deformable_groups" +] +array_args = [ + "input", + "output", + "offset", + "weights", + "bias", + "tmp" +] +output_args = [ + "output" +] + +[benchmark.parameters.S] +batch = 1 +in_chw = [16, 32, 32] +out_chw = [16, 32, 32] +kernel_hw = [3, 3] +stride_hw = [1, 1] +dilation_hw = [1, 1] +pad_hw = [1, 1] +groups = 1 +deformable_groups = 1 +seed = 7777777 + +[benchmark.parameters.M] +batch = 1 +in_chw = [64, 64, 64] +out_chw = [64, 64, 64] +kernel_hw = [3, 3] +stride_hw = [1, 1] +dilation_hw = [1, 1] +pad_hw = [1, 1] +groups = 1 +deformable_groups = 1 +seed = 7777777 + +[benchmark.parameters.L] +batch = 2 +in_chw = [64, 128, 128] +out_chw = [128, 128, 128] +kernel_hw = [3, 3] +stride_hw = [1, 1] +dilation_hw = [1, 1] +pad_hw = [1, 1] +groups = 1 +deformable_groups = 1 +seed = 7777777 + +[benchmark.init] +func_name = "initialize" +types_dict_name="types_dict" +precision="single" +input_args = [ + "batch", + "in_chw", + "out_chw", + "kernel_hw", + "stride_hw", + "dilation_hw", + "pad_hw", + "groups", + "deformable_groups", + "seed", + "types_dict", +] +output_args = [ + "input", + "output", + "offset", + "weights", + "bias", + "tmp" +] diff --git a/dpbench/infrastructure/benchmark_runner.py b/dpbench/infrastructure/benchmark_runner.py index 1fe53357..9c95a43e 100644 --- a/dpbench/infrastructure/benchmark_runner.py +++ b/dpbench/infrastructure/benchmark_runner.py @@ -314,10 +314,9 @@ def run_benchmark_in_sub_process( return results - _, conn = self.get_process(rc.framework) - brc = BaseRunConfig.from_instance(rc) + _, conn = self.get_process(rc.framework) brc.ref_framework: cfg.Framework = [ f for f in cfg.GLOBAL.frameworks @@ -358,7 +357,14 @@ def run_benchmark_and_save( Args: rc: runtime configuration. """ - results = self.run_benchmark_in_sub_process(rc) + try: + results = self.run_benchmark_in_sub_process(rc) + except Exception as e: + # self.kill_process(rc.framework) + + results = BenchmarkResults(0, rc.implementation, rc.preset) + results.error_state = ErrorCodes.FAILED_EXECUTION + results.error_msg = f"Unexpected failure. {str(e)}" if rc.conn: framework = build_framework(rc.framework) diff --git a/environments/conda-linux-sycl.yml b/environments/conda-linux-sycl.yml index 8d34ee73..7fe49532 100644 --- a/environments/conda-linux-sycl.yml +++ b/environments/conda-linux-sycl.yml @@ -34,3 +34,5 @@ dependencies: - libgcc-ng - libstdcxx-ng - libgomp + - tbb-devel >= 2021.6.0 + - mkl-devel-dpcpp>=2023.0.0 diff --git a/environments/conda-win-sycl.yml b/environments/conda-win-sycl.yml index e4ac46e5..0c5d3032 100644 --- a/environments/conda-win-sycl.yml +++ b/environments/conda-win-sycl.yml @@ -30,3 +30,5 @@ dependencies: # https://github.com/scikit-build/scikit-build/issues/981 - setuptools>=42,<64 - pybind11 + - tbb-devel >= 2021.6.0 + - mkl-devel-dpcpp>=2023.0.0