IntelPython · AlexanderKalistratov · Nov 30, 2022 · Apr 10, 2023 · May 29, 2023 · Jun 1, 2023
diff --git a/dpbench/benchmarks/CMakeLists.txt b/dpbench/benchmarks/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(rambo)
 add_subdirectory(kmeans)
 add_subdirectory(knn)
 add_subdirectory(gpairs)
+add_subdirectory(deformable_convolution)
 add_subdirectory(dbscan)
 
 # generate dpcpp version into config

diff --git a/dpbench/benchmarks/deformable_convolution/CMakeLists.txt b/dpbench/benchmarks/deformable_convolution/CMakeLists.txt
@@ -0,0 +1,5 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+add_subdirectory(deformable_convolution_sycl_native_ext)
diff --git a/dpbench/benchmarks/deformable_convolution/__init__.py b/dpbench/benchmarks/deformable_convolution/__init__.py
@@ -0,0 +1,6 @@
+# Copyright 2022 Intel Corporation
+#
+# SPDX-License-Identifier: Apache 2.0
+
+"""Deformable convolution
+"""
diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_initialize.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_initialize.py
@@ -0,0 +1,53 @@
+# Copyright 2022 Intel Corp.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+def initialize(
+    batch,
+    in_chw,
+    out_chw,
+    kernel_hw,
+    stride_hw,
+    dilation_hw,
+    pad_hw,
+    groups,
+    deformable_groups,
+    seed,
+    types_dict,
+):
+    import numpy as np
+    import numpy.random as default_rng
+
+    dtype: np.dtype = types_dict["float"]
+
+    default_rng.seed(seed)
+
+    input_size = [batch] + in_chw  # nchw
+    output_size = [batch] + out_chw  # nchw
+    offset_size = kernel_hw + [2, out_chw[1], out_chw[2]]  # kh, kw, 2, oh, ow
+    weights_size = [out_chw[0], in_chw[0]] + kernel_hw  # oc, ic, kh, kw
+    bias_size = out_chw[0]  # oc
+    tmp_size = [
+        in_chw[0],
+        kernel_hw[0],
+        kernel_hw[1],
+        out_chw[1],
+        out_chw[2],
+    ]  # ic, kh, kw, oh, ow
+
+    input = default_rng.random(input_size).astype(dtype)
+    output = np.empty(output_size, dtype=dtype)
+    offset = 2 * default_rng.random(offset_size).astype("float32") - 1
+    weights = default_rng.random(weights_size).astype(dtype)
+    bias = default_rng.random(bias_size).astype(dtype)
+    tmp = np.empty(tmp_size, dtype=dtype)
+
+    return (
+        input,
+        output,
+        offset,
+        weights,
+        bias,
+        tmp,
+    )
diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_numba_mlir_p.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_numba_mlir_p.py
@@ -0,0 +1,166 @@
+# Copyright 2022 Intel Corp.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+
+import numpy as np
+from numba import prange
+from numba_mlir import njit
+
+
+@njit(parallel=True, inline="always", fastmath=True, gpu_fp64_truncate="auto")
+def bilinear(input, offset_y, offset_x):
+    height, width = input.shape
+    start_x = int(math.floor(offset_x))
+    start_x_weight = 1 - (offset_x - start_x)
+    start_y = int(math.floor(offset_y))
+    start_y_weight = 1 - (offset_y - start_y)
+
+    output = 0
+    if (
+        offset_x >= width
+        or offset_y >= height
+        or offset_x <= -1
+        or offset_y <= -1
+    ):
+        return output
+
+    if start_x >= 0 and start_y >= 0:
+        w = start_x_weight * start_y_weight
+        output += w * input[start_y, start_x]
+
+    if start_x + 1 < width and start_y >= 0:
+        w = (1 - start_x_weight) * start_y_weight
+        output += w * input[start_y, start_x + 1]
+
+    if start_x >= 0 and start_y + 1 < height:
+        w = start_x_weight * (1 - start_y_weight)
+        output += w * input[start_y + 1, start_x]
+
+    if start_x + 1 < width and start_y + 1 < height:
+        w = (1 - start_x_weight) * (1 - start_y_weight)
+        output += w * input[start_y + 1, start_x + 1]
+
+    return output / 2
+
+
+@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
+def deform(
+    input, offset, output, stride, pad, dilation, groups, deformable_groups
+):
+    k_height, k_width, _, out_height, out_width = offset.shape
+    channels, _, _ = input.shape
+
+    k_h_m = (k_height - 1) // 2
+    k_w_m = (k_width - 1) // 2
+    for ckhkw in prange(channels * k_height * k_width):
+        for h in prange(out_height):
+            for w in prange(out_width):
+                c = ckhkw // (k_height * k_width)
+                khkw = ckhkw % (k_height * k_width)
+                kh = khkw // k_width
+                kw = khkw % k_width
+
+                offset_y = (
+                    offset[kh, kw, 1, h, w]
+                    + h * stride[0]
+                    + (kh - k_h_m) * dilation[0]
+                    - (pad[0] - k_h_m)
+                )
+                offset_x = (
+                    offset[kh, kw, 0, h, w]
+                    + w * stride[1]
+                    + (kw - k_w_m) * dilation[1]
+                    - (pad[1] - k_w_m)
+                )
+
+                output[c, kh, kw, h, w] = bilinear(input[c], offset_y, offset_x)
+
+
+@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
+def deformable_convolution_b1(
+    input,
+    output,
+    offset,
+    weights,
+    bias,
+    tmp,
+    stride,
+    pad,
+    dilation,
+    groups,
+    deformable_groups,
+):
+    out_channels, height, width = output.shape
+    _, in_channels, k_height, k_width = weights.shape
+
+    deform(input, offset, tmp, stride, pad, dilation, groups, deformable_groups)
+
+    tmp = tmp.reshape((in_channels * k_height * k_width, height * width))
+
+    _weights = weights.reshape((out_channels, in_channels * k_height * k_width))
+    _output = output.reshape((out_channels, height * width))
+    np.dot(_weights, tmp, _output)
+
+    _bias = bias.reshape((out_channels, 1))
+    _output[:] = _output + _bias
+
+
+@njit(parallel=True, gpu_fp64_truncate="auto")
+def jdeformable_convolution(
+    input,
+    output,
+    offset,
+    weights,
+    bias,
+    tmp,
+    stride,
+    pad,
+    dilation,
+    groups,
+    deformable_groups,
+):
+    batch, _, _, _ = input.shape
+    for b in range(batch):
+        deformable_convolution_b1(
+            input[b],
+            output[b],
+            offset,
+            weights,
+            bias,
+            tmp,
+            stride,
+            pad,
+            dilation,
+            groups,
+            deformable_groups,
+        )
+
+
+def deformable_convolution(
+    input,
+    output,
+    offset,
+    weights,
+    bias,
+    tmp,
+    stride_hw,
+    pad_hw,
+    dilation_hw,
+    groups,
+    deformable_groups,
+):
+    jdeformable_convolution(
+        input,
+        output,
+        offset,
+        weights,
+        bias,
+        tmp,
+        tuple(stride_hw),
+        tuple(pad_hw),
+        tuple(dilation_hw),
+        groups,
+        deformable_groups,
+    )
diff --git a/...h/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/CMakeLists.txt b/...h/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/CMakeLists.txt
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.23)
+
+set(py_module_name _deformable_convolution_sycl)
+pybind11_add_module(${py_module_name}
+    MODULE
+    deformable_convolution_sycl/impl.cpp
+)
+
+find_package(TBB CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
+find_package(IntelSYCL CONFIG REQUIRED)
+
+if (DEFINED ENV{CONDA_PREFIX})
+  set(MKL_ROOT $ENV{CONDA_PREFIX})
+endif()
+find_package(MKL CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
+
+target_compile_options(${py_module_name} PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
+target_include_directories(${py_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS} $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
+target_link_libraries(${py_module_name} PUBLIC $<LINK_ONLY:MKL::MKL> ${MKL_SYCL})
+
+file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+install(TARGETS ${py_module_name}
+  DESTINATION ${py_module_dest}/deformable_convolution_sycl
+)
diff --git a/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/__init__.py b/dpbench/benchmarks/deformable_convolution/deformable_convolution_sycl_native_ext/__init__.py
@@ -0,0 +1,9 @@
+# Copyright 2022 Intel Corporation
+#
+# SPDX-License-Identifier: Apache 2.0
+
+from .deformable_convolution_sycl._deformable_convolution_sycl import (
+    deformable_convolution as deformable_convolution_sycl,
+)
+
+__all__ = ["deformable_convolution_sycl"]