Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dpbench/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_subdirectory(rambo)
add_subdirectory(kmeans)
add_subdirectory(knn)
add_subdirectory(gpairs)
add_subdirectory(deformable_convolution)
add_subdirectory(dbscan)

# generate dpcpp version into config
Expand Down
5 changes: 5 additions & 0 deletions dpbench/benchmarks/deformable_convolution/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

add_subdirectory(deformable_convolution_sycl_native_ext)
6 changes: 6 additions & 0 deletions dpbench/benchmarks/deformable_convolution/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright 2022 Intel Corporation
#
# SPDX-License-Identifier: Apache 2.0

"""Deformable convolution
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2022 Intel Corp.
#
# SPDX-License-Identifier: Apache-2.0


def initialize(
batch,
in_chw,
out_chw,
kernel_hw,
stride_hw,
dilation_hw,
pad_hw,
groups,
deformable_groups,
seed,
types_dict,
):
import numpy as np
import numpy.random as default_rng

dtype: np.dtype = types_dict["float"]

default_rng.seed(seed)

input_size = [batch] + in_chw # nchw
output_size = [batch] + out_chw # nchw
offset_size = kernel_hw + [2, out_chw[1], out_chw[2]] # kh, kw, 2, oh, ow
weights_size = [out_chw[0], in_chw[0]] + kernel_hw # oc, ic, kh, kw
bias_size = out_chw[0] # oc
tmp_size = [
in_chw[0],
kernel_hw[0],
kernel_hw[1],
out_chw[1],
out_chw[2],
] # ic, kh, kw, oh, ow

input = default_rng.random(input_size).astype(dtype)
output = np.empty(output_size, dtype=dtype)
offset = 2 * default_rng.random(offset_size).astype("float32") - 1
weights = default_rng.random(weights_size).astype(dtype)
bias = default_rng.random(bias_size).astype(dtype)
tmp = np.empty(tmp_size, dtype=dtype)

return (
input,
output,
offset,
weights,
bias,
tmp,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# Copyright 2022 Intel Corp.
#
# SPDX-License-Identifier: Apache-2.0

import math

import numpy as np
from numba import prange
from numba_mlir import njit


@njit(parallel=True, inline="always", fastmath=True, gpu_fp64_truncate="auto")
def bilinear(input, offset_y, offset_x):
height, width = input.shape
start_x = int(math.floor(offset_x))
start_x_weight = 1 - (offset_x - start_x)
start_y = int(math.floor(offset_y))
start_y_weight = 1 - (offset_y - start_y)

output = 0
if (
offset_x >= width
or offset_y >= height
or offset_x <= -1
or offset_y <= -1
):
return output

if start_x >= 0 and start_y >= 0:
w = start_x_weight * start_y_weight
output += w * input[start_y, start_x]

if start_x + 1 < width and start_y >= 0:
w = (1 - start_x_weight) * start_y_weight
output += w * input[start_y, start_x + 1]

if start_x >= 0 and start_y + 1 < height:
w = start_x_weight * (1 - start_y_weight)
output += w * input[start_y + 1, start_x]

if start_x + 1 < width and start_y + 1 < height:
w = (1 - start_x_weight) * (1 - start_y_weight)
output += w * input[start_y + 1, start_x + 1]

return output / 2


@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
def deform(
input, offset, output, stride, pad, dilation, groups, deformable_groups
):
k_height, k_width, _, out_height, out_width = offset.shape
channels, _, _ = input.shape

k_h_m = (k_height - 1) // 2
k_w_m = (k_width - 1) // 2
for ckhkw in prange(channels * k_height * k_width):
for h in prange(out_height):
for w in prange(out_width):
c = ckhkw // (k_height * k_width)
khkw = ckhkw % (k_height * k_width)
kh = khkw // k_width
kw = khkw % k_width

offset_y = (
offset[kh, kw, 1, h, w]
+ h * stride[0]
+ (kh - k_h_m) * dilation[0]
- (pad[0] - k_h_m)
)
offset_x = (
offset[kh, kw, 0, h, w]
+ w * stride[1]
+ (kw - k_w_m) * dilation[1]
- (pad[1] - k_w_m)
)

output[c, kh, kw, h, w] = bilinear(input[c], offset_y, offset_x)


@njit(parallel=True, fastmath=True, gpu_fp64_truncate="auto")
def deformable_convolution_b1(
input,
output,
offset,
weights,
bias,
tmp,
stride,
pad,
dilation,
groups,
deformable_groups,
):
out_channels, height, width = output.shape
_, in_channels, k_height, k_width = weights.shape

deform(input, offset, tmp, stride, pad, dilation, groups, deformable_groups)

tmp = tmp.reshape((in_channels * k_height * k_width, height * width))

_weights = weights.reshape((out_channels, in_channels * k_height * k_width))
_output = output.reshape((out_channels, height * width))
np.dot(_weights, tmp, _output)

_bias = bias.reshape((out_channels, 1))
_output[:] = _output + _bias


@njit(parallel=True, gpu_fp64_truncate="auto")
def jdeformable_convolution(
input,
output,
offset,
weights,
bias,
tmp,
stride,
pad,
dilation,
groups,
deformable_groups,
):
batch, _, _, _ = input.shape
for b in range(batch):
deformable_convolution_b1(
input[b],
output[b],
offset,
weights,
bias,
tmp,
stride,
pad,
dilation,
groups,
deformable_groups,
)


def deformable_convolution(
input,
output,
offset,
weights,
bias,
tmp,
stride_hw,
pad_hw,
dilation_hw,
groups,
deformable_groups,
):
jdeformable_convolution(
input,
output,
offset,
weights,
bias,
tmp,
tuple(stride_hw),
tuple(pad_hw),
tuple(dilation_hw),
groups,
deformable_groups,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

cmake_minimum_required(VERSION 3.23)

set(py_module_name _deformable_convolution_sycl)
pybind11_add_module(${py_module_name}
MODULE
deformable_convolution_sycl/impl.cpp
)

find_package(TBB CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
find_package(IntelSYCL CONFIG REQUIRED)

if (DEFINED ENV{CONDA_PREFIX})
set(MKL_ROOT $ENV{CONDA_PREFIX})
endif()
find_package(MKL CONFIG REQUIRED PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake NO_DEFAULT_PATH)

target_compile_options(${py_module_name} PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
target_include_directories(${py_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS} $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(${py_module_name} PUBLIC $<LINK_ONLY:MKL::MKL> ${MKL_SYCL})

file(RELATIVE_PATH py_module_dest ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${py_module_name}
DESTINATION ${py_module_dest}/deformable_convolution_sycl
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright 2022 Intel Corporation
#
# SPDX-License-Identifier: Apache 2.0

from .deformable_convolution_sycl._deformable_convolution_sycl import (
deformable_convolution as deformable_convolution_sycl,
)

__all__ = ["deformable_convolution_sycl"]
Loading