Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions dpctl/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,14 @@
from ._elementwise_funcs import (
abs,
add,
ceil,
conj,
cos,
divide,
equal,
exp,
expm1,
floor,
floor_divide,
greater,
greater_equal,
Expand Down Expand Up @@ -128,6 +130,7 @@
sqrt,
square,
subtract,
trunc,
)
from ._reduction import sum

Expand Down Expand Up @@ -208,16 +211,21 @@
"inf",
"abs",
"add",
"ceil",
"conj",
"cos",
"divide",
"equal",
"exp",
"expm1",
"floor",
"floor_divide",
"greater",
"greater_equal",
"imag",
"isfinite",
"isinf",
"isnan",
"isfinite",
"less",
"less_equal",
"log",
Expand All @@ -228,19 +236,17 @@
"log1p",
"log2",
"log10",
"multiply",
"negative",
"not_equal",
"positive",
"pow",
"proj",
"real",
"sin",
"sqrt",
"square",
"divide",
"multiply",
"pow",
"subtract",
"equal",
"not_equal",
"sum",
"floor_divide",
"trunc",
]
7 changes: 6 additions & 1 deletion dpctl/tensor/_elementwise_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,12 @@ def __call__(self, x, out=None, order="K"):
x.dtype, self.result_type_resolver_fn_, x.sycl_device
)
if res_dt is None:
raise RuntimeError
raise TypeError(
f"function '{self.name_}' does not support input type "
f"({x.dtype}), "
"and the input could not be safely coerced to any "
"supported types according to the casting rule ''safe''."
)

orig_out = out
if out is not None:
Expand Down
77 changes: 74 additions & 3 deletions dpctl/tensor/_elementwise_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,30 @@
# FIXME: implement B07

# U09: ==== CEIL (x)
# FIXME: implement U09
_ceil_docstring = """
ceil(x, out=None, order='K')

Returns the ceiling for each element `x_i` for input array `x`.
The ceil of the scalar `x` is the smallest integer `i`, such that `i >= x`.

Args:
x (usm_ndarray):
Input array, expected to have numeric data type.
out ({None, usm_ndarray}, optional):
Output array to populate.
Array have the correct shape and the expected data type.
order ("C","F","A","K", optional):
Memory layout of the newly output array, if parameter `out` is `None`.
Default: "K".
Returns:
usm_narray:
An array containing the element-wise ceiling of input array.
The returned array has the same data type as `x`.
"""

ceil = UnaryElementwiseFunc(
"ceil", ti._ceil_result_type, ti._ceil, _ceil_docstring
)

# U10: ==== CONJ (x)
_conj_docstring = """
Expand Down Expand Up @@ -271,7 +294,30 @@
)

# U15: ==== FLOOR (x)
# FIXME: implement U15
_floor_docstring = """
floor(x, out=None, order='K')

Returns the floor for each element `x_i` for input array `x`.
The floor of the scalar `x` is the largest integer `i`, such that `i <= x`.

Args:
x (usm_ndarray):
Input array, expected to have numeric data type.
out ({None, usm_ndarray}, optional):
Output array to populate.
Array have the correct shape and the expected data type.
order ("C","F","A","K", optional):
Memory layout of the newly output array, if parameter `out` is `None`.
Default: "K".
Returns:
usm_narray:
An array containing the element-wise floor of input array.
The returned array has the same data type as `x`.
"""

floor = UnaryElementwiseFunc(
"floor", ti._floor_result_type, ti._floor, _floor_docstring
)

# B10: ==== FLOOR_DIVIDE (x1, x2)
_floor_divide_docstring_ = """
Expand Down Expand Up @@ -1031,4 +1077,29 @@
# FIXME: implement U35

# U36: ==== TRUNC (x)
# FIXME: implement U36
_trunc_docstring = """
trunc(x, out=None, order='K')

Returns the truncated value for each element `x_i` for input array `x`.
The truncated value of the scalar `x` is the nearest integer i which is
closer to zero than `x` is. In short, the fractional part of the
signed number `x` is discarded.

Args:
x (usm_ndarray):
Input array, expected to have numeric data type.
out ({None, usm_ndarray}, optional):
Output array to populate.
Array have the correct shape and the expected data type.
order ("C","F","A","K", optional):
Memory layout of the newly output array, if parameter `out` is `None`.
Default: "K".
Returns:
usm_narray:
An array containing the element-wise truncated value of input array.
The returned array has the same data type as `x`.
"""

trunc = UnaryElementwiseFunc(
"trunc", ti._trunc_result_type, ti._trunc, _trunc_docstring
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
//=== ceil.hpp - Unary function CEIL ------ *-C++-*--/===//
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2023 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===---------------------------------------------------------------------===//
///
/// \file
/// This file defines kernels for elementwise evaluation of CEIL(x) function.
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch.hpp"
#include "utils/type_utils.hpp"
#include <pybind11/pybind11.h>

namespace dpctl
{
namespace tensor
{
namespace kernels
{
namespace ceil
{

namespace py = pybind11;
namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct CeilFunctor
{

// is function constant for given argT
using is_constant = typename std::false_type;
// constant value, if constant
// constexpr resT constant_value = resT{};
// is function defined for sycl::vec
using supports_vec = typename std::false_type;
// do both argTy and resTy support sugroup store/load operation
using supports_sg_loadstore = typename std::negation<
std::disjunction<is_complex<resT>, is_complex<argT>>>;

resT operator()(const argT &in)
{
if constexpr (std::is_integral_v<argT>) {
return in;
}
else {
if (in == 0) {
return in;
}
return std::ceil(in);
}
}
};

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2>
using CeilContigFunctor = elementwise_common::
UnaryContigFunctor<argTy, resTy, CeilFunctor<argTy, resTy>, vec_sz, n_vecs>;

template <typename argTy, typename resTy, typename IndexerT>
using CeilStridedFunctor = elementwise_common::
UnaryStridedFunctor<argTy, resTy, IndexerT, CeilFunctor<argTy, resTy>>;

template <typename T> struct CeilOutputType
{
using value_type = typename std::disjunction< // disjunction is C++17
// feature, supported by DPC++
td_ns::TypeMapResultEntry<T, std::uint8_t>,
td_ns::TypeMapResultEntry<T, std::uint16_t>,
td_ns::TypeMapResultEntry<T, std::uint32_t>,
td_ns::TypeMapResultEntry<T, std::uint64_t>,
td_ns::TypeMapResultEntry<T, std::int8_t>,
td_ns::TypeMapResultEntry<T, std::int16_t>,
td_ns::TypeMapResultEntry<T, std::int32_t>,
td_ns::TypeMapResultEntry<T, std::int64_t>,
td_ns::TypeMapResultEntry<T, sycl::half>,
td_ns::TypeMapResultEntry<T, float>,
td_ns::TypeMapResultEntry<T, double>,
td_ns::DefaultResultEntry<void>>::result_type;
};

template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
class ceil_contig_kernel;

template <typename argTy>
sycl::event ceil_contig_impl(sycl::queue exec_q,
size_t nelems,
const char *arg_p,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
return elementwise_common::unary_contig_impl<
argTy, CeilOutputType, CeilContigFunctor, ceil_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct CeilContigFactory
{
fnT get()
{
if constexpr (std::is_same_v<typename CeilOutputType<T>::value_type,
void>) {
fnT fn = nullptr;
return fn;
}
else {
fnT fn = ceil_contig_impl<T>;
return fn;
}
}
};

template <typename fnT, typename T> struct CeilTypeMapFactory
{
/*! @brief get typeid for output type of sycl::ceil(T x) */
std::enable_if_t<std::is_same<fnT, int>::value, int> get()
{
using rT = typename CeilOutputType<T>::value_type;
return td_ns::GetTypeid<rT>{}.get();
}
};

template <typename T1, typename T2, typename T3> class ceil_strided_kernel;

template <typename argTy>
sycl::event
ceil_strided_impl(sycl::queue exec_q,
size_t nelems,
int nd,
const py::ssize_t *shape_and_strides,
const char *arg_p,
py::ssize_t arg_offset,
char *res_p,
py::ssize_t res_offset,
const std::vector<sycl::event> &depends,
const std::vector<sycl::event> &additional_depends)
{
return elementwise_common::unary_strided_impl<
argTy, CeilOutputType, CeilStridedFunctor, ceil_strided_kernel>(
exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
res_offset, depends, additional_depends);
}

template <typename fnT, typename T> struct CeilStridedFactory
{
fnT get()
{
if constexpr (std::is_same_v<typename CeilOutputType<T>::value_type,
void>) {
fnT fn = nullptr;
return fn;
}
else {
fnT fn = ceil_strided_impl<T>;
return fn;
}
}
};

} // namespace ceil
} // namespace kernels
} // namespace tensor
} // namespace dpctl
Loading