diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 6fe6b46f5e15..000000000000 --- a/.coveragerc +++ /dev/null @@ -1,15 +0,0 @@ -[run] -plugins = Cython.Coverage -include = dpnp/*,examples/*,build_cython/dpnp/* -branch = True -source = dpnp,dpnp.dpnp_algo - -[report] -exclude_lines = - if self.debug: - pragma: no cover - raise NotImplementedError - if __name__ == .__main__.: -ignore_errors = True -omit = - tests/* diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 17140158deee..b5edc3985308 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,4 +2,5 @@ - [ ] Have you added a test, reproducer or referred to issue with a reproducer? - [ ] Have you tested your changes locally for CPU and GPU devices? - [ ] Have you made sure that new changes do not introduce compiler warnings? +- [ ] Have you checked performance impact of proposed changes? - [ ] If this PR is a work in progress, are you filing the PR as a draft? diff --git a/.github/workflows/Windows-IntelLLVM_3.22.cmake b/.github/workflows/Windows-IntelLLVM_3.22.cmake new file mode 100644 index 000000000000..603fe243b520 --- /dev/null +++ b/.github/workflows/Windows-IntelLLVM_3.22.cmake @@ -0,0 +1,24 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + + +# This module is shared by multiple languages; use include blocker. +if(__WINDOWS_INTEL) + return() +endif() +set(__WINDOWS_INTEL 1) + +include(Platform/Windows-MSVC) +macro(__windows_compiler_intel lang) + __windows_compiler_msvc(${lang}) + + set(CMAKE_${lang}_LINK_EXECUTABLE " ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} -link -out: -implib: -pdb: -version:.${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}") + set(CMAKE_${lang}_CREATE_SHARED_LIBRARY " ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} -LD -link -out: -implib: -pdb: -version:.${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}") + set(CMAKE_${lang}_CREATE_SHARED_MODULE ${CMAKE_${lang}_CREATE_SHARED_LIBRARY}) + if (NOT "${lang}" STREQUAL "Fortran") # Fortran driver does not support -fuse-ld, yet + set(CMAKE_${lang}_CREATE_STATIC_LIBRARY " ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} -fuse-ld=llvm-lib -o -link ${CMAKE_END_TEMP_FILE}") + endif() + set(CMAKE_DEPFILE_FLAGS_${lang} "-QMMD -QMT -QMF ") + set(CMAKE_${lang}_DEPFILE_FORMAT gcc) + +endmacro() diff --git a/.github/workflows/Windows-IntelLLVM_3.26.cmake b/.github/workflows/Windows-IntelLLVM_3.26.cmake new file mode 100644 index 000000000000..eac3f0a9a827 --- /dev/null +++ b/.github/workflows/Windows-IntelLLVM_3.26.cmake @@ -0,0 +1,66 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + + +# This module is shared by multiple languages; use include blocker. +if(__WINDOWS_INTEL_LLVM) + return() +endif() +set(__WINDOWS_INTEL_LLVM 1) + +# Platform/Windows-MSVC adds some linking options icx/ifx do not understand, +# but that need to be passed to the linker. Wrap all the linking options from +# Platform/Windows-MSVC so that the compiler will hand them off to the linker +# without interpreting them. + +# Save original CMAKE_${t}_LINKER_FLAGS_INIT +foreach(t EXE SHARED MODULE STATIC) + set(_saved_cmake_${t}_linker_flags_init ${CMAKE_${t}_LINKER_FLAGS_INIT}) + set(CMAKE_${t}_LINKER_FLAGS_INIT "") +endforeach() +include(Platform/Windows-MSVC) +# Wrap linker flags from Windows-MSVC +set(_IntelLLVM_LINKER_WRAPPER_FLAG "/Qoption,link,") +set(_IntelLLVM_LINKER_WRAPPER_FLAG_SEP ",") +foreach(t EXE SHARED MODULE STATIC) + set(_wrapped_linker_flags "") + foreach(flag ${CMAKE_${t}_LINKER_FLAGS_INIT}) + string(STRIP ${flag} flag) + list(APPEND _wrapped_linker_flags "${_IntelLLVM_LINKER_WRAPPER_FLAG}${flag}") + endforeach() + set(CMAKE_${t}_LINKER_FLAGS_INIT "") + list(APPEND CMAKE_${t}_LINKER_FLAGS_INIT + ${_saved_cmake_${t}_linker_flags_init} ${_wrapped_linker_flags}) +endforeach() + +macro(__windows_compiler_intel lang) + __windows_compiler_msvc(${lang}) + + set(CMAKE_${lang}_LINKER_WRAPPER_FLAG "${_IntelLLVM_LINKER_WRAPPER_FLAG}") + set(CMAKE_${lang}_LINKER_WRAPPER_FLAG_SEP "${_IntelLLVM_LINKER_WRAPPER_FLAG_SEP}") + set(CMAKE_${lang}_CREATE_WIN32_EXE "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/subsystem:windows") + set(CMAKE_${lang}_CREATE_CONSOLE_EXE "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/subsystem:console") + set(CMAKE_LINK_DEF_FILE_FLAG "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/DEF:") + set(CMAKE_LIBRARY_PATH_FLAG "${CMAKE_${lang}_LINKER_WRAPPER_FLAG}/LIBPATH:") + + # Features for LINK_LIBRARY generator expression + if(MSVC_VERSION GREATER "1900") + ## WHOLE_ARCHIVE: Force loading all members of an archive + set(CMAKE_LINK_LIBRARY_USING_WHOLE_ARCHIVE "LINKER:/WHOLEARCHIVE:") + set(CMAKE_LINK_LIBRARY_USING_WHOLE_ARCHIVE_SUPPORTED TRUE) + endif() + + set(CMAKE_${lang}_LINK_EXECUTABLE + "${_CMAKE_VS_LINK_EXE} ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} /link /out: /implib: /pdb: /version:.${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}") + set(CMAKE_${lang}_CREATE_SHARED_LIBRARY + "${_CMAKE_VS_LINK_DLL} ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} -LD -link /out: /implib: /pdb: /version:.${_PLATFORM_LINK_FLAGS} ${CMAKE_END_TEMP_FILE}") + set(CMAKE_${lang}_CREATE_SHARED_MODULE ${CMAKE_${lang}_CREATE_SHARED_LIBRARY}) + if (NOT "${lang}" STREQUAL "Fortran" OR CMAKE_${lang}_COMPILER_VERSION VERSION_GREATER_EQUAL 2022.1) + # The Fortran driver does not support -fuse-ld=llvm-lib before compiler version 2022.1 + set(CMAKE_${lang}_CREATE_STATIC_LIBRARY + " ${CMAKE_CL_NOLOGO} ${CMAKE_START_TEMP_FILE} -fuse-ld=llvm-lib -o ${CMAKE_END_TEMP_FILE}") + endif() + + set(CMAKE_DEPFILE_FLAGS_${lang} "-QMD -QMT -QMF ") + set(CMAKE_${lang}_DEPFILE_FORMAT gcc) +endmacro() diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 95db7b640f1f..f4e3b74c6237 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -17,6 +17,7 @@ jobs: env: python-ver: '3.9' + CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' steps: - name: Cancel Previous Runs @@ -74,21 +75,20 @@ jobs: - name: Install dpnp dependencies run: | conda install dpctl mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64 \ - cmake cython pytest -c dppy/label/dev -c intel -c conda-forge + cmake cython pytest ninja scikit-build sysroot_linux-64">=2.28" ${{ env.CHANNELS }} - name: Install cuPy dependencies - run: conda install -c conda-forge cupy cudatoolkit=10.0 + run: conda install cupy cudatoolkit=10.0 - name: Conda info - run: | - conda info - conda list + run: conda info + + - name: Conda list + run: conda list - name: Build library run: | - python setup.py build_clib - CC=icpx python setup.py build_ext --inplace - python setup.py develop + CC=icx CXX=icpx python setup.py develop -G Ninja -- -DDPCTL_MODULE_PATH=$(python -m dpctl --cmakedir) - name: Build docs run: make html diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 52ac769b7fe8..e42adbdc913d 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -9,7 +9,7 @@ on: env: PACKAGE_NAME: dpnp MODULE_NAME: dpnp - CHANNELS: '-c dppy/label/dev -c intel -c main --override-channels' + CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' TEST_SCOPE: >- test_arraycreation.py test_dot.py @@ -18,8 +18,10 @@ env: test_linalg.py test_mathematical.py test_random_state.py + test_sort.py test_special.py test_usm_type.py + third_party/cupy/sorting_tests/test_sort.py VER_JSON_NAME: 'version.json' VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); " VER_SCRIPT2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))" @@ -59,7 +61,6 @@ jobs: python-version: ${{ matrix.python }} miniconda-version: 'latest' activate-environment: 'build' - use-only-tar-bz2: true - if: matrix.os == 'ubuntu-20.04' name: Store conda paths as envs on Linux @@ -170,7 +171,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3.2.6 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 1 # Increase to reset cache with: path: ${{ env.conda-pkgs }} key: @@ -189,15 +190,11 @@ jobs: - name: Smoke test run: python -c "import dpnp, dpctl; dpctl.lsplatform()" - env: - OCL_ICD_FILENAMES: 'libintelocl.so' # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} - env: - OCL_ICD_FILENAMES: 'libintelocl.so' working-directory: ${{ env.tests-path }} test_windows: @@ -319,52 +316,14 @@ jobs: - name: List installed packages run: conda list - - name: Add library + - name: Activate OCL CPU RT shell: pwsh run: | - # Make sure the below libraries exist - Get-Item -Path "$env:CONDA_LIB_BIN_PATH\OpenCL.dll" - Get-Item -Path "$env:CONDA_LIB_PATH\intelocl64.dll" - - echo "OCL_ICD_FILENAMES = $env:CONDA_LIB_PATH\intelocl64.dll" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} - - if ($list.count -eq 0) { - if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos)) { - New-Item -Path HKLM:\SOFTWARE\Khronos - } - - if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL)) { - New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL - } - - if (-not (Test-Path -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors)) { - New-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors - } - - New-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name "$env:CONDA_LIB_PATH\intelocl64.dll" -Value 0 - try {$list = Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Select-Object -ExpandProperty Property } catch {$list=@()} - Write-Output $(Get-Item -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors) - - # Now copy OpenCL.dll into system folder - $system_ocl_icd_loader="C:\Windows\System32\OpenCL.dll" - $python_ocl_icd_loader="$env:CONDA_LIB_BIN_PATH\OpenCL.dll" - Copy-Item -Path $python_ocl_icd_loader -Destination $system_ocl_icd_loader - - if (Test-Path -Path $system_ocl_icd_loader) { - Write-Output "$system_ocl_icd_loader has been copied" - $acl = Get-Acl $system_ocl_icd_loader - Write-Output $acl - } else { - Write-Output "OCL-ICD-Loader was not copied" - } - - # Configuration variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default - $cl_cfg="$env:CONDA_LIB_PATH\cl.cfg" - Write-Output "`n>>> Dump content of $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n" - (Get-Content $cl_cfg) -replace '^CL_CONFIG_TBB_DLL_PATH =.*', "CL_CONFIG_TBB_DLL_PATH = $env:CONDA_LIB_BIN_PATH" | Set-Content $cl_cfg - Write-Output "`n>>> Dump content of modified $cl_cfg`n" (Get-Content $cl_cfg) "`n<<< end of dump`n" - } + $script_path="$env:CONDA_PREFIX\Scripts\set-intel-ocl-icd-registry.ps1" + &$script_path + # Check the variable assisting OpenCL CPU driver to find TBB DLLs which are not located where it expects them by default + $cl_cfg="$env:CONDA_PREFIX\Library\lib\cl.cfg" + Get-Content -Tail 5 -Path $cl_cfg - name: Smoke test run: python -c "import dpnp, dpctl; dpctl.lsplatform()" diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml new file mode 100644 index 000000000000..fd38dde9a5d3 --- /dev/null +++ b/.github/workflows/generate_coverage.yaml @@ -0,0 +1,81 @@ +name: Generate coverage data for dpnp +on: + pull_request: + push: + branches: [master] + +jobs: + generate-coverage: + name: Generate coverage and push to Coveralls.io + runs-on: ubuntu-20.04 + + defaults: + run: + shell: bash -l {0} + + env: + python-ver: '3.10' + CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + + - name: Checkout repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2.2.0 + with: + auto-update-conda: true + python-version: ${{ env.python-ver }} + miniconda-version: 'latest' + activate-environment: 'coverage' + + - name: Install Lcov + run: | + sudo apt-get install lcov + - name: Install dpnp dependencies + run: | + conda install cython llvm cmake scikit-build ninja pytest pytest-cov coverage[toml] \ + dpctl dpcpp_linux-64 sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel onedpl-devel ${{ env.CHANNELS }} + - name: Conda info + run: | + conda info + conda list + - name: Build dpnp with coverage + run: | + python scripts/gen_coverage.py --pytest-opts="--ignore tests/test_random.py \ + --ignore tests/test_strides.py" + - name: Install coverall dependencies + run: | + sudo gem install coveralls-lcov + pip install coveralls==3.2.0 + - name: Upload coverage data to coveralls.io + run: | + echo "Processing pytest-coverage" + export DPNP_PYTEST_LCOV=$(find . -name dpnp_pytest.lcov) + coveralls-lcov -v -n $DPNP_PYTEST_LCOV > pytest-dpnp-c-api-coverage.json + # merge file with coverage data and upload + echo "Merging files with coverage data" + coveralls --service=github --merge=pytest-dpnp-c-api-coverage.json + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_PARALLEL: true + + coveralls: + name: Indicate completion to coveralls.io + needs: generate-coverage + runs-on: ubuntu-latest + container: python:3-slim + steps: + - name: Finished + run: | + pip3 install --upgrade coveralls + coveralls --finish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 7ed68aab8567..8beb38f1efd6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # CMake build and local install directory -build +_skbuild build_cython dpnp.egg-info diff --git a/0.build.sh b/0.build.sh deleted file mode 100755 index b1a2a29ec0ae..000000000000 --- a/0.build.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - -# . ${THEDIR}/0.env.sh -cd ${THEDIR} - -# Assign $TMP env variable to a directory where the script locates. -# The env variable is used by compiler as a path to temporary folder, -# where it can store a temporary files generated during compilation and linkage phases. -# By default the compiler uses /tmp folder, but it is limited by the size and -# there might be not enough space to temporary keep all generated data. -export TMP=${THEDIR} - - -export DPNP_DEBUG=1 - -python setup.py clean -python setup.py build_clib - -# inplace build -CC=icpx python setup.py build_ext --inplace - -# development build. Root privileges needed -# python setup.py develop - -echo -echo =========example3============== -DPCTL_INCLUDES=$(python -m dpctl --includes) -icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3 -# LD_DEBUG=libs,bindings,symbols ./example3 -./example3 - -# gcc --version -# echo =========LD_LIBRARY_PATH============== -# echo $LD_LIBRARY_PATH - -# echo =========ldd example3============== -# ldd ./example3 -# echo =========readelf example3============== -# readelf -d ./example3 -# echo =========ldd dpnp/libdpnp_backend_c.so============== -# ldd ./dpnp/libdpnp_backend_c.so -# echo =========readelf dpnp/libdpnp_backend_c.so============== -# readelf -d ./dpnp/libdpnp_backend_c.so - -# echo ========= libstdc++.so ============== -# ls -l /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so -# strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n - - -echo -echo =========example1============== -# LD_DEBUG=libs,bindings,symbols python examples/example1.py -# LD_DEBUG=libs python examples/example1.py -python examples/example1.py - -# echo ========= find /opt ============== -# find /opt -name libstdc++.so* -# echo ========= find anaconda ============== -# find /usr/share/miniconda -name libstdc++.so* -# echo ========= dpkg-query -L libstdc++6 ============== -# dpkg-query -L libstdc++6 -# echo ========= ls -l /lib/x86_64-linux-gnu/libstdc* ============== -# ls -l /lib/x86_64-linux-gnu/libstdc* - -# gcc --version -# g++ --version -# dpcpp --version - -# echo ========= APT ============== -# apt list --installed -# echo ========= conda ============== -# conda list diff --git a/0.env.sh b/0.env.sh deleted file mode 100755 index 6759181ce760..000000000000 --- a/0.env.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - -# We can not use common setup script because -# using Intel Python brakes build and run procedure -export ONEAPI_ROOT=/opt/intel/oneapi - -. ${ONEAPI_ROOT}/compiler/latest/env/vars.sh -. ${ONEAPI_ROOT}/tbb/latest/env/vars.sh - -if false -then - # Temporary use explicit version (arg_verz) due to MKLD-10520 - arg_verz=latest - . ${ONEAPI_ROOT}/mkl/latest/env/vars.sh - unset arg_verz -else - . ${ONEAPI_ROOT}/mkl/latest/env/vars.sh -fi - -export DPCPPROOT=${ONEAPI_ROOT}/compiler/latest - -export PYTHONPATH=$PYTHONPATH:${THEDIR} diff --git a/1.build.bat b/1.build.bat deleted file mode 100644 index 148eab47cae9..000000000000 --- a/1.build.bat +++ /dev/null @@ -1,11 +0,0 @@ - -:: CALL "1.env.bat" - -python setup.py clean -python setup.py build_clib_setuptools - -:: inplace build -python setup.py build_ext --inplace - -:: development build. Root privileges needed -:: python setup.py develop diff --git a/1.env.bat b/1.env.bat deleted file mode 100644 index 069ddc8577eb..000000000000 --- a/1.env.bat +++ /dev/null @@ -1,8 +0,0 @@ - -SET "ONEAPI_ROOT=C:\oneapi" -CALL "%ONEAPI_ROOT%\compiler\latest\env\vars.bat" -CALL "%ONEAPI_ROOT%\mkl\latest\env\vars.bat" -CALL "%ONEAPI_ROOT%\tbb\latest\env\vars.bat" -CALL "%ONEAPI_ROOT%\dpl\latest\env\vars.bat" - -SET "DPCPPROOT=%ONEAPI_ROOT%\compiler\latest" diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000000..efa35ac50869 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,148 @@ +cmake_minimum_required(VERSION 3.21...3.26 FATAL_ERROR) + +if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24") + cmake_policy(SET CMP0135 NEW) +endif() + +project(dpnp + DESCRIPTION "NumPy-like API accelerated by SYCL." +) + +option(DPNP_GENERATE_COVERAGE "Enable build DPNP with coverage instrumentation" FALSE) +option(DPNP_BACKEND_TESTS "Enable building of DPNP backend test suite" FALSE) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) + +if (NOT DEFINED DPCTL_MODULE_PATH) + if (DEFINED ENV{DPCTL_MODULE_PATH}) + set(DPCTL_MODULE_PATH $ENV{DPCTL_MODULE_PATH}) + else () + message(FATAL_ERROR "Specify DPCTL_MODULE_PATH, either via cmake or as environment varibale") + endif() +endif() + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${DPCTL_MODULE_PATH}) + + +find_package(IntelDPCPP REQUIRED) +find_package(TBB REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH) + +set(MKL_ARCH "intel64") +set(MKL_LINK "dynamic") +set(MKL_INTERFACE_FULL "intel_ilp64") +set(MKL_THREADING "tbb_thread") +find_package(MKL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH) + +set(ONEDPL_PAR_BACKEND tbb) +find_package(oneDPL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH) + +include(GNUInstallDirs) + +# Fetch pybind11 +include(FetchContent) +FetchContent_Declare( + pybind11 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.2.tar.gz + URL_HASH SHA256=93bd1e625e43e03028a3ea7389bba5d3f9f2596abc074b068e70f4ef9b1314ae +) +FetchContent_MakeAvailable(pybind11) + +find_package(PythonExtensions REQUIRED) +find_package(NumPy REQUIRED) + +set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") +find_package(Cython REQUIRED) +find_package(Dpctl REQUIRED) + +message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS}) +message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR}) + +if(WIN32) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-Wno-unused-parameter " + ) + string(CONCAT SDL_FLAGS + "/GS " + "/DynamicBase " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox ${WARNING_FLAGS} ${SDL_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" + ) + set(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} ${WARNING_FLAGS} ${SDL_FLAGS} -O0 -g1 -DDEBUG" + ) + set(DPNP_LDFLAGS "/NXCompat;/DynamicBase") +elseif(UNIX) + string(CONCAT WARNING_FLAGS + "-Wall " + "-Wextra " + "-Winit-self " + "-Wunused-function " + "-Wuninitialized " + "-Wmissing-declarations " + "-fdiagnostics-color=auto " + ) + string(CONCAT SDL_FLAGS + "-fstack-protector " + "-fstack-protector-all " + "-fpic " + "-fPIC " + "-D_FORTIFY_SOURCE=2 " + "-Wformat " + "-Wformat-security " +# "-fno-strict-overflow " # implied by -fwrapv + "-fno-delete-null-pointer-checks " + "-fwrapv " + ) + string(CONCAT CFLAGS + "${WARNING_FLAGS}" + "${SDL_FLAGS}" + ) + string(CONCAT CXXFLAGS + "${WARNING_FLAGS}" + "${SDL_FLAGS}" + "-fsycl " + ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ${CFLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 ${CXXFLAGS}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} ${CFLAGS} -O0 -g1 -DDEBUG" + ) + set(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} ${CXXFLAGS} -O0 -g1 -DDEBUG" + ) + set(DPNP_LDFLAGS "-z,noexecstack,-z,relro,-z,now") +else() + message(FATAL_ERROR "Unsupported system.") +endif() + +if (DPNP_GENERATE_COVERAGE) + string(CONCAT PROFILE_FLAGS + "-fprofile-instr-generate " + "-fcoverage-mapping " + "-fno-sycl-use-footer " +# "-save-temps=obj " + ) + + # Add profiling flags + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} ${PROFILE_FLAGS}" + ) +endif() + +if(DEFINED SKBUILD) + set(_ignore_me ${SKBUILD}) +endif() + +add_subdirectory(dpnp) diff --git a/README.md b/README.md index 360d6b4e730e..b10394c0eec0 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ After these steps, `dpnp` can be built in debug mode as follows: ```bash git clone https://github.com/IntelPython/dpnp cd dpnp -./0.build.sh +python scripts/build_locally.py ``` ## Install Wheel Package from Pypi @@ -48,7 +48,6 @@ export OCL_ICD_FILENAMES=libintelocl.so ## Run test ```bash -. ./0.env.sh pytest # or pytest tests/test_matmul.py -s -v diff --git a/codecov.yml b/codecov.yml deleted file mode 100644 index 69cb76019a47..000000000000 --- a/codecov.yml +++ /dev/null @@ -1 +0,0 @@ -comment: false diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index 8ec6c1fb1587..9398b115175b 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -9,10 +9,54 @@ REM @TODO: remove the setting, once transition to build backend on Windows REM to cmake is complete. SET "SETUPTOOLS_USE_DISTUTILS=stdlib" -IF DEFINED DPLROOT ( - ECHO "Sourcing DPLROOT" - SET "INCLUDE=%DPLROOT%\include;%INCLUDE%" +"%PYTHON%" setup.py clean --all + +set "MKLROOT=%PREFIX%/Library" +set "TBB_ROOT_HINT=%PREFIX%/Library" +set "DPL_ROOT_HINT=%PREFIX%/Library" + +%PYTHON% -m dpctl --cmakedir > Output +set /p DPCTL_CMAKE_DIR= < Output + +set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx" +set "SKBUILD_ARGS=%SKBUILD_ARGS% -DDPCTL_MODULE_PATH:PATH=%DPCTL_CMAKE_DIR% " +set "SKBUILD_ARGS=%SKBUILD_ARGS% -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) + +set "PATCHED_CMAKE_VERSION=3.26" +set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-%PATCHED_CMAKE_VERSION%\Modules\Platform" +set "FN=Windows-IntelLLVM.cmake" + +rem Save the original file, and copy patched file to +rem fix the issue with IntelLLVM integration with cmake on Windows +if EXIST "%PLATFORM_DIR%" ( + dir "%PLATFORM_DIR%\%FN%" + copy /Y "%PLATFORM_DIR%\%FN%" . + if errorlevel 1 exit 1 + copy /Y ".github\workflows\Windows-IntelLLVM_%PATCHED_CMAKE_VERSION%.cmake" "%PLATFORM_DIR%\%FN%" + if errorlevel 1 exit 1 ) -%PYTHON% setup.py build_clib -%PYTHON% setup.py build_ext install +if NOT "%WHEELS_OUTPUT_FOLDER%"=="" ( + rem Install and assemble wheel package from the build bits + "%PYTHON%" setup.py install bdist_wheel %SKBUILD_ARGS% + if errorlevel 1 exit 1 + copy dist\dpnp*.whl %WHEELS_OUTPUT_FOLDER% + if errorlevel 1 exit 1 +) ELSE ( + rem Only install + "%PYTHON%" setup.py install %SKBUILD_ARGS% + if errorlevel 1 exit 1 +) + +rem copy back +if EXIST "%PLATFORM_DIR%" ( + copy /Y "%FN%" "%PLATFORM_DIR%\%FN%" + if errorlevel 1 exit 1 +) diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index 164ad09d578f..8d832e5cb96c 100644 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -1,41 +1,17 @@ #!/bin/bash -# if ONEAPI_ROOT is specified (use all from it) -if [ -n "${ONEAPI_ROOT}" ]; then - export DPCPPROOT=${ONEAPI_ROOT}/compiler/latest - export MKLROOT=${ONEAPI_ROOT}/mkl/latest - export TBBROOT=${ONEAPI_ROOT}/tbb/latest - export DPLROOT=${ONEAPI_ROOT}/dpl/latest -fi - -# if DPCPPROOT is specified (work with custom DPCPP) -if [ -n "${DPCPPROOT}" ]; then - . ${DPCPPROOT}/env/vars.sh -fi - -# if MKLROOT is specified (work with custom math library) -if [ -n "${MKLROOT}" ]; then - . ${MKLROOT}/env/vars.sh - # conda remove mkl --force -y || true -fi - -# have to activate while SYCL CPU device/driver needs paths -# if TBBROOT is specified -if [ -n "${TBBROOT}" ]; then - . ${TBBROOT}/env/vars.sh -fi - -# Set RPATH for wheels -export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS" -export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS" - # Intel LLVM must cooperate with compiler and sysroot from conda echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg export ICPXCFG="$(pwd)/icpx_for_conda.cfg" export ICXCFG="$(pwd)/icpx_for_conda.cfg" -$PYTHON setup.py build_clib -$PYTHON setup.py build_ext install +export CMAKE_GENERATOR="Ninja" +export TBB_ROOT_HINT=$PREFIX +export DPL_ROOT_HINT=$PREFIX +export MKL_ROOT_HINT=$PREFIX +SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) " +SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx" +SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" # Build wheel package if [ "$CONDA_PY" == "36" ]; then @@ -44,6 +20,8 @@ else WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" fi if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then - $PYTHON setup.py bdist_wheel ${WHEELS_BUILD_ARGS} + $PYTHON setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} cp dist/dpnp*.whl ${WHEELS_OUTPUT_FOLDER} +else + $PYTHON setup.py install ${SKBUILD_ARGS} fi diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index cf9150fc3297..011226d0165e 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -10,19 +10,22 @@ requirements: - setuptools - numpy 1.21 - cython - - cmake >=3.19 - - dpctl >=0.14 - - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }} + - cmake >=3.21 + - ninja + - git + - dpctl >=0.14.2 + - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.1.0') }} - onedpl-devel - tbb-devel - wheel + - scikit-build build: - {{ compiler('cxx') }} - - {{ compiler('dpcpp') }} =2023.1.0 # [not osx] - - sysroot_linux-64 >=2.17 # [linux] + - {{ compiler('dpcpp') }} >=2023.1.0 # [not osx] + - sysroot_linux-64 >=2.28 # [linux] run: - python - - dpctl >=0.14 + - dpctl >=0.14.2 - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }} @@ -31,11 +34,6 @@ build: number: {{ GIT_DESCRIBE_NUMBER }} include_recipe: False script_env: - - ONEAPI_ROOT - - DPCPPROOT - - MKLROOT - - TBBROOT - - DPLROOT - WHEELS_OUTPUT_FOLDER test: @@ -43,9 +41,9 @@ test: - pytest - setuptools source_files: - - examples - - tests - - setup.cfg + - examples + - tests + - setup.cfg commands: - python -c "import dpnp" - pytest -s diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh index e97883cb5c52..5dd034ac6674 100755 --- a/doc/0.builddoc.sh +++ b/doc/0.builddoc.sh @@ -4,7 +4,6 @@ BUILDDOCDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) ROOTDIR=$BUILDDOCDIR/.. cd $ROOTDIR -. 0.env.sh python setup.py develop cd $BUILDDOCDIR diff --git a/doc/install.rst b/doc/install.rst index 7569c14f9ffa..c47c1edad7f3 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -34,7 +34,7 @@ You can install the latest development version of DPNP from a cloned Git reposit $ git clone --recursive https://github.com/IntelPython/dpnp.git $ cd dpnp - $ ./0.build.sh + $ python scripts/build_locally.py .. note:: diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt new file mode 100644 index 000000000000..54be4eb23b9a --- /dev/null +++ b/dpnp/CMakeLists.txt @@ -0,0 +1,56 @@ + +function(build_dpnp_cython_ext _trgt _src _dest) + add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src) + message(STATUS "Using ${_trgt}") + add_library(${_trgt} MODULE ${_generated_src}) + set(_trgt_deps "${_trgt}_deps") + add_custom_target(${_trgt_deps} DEPENDS ${_src}) + add_dependencies(${_trgt} ${_trgt_deps}) + if (DPNP_GENERATE_COVERAGE) + target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1) + endif() + # NumPy + target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR}) + # Dpctl + target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}) + target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..) + target_link_libraries(${_trgt} DPCTLSyclInterface) + + set(_linker_options "LINKER:${DPNP_LDFLAGS}") + target_link_options(${_trgt} PRIVATE ${_linker_options}) + python_extension_module(${_trgt}) + + if (DPNP_GENERATE_COVERAGE) + set(_copy_cxx_trgt "${_trgt}_copy_cxx") + add_custom_target( + ${_copy_cxx_trgt} ALL + COMMAND ${CMAKE_COMMAND} + -DSOURCE_FILE=${_generated_src} + -DDEST=${CMAKE_CURRENT_SOURCE_DIR} + -P ${CMAKE_SOURCE_DIR}/dpnp/cmake/copy_existing.cmake + DEPENDS ${_trgt} + VERBATIM + COMMENT "Copying Cython-generated source for target ${_trgt} to dpnp source layout" + ) + endif() + install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest}) +endfunction() + +function(build_dpnp_cython_ext_with_backend _trgt _src _dest) + build_dpnp_cython_ext(${_trgt} ${_src} ${_dest}) + target_link_libraries(${_trgt} dpnp_backend_library) + if (UNIX) + set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH "$ORIGIN/..") + endif() +endfunction() + + +build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp) +add_subdirectory(backend) +add_subdirectory(backend/extensions/lapack) + +add_subdirectory(dpnp_algo) +add_subdirectory(dpnp_utils) +add_subdirectory(fft) +add_subdirectory(linalg) +add_subdirectory(random) diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index baee709b11ee..8aeadc38c376 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -23,187 +23,6 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -# cmake-format -i CMakeLists.txt --line-width=120 - -cmake_minimum_required(VERSION 3.10 FATAL_ERROR) - -# set(DPNP_VERSION 0.11.1) -# set(DPNP_API_VERSION 0.11) - -# set directory where the custom finders live -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules") - -# ----------------------------------------------------------------------------------------------- -# Project build options... -# ----------------------------------------------------------------------------------------------- -if(DEFINED ENV{ONEAPI_ROOT}) - set(DPNP_ONEAPI_ROOT "$ENV{ONEAPI_ROOT}" CACHE PATH "Folder contains oneapi tool set") -endif() - -option(DPNP_STATIC_LIB_ENABLE "Enable build DPNP static library" FALSE) -option(DPNP_DEBUG_ENABLE "Enable output for DPNP_DEBUG statements" FALSE) -option(DPNP_INSTALL_STRUCTURED "if FALSE, install package files into same directory" TRUE) -option(DPNP_SYCL_QUEUE_MGR_ENABLE "Use external manager for SYCL queue" FALSE) -option(DPNP_BACKEND_TESTS "Enable DPNP tests" FALSE) - -if(DEFINED ENV{DPNP_DEBUG}) - set(DPNP_DEBUG_ENABLE $ENV{DPNP_DEBUG}) -endif() - -message(STATUS "CMAKE_VERSION: ${CMAKE_VERSION}") -message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}") -message(STATUS "CMAKE_HOST_SYSTEM_NAME: ${CMAKE_HOST_SYSTEM_NAME}") -message(STATUS "========== User controlled variables list ==========") -message(STATUS "DPNP_ONEAPI_ROOT: ${DPNP_ONEAPI_ROOT}") -message(STATUS "DPNP_STATIC_LIB_ENABLE: ${DPNP_STATIC_LIB_ENABLE}") -message(STATUS "DPNP_DEBUG_ENABLE: ${DPNP_DEBUG_ENABLE}") -message(STATUS "DPNP_BACKEND_TESTS: ${DPNP_BACKEND_TESTS}") -message(STATUS "DPNP_INSTALL_STRUCTURED: ${DPNP_INSTALL_STRUCTURED}") -message(STATUS "DPNP_SYCL_QUEUE_MGR_ENABLE: ${DPNP_SYCL_QUEUE_MGR_ENABLE}") -message(STATUS " |- DPNP_QUEUEMGR_INCLUDE_DIR: ${DPNP_QUEUEMGR_INCLUDE_DIR}") -message(STATUS " |- DPNP_QUEUEMGR_LIB_DIR: ${DPNP_QUEUEMGR_LIB_DIR}") -message(STATUS "======= End of user controlled variables list ======") - -# ----------------------------------------------------------------------------------------------- -# Compiler-specific logic... -# ----------------------------------------------------------------------------------------------- - -# cmake 3.19.1 has a bug in dpcpp compiler detection. Let's assume it is a clang -# set(CMAKE_CXX_COMPILER_ID "Clang") -# set(CMAKE_CXX_COMPILER_VERSION 12.0) -if (CMAKE_VERSION VERSION_EQUAL 3.19.1) - message(FATAL_ERROR - " Unsupported cmake version ${CMAKE_VERSION}\n" - " Please use other cmake version, for example:\n" - "in Linux:\n" - " curl --output cmake_webimage.tar.gz --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz --retry 5 --retry-delay 5\n" - " tar -xzf cmake_webimage.tar.gz\n" - " rm -f cmake_webimage.tar.gz\n" - " export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH\n" - "in Windows:\n" - " curl.exe --output cmake_webimage.zip --url https://cmake.org/files/v3.19/cmake-3.19.2-win64-x64.zip --retry 5 --retry-delay 5\n" - " tar -xf cmake_webimage.zip\n" - " del cmake_webimage.zip\n" - " set PATH=%CD%\\cmake-3.19.2-win64-x64\\bin;%PATH%\n" - ) -endif() - -# SYCL related compile options -string(CONCAT COMMON_COMPILE_FLAGS - "-fsycl " - "-fsycl-device-code-split=per_kernel " - "-fno-approx-func " - "-fno-finite-math-only " -) -string(CONCAT COMMON_LINK_FLAGS - "-fsycl " - "-fsycl-device-code-split=per_kernel " -) -if(UNIX) - set(CMAKE_CXX_COMPILER "icpx") - # add_compile_options(-fPIC) -elseif(WIN32) - set(CMAKE_CXX_COMPILER "icx") - # set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld-link") - # set(CMAKE_LINKER "lld-link") - # include (Platform/Windows-Clang) - # set(CMAKE_LINKER "dpcpp") - # set(CMAKE_AR "llvm-ar") - # set(CMAKE_RANLIB "llvm-ranlib") - # set(CMAKE_CXX_FLAGS "/EHsc") - - string(APPEND COMMON_COMPILE_FLAGS - "/EHsc " -# "/Ox " -# "/W3 " -# "/GL " -# "/DNDEBUG " -# "/MD " - ) -else() - message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in compiler selection case") -endif() - -# set language version -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# warning flag set -string(CONCAT DPNP_WARNING_FLAGS - "-W " - "-Wextra " - "-Wshadow " - "-Wall " - "-Wstrict-prototypes " - "-Wformat " - "-Wformat-security " -) -string(APPEND COMMON_COMPILE_FLAGS - "${DPNP_WARNING_FLAGS}" -) - -# debug/release compile definitions -if(DPNP_DEBUG_ENABLE) - set(CMAKE_BUILD_TYPE "Debug") - string(APPEND COMMON_COMPILE_FLAGS - "-O0 " - "-ggdb3 " - ) - string(APPEND COMMON_LINK_FLAGS - "-O0 " - "-ggdb3 " - "-fsycl-link-huge-device-code " - ) -else() - set(CMAKE_BUILD_TYPE "Release") - string(APPEND COMMON_COMPILE_FLAGS - "-O3 " - ) -endif() - -# ----------------------------------------------------------------------------------------------- -# Auxilary building options... -# ----------------------------------------------------------------------------------------------- -# sdl -string(CONCAT DPNP_DEFS - "-D_FORTIFY_SOURCE=2 " -) -if(NOT WIN32) - string(APPEND COMMON_COMPILE_FLAGS - "-fno-delete-null-pointer-checks " - "-fstack-protector-strong " - "-fno-strict-overflow " - ) - string(APPEND COMMON_LINK_FLAGS - "LINKER:-z,noexecstack,-z,relro,-z,now " - ) -endif() - -# disable PSTL policies due to compiler bug -string(APPEND DPNP_DEFS - "-DPSTL_USE_PARALLEL_POLICIES=0 " - "-D_GLIBCXX_USE_TBB_PAR_BACKEND=0 " -) - -# disable PSTL predefined policies objects (global queues, prevent fail on Windows) -string(APPEND DPNP_DEFS - "-DONEDPL_USE_PREDEFINED_POLICIES=0 " -) - -# ----------------------------------------------------------------------------------------------- -# Create project... -# ----------------------------------------------------------------------------------------------- -# set(CMAKE_CXX_COMPILER "clang++") -project(dpnp_project - # VERSION ${DPNP_VERSION} - DESCRIPTION "DPNP: NumPy-like API accelerated with SYCL" - HOMEPAGE_URL https://github.com/IntelPython/dpnp - LANGUAGES CXX) -# set(CMAKE_CXX_COMPILER "dpcpp") - -# ----------------------------------------------------------------------------------------------- -# Building logic... -# ----------------------------------------------------------------------------------------------- set(DPNP_SRC kernels/dpnp_krnl_arraycreation.cpp kernels/dpnp_krnl_bitwise.cpp @@ -228,127 +47,82 @@ set(DPNP_SRC src/dpnp_random_state.cpp ) -if(DPNP_STATIC_LIB_ENABLE) - add_library(dpnp_backend_c STATIC ${DPNP_SRC}) -else() - add_library(dpnp_backend_c SHARED ${DPNP_SRC}) - set_target_properties(dpnp_backend_c PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) -endif() - -target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) -target_include_directories(dpnp_backend_c PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) +set(_trgt "dpnp_backend_c") -string(REPLACE " " ";" COMMON_COMPILE_FLAGS_AS_LIST ${COMMON_COMPILE_FLAGS}) -target_compile_options(dpnp_backend_c PUBLIC ${COMMON_COMPILE_FLAGS_AS_LIST}) -string(REPLACE " " ";" DPNP_DEFS_AS_LIST ${DPNP_DEFS}) -target_compile_definitions(dpnp_backend_c PUBLIC ${DPNP_DEFS_AS_LIST}) -string(REPLACE " " ";" COMMON_LINK_FLAGS_AS_LIST ${COMMON_LINK_FLAGS}) -target_link_options(dpnp_backend_c PUBLIC ${COMMON_LINK_FLAGS_AS_LIST}) - - -# ----------------------------------------------------------------------------------------------- -# Testing logic... -# ----------------------------------------------------------------------------------------------- -if(DPNP_BACKEND_TESTS) - add_subdirectory(tests) +if(WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() endif() -# ----------------------------------------------------------------------------------------------- -# Dependencies logic... -# ----------------------------------------------------------------------------------------------- -# Math library -find_package(MathLib REQUIRED) -target_compile_definitions(dpnp_backend_c PUBLIC -DMKL_ILP64=1) -target_include_directories(dpnp_backend_c PUBLIC ${MATHLIB_INCLUDE_DIR}) +add_library(${_trgt} SHARED ${DPNP_SRC}) +set_target_properties(${_trgt} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) -link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # does not work with some cmake versions -target_link_directories(dpnp_backend_c PUBLIC ${MATHLIB_LIBRARY_DIR}) # duplicate link_directories +target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) -if(UNIX) -# Link Line Advisor v6.13 -# -DMKL_ILP64 -I"${MKLROOT}/include" -# -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl - set(DPNP_MATHLIB_DEP_LIBS - mkl_sycl - mkl_intel_ilp64 - mkl_tbb_thread # mkl_sequential - mkl_core - sycl - OpenCL - pthread - m - dl - CACHE STRING "Set of libraries to link") -elseif(WIN32) -# Link Line Advisor v6.13 -# -DMKL_ILP64 -I"%MKLROOT%\include" -# mkl_sycl_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib sycl.lib OpenCL.lib - set(DPNP_MATHLIB_DEP_LIBS - mkl_sycl_dll - mkl_intel_ilp64_dll - mkl_tbb_thread_dll # mkl_sequential_dll - mkl_core_dll - sycl - OpenCL - CACHE STRING "Set of libraries to link") +if (WIN32) + target_compile_options(${_trgt} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) else() - message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MathLib libraries set") + target_compile_options(${_trgt} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) endif() -target_link_libraries(dpnp_backend_c PUBLIC ${DPNP_MATHLIB_DEP_LIBS}) - -# Parallel STL -find_package(DPL REQUIRED) -target_include_directories(dpnp_backend_c PUBLIC ${DPL_INCLUDE_DIR}) +# if (WIN32) +# target_compile_definitions(${_trgt} PRIVATE _WIN=1) +# endif() -# SYCL queue manager -if(DPNP_SYCL_QUEUE_MGR_ENABLE) - target_include_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_INCLUDE_DIR}) - target_link_directories(dpnp_backend_c PUBLIC ${DPNP_QUEUEMGR_LIB_DIR}) - target_link_libraries(dpnp_backend_c PUBLIC "DPCTLSyclInterface") - - # not sure but add runpath - set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${DPNP_QUEUEMGR_LIB_DIR}") +target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel) +if(UNIX) + # this option is support on Linux only + target_link_options(${_trgt} PUBLIC -fsycl-link-huge-device-code) +endif() - # disable stripping rpath in installation logic - set_target_properties(dpnp_backend_c PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) -else() - target_compiler_definitions(dpnp_backend_c PUBLIC -DDPNP_LOCAL_QUEUE=1) +if(DPNP_GENERATE_COVERAGE) + target_link_options(${_trgt} PRIVATE -fprofile-instr-generate -fcoverage-mapping) endif() -# ----------------------------------------------------------------------------------------------- -# Installation logic... -# ----------------------------------------------------------------------------------------------- -set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/package_dpnp) +target_link_libraries(${_trgt} PUBLIC MKL::MKL_DPCPP) +target_link_libraries(${_trgt} PUBLIC oneDPL) -if(DEFINED DPNP_INSTALL_PREFIX) - set(CMAKE_INSTALL_PREFIX ${DPNP_INSTALL_PREFIX}) +if (UNIX) + # needed for STL headers with GCC < 11 + target_compile_definitions(${_trgt} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0) endif() -if(NOT DPNP_INSTALL_STRUCTURED) - set(CMAKE_INSTALL_BINDIR ${CMAKE_INSTALL_PREFIX}) - set(CMAKE_INSTALL_LIBDIR ${CMAKE_INSTALL_PREFIX}) - set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}) -endif() +target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) +# work-around for Windows at exit crash with predefined policies +target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) + +target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..") +target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface) -# set_target_properties(dpnp_backend_c PROPERTIES VERSION ${DPNP_VERSION} SOVERSION ${DPNP_API_VERSION}) +add_library(dpnp_backend_library INTERFACE IMPORTED GLOBAL) +target_include_directories(dpnp_backend_library BEFORE INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src) +target_link_libraries(dpnp_backend_library INTERFACE ${_trgt}) -install(TARGETS dpnp_backend_c - PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE - ) +if (DPNP_BACKEND_TESTS) + add_subdirectory(tests) +endif() -# ----------------------------------------------------------------------------------------------- -# Print Global Options -# ----------------------------------------------------------------------------------------------- -message(STATUS "CMAKE_SYSTEM: ${CMAKE_SYSTEM}") -message(STATUS "CMAKE_SYSTEM_VERSION: ${CMAKE_SYSTEM_VERSION}") -message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") -message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") -message(STATUS "CXX_STANDARD: ${CMAKE_CXX_STANDARD}") -message(STATUS "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") -message(STATUS "CMAKE_CXX_COMPILER_VERSION: ${CMAKE_CXX_COMPILER_VERSION}") -message(STATUS "CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") -message(STATUS "CMAKE_LINKER: ${CMAKE_LINKER}") -message(STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}") -message(STATUS "DPNP_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") -message(STATUS "CMAKE_VERBOSE_MAKEFILE: ${CMAKE_VERBOSE_MAKEFILE}") +set(_lib_destination dpnp) +set(_include_destination dpnp/backend/include) +install( + TARGETS ${_trgt} + LIBRARY + DESTINATION ${_lib_destination} + ARCHIVE + DESTINATION ${_lib_destination} + RUNTIME + DESTINATION ${_lib_destination} + PUBLIC_HEADER + DESTINATION ${_include_destination} + ) diff --git a/dpnp/backend/cmake/Modules/FindDPL.cmake b/dpnp/backend/cmake/Modules/FindDPL.cmake deleted file mode 100644 index a5301e6fda97..000000000000 --- a/dpnp/backend/cmake/Modules/FindDPL.cmake +++ /dev/null @@ -1,53 +0,0 @@ -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -# The following variables are optionally searched for defaults -# DPLROOT: Environment variable to specify custom search place -# ONEAPI_ROOT: Environment variable to specify search place from oneAPI -# -# The following are set after configuration is done: -# DPL_FOUND -# DPL_INCLUDE_DIR - -include(FindPackageHandleStandardArgs) - -set(DPNP_ONEAPI_DPL "$ENV{DPNP_ONEAPI_ROOT}/dpl/latest" CACHE PATH "Folder contains DPL files from ONEAPI_ROOT") - -if(DEFINED ENV{DPLROOT}) - set(DPNP_DPLROOT "$ENV{DPLROOT}" CACHE PATH "Folder contains DPL files from DPLROOT") -endif() - -find_path( - DPL_INCLUDE_DIR oneapi/dpl/algorithm - HINTS ${DPNP_DPLROOT} ${DPNP_ONEAPI_DPL} ENV CONDA_PREFIX ENV PREFIX # search order is important - PATH_SUFFIXES include linux/include - DOC "Path to DPL include files") - -find_package_handle_standard_args(DPL DEFAULT_MSG DPL_INCLUDE_DIR) - -if(DPL_FOUND) - message(STATUS "Found DPL: (include: ${DPL_INCLUDE_DIR})") - # mark_as_advanced(DPNP_DPLROOT DPL_INCLUDE_DIR) -endif() diff --git a/dpnp/backend/cmake/Modules/FindMathLib.cmake b/dpnp/backend/cmake/Modules/FindMathLib.cmake deleted file mode 100644 index a5b5e1602e2e..000000000000 --- a/dpnp/backend/cmake/Modules/FindMathLib.cmake +++ /dev/null @@ -1,75 +0,0 @@ -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -# The following variables are optionally searched for defaults -# MKLROOT: Environment variable to specify custom search place -# ONEAPI_ROOT: Environment variable to specify search place from oneAPI -# -# The following are set after configuration is done: -# MATHLIB_FOUND -# MATHLIB_INCLUDE_DIR -# MATHLIB_LIBRARY_DIR - -include(FindPackageHandleStandardArgs) - -set(DPNP_ONEAPI_MKL "$ENV{DPNP_ONEAPI_ROOT}/mkl/latest" CACHE PATH "Folder contains Math Lib files from ONEAPI_ROOT") - -if(DEFINED ENV{MKLROOT}) - set(DPNP_MKLROOT "$ENV{MKLROOT}" CACHE PATH "Folder contains Math Lib files from MKLROOT") -endif() - -if(UNIX) - set(MATHLIB_SYCL_LIB - ${CMAKE_SHARED_LIBRARY_PREFIX}mkl_sycl${CMAKE_SHARED_LIBRARY_SUFFIX} - CACHE PATH "SYCL math lib") -elseif(WIN32) - set(MATHLIB_SYCL_LIB - mkl_sycl_dll${CMAKE_STATIC_LIBRARY_SUFFIX} - CACHE PATH "SYCL math lib") -else() - message(FATAL_ERROR "Unsupported system ${CMAKE_SYSTEM} in MATHLIB_SYCL_LIB selection") -endif() - -find_path( - MATHLIB_INCLUDE_DIR oneapi/mkl.hpp - HINTS ${DPNP_MKLROOT} ${DPNP_ONEAPI_MKL} ENV CONDA_PREFIX ENV PREFIX # search order is important - PATH_SUFFIXES include include - DOC "Path to mathlib include files") - -find_path( - MATHLIB_LIBRARY_DIR ${MATHLIB_SYCL_LIB} - HINTS ${DPNP_MKLROOT} ${DPNP_ONEAPI_MKL} ENV CONDA_PREFIX ENV PREFIX # search order is important - PATH_SUFFIXES lib lib/intel64 - DOC "Path to mathlib library files") - -# TODO implement recurcive searching file (GLOB_RECURSE MY_PATH "/opt/intel/*/mkl.hpp") -# message(STATUS "+++++++++++++:(include: ${MY_PATH})") - -find_package_handle_standard_args(MathLib DEFAULT_MSG MATHLIB_INCLUDE_DIR MATHLIB_LIBRARY_DIR) - -if(MathLib_FOUND) - message(STATUS "Found MathLib: (include: ${MATHLIB_INCLUDE_DIR}, library: ${MATHLIB_LIBRARY_DIR})") - # mark_as_advanced(DPNP_MKLROOT MATHLIB_INCLUDE_DIR MATHLIB_LIBRARY_DIR) -endif() diff --git a/dpnp/backend/cmake/Modules/MKLConfig.cmake b/dpnp/backend/cmake/Modules/MKLConfig.cmake new file mode 100644 index 000000000000..07290e8ea331 --- /dev/null +++ b/dpnp/backend/cmake/Modules/MKLConfig.cmake @@ -0,0 +1,851 @@ +#=============================================================================== +# Copyright 2021-2022 Intel Corporation. +# +# This software and the related documents are Intel copyrighted materials, and +# your use of them is governed by the express license under which they were +# provided to you (License). Unless the License provides otherwise, you may not +# use, modify, copy, publish, distribute, disclose or transmit this software or +# the related documents without Intel's prior written permission. +# +# This software and the related documents are provided as is, with no express +# or implied warranties, other than those that are expressly stated in the +# License. +#=============================================================================== + +#=================================================================== +# CMake Config file for Intel(R) oneAPI Math Kernel Library (oneMKL) +#=================================================================== + +#=============================================================================== +# Input parameters +#================= +#------------- +# Main options +#------------- +# MKL_ROOT: oneMKL root directory (May be required for non-standard install locations. Optional otherwise.) +# Default: use location from MKLROOT environment variable or /../../../ if MKLROOT is not defined +# MKL_ARCH +# Values: ia32 intel64 +# Default: intel64 +# MKL_LINK +# Values: static, dynamic, sdl +# Default: dynamic +# Exceptions:- DPC++ doesn't support sdl +# MKL_THREADING +# Values: sequential, +# intel_thread (Intel OpenMP), +# gnu_thread (GNU OpenMP), +# pgi_thread (PGI OpenMP), +# tbb_thread +# Default: intel_thread +# Exceptions:- DPC++ defaults to tbb, PGI compiler on Windows defaults to pgi_thread +# MKL_INTERFACE (for MKL_ARCH=intel64 only) +# Values: lp64, ilp64 +# GNU or INTEL interface will be selected based on Compiler. +# Default: ilp64 +# MKL_MPI +# Values: intelmpi, mpich, openmpi, msmpi, mshpc +# Default: intelmpi +#----------------------------------- +# Special options (OFF by default) +#----------------------------------- +# ENABLE_BLAS95: Enables BLAS Fortran95 API +# ENABLE_LAPACK95: Enables LAPACK Fortran95 API +# ENABLE_BLACS: Enables cluster BLAS library +# ENABLE_CDFT: Enables cluster DFT library +# ENABLE_CPARDISO: Enables cluster PARDISO functionality +# ENABLE_SCALAPACK: Enables cluster LAPACK library +# ENABLE_OMP_OFFLOAD: Enables OpenMP Offload functionality +# +#================== +# Output parameters +#================== +# MKL_ROOT +# oneMKL root directory. +# MKL_INCLUDE +# Use of target_include_directories() is recommended. +# INTERFACE_INCLUDE_DIRECTORIES property is set on mkl_core and mkl_rt libraries. +# Alternatively, this variable can be used directly (not recommended as per Modern CMake) +# MKL_ENV +# Provides all environment variables based on input parameters. +# Currently useful for mkl_rt linking and BLACS on Windows. +# Must be set as an ENVIRONMENT property. +# Example: +# add_test(NAME mytest COMMAND myexe) +# if(MKL_ENV) +# set_tests_properties(mytest PROPERTIES ENVIRONMENT "${MKL_ENV}") +# endif() +# +# MKL:: +# IMPORTED targets to link MKL libraries individually or when using a custom link-line. +# mkl_core and mkl_rt have INTERFACE_* properties set to them. +# Please refer to Intel(R) oneMKL Link Line Advisor for help with linking. +# +# Below INTERFACE targets provide full link-lines for direct use. +# Example: +# target_link_options( PUBLIC $) +# +# MKL::MKL +# Link line for C and Fortran API +# MKL::MKL_DPCPP +# Link line for DPC++ API +# +# Note: For Device API, library linking is not required. +# Compile options can be added from the INTERFACE_COMPILE_OPTIONS property on MKL::MKL_DPCPP +# Include directories can be added from the INTERFACE_INCLUDE_DIRECTORIES property on MKL::MKL_DPCPP +# +# Note: Output parameters' and targets' availability can change +# based on Input parameters and application project languages. +#=============================================================================== + +function(mkl_message MSG_MODE MSG_TEXT) + if(MSG_MODE STREQUAL "FATAL_ERROR") + message(${MSG_MODE} ${MSG_TEXT}) + else() + if(NOT MKL_FIND_QUIETLY) + message(${MSG_MODE} ${MSG_TEXT}) + endif() + endif() +endfunction() + +if(${CMAKE_VERSION} VERSION_LESS "3.13") + mkl_message(FATAL_ERROR "The minimum supported CMake version is 3.13. You are running version ${CMAKE_VERSION}") +endif() + +include_guard() +include(FindPackageHandleStandardArgs) + +if(NOT MKL_LIBRARIES) + +# Set CMake policies for well-defined behavior across CMake versions +cmake_policy(SET CMP0011 NEW) +cmake_policy(SET CMP0057 NEW) + +# Project Languages +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +list(APPEND MKL_LANGS C CXX Fortran) +foreach(lang ${languages}) + if(${lang} IN_LIST MKL_LANGS) + list(APPEND CURR_LANGS ${lang}) + endif() +endforeach() +list(REMOVE_DUPLICATES CURR_LANGS) + +option(ENABLE_BLAS95 "Enables BLAS Fortran95 API" OFF) +option(ENABLE_LAPACK95 "Enables LAPACK Fortran95 API" OFF) +option(ENABLE_BLACS "Enables cluster BLAS library" OFF) +option(ENABLE_CDFT "Enables cluster DFT library" OFF) +option(ENABLE_CPARDISO "Enables cluster PARDISO functionality" OFF) +option(ENABLE_SCALAPACK "Enables cluster LAPACK library" OFF) +option(ENABLE_OMP_OFFLOAD "Enables OpenMP Offload functionality" OFF) + +# Use MPI if any of these are enabled +if(ENABLE_BLACS OR ENABLE_CDFT OR ENABLE_SCALAPACK OR ENABLE_CPARDISO) + set(USE_MPI ON) +endif() + +# Check Parameters +function(define_param TARGET_PARAM DEFAULT_PARAM SUPPORTED_LIST) + if(NOT DEFINED ${TARGET_PARAM} AND NOT DEFINED ${DEFAULT_PARAM}) + mkl_message(STATUS "${TARGET_PARAM}: Undefined") + elseif(NOT DEFINED ${TARGET_PARAM} AND DEFINED ${DEFAULT_PARAM}) + set(${TARGET_PARAM} "${${DEFAULT_PARAM}}" CACHE STRING "Choose ${TARGET_PARAM} options are: ${${SUPPORTED_LIST}}") + foreach(opt ${${DEFAULT_PARAM}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(STATUS "${TARGET_PARAM}: None, set to `${STR_LIST}` by default") + elseif(${SUPPORTED_LIST}) + set(ITEM_FOUND 1) + foreach(opt ${${TARGET_PARAM}}) + if(NOT ${opt} IN_LIST ${SUPPORTED_LIST}) + set(ITEM_FOUND 0) + endif() + endforeach() + if(ITEM_FOUND EQUAL 0) + foreach(opt ${${SUPPORTED_LIST}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(FATAL_ERROR "Invalid ${TARGET_PARAM} `${${TARGET_PARAM}}`, options are: ${STR_LIST}") + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() +endfunction() + +#================ +# Compiler checks +#================ + +if(CMAKE_C_COMPILER) + get_filename_component(C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME) +endif() +if(CMAKE_CXX_COMPILER) + get_filename_component(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME) +endif() +if(CMAKE_Fortran_COMPILER) + get_filename_component(Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME) +endif() + +# Determine Compiler Family +if(CXX_COMPILER_NAME STREQUAL "dpcpp" OR CXX_COMPILER_NAME STREQUAL "dpcpp.exe" + OR CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + set(DPCPP_COMPILER ON) +endif() +if(C_COMPILER_NAME MATCHES "^clang") + set(CLANG_COMPILER ON) +endif() +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + set(PGI_COMPILER ON) +elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" + OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM") + set(INTEL_COMPILER ON) +else() + if(CMAKE_C_COMPILER_ID STREQUAL "GNU") + set(GNU_C_COMPILER ON) + endif() + if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + set(GNU_Fortran_COMPILER ON) + endif() +endif() + +if(USE_MPI AND (C_COMPILER_NAME MATCHES "^mpi" OR Fortran_COMPILER_NAME MATCHES "^mpi")) + set(USE_MPI_SCRIPT ON) +endif() + +#================ + +#================ +# System-specific +#================ + +# Extensions +if(UNIX) + set(LIB_PREFIX "lib") + set(LIB_EXT ".a") + set(DLL_EXT ".so") + if(APPLE) + set(DLL_EXT ".dylib") + endif() + set(LINK_PREFIX "-l") + set(LINK_SUFFIX "") +else() + set(LIB_PREFIX "") + set(LIB_EXT ".lib") + set(DLL_EXT "_dll.lib") + set(LINK_PREFIX "") + set(LINK_SUFFIX ".lib") +endif() + +# Set target system architecture +set(DEFAULT_MKL_ARCH intel64) +if(DPCPP_COMPILER OR PGI_COMPILER OR ENABLE_OMP_OFFLOAD OR USE_MPI) + set(MKL_ARCH_LIST intel64) +else() + set(MKL_ARCH_LIST ia32 intel64) +endif() +define_param(MKL_ARCH DEFAULT_MKL_ARCH MKL_ARCH_LIST) + +#================ + +#========== +# Setup MKL +#========== + +# Set MKL_ROOT directory +if(NOT DEFINED MKL_ROOT) + if(DEFINED ENV{MKLROOT}) + set(MKL_ROOT $ENV{MKLROOT}) + else() + get_filename_component(MKL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) + get_filename_component(MKL_ROOT "${MKL_CMAKE_PATH}/../../../" ABSOLUTE) + mkl_message(STATUS "MKL_ROOT ${MKL_ROOT}") + endif() +endif() +string(REPLACE "\\" "/" MKL_ROOT ${MKL_ROOT}) + +# Define MKL_LINK +set(DEFAULT_MKL_LINK dynamic) +if(DPCPP_COMPILER OR USE_MPI) + set(MKL_LINK_LIST static dynamic) +else() + set(MKL_LINK_LIST static dynamic sdl) +endif() +define_param(MKL_LINK DEFAULT_MKL_LINK MKL_LINK_LIST) + +# Define MKL_INTERFACE +if(MKL_ARCH STREQUAL "intel64") + set(IFACE_TYPE intel) + if(GNU_Fortran_COMPILER) + set(IFACE_TYPE gf) + endif() + if(DPCPP_COMPILER) + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL intel_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE intel_ilp64) + set(MKL_INTERFACE_LIST intel_ilp64) + else() + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL ${IFACE_TYPE}_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE ${IFACE_TYPE}_ilp64) + set(MKL_INTERFACE_LIST ${IFACE_TYPE}_ilp64 ${IFACE_TYPE}_lp64) + endif() + define_param(MKL_INTERFACE_FULL DEFAULT_MKL_INTERFACE MKL_INTERFACE_LIST) +else() + if(WIN32) + set(MKL_INTERFACE_FULL intel_c) + elseif(NOT APPLE) + if(GNU_Fortran_COMPILER) + set(MKL_INTERFACE_FULL gf) + else() + set(MKL_INTERFACE_FULL intel) + endif() + else() + mkl_message(FATAL_ERROR "OSX does not support MKL_ARCH ia32.") + endif() +endif() +if(MKL_INTERFACE_FULL MATCHES "ilp64") + set(MKL_INTERFACE "ilp64") +else() + set(MKL_INTERFACE "lp64") +endif() + +# Define MKL headers +find_path(MKL_H mkl.h + HINTS ${MKL_ROOT} + PATH_SUFFIXES include) +list(APPEND MKL_INCLUDE ${MKL_H}) + +# Add pre-built F95 Interface Modules +if(INTEL_COMPILER AND (ENABLE_BLAS95 OR ENABLE_LAPACK95)) + if(MKL_ARCH STREQUAL "intel64") + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}/${MKL_INTERFACE}") + else() + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}") + endif() +endif() + +# Define MKL_THREADING +# All APIs support sequential threading +set(MKL_THREADING_LIST "sequential" "intel_thread" "tbb_thread") +set(DEFAULT_MKL_THREADING intel_thread) +# DPC++ API supports TBB threading, but not OpenMP threading +if(DPCPP_COMPILER) + set(DEFAULT_MKL_THREADING tbb_thread) + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) +# C, Fortran API +elseif(PGI_COMPILER) + # PGI compiler supports PGI OpenMP threading, additionally + list(APPEND MKL_THREADING_LIST pgi_thread) + # PGI compiler does not support TBB threading + list(REMOVE_ITEM MKL_THREADING_LIST tbb_thread) + if(WIN32) + # PGI 19.10 and 20.1 on Windows, do not support Intel OpenMP threading + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) + set(DEFAULT_MKL_THREADING pgi_thread) + endif() +elseif(GNU_C_COMPILER OR GNU_Fortran_COMPILER OR CLANG_COMPILER) + list(APPEND MKL_THREADING_LIST gnu_thread) +else() + # Intel and Microsoft compilers + # Nothing to do, only for completeness +endif() +define_param(MKL_THREADING DEFAULT_MKL_THREADING MKL_THREADING_LIST) + +# Define MKL_MPI +set(DEFAULT_MKL_MPI intelmpi) +if(UNIX) + if(APPLE) + # Override defaults for OSX + set(DEFAULT_MKL_MPI mpich) + set(MKL_MPI_LIST mpich) + else() + set(MKL_MPI_LIST intelmpi openmpi mpich mpich2) + endif() +else() + # Windows + set(MKL_MPI_LIST intelmpi mshpc msmpi) +endif() +define_param(MKL_MPI DEFAULT_MKL_MPI MKL_MPI_LIST) +# MSMPI is now called MSHPC. MSMPI option exists for backward compatibility. +if(MKL_MPI STREQUAL "mshpc") + set(MKL_MPI msmpi) +endif() +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_MPI) + +# Checkpoint - Verify if required options are defined +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_ROOT MKL_ARCH MKL_INCLUDE MKL_LINK MKL_THREADING MKL_INTERFACE_FULL) + +# Provides a list of IMPORTED targets for the project +if(NOT DEFINED MKL_IMPORTED_TARGETS) + set(MKL_IMPORTED_TARGETS "") +endif() + +# Clear temporary variables +set(MKL_C_COPT "") +set(MKL_F_COPT "") +set(MKL_SDL_COPT "") +set(MKL_CXX_COPT "") +set(MKL_DPCPP_COPT "") +set(MKL_DPCPP_LOPT "") +set(MKL_OFFLOAD_COPT "") +set(MKL_OFFLOAD_LOPT "") + +set(MKL_SUPP_LINK "") # Other link options. Usually at the end of the link-line. +set(MKL_LINK_LINE) # For MPI only +set(MKL_ENV_PATH "") # Temporary variable to work with PATH +set(MKL_ENV "") # Exported environment variables + +# Modify PATH variable to make it CMake-friendly +set(OLD_PATH $ENV{PATH}) +string(REPLACE ";" "\;" OLD_PATH "${OLD_PATH}") + +# Compiler options +if(GNU_C_COMPILER OR GNU_Fortran_COMPILER) + if(MKL_ARCH STREQUAL "ia32") + list(APPEND MKL_C_COPT -m32) + list(APPEND MKL_F_COPT -m32) + else() + list(APPEND MKL_C_COPT -m64) + list(APPEND MKL_F_COPT -m64) + endif() +endif() + +# Additonal compiler & linker options +if(CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + list(APPEND MKL_DPCPP_COPT "-fsycl") + list(APPEND MKL_DPCPP_LOPT "-fsycl") +endif() +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(MKL_LINK STREQUAL "static") + list(APPEND MKL_DPCPP_LOPT "-fsycl-device-code-split=per_kernel") + list(APPEND MKL_OFFLOAD_LOPT "-fsycl-device-code-split=per_kernel") + endif() +endif() + +# For OpenMP Offload +if(ENABLE_OMP_OFFLOAD) + if(WIN32) + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -Qopenmp-version:51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64) + endif() + # -MD and -MDd are manually added here because offload functionality uses DPC++ runtime. + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_OFFLOAD_COPT -MDd) + else() + list(APPEND MKL_OFFLOAD_COPT -MD) + endif() + list(APPEND MKL_OFFLOAD_LOPT -Qiopenmp -Qopenmp-targets:spir64 -fsycl) + set(SKIP_LIBPATH ON) + else() + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -fopenmp-version=51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else () + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64) + endif() + list(APPEND MKL_OFFLOAD_LOPT -fiopenmp -fopenmp-targets=spir64 -fsycl) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() +endif() + +# For selected Interface +if(MKL_INTERFACE_FULL) + if(MKL_ARCH STREQUAL "ia32") + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU") + endif() + else() + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU,${MKL_INTERFACE}") + else() + set(MKL_SDL_IFACE_ENV "${MKL_INTERFACE}") + endif() + if(MKL_INTERFACE STREQUAL "ilp64") + if("Fortran" IN_LIST CURR_LANGS) + if(INTEL_COMPILER) + if(WIN32) + list(APPEND MKL_F_COPT "-4I8") + else() + list(APPEND MKL_F_COPT "-i8") + endif() + elseif(GNU_Fortran_COMPILER) + list(APPEND MKL_F_COPT "-fdefault-integer-8") + elseif(PGI_COMPILER) + list(APPEND MKL_F_COPT "-i8") + endif() + endif() + list(INSERT MKL_C_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_SDL_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_CXX_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_OFFLOAD_COPT 0 "-DMKL_ILP64") + else() + # lp64 + endif() + endif() + if(MKL_SDL_IFACE_ENV) + string(TOUPPER ${MKL_SDL_IFACE_ENV} MKL_SDL_IFACE_ENV) + endif() +endif() # MKL_INTERFACE_FULL + +# All MKL Libraries +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + set(MKL_SYCL mkl_sycld) +else() + set(MKL_SYCL mkl_sycl) +endif() +set(MKL_IFACE_LIB mkl_${MKL_INTERFACE_FULL}) +set(MKL_CORE mkl_core) +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo" AND MKL_THREADING STREQUAL "tbb_thread") + set(MKL_THREAD mkl_tbb_threadd) +else() + set(MKL_THREAD mkl_${MKL_THREADING}) +endif() +set(MKL_SDL mkl_rt) +if(MKL_ARCH STREQUAL "ia32") + set(MKL_BLAS95 mkl_blas95) + set(MKL_LAPACK95 mkl_lapack95) +else() + set(MKL_BLAS95 mkl_blas95_${MKL_INTERFACE}) + set(MKL_LAPACK95 mkl_lapack95_${MKL_INTERFACE}) +endif() +# BLACS +set(MKL_BLACS mkl_blacs_${MKL_MPI}_${MKL_INTERFACE}) +if(UNIX AND NOT APPLE AND MKL_MPI MATCHES "mpich") + # MPICH is compatible with INTELMPI Wrappers on Linux + set(MKL_BLACS mkl_blacs_intelmpi_${MKL_INTERFACE}) +endif() +if(WIN32) + if(MKL_MPI STREQUAL "msmpi") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_SUPP_LINK "msmpifec.lib") + endif() + # MSMPI and MSHPC are supported with the same BLACS library + set(MKL_BLACS mkl_blacs_msmpi_${MKL_INTERFACE}) + if(NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV MSMPI) + endif() + elseif(MKL_MPI STREQUAL "intelmpi" AND NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV INTELMPI) + endif() +endif() +# CDFT & SCALAPACK +set(MKL_CDFT mkl_cdft_core) +set(MKL_SCALAPACK mkl_scalapack_${MKL_INTERFACE}) + + +if (UNIX) + if(NOT APPLE) + if(MKL_LINK STREQUAL "static") + set(START_GROUP "-Wl,--start-group") + set(END_GROUP "-Wl,--end-group") + if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + set(EXPORT_DYNAMIC "-Wl,-export-dynamic") + endif() + elseif(MKL_LINK STREQUAL "dynamic") + set(MKL_RPATH "-Wl,-rpath=$") + if((GNU_Fortran_COMPILER OR PGI_COMPILER) AND "Fortran" IN_LIST CURR_LANGS) + set(NO_AS_NEEDED -Wl,--no-as-needed) + endif() + else() + set(MKL_RPATH "-Wl,-rpath=$") + endif() + endif() +endif() + +# Create a list of requested libraries, based on input options (MKL_LIBRARIES) +# Create full link-line in MKL_LINK_LINE +list(APPEND MKL_LINK_LINE $,${MKL_OFFLOAD_LOPT},> + $,${MKL_DPCPP_LOPT},> ${EXPORT_DYNAMIC} ${NO_AS_NEEDED} ${MKL_RPATH}) +if(ENABLE_BLAS95) + list(APPEND MKL_LIBRARIES ${MKL_BLAS95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLAS95}) +endif() +if(ENABLE_LAPACK95) + list(APPEND MKL_LIBRARIES ${MKL_LAPACK95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_LAPACK95}) +endif() +if(ENABLE_SCALAPACK) + list(APPEND MKL_LIBRARIES ${MKL_SCALAPACK}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SCALAPACK}) +endif() +if(DPCPP_COMPILER OR (ENABLE_OMP_OFFLOAD AND NOT MKL_LINK STREQUAL "sdl")) + list(APPEND MKL_LIBRARIES ${MKL_SYCL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SYCL}) +endif() +list(APPEND MKL_LINK_LINE ${START_GROUP}) +if(ENABLE_CDFT) + list(APPEND MKL_LIBRARIES ${MKL_CDFT}) + list(APPEND MKL_LINK_LINE MKL::${MKL_CDFT}) +endif() +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_LIBRARIES ${MKL_SDL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SDL}) +else() + list(APPEND MKL_LIBRARIES ${MKL_IFACE_LIB} ${MKL_THREAD} ${MKL_CORE}) + list(APPEND MKL_LINK_LINE MKL::${MKL_IFACE_LIB} MKL::${MKL_THREAD} MKL::${MKL_CORE}) +endif() +if(USE_MPI) + list(APPEND MKL_LIBRARIES ${MKL_BLACS}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLACS}) +endif() +list(APPEND MKL_LINK_LINE ${END_GROUP}) + +# Find all requested libraries +foreach(lib ${MKL_LIBRARIES}) + unset(${lib}_file CACHE) + if(MKL_LINK STREQUAL "static" AND NOT ${lib} STREQUAL ${MKL_SDL}) + find_library(${lib}_file ${LIB_PREFIX}${lib}${LIB_EXT} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} STATIC IMPORTED) + else() + find_library(${lib}_file NAMES ${LIB_PREFIX}${lib}${DLL_EXT} ${lib} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} SHARED IMPORTED) + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS ${lib}_file) + # CMP0111, implemented in CMake 3.20+ requires a shared library target on Windows + # to be defined with IMPLIB and LOCATION property. + # It also requires a static library target to be defined with LOCATION property. + # Setting the policy to OLD usage, using cmake_policy() does not work as of 3.20.0, hence the if-else below. + if(WIN32 AND NOT MKL_LINK STREQUAL "static") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_IMPLIB "${${lib}_file}") + # Find corresponding DLL + set(MKL_DLL_GLOB ${lib}.*.dll) + file(GLOB MKL_DLL_FILE "${MKL_ROOT}/redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/mkl/${MKL_DLL_GLOB}" + "${MKL_ROOT}/bin/${MKL_DLL_GLOB}") + if(NOT ${lib} STREQUAL ${MKL_IFACE_LIB} AND NOT ${lib} STREQUAL ${MKL_BLAS95} AND NOT ${lib} STREQUAL ${MKL_LAPACK95}) # Windows IFACE libs are static only + list(LENGTH MKL_DLL_FILE MKL_DLL_FILE_LEN) + if(MKL_DLL_FILE_LEN) + # in case multiple versions of the same dll are found, select the highest version + list(SORT MKL_DLL_FILE) + list(REVERSE MKL_DLL_FILE) + list(GET MKL_DLL_FILE 0 MKL_DLL_FILE) + + mkl_message(STATUS "Found DLL: ${MKL_DLL_FILE}") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${MKL_DLL_FILE}") + else() + mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found. MKL_ROOT was '${MKL_ROOT}'. MKL_DLL_FILE is '${MKL_DLL_FILE}'") + endif() + endif() + else() + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${${lib}_file}") + endif() + list(APPEND MKL_IMPORTED_TARGETS MKL::${lib}) +endforeach() + +# Threading selection +if(MKL_THREADING) + if(MKL_THREADING STREQUAL "tbb_thread") + find_package(TBB REQUIRED CONFIG COMPONENTS tbb) + set(MKL_THREAD_LIB $) + set(MKL_SDL_THREAD_ENV "TBB") + get_property(TBB_LIB TARGET TBB::tbb PROPERTY IMPORTED_LOCATION_RELEASE) + get_filename_component(TBB_LIB_DIR ${TBB_LIB} DIRECTORY) + if(UNIX) + if(CMAKE_SKIP_BUILD_RPATH) + set(TBB_LINK "-L${TBB_LIB_DIR} -ltbb") + else() + set(TBB_LINK "-Wl,-rpath,${TBB_LIB_DIR} -L${TBB_LIB_DIR} -ltbb") + endif() + list(APPEND MKL_SUPP_LINK ${TBB_LINK}) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() + if(WIN32 OR APPLE) + set(MKL_ENV_PATH ${TBB_LIB_DIR}) + endif() + elseif(MKL_THREADING MATCHES "_thread") + if(MKL_THREADING STREQUAL "pgi_thread") + list(APPEND MKL_SUPP_LINK -mp -pgf90libs) + set(MKL_SDL_THREAD_ENV "PGI") + elseif(MKL_THREADING STREQUAL "gnu_thread") + list(APPEND MKL_SUPP_LINK -lgomp) + set(MKL_SDL_THREAD_ENV "GNU") + else() + # intel_thread + if(UNIX) + set(MKL_OMP_LIB iomp5) + set(LIB_EXT ".so") + if(APPLE) + set(LIB_EXT ".dylib") + endif() + else() + set(MKL_OMP_LIB libiomp5md) + endif() + set(MKL_SDL_THREAD_ENV "INTEL") + set(OMP_LIBNAME ${LIB_PREFIX}${MKL_OMP_LIB}${LIB_EXT}) + + find_library(OMP_LIBRARY ${OMP_LIBNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}" + "lib/${MKL_ARCH}_lin" "lib/${MKL_ARCH}_win" + "linux/compiler/lib/${MKL_ARCH}" + "linux/compiler/lib/${MKL_ARCH}_lin" + "windows/compiler/lib/${MKL_ARCH}" + "windows/compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}_lin" "../compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}" "../compiler/lib" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}_lin" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}_win" + "../../compiler/latest/mac/compiler/lib") + if(WIN32) + set(OMP_DLLNAME ${LIB_PREFIX}${MKL_OMP_LIB}.dll) + find_path(OMP_DLL_DIR ${OMP_DLLNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "redist/${MKL_ARCH}" + "redist/${MKL_ARCH}_win" "redist/${MKL_ARCH}_win/compiler" + "../redist/${MKL_ARCH}/compiler" "../compiler/lib" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win/compiler" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win/compiler") + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_DLL_DIR) + set(MKL_ENV_PATH "${OMP_DLL_DIR}") + endif() + + if(WIN32 AND SKIP_LIBPATH) + # Only for Intel OpenMP Offload + set(OMP_LINK "libiomp5md.lib") + else() + set(OMP_LINK "${OMP_LIBRARY}") + if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + # Disable PGI OpenMP runtime for correct work of Intel OpenMP runtime + list(APPEND MKL_SUPP_LINK -nomp) + endif() + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_LIBRARY OMP_LINK) + set(MKL_THREAD_LIB ${OMP_LINK}) + endif() + else() + # Sequential threading + set(MKL_SDL_THREAD_ENV "SEQUENTIAL") + endif() +endif() # MKL_THREADING + +if (UNIX) + list(APPEND MKL_SUPP_LINK -lm -ldl -lpthread) +endif() + +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(WIN32) + # Detect sycl library version + if(NOT DEFINED SYCL_LIB_VER_CACHE) + set(SYCL_LIB_VER "") + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(NOT SYCL_LIB_DIR) + foreach(ver RANGE 6 99) + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${ver}${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(SYCL_LIB_DIR) + set(SYCL_LIB_VER ${ver}) + break() + endif() + endforeach() + endif() + set(SYCL_LIB_VER_CACHE ${SYCL_LIB_VER} CACHE STRING "") + endif() + + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}d${LINK_SUFFIX}) + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}${LINK_SUFFIX}) + endif() + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${LINK_SUFFIX}) + endif() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}OpenCL${LINK_SUFFIX}) +endif() + +# Setup link types based on input options +set(LINK_TYPES "") + +if(DPCPP_COMPILER) + add_library(MKL::MKL_DPCPP INTERFACE IMPORTED GLOBAL) + target_compile_options(MKL::MKL_DPCPP INTERFACE ${MKL_DPCPP_COPT}) + target_link_libraries(MKL::MKL_DPCPP INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) + list(APPEND LINK_TYPES MKL::MKL_DPCPP) +endif() +# Single target for all C, Fortran link-lines +add_library(MKL::MKL INTERFACE IMPORTED GLOBAL) +target_compile_options(MKL::MKL INTERFACE + $<$,C>:${MKL_C_COPT}> + $<$,Fortran>:${MKL_F_COPT}> + $<$,CXX>:${MKL_CXX_COPT}> + $,${MKL_OFFLOAD_COPT},>) +target_link_libraries(MKL::MKL INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) +list(APPEND LINK_TYPES MKL::MKL) + +foreach(link ${LINK_TYPES}) + # Set properties on all INTERFACE targets + target_include_directories(${link} BEFORE INTERFACE "${MKL_INCLUDE}") + list(APPEND MKL_IMPORTED_TARGETS ${link}) +endforeach(link) # LINK_TYPES + +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_ENV "MKL_INTERFACE_LAYER=${MKL_SDL_IFACE_ENV}" "MKL_THREADING_LAYER=${MKL_SDL_THREAD_ENV}") +endif() +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + list(APPEND MKL_ENV "MKL_BLACS_MPI=${MKL_BLACS_ENV}") +endif() + +# Add MKL dynamic libraries if RPATH is not defined on Unix +if(UNIX AND CMAKE_SKIP_BUILD_RPATH) + if(MKL_LINK STREQUAL "sdl") + set(MKL_LIB_DIR $) + else() + set(MKL_LIB_DIR $) + endif() + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{DYLD_LIBRARY_PATH}") + else() + list(APPEND MKL_ENV "LD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{LD_LIBRARY_PATH}") + endif() +endif() + +# Add MKL dynamic libraries to PATH on Windows +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + get_filename_component(MKL_DLL_DIR ${MKL_DLL_FILE} DIRECTORY) + set(MKL_ENV_PATH "${MKL_DLL_DIR}\;${MKL_ENV_PATH}") +endif() + +if(MKL_ENV_PATH) + list(APPEND MKL_ENV "PATH=${MKL_ENV_PATH}\;${OLD_PATH}") + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_ENV_PATH}\:${OLD_PATH}") + endif() +endif() + +unset(MKL_DLL_FILE) + +endif() # MKL_LIBRARIES diff --git a/dpnp/backend/cmake/Modules/README.md b/dpnp/backend/cmake/Modules/README.md new file mode 100644 index 000000000000..02b4c40e0a4d --- /dev/null +++ b/dpnp/backend/cmake/Modules/README.md @@ -0,0 +1,8 @@ +# oneAPI CMake scripts vendored from Intel oneAPI BaseKit 2023.0.0 + +This is done to work around absence of this script in onedpl-devel conda +package. Once it is added, expected 2023.2.0, this vendored package is +to be removed. + +tbb-devel script has been modified to allow it to work correctly in conda +environment. \ No newline at end of file diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake new file mode 100644 index 000000000000..6a3f4f7a43a6 --- /dev/null +++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake @@ -0,0 +1,193 @@ +# Copyright (c) 2017-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It defines the following variables: +# TBB__FOUND +# TBB_IMPORTED_TARGETS +# +# TBBConfigVersion.cmake defines TBB_VERSION +# +# Initialize to default values +if (NOT TBB_IMPORTED_TARGETS) + set(TBB_IMPORTED_TARGETS "") +endif() + +if (NOT TBB_FIND_COMPONENTS) + set(TBB_FIND_COMPONENTS "tbb;tbbmalloc;tbbmalloc_proxy") + foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_FIND_REQUIRED_${_tbb_component} 1) + endforeach() +endif() + +get_filename_component(_tbb_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_tbb_root "${_tbb_root}/../../.." ABSOLUTE) + +set(TBB_INTERFACE_VERSION ) + +set(_tbb_bin_version 12) +set(_tbbmalloc_bin_version 2) +set(_tbbmalloc_proxy_bin_version 2) +set(_tbbbind_bin_version 3) + +# Add components with internal dependencies: tbbmalloc_proxy -> tbbmalloc +list(FIND TBB_FIND_COMPONENTS tbbmalloc_proxy _tbbmalloc_proxy_ix) +if (NOT _tbbmalloc_proxy_ix EQUAL -1) + list(APPEND TBB_FIND_COMPONENTS tbbmalloc) + list(REMOVE_DUPLICATES TBB_FIND_COMPONENTS) + set(TBB_FIND_REQUIRED_tbbmalloc ${TBB_FIND_REQUIRED_tbbmalloc_proxy}) +endif() +unset(_tbbmalloc_proxy_ix) + +if (CMAKE_SIZEOF_VOID_P STREQUAL "8") + set(_tbb_subdir intel64/gcc4.8) +else () + set(_tbb_subdir ia32/gcc4.8) +endif() + +if (UNIX) + set(_tbb_lib_ext ".so") + set(_tbb_lib_prefix "lib") + set(_tbb_lib_dir_conda "lib") + set(_bin_version "") +elseif (WIN32) + set(_bin_version "") + set(_tbb_lib_prefix "") + set(_tbb_lib_ext ".dll") + set(_tbb_impllib_ext ".lib") + set(_tbb_lib_dir_conda "bin") + set(_tbb_impllib_dir_conda "lib") +else() + message(FATAL_ERROR "Unsupported platform. Only Unix and Windows are supported.") +endif() + +foreach (_tbb_component ${TBB_FIND_COMPONENTS}) + set(TBB_${_tbb_component}_FOUND 0) + +if(WIN32) + unset(_bin_version) + if (_tbb_component STREQUAL tbb) + set(_bin_version ${_tbb_bin_version}) + endif() +endif() + + if(UNIX) + find_library(_tbb_release_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + + else() + find_file(_tbb_release_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + + if (EXISTS "${_tbb_release_lib}") + find_library(_tbb_release_impllib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}${_tbb_impllib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}") + endif() + endif() + + if (NOT TBB_FIND_RELEASE_ONLY) + find_library(_tbb_debug_lib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_lib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_lib_dir_conda}" "lib/${_tbb_subdir}") + if(WIN32 AND EXISTS "${_tbb_debug_lib}") + find_library(_tbb_debug_impllib + NAMES ${_tbb_lib_prefix}${_tbb_component}${_bin_version}_debug.${_tbb_impllib_ext} + PATHS ${_tbb_root} + HINTS ENV TBB_ROOT_HINT + PATH_SUFFIXES "${_tbb_impllib_dir_conda}" "lib/${_tbb_subdir}") + endif() + endif() + + if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}") + if (NOT TARGET TBB::${_tbb_component}) + add_library(TBB::${_tbb_component} SHARED IMPORTED) + + find_path(_tbb_include_dir + oneapi/tbb.h + PATHS ${_tbb_root} + PATH_SUFFIXES include + HINTS ENV TBB_ROOT_HINT + ) + +if(WIN32) + set_target_properties( + TBB::${_tbb_component} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}" + INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1" + ) +else() + set_target_properties( + TBB::${_tbb_component} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}" + ) +endif() + unset(_tbb_current_realpath) + unset(_tbb_include_dir) + + if (EXISTS "${_tbb_release_lib}") +if(WIN32) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}" + IMPORTED_IMPLIB_RELEASE "${_tbb_release_impllib}") +else() + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_RELEASE "${_tbb_release_lib}") +endif() + set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) + endif() + + if (EXISTS "${_tbb_debug_lib}") +if(WIN32) + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}" + IMPORTED_IMPLIB_DEBUG "${_tbb_debug_impllib}" + ) +else() + set_target_properties(TBB::${_tbb_component} PROPERTIES + IMPORTED_LOCATION_DEBUG "${_tbb_debug_lib}") +endif() + set_property(TARGET TBB::${_tbb_component} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + endif() + + # Add internal dependencies for imported targets: TBB::tbbmalloc_proxy -> TBB::tbbmalloc + if (_tbb_component STREQUAL tbbmalloc_proxy) + set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) + endif() + endif() + list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) + set(TBB_${_tbb_component}_FOUND 1) + elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) + message(STATUS "Missed required oneTBB component: ${_tbb_component}") + if (TBB_FIND_RELEASE_ONLY) + message(STATUS " ${_tbb_release_lib} must exist.") + else() + message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + endif() + set(TBB_FOUND FALSE) + endif() +endforeach() +list(REMOVE_DUPLICATES TBB_IMPORTED_TARGETS) +unset(_tbb_release_lib) +unset(_tbb_debug_lib) +unset(_tbb_root) diff --git a/dpnp/backend/cmake/Modules/oneDPLConfig.cmake b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake new file mode 100755 index 000000000000..6473d20c69f5 --- /dev/null +++ b/dpnp/backend/cmake/Modules/oneDPLConfig.cmake @@ -0,0 +1,105 @@ +##===----------------------------------------------------------------------===## +# +# Copyright (C) Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# This file incorporates work covered by the following copyright and permission +# notice: +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# +##===----------------------------------------------------------------------===## + +# Installation path: /lib/cmake/oneDPL/ +get_filename_component(_onedpl_root "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_onedpl_root "${_onedpl_root}/../../../" ABSOLUTE) + +if (WIN32) + set(_onedpl_headers_subdir windows) +else() + set(_onedpl_headers_subdir linux) +endif() + + +find_path(_onedpl_headers + NAMES oneapi/dpl + PATHS ${_onedpl_root} + HINTS ENV DPL_ROOT_HINT + PATH_SUFFIXES include ${_onedpl_headers_subdir}/include +) + + +if (EXISTS "${_onedpl_headers}") + if (NOT TARGET oneDPL) + include(CheckCXXCompilerFlag) + + add_library(oneDPL INTERFACE IMPORTED) + set_target_properties(oneDPL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${_onedpl_headers}") + + if (ONEDPL_PAR_BACKEND AND NOT ONEDPL_PAR_BACKEND MATCHES "^(tbb|openmp|serial)$") + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} is requested, but not supported, available backends: tbb, openmp, serial") + set(oneDPL_FOUND FALSE) + return() + endif() + + if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "tbb") # Handle oneTBB backend + if (NOT TBB_FOUND) + find_package(TBB 2021 QUIET COMPONENTS tbb PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH) + endif() + if (NOT TBB_FOUND AND ONEDPL_PAR_BACKEND STREQUAL "tbb") # If oneTBB backend is requested explicitly, but not found. + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} requested, but not found") + set(oneDPL_FOUND FALSE) + return() + elseif (TBB_FOUND) + set(ONEDPL_PAR_BACKEND tbb) + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable OpenMP backend") + set_target_properties(oneDPL PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbb) + set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=1 ONEDPL_USE_OPENMP_BACKEND=0) + endif() + endif() + + if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "openmp") # Handle OpenMP backend + if (UNIX) + set(_openmp_flag "-fopenmp") + else() + set(_openmp_flag "-Qopenmp") + endif() + + # Some compilers may fail if _openmp_flag is not in CMAKE_REQUIRED_LIBRARIES. + set(_onedpl_saved_required_libs ${CMAKE_REQUIRED_LIBRARIES}) + set(CMAKE_REQUIRED_LIBRARIES ${_openmp_option}) + check_cxx_compiler_flag(${_openmp_flag} _openmp_option) + set(CMAKE_REQUIRED_LIBRARIES ${_onedpl_saved_required_libs}) + unset(_onedpl_saved_required_libs) + + if (NOT _openmp_option AND ONEDPL_PAR_BACKEND STREQUAL "openmp") # If OpenMP backend is requested explicitly, but not supported. + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND} requested, but not supported") + set(oneDPL_FOUND FALSE) + return() + elseif (_openmp_option) + set(ONEDPL_PAR_BACKEND openmp) + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable oneTBB backend") + set_target_properties(oneDPL PROPERTIES INTERFACE_COMPILE_OPTIONS ${_openmp_flag}) + set_target_properties(oneDPL PROPERTIES INTERFACE_LINK_LIBRARIES ${_openmp_flag}) + set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=0 ONEDPL_USE_OPENMP_BACKEND=1) + endif() + endif() + + if (NOT ONEDPL_PAR_BACKEND OR ONEDPL_PAR_BACKEND STREQUAL "serial") + set(ONEDPL_PAR_BACKEND serial) + message(STATUS "oneDPL: ONEDPL_PAR_BACKEND=${ONEDPL_PAR_BACKEND}, disable oneTBB and OpenMP backends") + set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_TBB_BACKEND=0 ONEDPL_USE_OPENMP_BACKEND=0) + endif() + + check_cxx_compiler_flag("-fsycl" _fsycl_option) + if (NOT _fsycl_option) + message(STATUS "oneDPL: -fsycl is not supported by current compiler, set ONEDPL_USE_DPCPP_BACKEND=0") + set_property(TARGET oneDPL APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS ONEDPL_USE_DPCPP_BACKEND=0) + endif() + endif() +else() + message(STATUS "oneDPL: headers do not exist ${_onedpl_headers}") + set(oneDPL_FOUND FALSE) +endif() diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt new file mode 100644 index 000000000000..e54de4068c01 --- /dev/null +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -0,0 +1,76 @@ +# ***************************************************************************** +# Copyright (c) 2016-2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _lapack_impl) +pybind11_add_module(${python_module_name} MODULE + lapack_py.cpp + heevd.cpp + syevd.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/lapack" +) diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp new file mode 100644 index 000000000000..f99fb94c18ec --- /dev/null +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -0,0 +1,252 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "heevd.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ + +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*heevd_impl_fn_ptr_t)(sycl::queue, + const oneapi::mkl::job, + const oneapi::mkl::uplo, + const std::int64_t, + char*, + char*, + std::vector&, + const std::vector&); + +static heevd_impl_fn_ptr_t heevd_dispatch_table[dpctl_td_ns::num_types][dpctl_td_ns::num_types]; + +template +static sycl::event heevd_impl(sycl::queue exec_q, + const oneapi::mkl::job jobz, + const oneapi::mkl::uplo upper_lower, + const std::int64_t n, + char* in_a, + char* out_w, + std::vector& host_task_events, + const std::vector& depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + T* a = reinterpret_cast(in_a); + RealT* w = reinterpret_cast(out_w); + + const std::int64_t lda = std::max(1UL, n); + const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); + T* scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + + sycl::event heevd_event; + try + { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + heevd_event = mkl_lapack::heevd( + exec_q, + jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are computed. + upper_lower, // 'upper_lower == job::upper' means the upper triangular part of A, or the lower triangular otherwise + n, // The order of the matrix A (0 <= n) + a, // Pointer to A, size (lda, *), where the 2nd dimension, must be at least max(1, n) + // If 'jobz == job::vec', then on exit it will contain the eigenvectors of A + lda, // The leading dimension of a, must be at least max(1, n) + w, // Pointer to array of size at least n, it will contain the eigenvalues of A in ascending order + scratchpad, // Pointer to scratchpad memory to be used by MKL routine for storing intermediate results + scratchpad_size, + depends); + } + catch (mkl_lapack::exception const& e) + { + error_msg << "Unexpected MKL exception caught during heevd() call:\nreason: " << e.what() + << "\ninfo: " << e.info(); + info = e.info(); + } + catch (sycl::exception const& e) + { + error_msg << "Unexpected SYCL exception caught during heevd() call:\n" << e.what(); + info = -1; + } + + if (info != 0) // an unexected error occurs + { + if (scratchpad != nullptr) + { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler& cgh) { + cgh.depends_on(heevd_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return heevd_event; +} + +std::pair heevd(sycl::queue exec_q, + const std::int8_t jobz, + const std::int8_t upper_lower, + dpctl::tensor::usm_ndarray eig_vecs, + dpctl::tensor::usm_ndarray eig_vals, + const std::vector& depends) +{ + const int eig_vecs_nd = eig_vecs.get_ndim(); + const int eig_vals_nd = eig_vals.get_ndim(); + + if (eig_vecs_nd != 2) + { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + + " of an output array with eigenvectors"); + } + else if (eig_vals_nd != 1) + { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + + " of an output array with eigenvalues"); + } + + const py::ssize_t* eig_vecs_shape = eig_vecs.get_shape_raw(); + const py::ssize_t* eig_vals_shape = eig_vals.get_shape_raw(); + + if (eig_vecs_shape[0] != eig_vecs_shape[1]) + { + throw py::value_error("Output array with eigenvectors with be square"); + } + else if (eig_vecs_shape[0] != eig_vals_shape[0]) + { + throw py::value_error("Eigenvectors and eigenvalues have different shapes"); + } + + size_t src_nelems(1); + + for (int i = 0; i < eig_vecs_nd; ++i) + { + src_nelems *= static_cast(eig_vecs_shape[i]); + } + + if (src_nelems == 0) + { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) + { + throw py::value_error("Execution queue is not compatible with allocation queues"); + } + + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(eig_vecs, eig_vals)) + { + throw py::value_error("Arrays with eigenvectors and eigenvalues are overlapping segments of memory"); + } + + bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + if (!is_eig_vecs_f_contig) + { + throw py::value_error("An array with input matrix / ouput eigenvectors must be F-contiguous"); + } + else if (!is_eig_vals_c_contig) + { + throw py::value_error("An array with output eigenvalues must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); + int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum()); + + heevd_impl_fn_ptr_t heevd_fn = heevd_dispatch_table[eig_vecs_type_id][eig_vals_type_id]; + if (heevd_fn == nullptr) + { + throw py::value_error("No heevd implementation defined for a pair of type for eigenvectors and eigenvalues"); + } + + char* eig_vecs_data = eig_vecs.get_data(); + char* eig_vals_data = eig_vals.get_data(); + + const std::int64_t n = eig_vecs_shape[0]; + const oneapi::mkl::job jobz_val = static_cast(jobz); + const oneapi::mkl::uplo uplo_val = static_cast(upper_lower); + + std::vector host_task_events; + sycl::event heevd_ev = + heevd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, eig_vals_data, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events); + return std::make_pair(args_ev, heevd_ev); +} + +template +struct HeevdContigFactory +{ + fnT get() + { + if constexpr (types::HeevdTypePairSupportFactory::is_defined) + { + return heevd_impl; + } + else + { + return nullptr; + } + } +}; + +void init_heevd_dispatch_table(void) +{ + dpctl_td_ns::DispatchTableBuilder contig; + contig.populate_dispatch_table(heevd_dispatch_table); +} +} +} +} +} diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp new file mode 100644 index 000000000000..85696d147f66 --- /dev/null +++ b/dpnp/backend/extensions/lapack/heevd.hpp @@ -0,0 +1,53 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ + extern std::pair heevd(sycl::queue exec_q, + const std::int8_t jobz, + const std::int8_t upper_lower, + dpctl::tensor::usm_ndarray eig_vecs, + dpctl::tensor::usm_ndarray eig_vals, + const std::vector& depends); + + extern void init_heevd_dispatch_table(void); +} +} +} +} diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp new file mode 100644 index 000000000000..eaa3e6873b6a --- /dev/null +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -0,0 +1,77 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._lapack_impl extensions +// +//***************************************************************************** + +#include +#include + +#include "heevd.hpp" +#include "syevd.hpp" + +namespace lapack_ext = dpnp::backend::ext::lapack; +namespace py = pybind11; + +// populate dispatch vectors +void init_dispatch_vectors(void) +{ + lapack_ext::init_syevd_dispatch_vector(); +} + +// populate dispatch tables +void init_dispatch_tables(void) +{ + lapack_ext::init_heevd_dispatch_table(); +} + +PYBIND11_MODULE(_lapack_impl, m) +{ + init_dispatch_vectors(); + init_dispatch_tables(); + + m.def("_heevd", + &lapack_ext::heevd, + "Call `heevd` from OneMKL LAPACK library to return " + "the eigenvalues and eigenvectors of a complex Hermitian matrix", + py::arg("sycl_queue"), + py::arg("jobz"), + py::arg("upper_lower"), + py::arg("eig_vecs"), + py::arg("eig_vals"), + py::arg("depends") = py::list()); + + m.def("_syevd", + &lapack_ext::syevd, + "Call `syevd` from OneMKL LAPACK library to return " + "the eigenvalues and eigenvectors of a real symmetric matrix", + py::arg("sycl_queue"), + py::arg("jobz"), + py::arg("upper_lower"), + py::arg("eig_vecs"), + py::arg("eig_vals"), + py::arg("depends") = py::list()); +} diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp new file mode 100644 index 000000000000..d03c2dff372c --- /dev/null +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -0,0 +1,256 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "syevd.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ + +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*syevd_impl_fn_ptr_t)(sycl::queue, + const oneapi::mkl::job, + const oneapi::mkl::uplo, + const std::int64_t, + char*, + char*, + std::vector&, + const std::vector&); + +static syevd_impl_fn_ptr_t syevd_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event syevd_impl(sycl::queue exec_q, + const oneapi::mkl::job jobz, + const oneapi::mkl::uplo upper_lower, + const std::int64_t n, + char* in_a, + char* out_w, + std::vector& host_task_events, + const std::vector& depends) +{ + type_utils::validate_type_for_device(exec_q); + + T* a = reinterpret_cast(in_a); + T* w = reinterpret_cast(out_w); + + const std::int64_t lda = std::max(1UL, n); + const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); + T* scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + + sycl::event syevd_event; + try + { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + syevd_event = mkl_lapack::syevd( + exec_q, + jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are computed. + upper_lower, // 'upper_lower == job::upper' means the upper triangular part of A, or the lower triangular otherwise + n, // The order of the matrix A (0 <= n) + a, // Pointer to A, size (lda, *), where the 2nd dimension, must be at least max(1, n) + // If 'jobz == job::vec', then on exit it will contain the eigenvectors of A + lda, // The leading dimension of a, must be at least max(1, n) + w, // Pointer to array of size at least n, it will contain the eigenvalues of A in ascending order + scratchpad, // Pointer to scratchpad memory to be used by MKL routine for storing intermediate results + scratchpad_size, + depends); + } + catch (mkl_lapack::exception const& e) + { + error_msg << "Unexpected MKL exception caught during syevd() call:\nreason: " << e.what() + << "\ninfo: " << e.info(); + info = e.info(); + } + catch (sycl::exception const& e) + { + error_msg << "Unexpected SYCL exception caught during syevd() call:\n" << e.what(); + info = -1; + } + + if (info != 0) // an unexected error occurs + { + if (scratchpad != nullptr) + { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler& cgh) { + cgh.depends_on(syevd_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return syevd_event; +} + +std::pair syevd(sycl::queue exec_q, + const std::int8_t jobz, + const std::int8_t upper_lower, + dpctl::tensor::usm_ndarray eig_vecs, + dpctl::tensor::usm_ndarray eig_vals, + const std::vector& depends) +{ + const int eig_vecs_nd = eig_vecs.get_ndim(); + const int eig_vals_nd = eig_vals.get_ndim(); + + if (eig_vecs_nd != 2) + { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + + " of an output array with eigenvectors"); + } + else if (eig_vals_nd != 1) + { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + + " of an output array with eigenvalues"); + } + + const py::ssize_t* eig_vecs_shape = eig_vecs.get_shape_raw(); + const py::ssize_t* eig_vals_shape = eig_vals.get_shape_raw(); + + if (eig_vecs_shape[0] != eig_vecs_shape[1]) + { + throw py::value_error("Output array with eigenvectors with be square"); + } + else if (eig_vecs_shape[0] != eig_vals_shape[0]) + { + throw py::value_error("Eigenvectors and eigenvalues have different shapes"); + } + + size_t src_nelems(1); + + for (int i = 0; i < eig_vecs_nd; ++i) + { + src_nelems *= static_cast(eig_vecs_shape[i]); + } + + if (src_nelems == 0) + { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) + { + throw py::value_error("Execution queue is not compatible with allocation queues"); + } + + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(eig_vecs, eig_vals)) + { + throw py::value_error("Arrays with eigenvectors and eigenvalues are overlapping segments of memory"); + } + + bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + if (!is_eig_vecs_f_contig) + { + throw py::value_error("An array with input matrix / ouput eigenvectors must be F-contiguous"); + } + else if (!is_eig_vals_c_contig) + { + throw py::value_error("An array with output eigenvalues must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); + int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum()); + + if (eig_vecs_type_id != eig_vals_type_id) + { + throw py::value_error("Types of eigenvectors and eigenvalues are missmatched"); + } + + syevd_impl_fn_ptr_t syevd_fn = syevd_dispatch_vector[eig_vecs_type_id]; + if (syevd_fn == nullptr) + { + throw py::value_error("No syevd implementation defined for a type of eigenvectors and eigenvalues"); + } + + char* eig_vecs_data = eig_vecs.get_data(); + char* eig_vals_data = eig_vals.get_data(); + + const std::int64_t n = eig_vecs_shape[0]; + const oneapi::mkl::job jobz_val = static_cast(jobz); + const oneapi::mkl::uplo uplo_val = static_cast(upper_lower); + + std::vector host_task_events; + sycl::event syevd_ev = + syevd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, eig_vals_data, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, host_task_events); + return std::make_pair(args_ev, syevd_ev); +} + +template +struct SyevdContigFactory +{ + fnT get() + { + if constexpr (types::SyevdTypePairSupportFactory::is_defined) + { + return syevd_impl; + } + else + { + return nullptr; + } + } +}; + +void init_syevd_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder contig; + contig.populate_dispatch_vector(syevd_dispatch_vector); +} +} +} +} +} diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp new file mode 100644 index 000000000000..c5f0bc1b1531 --- /dev/null +++ b/dpnp/backend/extensions/lapack/syevd.hpp @@ -0,0 +1,53 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ + extern std::pair syevd(sycl::queue exec_q, + const std::int8_t jobz, + const std::int8_t upper_lower, + dpctl::tensor::usm_ndarray eig_vecs, + dpctl::tensor::usm_ndarray eig_vals, + const std::vector& depends = {}); + + extern void init_syevd_dispatch_vector(void); +} +} +} +} diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp new file mode 100644 index 000000000000..4175873b541f --- /dev/null +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -0,0 +1,80 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +// dpctl namespace for operations with types +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace types +{ +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd function. + * + * @tparam T Type of array containing input matrix A and an output array with eigenvectors. + * @tparam RealT Type of output array containing eigenvalues of A. + */ +template +struct HeevdTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction, RealT, double>, + dpctl_td_ns::TypePairDefinedEntry, RealT, float>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd function. + * + * @tparam T Type of array containing input matrix A and an output arrays with eigenvectors and eigenvectors. + */ +template +struct SyevdTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; +} +} +} +} +} diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp index 0f6cb5b31deb..beb4fb427de9 100644 --- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp @@ -91,14 +91,14 @@ MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_erf_c, - sycl::erf((double)input_elem), - oneapi::mkl::vm::erf(q, input1_size, input1_data, result)) // no sycl::erf for int and long + dispatch_erf_op(input_elem), + oneapi::mkl::vm::erf(q, input1_size, input1_data, result)) MACRO_1ARG_1TYPE_OP(dpnp_negative_c, -input_elem, q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_recip_c, _DataType(1) / input_elem, q.submit(kernel_func)) // error: no member named 'recip' in namespace 'sycl' MACRO_1ARG_1TYPE_OP(dpnp_sign_c, - sycl::sign((double)input_elem), + dispatch_sign_op(input_elem), q.submit(kernel_func)) // no sycl::sign for int and long MACRO_1ARG_1TYPE_OP(dpnp_square_c, input_elem* input_elem, diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 348dd8e7bff4..7a80b40a3d2e 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -1683,57 +1683,6 @@ INP_DLLEXPORT void dpnp_var_c(void* array, size_t naxis, size_t ddof); -/** - * @ingroup BACKEND_API - * @brief Implementation of where function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [out] result_out Output array. - * @param [in] result_size Size of output array. - * @param [in] result_ndim Number of output array dimensions. - * @param [in] result_shape Shape of output array. - * @param [in] result_strides Strides of output array. - * @param [in] condition_in Condition array. - * @param [in] condition_size Size of condition array. - * @param [in] condition_ndim Number of condition array dimensions. - * @param [in] condition_shape Shape of condition array. - * @param [in] condition_strides Strides of condition array. - * @param [in] input1_in First input array. - * @param [in] input1_size Size of first input array. - * @param [in] input1_ndim Number of first input array dimensions. - * @param [in] input1_shape Shape of first input array. - * @param [in] input1_strides Strides of first input array. - * @param [in] input2_in Second input array. - * @param [in] input2_size Size of second input array. - * @param [in] input2_ndim Number of second input array dimensions. - * @param [in] input2_shape Shape of second input array. - * @param [in] input2_strides Strides of second input array. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref, - void* result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type* result_shape, - const shape_elem_type* result_strides, - const void* condition_in, - const size_t condition_size, - const size_t condition_ndim, - const shape_elem_type* condition_shape, - const shape_elem_type* condition_strides, - const void* input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type* input1_shape, - const shape_elem_type* input1_strides, - const void* input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type* input2_shape, - const shape_elem_type* input2_strides, - const DPCTLEventVectorRef dep_event_vec_ref); - /** * @ingroup BACKEND_API * @brief Implementation of invert function diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 197623efe454..3a0dc7d0a526 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -240,7 +240,6 @@ enum class DPNPFuncName : size_t DPNP_FN_PARTITION, /**< Used in numpy.partition() impl */ DPNP_FN_PARTITION_EXT, /**< Used in numpy.partition() impl, requires extra parameters */ DPNP_FN_PLACE, /**< Used in numpy.place() impl */ - DPNP_FN_PLACE_EXT, /**< Used in numpy.place() impl, requires extra parameters */ DPNP_FN_POWER, /**< Used in numpy.power() impl */ DPNP_FN_POWER_EXT, /**< Used in numpy.power() impl, requires extra parameters */ DPNP_FN_PROD, /**< Used in numpy.prod() impl */ @@ -376,7 +375,6 @@ enum class DPNPFuncName : size_t DPNP_FN_VANDER_EXT, /**< Used in numpy.vander() impl, requires extra parameters */ DPNP_FN_VAR, /**< Used in numpy.var() impl */ DPNP_FN_VAR_EXT, /**< Used in numpy.var() impl, requires extra parameters */ - DPNP_FN_WHERE_EXT, /**< Used in numpy.where() impl, requires extra parameters */ DPNP_FN_ZEROS, /**< Used in numpy.zeros() impl */ DPNP_FN_ZEROS_LIKE, /**< Used in numpy.zeros_like() impl */ DPNP_FN_LAST, /**< The latest element of the enumeration */ diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp index f3d8a4a95ccd..6264d5d7146d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp @@ -281,8 +281,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap) const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ + const shape_elem_type* input2_strides_data = &dev_strides_data[2 * result_ndim]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 741a945fb099..30310162582f 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -111,7 +111,7 @@ size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input_id = 0; \ for (size_t i = 0; i < input1_ndim; ++i) \ @@ -138,7 +138,7 @@ else \ { \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const _DataType_output input_elem = input1_data[output_id]; \ result[output_id] = __operation1__; \ @@ -149,16 +149,17 @@ gws, kernel_parallel_for_func); \ }; \ \ - if constexpr ((std::is_same<_DataType_input, double>::value || \ - std::is_same<_DataType_input, float>::value) && \ - std::is_same<_DataType_input, _DataType_output>::value) \ - { \ - event = __operation2__; \ - } \ - else \ + if constexpr (both_types_are_same<_DataType_input, _DataType_output, float, double>) \ { \ - event = q.submit(kernel_func); \ + if (q.get_device().has(sycl::aspect::fp64)) \ + { \ + event = __operation2__; \ + \ + event_ref = reinterpret_cast(&event); \ + return DPCTLEvent_Copy(event_ref); \ + } \ } \ + event = q.submit(kernel_func); \ } \ \ event_ref = reinterpret_cast(&event); \ @@ -558,6 +559,37 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) return; } +template +constexpr T dispatch_erf_op(T elem) +{ + if constexpr (is_any_v) + { + // TODO: need to convert to double when possible + return sycl::erf((float)elem); + } + else + { + return sycl::erf(elem); + } +} + +template +constexpr T dispatch_sign_op(T elem) +{ + if constexpr (is_any_v) + { + if (elem > 0) + return T(1); + if (elem < 0) + return T(-1); + return elem; // elem is 0 + } + else + { + return sycl::sign(elem); + } +} + #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__) \ template \ class __name__##_kernel; \ @@ -635,7 +667,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input_id = 0; \ for (size_t i = 0; i < input1_ndim; ++i) \ @@ -671,14 +703,17 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap) cgh.parallel_for>(gws, kernel_parallel_for_func); \ }; \ \ - if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value) \ - { \ - event = __operation2__; \ - } \ - else \ + if constexpr (is_any_v<_DataType, float, double>) \ { \ - event = q.submit(kernel_func); \ + if (q.get_device().has(sycl::aspect::fp64)) \ + { \ + event = __operation2__; \ + \ + event_ref = reinterpret_cast(&event); \ + return DPCTLEvent_Copy(event_ref); \ + } \ } \ + event = q.submit(kernel_func); \ } \ \ event_ref = reinterpret_cast(&event); \ @@ -776,8 +811,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_copy_c_ext}; fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_copy_c_ext}; fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_copy_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C128][eft_C128] = {eft_C128, - (void*)dpnp_copy_c_ext>}; + fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_copy_c_ext>}; + fmap[DPNPFuncName::DPNP_FN_COPY_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_copy_c_ext>}; fmap[DPNPFuncName::DPNP_FN_ERF][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_erf_c_default}; fmap[DPNPFuncName::DPNP_FN_ERF][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_erf_c_default}; @@ -848,7 +883,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; } - #define MACRO_2ARG_3TYPES_OP( \ __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ template ) \ { \ - event = __mkl_operation__(q, result_size, input1_data, input2_data, result); \ + if (q.get_device().has(sycl::aspect::fp64)) \ + { \ + event = __mkl_operation__(q, result_size, input1_data, input2_data, result); \ + \ + event_ref = reinterpret_cast(&event); \ + return DPCTLEvent_Copy(event_ref); \ + } \ } \ - else if constexpr (none_of_both_types<_DataType_input1, \ - _DataType_input2, \ - std::complex, \ - std::complex>) \ + \ + if constexpr (none_of_both_types<_DataType_input1, \ + _DataType_input2, \ + std::complex, \ + std::complex>) \ { \ constexpr size_t lws = 64; \ constexpr unsigned int vec_sz = 8; \ diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index 756899b6cc50..ac71f4fbc5f6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -546,20 +546,12 @@ void dpnp_place_c(void* arr_in, long* mask_in, void* vals_in, const size_t arr_s vals_size, dep_event_vec_ref); DPCTLEvent_WaitAndThrow(event_ref); + DPCTLEvent_Delete(event_ref); } template void (*dpnp_place_default_c)(void*, long*, void*, const size_t, const size_t) = dpnp_place_c<_DataType>; -template -DPCTLSyclEventRef (*dpnp_place_ext_c)(DPCTLSyclQueueRef, - void*, - long*, - void*, - const size_t, - const size_t, - const DPCTLEventVectorRef) = dpnp_place_c<_DataType>; - template DPCTLSyclEventRef dpnp_put_c(DPCTLSyclQueueRef q_ref, void* array1_in, @@ -1017,11 +1009,6 @@ void func_map_init_indexing_func(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_PLACE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_place_default_c}; fmap[DPNPFuncName::DPNP_FN_PLACE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_place_default_c}; - fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_place_ext_c}; - fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_place_ext_c}; - fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_place_ext_c}; - fmap[DPNPFuncName::DPNP_FN_PLACE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_place_ext_c}; - fmap[DPNPFuncName::DPNP_FN_PUT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_put_default_c}; fmap[DPNPFuncName::DPNP_FN_PUT][eft_LNG][eft_LNG] = {eft_LNG, diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index d1a6767c2adc..78a9a29e99e7 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -396,7 +396,7 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef, const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \ \ size_t input1_id = 0; \ \ @@ -635,8 +635,8 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap) const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ - const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[result_ndim]; \ + const shape_elem_type *input2_strides_data = &dev_strides_data[2 * result_ndim]; \ \ size_t input1_id = 0; \ size_t input2_id = 0; \ diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index cbcd191fae64..b82cbb49b1a6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -170,10 +170,14 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref, sycl::vec<_DataType_input, vec_sz> data_vec = sg.load(input_ptrT(&array1[start])); +#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_VECTOR_ABS_CHANGED) // sycl::abs() returns unsigned integers only, so explicit casting to signed ones is required using result_absT = typename cl::sycl::detail::make_unsigned<_DataType_output>::type; sycl::vec<_DataType_output, vec_sz> res_vec = dpnp_vec_cast<_DataType_output, result_absT, vec_sz>(sycl::abs(data_vec)); +#else + sycl::vec<_DataType_output, vec_sz> res_vec = sycl::abs(data_vec); +#endif sg.store(result_ptrT(&result[start]), res_vec); } diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp index fef5f78d15da..9bbb35068134 100644 --- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp @@ -27,7 +27,6 @@ #include #include "dpnp_fptr.hpp" -#include "dpnp_iterator.hpp" #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" @@ -140,258 +139,6 @@ DPCTLSyclEventRef (*dpnp_argmin_ext_c)(DPCTLSyclQueueRef, size_t, const DPCTLEventVectorRef) = dpnp_argmin_c<_DataType, _idx_DataType>; - -template -class dpnp_where_c_broadcast_kernel; - -template -class dpnp_where_c_strides_kernel; - -template -class dpnp_where_c_kernel; - -template -DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref, - void* result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type* result_shape, - const shape_elem_type* result_strides, - const void* condition_in, - const size_t condition_size, - const size_t condition_ndim, - const shape_elem_type* condition_shape, - const shape_elem_type* condition_strides, - const void* input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type* input1_shape, - const shape_elem_type* input1_strides, - const void* input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type* input2_shape, - const shape_elem_type* input2_strides, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - /* avoid warning unused variable*/ - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if (!condition_size || !input1_size || !input2_size) - { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - bool* condition_data = static_cast(const_cast(condition_in)); - _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast(input1_in)); - _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast(input2_in)); - _DataType_output* result = static_cast<_DataType_output*>(result_out); - - bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); - use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input1_shape, input1_ndim); - use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input2_shape, input2_ndim); - - shape_elem_type* condition_shape_offsets = new shape_elem_type[condition_ndim]; - - get_shape_offsets_inkernel(condition_shape, condition_ndim, condition_shape_offsets); - bool use_strides = !array_equal(condition_strides, condition_ndim, condition_shape_offsets, condition_ndim); - delete[] condition_shape_offsets; - - shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; - - get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); - use_strides = use_strides || !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); - delete[] input1_shape_offsets; - - shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; - - get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); - use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); - delete[] input2_shape_offsets; - - sycl::event event; - sycl::range<1> gws(result_size); - - if (use_broadcasting) - { - DPNPC_id* condition_it; - const size_t condition_it_it_size_in_bytes = sizeof(DPNPC_id); - condition_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, condition_it_it_size_in_bytes)); - new (condition_it) DPNPC_id(q_ref, condition_data, condition_shape, condition_strides, condition_ndim); - - condition_it->broadcast_to_shape(result_shape, result_ndim); - - DPNPC_id<_DataType_input1>* input1_it; - const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); - input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); - new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); - - input1_it->broadcast_to_shape(result_shape, result_ndim); - - DPNPC_id<_DataType_input2>* input2_it; - const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); - input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); - new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); - - input2_it->broadcast_to_shape(result_shape, result_ndim); - - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ - { - const bool condition = (*condition_it)[i]; - const _DataType_output input1_elem = (*input1_it)[i]; - const _DataType_output input2_elem = (*input2_it)[i]; - result[i] = (condition) ? input1_elem : input2_elem; - } - }; - auto kernel_func = [&](sycl::handler& cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - q.submit(kernel_func).wait(); - - condition_it->~DPNPC_id(); - input1_it->~DPNPC_id(); - input2_it->~DPNPC_id(); - - return event_ref; - } - else if (use_strides) - { - if ((result_ndim != condition_ndim) || (result_ndim != input1_ndim) || (result_ndim != input2_ndim)) - { - throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + - " mismatches with either condition ndim=" + std::to_string(condition_ndim) + - " or input1 ndim=" + std::to_string(input1_ndim) + - " or input2 ndim=" + std::to_string(input2_ndim)); - } - - /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ - using usm_host_allocatorT = sycl::usm_allocator; - - size_t strides_size = 4 * result_ndim; - shape_elem_type* dev_strides_data = sycl::malloc_device(strides_size, q); - - /* create host temporary for packed strides managed by shared pointer */ - auto strides_host_packed = - std::vector(strides_size, usm_host_allocatorT(q)); - - /* packed vector is concatenation of result_strides, condition_strides, input1_strides and input2_strides */ - std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); - std::copy(condition_strides, condition_strides + result_ndim, strides_host_packed.begin() + result_ndim); - std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); - std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 3 * result_ndim); - - auto copy_strides_ev = - q.copy(strides_host_packed.data(), dev_strides_data, strides_host_packed.size()); - - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ - { - const shape_elem_type* result_strides_data = &dev_strides_data[0]; - const shape_elem_type* condition_strides_data = &dev_strides_data[1]; - const shape_elem_type* input1_strides_data = &dev_strides_data[2]; - const shape_elem_type* input2_strides_data = &dev_strides_data[3]; - - size_t condition_id = 0; - size_t input1_id = 0; - size_t input2_id = 0; - - for (size_t i = 0; i < result_ndim; ++i) - { - const size_t output_xyz_id = - get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i); - condition_id += output_xyz_id * condition_strides_data[i]; - input1_id += output_xyz_id * input1_strides_data[i]; - input2_id += output_xyz_id * input2_strides_data[i]; - } - - const bool condition = condition_data[condition_id]; - const _DataType_output input1_elem = input1_data[input1_id]; - const _DataType_output input2_elem = input2_data[input2_id]; - result[output_id] = (condition) ? input1_elem : input2_elem; - } - }; - auto kernel_func = [&](sycl::handler& cgh) { - cgh.depends_on(copy_strides_ev); - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - q.submit(kernel_func).wait(); - - sycl::free(dev_strides_data, q); - return event_ref; - } - else - { - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ - - const bool condition = condition_data[i]; - const _DataType_output input1_elem = input1_data[i]; - const _DataType_output input2_elem = input2_data[i]; - result[i] = (condition) ? input1_elem : input2_elem; - }; - auto kernel_func = [&](sycl::handler& cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - event = q.submit(kernel_func); - } - - event_ref = reinterpret_cast(&event); - return DPCTLEvent_Copy(event_ref); - - return event_ref; -} - -template -DPCTLSyclEventRef (*dpnp_where_ext_c)(DPCTLSyclQueueRef, - void*, - const size_t, - const size_t, - const shape_elem_type*, - const shape_elem_type*, - const void*, - const size_t, - const size_t, - const shape_elem_type*, - const shape_elem_type*, - const void*, - const size_t, - const size_t, - const shape_elem_type*, - const shape_elem_type*, - const void*, - const size_t, - const size_t, - const shape_elem_type*, - const shape_elem_type*, - const DPCTLEventVectorRef) = dpnp_where_c<_DataType_output, _DataType_input1, _DataType_input2>; - -template -static void func_map_searching_2arg_3type_core(func_map_t& fmap) -{ - ((fmap[DPNPFuncName::DPNP_FN_WHERE_EXT][FT1][FTs] = - {populate_func_types(), - (void*)dpnp_where_ext_c()>, - func_type_map_t::find_type, - func_type_map_t::find_type>}), - ...); -} - -template -static void func_map_searching_2arg_3type_helper(func_map_t& fmap) -{ - ((func_map_searching_2arg_3type_core(fmap)), ...); -} - void func_map_init_searching(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ARGMAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_argmax_default_c}; @@ -430,7 +177,5 @@ void func_map_init_searching(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_INT] = {eft_INT, (void*)dpnp_argmin_ext_c}; fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_LNG] = {eft_LNG, (void*)dpnp_argmin_ext_c}; - func_map_searching_2arg_3type_helper(fmap); - return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp index 614bb94f0705..01bc26cdf8f0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -160,6 +160,24 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref, sycl::queue q = *(reinterpret_cast(q_ref)); + if (ndim == 1) // 1d array with C-contiguous data + { + _DataType* arr = static_cast<_DataType*>(array1_in); + _DataType* result = static_cast<_DataType*>(result1); + + auto policy = oneapi::dpl::execution::make_device_policy>(q); + + // fill the result array with data from input one + q.memcpy(result, arr, size * sizeof(_DataType)).wait(); + + // make a partial sorting such that: + // 1. result[0 <= i < kth] <= result[kth] + // 2. result[kth <= i < size] >= result[kth] + // event-blocking call, no need for wait() + std::nth_element(policy, result, result + kth, result + size, dpnp_less_comp()); + return event_ref; + } + DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true); DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, array2_in, size, true); DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, true, true); @@ -181,7 +199,7 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref, size_t ind = j - ind_begin; matrix[ind] = arr2[j]; } - std::partial_sort(matrix, matrix + shape_[ndim - 1], matrix + shape_[ndim - 1]); + std::partial_sort(matrix, matrix + shape_[ndim - 1], matrix + shape_[ndim - 1], dpnp_less_comp()); for (size_t j = ind_begin; j < ind_end + 1; ++j) { size_t ind = j - ind_begin; @@ -492,10 +510,13 @@ void func_map_init_sorting(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_partition_default_c}; fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_partition_default_c}; + fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_partition_ext_c}; fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_partition_ext_c}; fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_partition_ext_c}; fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_partition_ext_c}; fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_partition_ext_c}; + fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_partition_ext_c>}; + fmap[DPNPFuncName::DPNP_FN_PARTITION_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_partition_ext_c>}; fmap[DPNPFuncName::DPNP_FN_SEARCHSORTED][eft_INT][eft_INT] = { eft_INT, (void*)dpnp_searchsorted_default_c}; diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index abf77ff25eec..eaaf6b72f89f 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -192,7 +192,7 @@ DPCTLSyclEventRef dpnp_cov_c(DPCTLSyclQueueRef q_ref, nrows, // std::int64_t n, ncols, // std::int64_t k, alpha, // T alpha, - temp, //const T* a, + temp, // const T* a, ncols, // std::int64_t lda, beta, // T beta, result, // T* c, @@ -1384,7 +1384,7 @@ void func_map_init_statistics(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c}; fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c}; - fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_ext_c}; + fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c}; fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c}; fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c}; diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index d6c48784e6bd..cb33e70185e2 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -164,7 +164,7 @@ template struct are_same : std::conjunction...> {}; /** - * A template constant to check if type T matces any type from Ts. + * A template constant to check if type T matches any type from Ts. */ template constexpr auto is_any_v = is_any::value; @@ -187,6 +187,59 @@ constexpr auto both_types_are_any_of = std::conjunction_v, is_ template constexpr auto none_of_both_types = !std::disjunction_v, is_any>; + +/** + * @brief If the type _Tp is a reference type, provides the member typedef type which is the type referred to by _Tp + * with its topmost cv-qualifiers removed. Otherwise type is _Tp with its topmost cv-qualifiers removed. + * + * @note std::remove_cvref is only available since c++20 + */ +template +using dpnp_remove_cvref_t = typename std::remove_cv_t>; + + +/** + * @brief "<" comparison with complex types support. + * + * @note return a result of lexicographical "<" comparison for complex types. + */ +class dpnp_less_comp +{ +public: + template + bool operator()(_Xp&& __x, _Yp&& __y) const + { + if constexpr (both_types_are_same, dpnp_remove_cvref_t<_Yp>, std::complex, std::complex>) + { + bool ret = false; + _Xp a = std::forward<_Xp>(__x); + _Yp b = std::forward<_Yp>(__y); + + if (a.real() < b.real()) + { + ret = (a.imag() == a.imag() || b.imag() != b.imag()); + } + else if (a.real() > b.real()) + { + ret = (b.imag() != b.imag() && a.imag() == a.imag()); + } + else if (a.real() == b.real() || (a.real() != a.real() && b.real() != b.real())) + { + ret = (a.imag() < b.imag() || (b.imag() != b.imag() && a.imag() == a.imag())); + } + else + { + ret = (b.real() != b.real()); + } + return ret; + } + else + { + return std::forward<_Xp>(__x) < std::forward<_Yp>(__y); + } + } +}; + /** * FPTR interface initialization functions */ diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp index 985d5a61494e..6c1bda90cba7 100644 --- a/dpnp/backend/src/dpnp_utils.hpp +++ b/dpnp/backend/src/dpnp_utils.hpp @@ -40,6 +40,14 @@ (__LIBSYCL_MAJOR_VERSION > major) || (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION > minor) || \ (__LIBSYCL_MAJOR_VERSION == major and __LIBSYCL_MINOR_VERSION == minor and __LIBSYCL_PATCH_VERSION >= patch) +/** + * Version of SYCL DPC++ 2023 compiler where a return type of sycl::abs() is changed + * from unsinged integer to signed one of input vector. + */ +#ifndef __SYCL_COMPILER_VECTOR_ABS_CHANGED +#define __SYCL_COMPILER_VECTOR_ABS_CHANGED 20230503L +#endif + /** * Version of SYCL DPC++ 2023 compiler at which transition to SYCL 2020 occurs. * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and diff --git a/dpnp/backend/tests/CMakeLists.txt b/dpnp/backend/tests/CMakeLists.txt index e0a4936d02d3..8729b76a6845 100644 --- a/dpnp/backend/tests/CMakeLists.txt +++ b/dpnp/backend/tests/CMakeLists.txt @@ -51,7 +51,7 @@ add_executable(dpnpc_tests test_random.cpp test_utils.cpp test_utils_iterator.cpp) -target_link_libraries(dpnpc_tests GTest::GTest GTest::Main pthread dpnp_backend_c) +target_link_libraries(dpnpc_tests GTest::GTest GTest::Main pthread dpnp_backend_library) # TODO split add_test(dpnpc_tests dpnpc_tests) diff --git a/dpnp/cmake/copy_existing.cmake b/dpnp/cmake/copy_existing.cmake new file mode 100644 index 000000000000..242dc292578b --- /dev/null +++ b/dpnp/cmake/copy_existing.cmake @@ -0,0 +1,3 @@ +if (EXISTS ${SOURCE_FILE}) + configure_file(${SOURCE_FILE} ${DEST} COPYONLY) +endif() diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx index dffbf6f65d15..0cf94759a6ce 100644 --- a/dpnp/dparray.pyx +++ b/dpnp/dparray.pyx @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt new file mode 100644 index 000000000000..abdf9ae0cb7e --- /dev/null +++ b/dpnp/dpnp_algo/CMakeLists.txt @@ -0,0 +1,25 @@ + +set(dpnp_algo_pyx_deps + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linearalgebra.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_manipulation.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_counting.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_statistics.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_trigonometric.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_arraycreation.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_searching.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_logic.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_bitwise.pxi + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_special.pxi + ) + +build_dpnp_cython_ext_with_backend( + dpnp_algo + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo.pyx + dpnp/dpnp_algo + ) + +add_custom_target(_dpnp_algo_deps DEPENDS ${dpnp_algo_pyx_deps}) +add_dependencies(dpnp_algo _dpnp_algo_deps) diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index da1efddd3ccc..56195613a338 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -217,7 +217,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_PARTITION DPNP_FN_PARTITION_EXT DPNP_FN_PLACE - DPNP_FN_PLACE_EXT DPNP_FN_POWER DPNP_FN_POWER_EXT DPNP_FN_PROD @@ -355,7 +354,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_VANDER_EXT DPNP_FN_VAR DPNP_FN_VAR_EXT - DPNP_FN_WHERE_EXT DPNP_FN_ZEROS DPNP_FN_ZEROS_LIKE @@ -578,7 +576,6 @@ Searching functions """ cpdef dpnp_descriptor dpnp_argmax(dpnp_descriptor array1) cpdef dpnp_descriptor dpnp_argmin(dpnp_descriptor array1) -cpdef dpnp_descriptor dpnp_where(dpnp_descriptor cond_obj, dpnp_descriptor x_obj, dpnp_descriptor y_obj) """ Trigonometric functions @@ -604,7 +601,7 @@ cpdef dpnp_descriptor dpnp_radians(dpnp_descriptor array1) cpdef dpnp_descriptor dpnp_recip(dpnp_descriptor array1) cpdef dpnp_descriptor dpnp_sin(dpnp_descriptor array1, dpnp_descriptor out) cpdef dpnp_descriptor dpnp_sinh(dpnp_descriptor array1) -cpdef dpnp_descriptor dpnp_sqrt(dpnp_descriptor array1) +cpdef dpnp_descriptor dpnp_sqrt(dpnp_descriptor array1, dpnp_descriptor out) cpdef dpnp_descriptor dpnp_square(dpnp_descriptor array1) cpdef dpnp_descriptor dpnp_tan(dpnp_descriptor array1, dpnp_descriptor out) cpdef dpnp_descriptor dpnp_tanh(dpnp_descriptor array1) diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 2fa9de34b998..24abd1b4b9e4 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -60,19 +61,19 @@ __all__ = [ ] -include "dpnp_algo_arraycreation.pyx" -include "dpnp_algo_bitwise.pyx" -include "dpnp_algo_counting.pyx" -include "dpnp_algo_indexing.pyx" -include "dpnp_algo_linearalgebra.pyx" -include "dpnp_algo_logic.pyx" -include "dpnp_algo_manipulation.pyx" -include "dpnp_algo_mathematical.pyx" -include "dpnp_algo_searching.pyx" -include "dpnp_algo_sorting.pyx" -include "dpnp_algo_special.pyx" -include "dpnp_algo_statistics.pyx" -include "dpnp_algo_trigonometric.pyx" +include "dpnp_algo_arraycreation.pxi" +include "dpnp_algo_bitwise.pxi" +include "dpnp_algo_counting.pxi" +include "dpnp_algo_indexing.pxi" +include "dpnp_algo_linearalgebra.pxi" +include "dpnp_algo_logic.pxi" +include "dpnp_algo_manipulation.pxi" +include "dpnp_algo_mathematical.pxi" +include "dpnp_algo_searching.pxi" +include "dpnp_algo_sorting.pxi" +include "dpnp_algo_special.pxi" +include "dpnp_algo_statistics.pxi" +include "dpnp_algo_trigonometric.pxi" ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_astype_t)(c_dpctl.DPCTLSyclQueueRef, @@ -505,8 +506,23 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, return_type = kernel_data.return_type_no_fp64 func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64 - if out is None: - """ Create result array with type given by FPTR data """ + # check 'out' parameter data + if out is not None: + if out.shape != result_shape: + utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape) + + utils.get_common_usm_allocation(x1_obj, out) # check USM allocation is common + + if out is None or out.is_array_overlapped(x1_obj) or out.is_array_overlapped(x2_obj) or not out.match_ctype(return_type): + """ + Create result array with type given by FPTR data. + If 'out' array has another dtype than expected or overlaps a memory from any input array, + we have to create a temporary array and to copy data from the temporary into 'out' array, + once the computation is completed. + Otherwise simultaneously access to the same memory may cause a race condition issue + which will result into undefined behaviour. + """ + is_result_memory_allocated = True result = utils.create_output_descriptor(result_shape, return_type, None, @@ -514,16 +530,9 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, usm_type=result_usm_type, sycl_queue=result_sycl_queue) else: - result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type) - if out.dtype != result_type: - utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type) - if out.shape != result_shape: - utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape) - + is_result_memory_allocated = False result = out - utils.get_common_usm_allocation(x1_obj, result) # check USM allocation is common - cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) result_obj = result.get_array() @@ -554,4 +563,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) - return result + if out is not None and is_result_memory_allocated: + return out.get_result_desc(result) + + return result.get_result_desc() diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx rename to dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi index 7b538118b939..3525a42ca0d4 100644 --- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_bitwise.pyx rename to dpnp/dpnp_algo/dpnp_algo_bitwise.pxi index a8af53b709d1..6c630f791ad1 100644 --- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation diff --git a/dpnp/dpnp_algo/dpnp_algo_counting.pyx b/dpnp/dpnp_algo/dpnp_algo_counting.pxi similarity index 96% rename from dpnp/dpnp_algo/dpnp_algo_counting.pyx rename to dpnp/dpnp_algo/dpnp_algo_counting.pxi index 119c0d27b692..ef32d3ed3629 100644 --- a/dpnp/dpnp_algo/dpnp_algo_counting.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_counting.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi similarity index 91% rename from dpnp/dpnp_algo/dpnp_algo_indexing.pyx rename to dpnp/dpnp_algo/dpnp_algo_indexing.pxi index 4e07c03f24a5..e9dc538393c3 100644 --- a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -40,7 +41,6 @@ __all__ += [ "dpnp_diagonal", "dpnp_fill_diagonal", "dpnp_indices", - "dpnp_place", "dpnp_put", "dpnp_put_along_axis", "dpnp_putmask", @@ -78,13 +78,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_1out_func_ptr_t_)(c_dpct ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_2in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, void *, void * , shape_elem_type * , const size_t, const c_dpctl.DPCTLEventVectorRef) -ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - void * , - void * , - const size_t, - const size_t, - const c_dpctl.DPCTLEventVectorRef) ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_3in_with_axis_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, void * , void * , @@ -307,41 +300,6 @@ cpdef object dpnp_indices(dimensions): return dpnp_result -cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(arr, vals) - - cdef utils.dpnp_descriptor mask_ = utils_py.create_output_descriptor_py((mask.size,), - dpnp.int64, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(mask.size): - if mask.item(i): - mask_.get_pyobj()[i] = 1 - else: - mask_.get_pyobj()[i] = 0 - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PLACE_EXT, param1_type, param1_type) - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef custom_indexing_3in_func_ptr_t func = kernel_data.ptr - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - arr.get_data(), - mask_.get_data(), - vals.get_data(), - arr.size, - vals.size, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - cpdef dpnp_put(dpnp_descriptor x1, object ind, v): ind_is_list = isinstance(ind, list) diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx rename to dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi index 91c1da884050..f9eac4ffd35b 100644 --- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_logic.pyx rename to dpnp/dpnp_algo/dpnp_algo_logic.pxi index b6ac36db412b..f84e90b186fc 100644 --- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_logic.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation diff --git a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi similarity index 94% rename from dpnp/dpnp_algo/dpnp_algo_manipulation.pyx rename to dpnp/dpnp_algo/dpnp_algo_manipulation.pxi index 0a51b44e7e52..b9234dbe5ab2 100644 --- a/dpnp/dpnp_algo/dpnp_algo_manipulation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_manipulation.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -42,7 +43,6 @@ __all__ += [ "dpnp_repeat", "dpnp_reshape", "dpnp_transpose", - "dpnp_squeeze", ] @@ -293,23 +293,3 @@ cpdef utils.dpnp_descriptor dpnp_transpose(utils.dpnp_descriptor array1, axes=No c_dpctl.DPCTLEvent_Delete(event_ref) return result - - -cpdef utils.dpnp_descriptor dpnp_squeeze(utils.dpnp_descriptor in_array, axis): - cdef shape_type_c shape_list - if axis is None: - for i in range(in_array.ndim): - if in_array.shape[i] != 1: - shape_list.push_back(in_array.shape[i]) - else: - axis_norm = utils._object_to_tuple(utils.normalize_axis(utils._object_to_tuple(axis), in_array.ndim)) - for i in range(in_array.ndim): - if i in axis_norm: - if in_array.shape[i] != 1: - utils.checker_throw_value_error("dpnp_squeeze", "axis", axis, "axis has size not equal to one") - else: - shape_list.push_back(in_array.shape[i]) - - in_array_obj = in_array.get_array() - - return dpnp_reshape(dpnp_copy(in_array), shape_list) diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_mathematical.pyx rename to dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index 5d937a7008bb..b5534c7c1a8e 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -631,6 +632,9 @@ cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1, usm_type=x1_obj.usm_type, sycl_queue=x1_obj.sycl_queue) + if x1.size == 0 and axis is None: + return result + result_sycl_queue = result.get_array().sycl_queue cdef c_dpctl.SyclQueue q = result_sycl_queue diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pxi similarity index 50% rename from dpnp/dpnp_algo/dpnp_algo_searching.pyx rename to dpnp/dpnp_algo/dpnp_algo_searching.pxi index 44621b5cca04..46f1c83f42b5 100644 --- a/dpnp/dpnp_algo/dpnp_algo_searching.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_searching.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -36,8 +37,7 @@ and the rest of the library __all__ += [ "dpnp_argmax", - "dpnp_argmin", - "dpnp_where" + "dpnp_argmin" ] @@ -46,29 +46,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.D void * , void * , size_t, const c_dpctl.DPCTLEventVectorRef) -ctypedef c_dpctl.DPCTLSyclEventRef(*where_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef, - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - const c_dpctl.DPCTLEventVectorRef) except + - cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) @@ -140,81 +117,3 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1): c_dpctl.DPCTLEvent_Delete(event_ref) return result - - -cpdef utils.dpnp_descriptor dpnp_where(utils.dpnp_descriptor cond_obj, - utils.dpnp_descriptor x_obj, - utils.dpnp_descriptor y_obj): - # Convert object type to C enum DPNPFuncType - cdef DPNPFuncType cond_c_type = dpnp_dtype_to_DPNPFuncType(cond_obj.dtype) - cdef DPNPFuncType x_c_type = dpnp_dtype_to_DPNPFuncType(x_obj.dtype) - cdef DPNPFuncType y_c_type = dpnp_dtype_to_DPNPFuncType(y_obj.dtype) - - # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_WHERE_EXT, x_c_type, y_c_type) - - # Create result array - cdef shape_type_c cond_shape = cond_obj.shape - cdef shape_type_c x_shape = x_obj.shape - cdef shape_type_c y_shape = y_obj.shape - - cdef shape_type_c cond_strides = utils.strides_to_vector(cond_obj.strides, cond_shape) - cdef shape_type_c x_strides = utils.strides_to_vector(x_obj.strides, x_shape) - cdef shape_type_c y_strides = utils.strides_to_vector(y_obj.strides, y_shape) - - cdef shape_type_c cond_x_shape = utils.get_common_shape(cond_shape, x_shape) - cdef shape_type_c cond_y_shape = utils.get_common_shape(cond_shape, y_shape) - cdef shape_type_c result_shape = utils.get_common_shape(cond_x_shape, cond_y_shape) - cdef utils.dpnp_descriptor result - - result_usm_type, result_sycl_queue = utils_py.get_usm_allocations([cond_obj.get_array(), - x_obj.get_array(), - y_obj.get_array()]) - - # get FPTR function and return type - cdef where_func_ptr_t func = < where_func_ptr_t > kernel_data.ptr - cdef DPNPFuncType return_type = kernel_data.return_type - - """ Create result array with type given by FPTR data """ - result = utils.create_output_descriptor(result_shape, - return_type, - None, - device=None, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - - cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape) - - result_obj = result.get_array() - - cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - """ Call FPTR function """ - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - cond_obj.get_data(), - cond_obj.size, - cond_obj.ndim, - cond_shape.data(), - cond_strides.data(), - x_obj.get_data(), - x_obj.size, - x_obj.ndim, - x_shape.data(), - x_strides.data(), - y_obj.get_data(), - y_obj.size, - y_obj.ndim, - y_shape.data(), - y_strides.data(), - NULL) # dep_events_ref) - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_sorting.pyx rename to dpnp/dpnp_algo/dpnp_algo_sorting.pxi index 9a701dd7c905..4d8b475cce49 100644 --- a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pyx b/dpnp/dpnp_algo/dpnp_algo_special.pxi similarity index 96% rename from dpnp/dpnp_algo/dpnp_algo_special.pyx rename to dpnp/dpnp_algo/dpnp_algo_special.pxi index fb6ff0d74cc6..179742613840 100644 --- a/dpnp/dpnp_algo/dpnp_algo_special.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_special.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi similarity index 99% rename from dpnp/dpnp_algo/dpnp_algo_statistics.pyx rename to dpnp/dpnp_algo/dpnp_algo_statistics.pxi index 5d21dcf8c74b..d2868a8ee042 100644 --- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -178,7 +179,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_ return result -# supports "double" input only cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1): cdef shape_type_c input_shape = array1.shape diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi similarity index 96% rename from dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx rename to dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi index bf9c4d5e0ed2..d76103365844 100644 --- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -148,8 +149,8 @@ cpdef utils.dpnp_descriptor dpnp_sinh(utils.dpnp_descriptor x1): return call_fptr_1in_1out_strides(DPNP_FN_SINH_EXT, x1) -cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1) +cpdef utils.dpnp_descriptor dpnp_sqrt(utils.dpnp_descriptor x1, utils.dpnp_descriptor out): + return call_fptr_1in_1out_strides(DPNP_FN_SQRT_EXT, x1, dtype=None, out=out, where=True, func_name='sqrt') cpdef utils.dpnp_descriptor dpnp_square(utils.dpnp_descriptor x1): diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index f2ccf56ef76b..3a3d4027d787 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -29,6 +29,23 @@ import dpnp + +def _get_unwrapped_index_key(key): + """ + Return a key where each nested instance of DPNP array is unwrapped into USM ndarray + for futher processing in DPCTL advanced indexing functions. + + """ + + if isinstance(key, tuple): + if any(isinstance(x, dpnp_array) for x in key): + # create a new tuple from the input key with unwrapped DPNP arrays + return tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in key) + elif isinstance(key, dpnp_array): + return key.get_array() + return key + + class dpnp_array: """ Multi-dimensional array object. @@ -176,8 +193,7 @@ def __ge__(self, other): # '__getattribute__', def __getitem__(self, key): - if isinstance(key, dpnp_array): - key = key.get_array() + key = _get_unwrapped_index_key(key) item = self._array_obj.__getitem__(key) if not isinstance(item, dpt.usm_ndarray): @@ -194,7 +210,10 @@ def __gt__(self, other): return dpnp.greater(self, other) # '__hash__', - # '__iadd__', + + def __iadd__(self, other): + dpnp.add(self, other, out=self) + return self def __iand__(self, other): dpnp.bitwise_and(self, other, out=self) @@ -208,7 +227,10 @@ def __ilshift__(self, other): # '__imatmul__', # '__imod__', - # '__imul__', + + def __imul__(self, other): + dpnp.multiply(self, other, out=self) + return self def __index__(self): return self._array_obj.__index__() @@ -334,8 +356,8 @@ def __rxor__(self, other): # '__setattr__', def __setitem__(self, key, val): - if isinstance(key, dpnp_array): - key = key.get_array() + key = _get_unwrapped_index_key(key) + if isinstance(val, dpnp_array): val = val.get_array() @@ -760,6 +782,8 @@ def item(self, id=None): @property def itemsize(self): """ + Size of one array element in bytes. + """ return self._array_obj.itemsize @@ -785,11 +809,20 @@ def min(self, axis=None, out=None, keepdims=numpy._NoValue, initial=numpy._NoVal return dpnp.min(self, axis, out, keepdims, initial, where) - # 'nbytes', + @property + def nbytes(self): + """ + Total bytes consumed by the elements of the array. + + """ + + return self._array_obj.nbytes @property def ndim(self): """ + Number of array dimensions. + """ return self._array_obj.ndim @@ -799,7 +832,32 @@ def ndim(self): def nonzero(self): return dpnp.nonzero(self) - # 'partition', + def partition(self, kth, axis=-1, kind='introselect', order=None): + """ + Rearranges the elements in the array in such a way that the value of the + element in kth position is in the position it would be in a sorted array. + + All elements smaller than the kth element are moved before this element and + all equal or greater are moved behind it. The ordering of the elements in + the two partitions is undefined. + + Refer to `dpnp.partition` for full documentation. + + See Also + -------- + :obj:`dpnp.partition` : Return a partitioned copy of an array. + + Examples + -------- + >>> import dpnp as np + >>> a = np.array([3, 4, 2, 1]) + >>> a.partition(3) + >>> a + array([1, 2, 3, 4]) + + """ + + self._array_obj = dpnp.partition(self, kth, axis=axis, kind=kind, order=order).get_array() def prod(self, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True): """ diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 12d28074b8fb..5bd6f460496f 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -86,22 +86,34 @@ def asarray(x1, usm_type=None, sycl_queue=None): """Converts `x1` to `dpnp_array`.""" - if isinstance(x1, dpnp_array): - x1_obj = x1.get_array() - else: - x1_obj = x1 + dpu.validate_usm_type(usm_type, allow_none=True) - sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue) if order is None: order = 'C' """Converts incoming 'x1' object to 'dpnp_array'.""" - array_obj = dpt.asarray(x1_obj, - dtype=dtype, - copy=copy, - order=order, - usm_type=usm_type, - sycl_queue=sycl_queue_normalized) + if isinstance(x1, (list, tuple, range)): + array_obj = dpt.asarray(x1, + dtype=dtype, + copy=copy, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue) + else: + if isinstance(x1, dpnp_array): + x1_obj = x1.get_array() + else: + x1_obj = x1 + + sycl_queue_normalized = dpnp.get_normalized_queue_device(x1_obj, device=device, sycl_queue=sycl_queue) + + array_obj = dpt.asarray(x1_obj, + dtype=dtype, + copy=copy, + order=order, + usm_type=usm_type, + sycl_queue=sycl_queue_normalized) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 9bf456060ddd..ce3c540539d6 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -67,7 +67,9 @@ "from_dlpack", "get_dpnp_descriptor", "get_include", - "get_normalized_queue_device" + "get_normalized_queue_device", + "get_usm_ndarray", + "is_supported_array_type" ] from dpnp import ( @@ -272,6 +274,10 @@ def get_dpnp_descriptor(ext_obj, if use_origin_backend(): return False + # It's required to keep track of input object if a non-strided copy is going to be created. + # Thus there will be an extra descriptor allocated to refer on original input. + orig_desc = None + # If input object is a scalar, it means it was allocated on host memory. # We need to copy it to USM memory according to compute follows data paradigm. if isscalar(ext_obj): @@ -291,6 +297,7 @@ def get_dpnp_descriptor(ext_obj, ext_obj_offset = 0 if ext_obj.strides != shape_offsets or ext_obj_offset != 0: + orig_desc = dpnp_descriptor(ext_obj) ext_obj = array(ext_obj) # while dpnp functions are based on DPNP_QUEUE @@ -304,7 +311,7 @@ def get_dpnp_descriptor(ext_obj, if not queue_is_default: ext_obj = array(ext_obj, sycl_queue=default_queue) - dpnp_desc = dpnp_descriptor(ext_obj) + dpnp_desc = dpnp_descriptor(ext_obj, orig_desc) if dpnp_desc.is_valid: return dpnp_desc @@ -366,3 +373,53 @@ def get_normalized_queue_device(obj=None, if hasattr(dpt._device, 'normalize_queue_device'): return dpt._device.normalize_queue_device(sycl_queue=sycl_queue, device=device) return sycl_queue + + +def get_usm_ndarray(a): + """ + Return :class:`dpctl.tensor.usm_ndarray` from input array `a`. + + Parameters + ---------- + a : {dpnp_array, usm_ndarray} + Input array of supported type :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. + + Returns + ------- + out : usm_ndarray + A dpctl USM ndarray of input array `a`. + + Raises + ------ + TypeError + If input parameter `a` is of unsupported array type. + + """ + + if isinstance(a, dpnp_array): + return a.get_array() + if isinstance(a, dpt.usm_ndarray): + return a + raise TypeError("An array must be any of supported type, but got {}".format(type(a))) + + +def is_supported_array_type(a): + """ + Return ``True`` if an array of either type :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray` type, ``False`` otherwise. + + Parameters + ---------- + a : array + An input array to check the type. + + Returns + ------- + out : bool + ``True`` if type of array `a` is supported array type, + ``False`` otherwise. + + """ + + return isinstance(a, (dpnp_array, dpt.usm_ndarray)) diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py index 36f37f4282ec..92f33bc6310a 100644 --- a/dpnp/dpnp_iface_bitwise.py +++ b/dpnp/dpnp_iface_bitwise.py @@ -62,7 +62,9 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs): """Choose function to call based on input and call chosen fucntion.""" - if where is not True: + if kwargs: + pass + elif where is not True: pass elif dtype is not None: pass @@ -85,7 +87,7 @@ def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=T if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None @@ -273,7 +275,7 @@ def invert(x, if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None return dpnp_invert(x1_desc, out_desc).get_pyobj() diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index ad2eb9794f6a..aa9989e5f7dc 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -54,6 +54,7 @@ "diag_indices", "diag_indices_from", "diagonal", + "extract", "fill_diagonal", "indices", "nonzero", @@ -232,6 +233,40 @@ def diagonal(x1, offset=0, axis1=0, axis2=1): return call_origin(numpy.diagonal, x1, offset, axis1, axis2) +def extract(condition, x): + """ + Return the elements of an array that satisfy some condition. + For full documentation refer to :obj:`numpy.extract`. + + Returns + ------- + y : dpnp.ndarray + Rank 1 array of values from `x` where `condition` is True. + + Limitations + ----------- + Parameters `condition` and `x` are supported either as + :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Parameter `x` must be the same shape as `condition`. + Otherwise the function will be executed sequentially on CPU. + """ + + check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray)) + if check_input_type(condition) and check_input_type(x): + if condition.shape != x.shape: + pass + else: + dpt_condition = ( + condition.get_array() + if isinstance(condition, dpnp_array) + else condition + ) + dpt_array = x.get_array() if isinstance(x, dpnp_array) else x + return dpnp_array._create_from_usm_ndarray(dpt.extract(dpt_condition, dpt_array)) + + return call_origin(numpy.extract, condition, x) + + def fill_diagonal(x1, val, wrap=False): """ Fill the main diagonal of the given array of any dimensionality. @@ -296,7 +331,7 @@ def nonzero(x, /): ------- y : tuple[dpnp.ndarray] Indices of elements that are non-zero. - + Limitations ----------- Parameters `x` is supported as either :class:`dpnp.ndarray` @@ -342,24 +377,26 @@ def nonzero(x, /): return call_origin(numpy.nonzero, x) -def place(x1, mask, vals): +def place(x, mask, vals, /): """ Change elements of an array based on conditional and input values. For full documentation refer to :obj:`numpy.place`. Limitations ----------- - Input arrays ``arr`` and ``mask`` are supported as :obj:`dpnp.ndarray`. - Parameter ``vals`` is supported as 1-D sequence. + Parameters `x`, `mask` and `vals` are supported either as + :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Otherwise the function will be executed sequentially on CPU. """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False) - vals_desc = dpnp.get_dpnp_descriptor(vals, copy_when_nondefault_queue=False) - if x1_desc and mask_desc and vals_desc: - return dpnp_place(x1_desc, mask, vals_desc) + check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray)) + if check_input_type(x) and check_input_type(mask) and check_input_type(vals): + dpt_array = x.get_array() if isinstance(x, dpnp_array) else x + dpt_mask = mask.get_array() if isinstance(mask, dpnp_array) else mask + dpt_vals = vals.get_array() if isinstance(vals, dpnp_array) else vals + return dpt.place(dpt_array, dpt_mask, dpt_vals) - return call_origin(numpy.place, x1, mask, vals, dpnp_inplace=True) + return call_origin(numpy.place, x, mask, vals, dpnp_inplace=True) def put(x1, ind, v, mode='raise'): diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index a989f745c0a1..2a643fc8469b 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -114,7 +114,7 @@ def dot(x1, x2, out=None, **kwargs): if out is not None: if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None else: out_desc = None return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj() diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 567661bdb57f..b317a0a9a11a 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -388,17 +388,24 @@ def hstack(tup): return call_origin(numpy.hstack, tup_new) -def moveaxis(x1, source, destination): +def moveaxis(x, source, destination): """ Move axes of an array to new positions. Other axes remain in their original order. For full documentation refer to :obj:`numpy.moveaxis`. + Returns + ------- + out : dpnp.ndarray + Array with moved axes. + The returned array will have the same data and + the same USM allocation type as `x`. + Limitations ----------- - Input array ``x1`` is supported as :obj:`dpnp.ndarray`. + Parameters `x` is supported as either :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. Otherwise the function will be executed sequentially on CPU. - Sizes of normalized input arrays are supported to be equal. Input array data types are limited by supported DPNP :ref:`Data types`. See Also @@ -417,30 +424,11 @@ def moveaxis(x1, source, destination): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - source_norm = normalize_axis(source, x1_desc.ndim) - destination_norm = normalize_axis(destination, x1_desc.ndim) - - if len(source_norm) != len(destination_norm): - pass - else: - # 'do nothing' pattern for transpose() with no elements in 'source' - input_permute = [] - for i in range(x1_desc.ndim): - if i not in source_norm: - input_permute.append(i) - - # insert moving axes into proper positions - for destination_id, source_id in sorted(zip(destination_norm, source_norm)): - # if destination_id in input_permute: - # pytest tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 - # checker_throw_value_error("swapaxes", "source_id exists", source_id, input_permute) - input_permute.insert(destination_id, source_id) - - return transpose(x1_desc.get_pyobj(), axes=input_permute) + if isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray): + dpt_array = x.get_array() if isinstance(x, dpnp_array) else x + return dpnp_array._create_from_usm_ndarray(dpt.moveaxis(dpt_array, source, destination)) - return call_origin(numpy.moveaxis, x1, source, destination) + return call_origin(numpy.moveaxis, x, source, destination) def ravel(x1, order='C'): @@ -583,12 +571,28 @@ def rollaxis(x1, axis, start=0): return call_origin(numpy.rollaxis, x1, axis, start) -def squeeze(x1, axis=None): +def squeeze(x, /, axis=None): """ - Remove single-dimensional entries from the shape of an array. + Removes singleton dimensions (axes) from array `x`. For full documentation refer to :obj:`numpy.squeeze`. + Returns + ------- + out : dpnp.ndarray + Output array is a view, if possible, + and a copy otherwise, but with all or a subset of the + dimensions of length 1 removed. Output has the same data + type as the input, is allocated on the same device as the + input and has the same USM allocation type as the input + array `x`. + + Limitations + ----------- + Parameters `x` is supported as either :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. + Otherwise the function will be executed sequentially on CPU. + Examples -------- >>> import dpnp as np @@ -602,26 +606,17 @@ def squeeze(x1, axis=None): >>> np.squeeze(x, axis=1).shape Traceback (most recent call last): ... - ValueError: cannot select an axis to squeeze out which has size not equal to one + ValueError: Cannot select an axis to squeeze out which has size not equal to one. >>> np.squeeze(x, axis=2).shape (1, 3) - >>> x = np.array([[1234]]) - >>> x.shape - (1, 1) - >>> np.squeeze(x) - array(1234) # 0d array - >>> np.squeeze(x).shape - () - >>> np.squeeze(x)[()] - 1234 """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - return dpnp_squeeze(x1_desc, axis).get_pyobj() + if isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray): + dpt_array = x.get_array() if isinstance(x, dpnp_array) else x + return dpnp_array._create_from_usm_ndarray(dpt.squeeze(dpt_array, axis)) - return call_origin(numpy.squeeze, x1, axis) + return call_origin(numpy.squeeze, x, axis) def stack(arrays, axis=0, out=None): diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 08de8b2ba5a8..98dcc71d31af 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -95,6 +95,41 @@ ] +def _check_nd_call(origin_func, dpnp_func, x1, x2, out=None, where=True, dtype=None, subok=True, **kwargs): + """Choose function to call based on input and call chosen fucntion.""" + + if kwargs: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + if out is not None: + if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): + raise TypeError("return array must be of supported array type") + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None + else: + out_desc = None + + return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() + + return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + + def abs(*args, **kwargs): """ Calculate the absolute value element-wise. @@ -200,7 +235,7 @@ def add(x1, ----------- Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. - Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. + Parameters `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. @@ -216,29 +251,7 @@ def add(x1, """ - if out is not None: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - - return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.add, dpnp_add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def around(x1, decimals=0, out=None): @@ -1142,7 +1155,7 @@ def multiply(x1, ----------- Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. - Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. + Parameters `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the functions will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. @@ -1157,29 +1170,7 @@ def multiply(x1, """ - if out is not None: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - - return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.multiply, dpnp_multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def nancumprod(x1, **kwargs): @@ -1397,34 +1388,7 @@ def power(x1, """ - if where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - if out is not None: - if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): - raise TypeError("return array must be of supported array type") - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) - else: - out_desc = None - - return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() - - return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) + return _check_nd_call(numpy.power, dpnp_power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True): @@ -1665,10 +1629,16 @@ def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where if where is not True: pass else: + if dpnp.isscalar(out): + raise TypeError("output must be an array") out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None result_obj = dpnp_sum(x1_desc, axis, dtype, out_desc, keepdims, initial, where).get_pyobj() result = dpnp.convert_single_elem_array_to_scalar(result_obj, keepdims) + if x1_desc.size == 0 and axis is None: + result = dpnp.zeros_like(result) + if out is not None: + out[...] = result return result return call_origin(numpy.sum, x1, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial, where=where) diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index a0b17f4845ef..0a72ecae0f80 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -44,7 +44,10 @@ from dpnp.dpnp_utils import * import dpnp +from dpnp.dpnp_array import dpnp_array + import numpy +import dpctl.tensor as dpt __all__ = [ @@ -181,7 +184,7 @@ def where(condition, x=None, y=None, /): Return elements chosen from `x` or `y` depending on `condition`. When only `condition` is provided, this function is a shorthand for - :obj:`dpnp.nonzero(condition)`. + :obj:`dpnp.nonzero(condition)`. For full documentation refer to :obj:`numpy.where`. @@ -193,12 +196,13 @@ def where(condition, x=None, y=None, /): Limitations ----------- - Parameters `condition`, `x` and `y` are supported as either scalar, :class:`dpnp.ndarray` + Parameter `condition` is supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Parameters `x` and `y` are supported as either scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray` Otherwise the function will be executed sequentially on CPU. - Data type of `condition` parameter is limited by :obj:`dpnp.bool`. Input array data types of `x` and `y` are limited by supported DPNP :ref:`Data types`. - + See Also -------- :obj:`nonzero` : The function that is called when `x` and `y`are omitted. @@ -220,18 +224,17 @@ def where(condition, x=None, y=None, /): elif missing == 2: return dpnp.nonzero(condition) elif missing == 0: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([condition, x, y]) - - c_desc = dpnp.get_dpnp_descriptor(condition, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - y_desc = dpnp.get_dpnp_descriptor(y, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if c_desc and x_desc and y_desc: - if c_desc.dtype != dpnp.bool: - raise TypeError("condition must be a boolean array") - return dpnp_where(c_desc, x_desc, y_desc).get_pyobj() + check_input_type = lambda x: isinstance(x, (dpnp_array, dpt.usm_ndarray)) + if check_input_type(condition): + if numpy.isscalar(x) or numpy.isscalar(y): + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([condition, x, y]) + x = dpt.asarray(x, usm_type=usm_type, sycl_queue=queue) if numpy.isscalar(x) else x + y = dpt.asarray(y, usm_type=usm_type, sycl_queue=queue) if numpy.isscalar(y) else y + if check_input_type(x) and check_input_type(y): + dpt_condition = condition.get_array() if isinstance(condition, dpnp_array) else condition + dpt_x = x.get_array() if isinstance(x, dpnp_array) else x + dpt_y = y.get_array() if isinstance(y, dpnp_array) else y + return dpnp_array._create_from_usm_ndarray(dpt.where(dpt_condition, dpt_x, dpt_y)) return call_origin(numpy.where, condition, x, y) diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index ab92f8cc6251..966a72142695 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -41,9 +41,10 @@ import numpy - +import dpctl.tensor as dpt from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * +from dpnp.dpnp_array import dpnp_array import dpnp @@ -237,7 +238,8 @@ def correlate(x1, x2, mode='valid'): def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None): - """ + """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None): + Estimate a covariance matrix, given data and weights. For full documentation refer to :obj:`numpy.cov`. @@ -245,10 +247,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= Limitations ----------- Input array ``m`` is supported as :obj:`dpnp.ndarray`. - Dimension of input array ``m`` is limited by ``m.ndim > 2``. + Dimension of input array ``m`` is limited by ``m.ndim <= 2``. Size and shape of input arrays are supported to be equal. Prameters ``y`` is supported only with default value ``None``. - Prameters ``rowvar`` is supported only with default value ``True``. Prameters ``bias`` is supported only with default value ``False``. Prameters ``ddof`` is supported only with default value ``None``. Prameters ``fweights`` is supported only with default value ``None``. @@ -256,7 +257,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. - .. seealso:: :obj:`dpnp.corrcoef` normalized covariance matrix. + .. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix. Examples -------- @@ -273,27 +274,30 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= [1.0, -1.0, -1.0, 1.0] """ - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - if x1_desc.ndim > 2: - pass - elif y is not None: - pass - elif not rowvar: - pass - elif bias: - pass - elif ddof is not None: - pass - elif fweights is not None: - pass - elif aweights is not None: - pass - else: - if x1_desc.dtype != dpnp.float64: - x1_desc = dpnp.get_dpnp_descriptor(dpnp.astype(x1, dpnp.float64), copy_when_nondefault_queue=False) - + if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)): + pass + elif x1.ndim > 2: + pass + elif y is not None: + pass + elif bias: + pass + elif ddof is not None: + pass + elif fweights is not None: + pass + elif aweights is not None: + pass + else: + if not rowvar and x1.shape[0] != 1: + x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1 + x1 = dpnp_array._create_from_usm_ndarray(x1.mT) + + if not x1.dtype in (dpnp.float32, dpnp.float64): + x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue)) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) + if x1_desc: return dpnp_cov(x1_desc).get_pyobj() return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights) diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py index 098dd19648f3..473401071643 100644 --- a/dpnp/dpnp_iface_trigonometric.py +++ b/dpnp/dpnp_iface_trigonometric.py @@ -41,6 +41,7 @@ import numpy +import dpctl.tensor as dpt from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * @@ -906,7 +907,7 @@ def sinh(x1): return call_origin(numpy.sinh, x1, **kwargs) -def sqrt(x1): +def sqrt(x1, /, out = None, **kwargs): """ Return the positive square-root of an array, element-wise. @@ -914,8 +915,11 @@ def sqrt(x1): Limitations ----------- - Input array is supported as :obj:`dpnp.ndarray`. + Input array is supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Parameter `out` is supported as class:`dpnp.ndarray`, class:`dpctl.tensor.usm_ndarray` or + with default value ``None``. Otherwise the function will be executed sequentially on CPU. + Keyword arguments ``kwargs`` are currently unsupported. Input array data types are limited by supported DPNP :ref:`Data types`. Examples @@ -928,11 +932,23 @@ def sqrt(x1): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) + x1_desc = ( + dpnp.get_dpnp_descriptor( + x1, copy_when_strides=False, copy_when_nondefault_queue=False + ) + if not kwargs + else None + ) if x1_desc: - return dpnp_sqrt(x1_desc).get_pyobj() + if out is not None: + if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): + raise TypeError("return array must be of supported array type") + out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) or None + else: + out_desc = None + return dpnp_sqrt(x1_desc, out=out_desc).get_pyobj() - return call_origin(numpy.sqrt, x1) + return call_origin(numpy.sqrt, x1, out=out, **kwargs) def square(x1): diff --git a/dpnp/dpnp_utils/CMakeLists.txt b/dpnp/dpnp_utils/CMakeLists.txt new file mode 100644 index 000000000000..75d403f74cfe --- /dev/null +++ b/dpnp/dpnp_utils/CMakeLists.txt @@ -0,0 +1,7 @@ +# Building dpnp_algo_utils Cython extension + +build_dpnp_cython_ext_with_backend( + dpnp_algo_utils + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_utils.pyx + dpnp/dpnp_utils + ) diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd index db7127319bb0..0015e8d12c02 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd @@ -116,11 +116,13 @@ cdef class dpnp_descriptor: cdef public: # TODO remove "public" as python accessible attribute object origin_pyobj + dpnp_descriptor origin_desc dict descriptor Py_ssize_t dpnp_descriptor_data_size cpp_bool dpnp_descriptor_is_scalar cdef void * get_data(self) + cdef cpp_bool match_ctype(self, DPNPFuncType ctype) cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except * diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index a94381788764..9c421c187b55 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -35,6 +36,8 @@ import numpy import dpctl import dpctl.utils as dpu +import dpctl.tensor._copy_utils as dpt_cu +import dpctl.tensor._tensor_impl as dpt_ti import dpnp.config as config import dpnp.dpnp_container as dpnp_container @@ -257,7 +260,7 @@ def get_usm_allocations(objects): if not isinstance(objects, (list, tuple)): raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) - + if len(objects) == 0: return (None, None) return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects)) @@ -660,9 +663,10 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2): cdef class dpnp_descriptor: - def __init__(self, obj): + def __init__(self, obj, dpnp_descriptor orig_desc=None): """ Initialze variables """ self.origin_pyobj = None + self.origin_desc = None self.descriptor = None self.dpnp_descriptor_data_size = 0 self.dpnp_descriptor_is_scalar = True @@ -681,6 +685,10 @@ cdef class dpnp_descriptor: self.origin_pyobj = obj + """ Keep track of a descriptor with original data """ + if orig_desc is not None and orig_desc.is_valid: + self.origin_desc = orig_desc + """ array size calculation """ cdef Py_ssize_t shape_it = 0 self.dpnp_descriptor_data_size = 1 @@ -740,6 +748,14 @@ cdef class dpnp_descriptor: def is_scalar(self): return self.dpnp_descriptor_is_scalar + @property + def is_temporary(self): + """ + Non-none descriptor of original data means the current descriptor + holds a temporary allocated data. + """ + return self.origin_desc is not None + @property def data(self): if self.is_valid: @@ -771,6 +787,15 @@ cdef class dpnp_descriptor: return interface_dict + def _copy_array_from(self, other_desc): + """ + Fill array data with usm_ndarray of the same shape from other DPNP descriptor + """ + if not isinstance(other_desc, dpnp_descriptor): + raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) + + dpt_cu._copy_same_shape(self.get_array(), other_desc.get_array()) + def get_pyobj(self): return self.origin_pyobj @@ -784,6 +809,29 @@ cdef class dpnp_descriptor: "expected either dpctl.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}" "".format(type(self.origin_pyobj))) + def get_result_desc(self, result_desc=None): + """ + Copy the result data into an original array + """ + if self.is_temporary: + # Original descriptor is not None, so copy the array data into it and return + from_desc = self if result_desc is None else result_desc + self.origin_desc._copy_array_from(from_desc) + return self.origin_desc + elif result_desc is not None: + # A temporary result descriptor was allocated, needs to copy data back into 'out' descriptor + self._copy_array_from(result_desc) + return self + + def is_array_overlapped(self, other_desc): + """ + Check if usm_ndarray overlaps an array from other DPNP descriptor + """ + if not isinstance(other_desc, dpnp_descriptor): + raise TypeError("expected dpnp_descriptor, got {}".format(type(other_desc))) + + return dpt_ti._array_overlap(self.get_array(), other_desc.get_array()) + cdef void * get_data(self): cdef Py_ssize_t item_size = 0 cdef Py_ssize_t elem_offset = 0 @@ -798,6 +846,9 @@ cdef class dpnp_descriptor: return < void * > val + cdef cpp_bool match_ctype(self, DPNPFuncType ctype): + return self.dtype == dpnp_DPNPFuncType_to_dtype(< size_t > ctype) + def __bool__(self): return self.is_valid diff --git a/dpnp/fft/CMakeLists.txt b/dpnp/fft/CMakeLists.txt new file mode 100644 index 000000000000..3b6146a2a854 --- /dev/null +++ b/dpnp/fft/CMakeLists.txt @@ -0,0 +1,7 @@ +# Building dpnp_algo_fft Cython extension + +build_dpnp_cython_ext_with_backend( + dpnp_algo_fft + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_fft.pyx + dpnp/fft + ) diff --git a/dpnp/fft/dpnp_algo_fft.pyx b/dpnp/fft/dpnp_algo_fft.pyx index 393c744d4f36..7f8d1161074f 100644 --- a/dpnp/fft/dpnp_algo_fft.pyx +++ b/dpnp/fft/dpnp_algo_fft.pyx @@ -1,7 +1,8 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/dpnp/linalg/CMakeLists.txt b/dpnp/linalg/CMakeLists.txt new file mode 100644 index 000000000000..a04d5f3b64e9 --- /dev/null +++ b/dpnp/linalg/CMakeLists.txt @@ -0,0 +1,7 @@ +# Building dpnp_algo_linalg Cython extension + +build_dpnp_cython_ext_with_backend( + dpnp_algo_linalg + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_linalg.pyx + dpnp/linalg + ) diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index ddcaf6774999..232a70cafb3e 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -87,7 +88,7 @@ cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_): # ceate result array with type given by FPTR data cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape, - kernel_data.return_type, + kernel_data.return_type, None, device=input_obj.sycl_device, usm_type=input_obj.usm_type, diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 6e6f55db8f92..e2e962585786 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -41,6 +41,10 @@ import dpnp +from .dpnp_utils_linalg import ( + dpnp_eigh +) + import numpy from dpnp.dpnp_utils import * @@ -53,6 +57,7 @@ "cond", "det", "eig", + "eigh", "eigvals", "inv", "matrix_power", @@ -172,6 +177,68 @@ def eig(x1): return call_origin(numpy.linalg.eig, x1) +def eigh(a, UPLO='L'): + """ + Return the eigenvalues and eigenvectors of a complex Hermitian + (conjugate symmetric) or a real symmetric matrix. + + Returns two objects, a 1-D array containing the eigenvalues of `a`, and + a 2-D square array or matrix (depending on the input type) of the + corresponding eigenvectors (in columns). + + For full documentation refer to :obj:`numpy.linalg.eigh`. + + Returns + ------- + w : (..., M) dpnp.ndarray + The eigenvalues in ascending order, each repeated according to + its multiplicity. + v : (..., M, M) dpnp.ndarray + The column ``v[:, i]`` is the normalized eigenvector corresponding + to the eigenvalue ``w[i]``. + + Limitations + ----------- + Parameter `a` is supported as :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. + Input array data types are limited by supported DPNP :ref:`Data types`. + + See Also + -------- + :obj:`dpnp.eig` : eigenvalues and right eigenvectors for non-symmetric arrays. + :obj:`dpnp.eigvals` : eigenvalues of non-symmetric arrays. + + Examples + -------- + >>> import dpnp as dp + >>> a = dp.array([[1, -2j], [2j, 5]]) + >>> a + array([[ 1.+0.j, -0.-2.j], + [ 0.+2.j, 5.+0.j]]) + >>> w, v = dp.linalg.eigh(a) + >>> w; v + array([0.17157288, 5.82842712]), + array([[-0.92387953-0.j , -0.38268343+0.j ], # may vary + [ 0. +0.38268343j, 0. -0.92387953j]])) + + """ + + if UPLO not in ('L', 'U'): + raise ValueError("UPLO argument must be 'L' or 'U'") + + if not dpnp.is_supported_array_type(a): + raise TypeError("An array must be any of supported type, but got {}".format(type(a))) + + if a.ndim < 2: + raise ValueError("%d-dimensional array given. Array must be " + "at least two-dimensional" % a.ndim) + + m, n = a.shape[-2:] + if m != n: + raise ValueError("Last 2 dimensions of the array must be square") + + return dpnp_eigh(a, UPLO=UPLO) + + def eigvals(input): """ Compute the eigenvalues of a general matrix. diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py new file mode 100644 index 000000000000..b7218b75d817 --- /dev/null +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -0,0 +1,131 @@ +# cython: language_level=3 +# distutils: language = c++ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +import dpnp +import dpnp.backend.extensions.lapack._lapack_impl as li + +import dpctl.tensor._tensor_impl as ti + +__all__ = [ + "dpnp_eigh" +] + +_jobz = {'N': 0, 'V': 1} +_upper_lower = {'U': 0, 'L': 1} + + +def dpnp_eigh(a, UPLO): + """ + Return the eigenvalues and eigenvectors of a complex Hermitian + (conjugate symmetric) or a real symmetric matrix. + + The main calculation is done by calling an extention function + for LAPACK library of OneMKL. Depending on input type of `a` array, + it will be either ``heevd`` (for complex types) or ``syevd`` (for others). + + """ + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + a_order = 'C' if a.flags.c_contiguous else 'F' + a_usm_arr = dpnp.get_usm_ndarray(a) + + # 'V' means both eigenvectors and eigenvalues will be calculated + jobz = _jobz['V'] + uplo = _upper_lower[UPLO] + + # get resulting type of arrays with eigenvalues and eigenvectors + a_dtype = a.dtype + lapack_func = "_syevd" + if dpnp.issubdtype(a_dtype, dpnp.complexfloating): + lapack_func = "_heevd" + v_type = a_dtype + w_type = dpnp.float64 if a_dtype == dpnp.complex128 else dpnp.float32 + elif dpnp.issubdtype(a_dtype, dpnp.floating): + v_type = w_type = a_dtype + elif a_sycl_queue.sycl_device.has_aspect_fp64: + v_type = w_type = dpnp.float64 + else: + v_type = w_type = dpnp.float32 + + if a.ndim > 2: + w = dpnp.empty(a.shape[:-1], dtype=w_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue) + + # need to loop over the 1st dimension to get eigenvalues and eigenvectors of 3d matrix A + op_count = a.shape[0] + if op_count == 0: + return w, dpnp.empty_like(a, dtype=v_type) + + eig_vecs = [None] * op_count + ht_copy_ev = [None] * op_count + ht_lapack_ev = [None] * op_count + for i in range(op_count): + # oneMKL LAPACK assumes fortran-like array as input, so + # allocate a memory with 'F' order for dpnp array of eigenvectors + eig_vecs[i] = dpnp.empty_like(a[i], order='F', dtype=v_type) + + # use DPCTL tensor function to fill the array of eigenvectors with content of input array + ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(src=a_usm_arr[i], dst=eig_vecs[i].get_array(), sycl_queue=a_sycl_queue) + + # call LAPACK extension function to get eigenvalues and eigenvectors of a portion of matrix A + ht_lapack_ev[i], _ = getattr(li, lapack_func)(a_sycl_queue, jobz, uplo, eig_vecs[i].get_array(), w[i].get_array(), depends=[copy_ev]) + + for i in range(op_count): + ht_lapack_ev[i].wait() + ht_copy_ev[i].wait() + + # combine the list of eigenvectors into a single array + v = dpnp.array(eig_vecs, order=a_order) + return w, v + else: + # oneMKL LAPACK assumes fortran-like array as input, so + # allocate a memory with 'F' order for dpnp array of eigenvectors + v = dpnp.empty_like(a, order='F', dtype=v_type) + + # use DPCTL tensor function to fill the array of eigenvectors with content of input array + ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(src=a_usm_arr, dst=v.get_array(), sycl_queue=a_sycl_queue) + + # allocate a memory for dpnp array of eigenvalues + w = dpnp.empty(a.shape[:-1], dtype=w_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue) + + # call LAPACK extension function to get eigenvalues and eigenvectors of matrix A + ht_lapack_ev, lapack_ev = getattr(li, lapack_func)(a_sycl_queue, jobz, uplo, v.get_array(), w.get_array(), depends=[copy_ev]) + + if a_order != 'F': + # need to align order of eigenvectors with one of input matrix A + out_v = dpnp.empty_like(v, order=a_order) + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=v.get_array(), dst=out_v.get_array(), sycl_queue=a_sycl_queue, depends=[lapack_ev]) + ht_copy_out_ev.wait() + else: + out_v = v + + ht_lapack_ev.wait() + ht_copy_ev.wait() + + return w, out_v diff --git a/dpnp/random/CMakeLists.txt b/dpnp/random/CMakeLists.txt new file mode 100644 index 000000000000..b7204a9d5c74 --- /dev/null +++ b/dpnp/random/CMakeLists.txt @@ -0,0 +1,7 @@ +# Building dpnp_algo_random Cython extension + +build_dpnp_cython_ext_with_backend( + dpnp_algo_random + ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_random.pyx + dpnp/random + ) diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx index 314906cee6d1..504e365405b4 100644 --- a/dpnp/random/dpnp_algo_random.pyx +++ b/dpnp/random/dpnp_algo_random.pyx @@ -1,4 +1,5 @@ # cython: language_level=3 +# cython: linetrace=True # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2023, Intel Corporation @@ -143,7 +144,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_laplace_c_1out_t)(c_dpctl.DPCT const double, const size_t, const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_logistic_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , +ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_logistic_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , const double, const double, const size_t, @@ -514,7 +515,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_binomial(int ntrial, double p, size): with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) - + return result @@ -831,7 +832,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_logistic(double loc, double scale, size): cdef fptr_dpnp_rng_logistic_c_1out_t func = < fptr_dpnp_rng_logistic_c_1out_t > kernel_data.ptr # call FPTR function cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), loc, scale, result.size, NULL) - + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) @@ -1015,7 +1016,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial(double a, double p, size) func = kernel_data.ptr # call FPTR function event_ref = func(q_ref, result.get_data(), a, p, result.size, NULL) - + with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) @@ -1204,7 +1205,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_rayleigh(double scale, size): q = result_sycl_queue q_ref = q.get_queue_ref() - + func = kernel_data.ptr # call FPTR function event_ref = func(q_ref, result.get_data(), scale, result.size, NULL) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000000..8f5c236ed288 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[tool.coverage.run] +plugins = [ + "Cython.Coverage" +] +branch = true +source = [ + "dpnp", +] +omit = [ + "tests/*", + "dpnp/_version.py", +] + +[tool.coverage.report] +omit = [ + "tests/*", + "dpnp/_version.py", +] + +[tool.pytest.ini.options] +minversion = "6.0" +norecursedirs= [ + ".*", "*.egg*", "build", "dist", "conda-recipe", +] +addopts = [ + "--junitxml=junit.xml", + "--ignore setup.py", + "--ignore run_test.py", + "--cov-report term-missing", + "--tb native", + "--strict", + "--durations=20", + "-q -ra", +] diff --git a/scripts/azure-pipelines.yml b/scripts/azure-pipelines.yml index 3a9921464473..206a6fe4d53e 100644 --- a/scripts/azure-pipelines.yml +++ b/scripts/azure-pipelines.yml @@ -40,7 +40,7 @@ jobs: echo ========================= CI ENV ========================================== . ./scripts/set_ci_env.sh echo ========================= build DPNP ====================================== - ./0.build.sh + python scripts/build_locally.py echo ========================= run valgrind ==================================== export PYTHONMALLOC=malloc valgrind --show-leak-kinds=definite --log-file=/tmp/valgrind-output \ diff --git a/scripts/build_locally.py b/scripts/build_locally.py new file mode 100644 index 000000000000..6d008851e7e1 --- /dev/null +++ b/scripts/build_locally.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2016-2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import os +import subprocess +import sys +import dpctl + + +def run( + use_oneapi=True, + build_type="Release", + c_compiler=None, + cxx_compiler=None, + compiler_root=None, + cmake_executable=None, + verbose=False, + cmake_opts="", +): + build_system = None + + if "linux" in sys.platform: + build_system = "Ninja" + elif sys.platform in ["win32", "cygwin"]: + build_system = "Ninja" + else: + assert False, sys.platform + " not supported" + + setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + cmake_args = [ + sys.executable, + "setup.py", + "develop", + ] + if cmake_executable: + cmake_args += [ + "--cmake-executable=" + cmake_executable, + ] + dpctl_module_path = os.path.join(dpctl.get_include(), "..", "resources", "cmake") + cmake_args += [ + "--build-type=" + build_type, + "--generator=" + build_system, + "--", + "-DCMAKE_C_COMPILER:PATH=" + c_compiler, + "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler, + "-DDPCTL_MODULE_PATH:PATH=" + dpctl_module_path, + ] + if verbose: + cmake_args += [ + "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", + ] + if cmake_opts: + cmake_args += cmake_opts.split() + if use_oneapi: + if "DPL_ROOT" in os.environ: + os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"] + subprocess.check_call( + cmake_args, shell=False, cwd=setup_dir, env=os.environ + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Driver to build dpnp for in-place installation" + ) + driver = parser.add_argument_group(title="Coverage driver arguments") + driver.add_argument("--c-compiler", help="Name of C compiler", default=None) + driver.add_argument( + "--cxx-compiler", help="Name of C++ compiler", default=None + ) + driver.add_argument( + "--oneapi", + help="Set if using one-API installation", + dest="oneapi", + action="store_true", + ) + driver.add_argument( + "--debug", + default="Release", + const="Debug", + action="store_const", + help="Set the compilation mode to debugging", + ) + driver.add_argument( + "--compiler-root", + type=str, + help="Path to compiler home directory", + default=None, + ) + driver.add_argument( + "--cmake-executable", + type=str, + help="Path to cmake executable", + default=None, + ) + driver.add_argument( + "--verbose", + help="Build using vebose makefile mode", + dest="verbose", + action="store_true", + ) + driver.add_argument( + "--cmake-opts", + help="Channels through additional cmake options", + dest="cmake_opts", + default="", + type=str, + ) + args = parser.parse_args() + + args_to_validate = [ + "c_compiler", + "cxx_compiler", + "compiler_root", + ] + + if args.oneapi or ( + args.c_compiler is None + and args.cxx_compiler is None + and args.compiler_root is None + ): + args.c_compiler = "icx" + args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx" + args.compiler_root = None + else: + cr = args.compiler_root + if isinstance(cr, str) and os.path.exists(cr): + if args.c_compiler is None: + args.c_compiler = "icx" + if args.cxx_compiler is None: + args.cxx_compiler = "icpx" if "linux" in sys.platform else "icx" + else: + raise RuntimeError( + "Option 'compiler-root' must be provided when " + "using non-default DPC++ layout." + ) + args_to_validate = [ + "c_compiler", + "cxx_compiler", + ] + for p in args_to_validate: + arg = getattr(args, p) + assert isinstance(arg, str) + if not os.path.exists(arg): + arg2 = os.path.join(cr, arg) + if os.path.exists(arg2): + arg = arg2 + setattr(args, p, arg) + if not os.path.exists(arg): + opt_name = p.replace("_", "-") + raise RuntimeError(f"Option {opt_name} value {arg} must exist.") + + run( + use_oneapi=args.oneapi, + build_type=args.debug, + c_compiler=args.c_compiler, + cxx_compiler=args.cxx_compiler, + compiler_root=args.compiler_root, + cmake_executable=args.cmake_executable, + verbose=args.verbose, + cmake_opts=args.cmake_opts, + ) diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py new file mode 100644 index 000000000000..9fb2d86d0df0 --- /dev/null +++ b/scripts/gen_coverage.py @@ -0,0 +1,145 @@ +import os +import subprocess +import sys + +def run( + c_compiler=None, + cxx_compiler=None, + bin_llvm=None, + pytest_opts = "", +): + + IS_LIN = False + + if "linux" in sys.platform: + IS_LIN = True + elif sys.platform in ["win32", "cygwin"]: + pass + else: + assert False, sys.platform + " not supported" + + if not IS_LIN: + raise RuntimeError( + "This scripts only supports coverage collection on Linux" + ) + + setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + dpctl_cmake_dir = subprocess.check_output([sys.executable, "-m", "dpctl", "--cmakedir"]) + + cmake_args = [ + sys.executable, + "setup.py", + "develop", + "-G=Ninja", + "--", + "-DCMAKE_C_COMPILER:PATH=" + c_compiler, + "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler, + "-DDPCTL_MODULE_PATH=" + dpctl_cmake_dir.decode().rstrip(), + "-DCMAKE_VERBOSE_MAKEFILE=ON", + "-DDPNP_GENERATE_COVERAGE=ON", + ] + + env = None + if bin_llvm: + env = { + "PATH": ":".join((os.environ.get("PATH", ""), bin_llvm)), + "LLVM_TOOLS_HOME": bin_llvm, + } + env.update({k: v for k, v in os.environ.items() if k != "PATH"}) + + + subprocess.check_call(cmake_args, shell=False, cwd=setup_dir, env=env) + + env["LLVM_PROFILE_FILE"] = "dpnp_pytest.profraw" + subprocess.check_call( + [ + "pytest", + "-q", + "-ra", + "--disable-warnings", + "--cov-config", + "pyproject.toml", + "--cov", + "dpnp", + "--cov-report", + "term-missing", + "--pyargs", + "tests", + "-vv", + *pytest_opts.split(), + ], + cwd=setup_dir, + shell=False, + env=env, + ) + + def find_objects(): + import os + + objects = [] + dpnp_path = os.getcwd() + search_path = os.path.join(dpnp_path, "dpnp") + files = os.listdir(search_path) + for file in files: + if file.endswith("_c.so"): + objects.extend(["-object", os.path.join(search_path, file)]) + return objects + + objects = find_objects() + instr_profile_fn = "dpnp_pytest.profdata" + # generate instrumentation profile data + subprocess.check_call( + [ + os.path.join(bin_llvm, "llvm-profdata"), + "merge", + "-sparse", + env["LLVM_PROFILE_FILE"], + "-o", + instr_profile_fn, + ] + ) + + # export lcov + with open("dpnp_pytest.lcov", "w") as fh: + subprocess.check_call( + [ + os.path.join(bin_llvm, "llvm-cov"), + "export", + "-format=lcov", + "-ignore-filename-regex=/tmp/icpx*", + "-instr-profile=" + instr_profile_fn, + ] + + objects, + stdout=fh, + ) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Driver to build dpnp and generate coverage" + ) + driver = parser.add_argument_group(title="Coverage driver arguments") + driver.add_argument( + "--pytest-opts", + help="Channels through additional pytest options", + dest="pytest_opts", + default="", + type=str, + ) + + args = parser.parse_args() + + c_compiler = "icx" + cxx_compiler = "icpx" + icx_path = subprocess.check_output(["which", "icx"]) + bin_dir = os.path.dirname(os.path.dirname(icx_path)) + bin_llvm = os.path.join(bin_dir.decode("utf-8"), "bin-llvm") + + + run( + c_compiler=c_compiler, + cxx_compiler=cxx_compiler, + bin_llvm=bin_llvm, + pytest_opts = args.pytest_opts, + ) diff --git a/scripts/install_cmake_lin.sh b/scripts/install_cmake_lin.sh index 6a8f7c7b3006..966a22c617be 100755 --- a/scripts/install_cmake_lin.sh +++ b/scripts/install_cmake_lin.sh @@ -4,13 +4,13 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) echo ========================= install cmake ================================== curl --output cmake_webimage.tar.gz \ - --url https://cmake.org/files/v3.19/cmake-3.19.2-Linux-x86_64.tar.gz \ + --url https://github.com/Kitware/CMake/releases/download/v3.26.2/cmake-3.26.2-linux-x86_64.tar.gz \ --retry 5 --retry-delay 5 tar -xzf cmake_webimage.tar.gz rm -f cmake_webimage.tar.gz -export PATH=`pwd`/cmake-3.19.2-Linux-x86_64/bin:$PATH +export PATH=`pwd`/cmake-3.26.2-linux-x86_64/bin:$PATH which cmake cmake --version diff --git a/scripts/install_system_deps.sh b/scripts/install_system_deps.sh index ce7104ed1a31..14ac03bb5a35 100755 --- a/scripts/install_system_deps.sh +++ b/scripts/install_system_deps.sh @@ -58,7 +58,8 @@ update-alternatives --get-selections echo ========================= install Intel OneAPI =========================== sudo aptitude install -y intel-oneapi-mkl \ intel-oneapi-mkl-devel \ - intel-oneapi-compiler-dpcpp-cpp + intel-oneapi-compiler-dpcpp-cpp \ + intel-tbb #intel-oneapi-python diff --git a/setup.py b/setup.py index b7bfba6c28ba..608dd8dd1c71 100644 --- a/setup.py +++ b/setup.py @@ -1,63 +1,6 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -""" NumPy is the fundamental package for array computing with Python. - -It provides: - -- a powerful N-dimensional array object -- sophisticated (broadcasting) functions -- tools for integrating C/C++ and Fortran code -- useful linear algebra, Fourier transform, and random number capabilities -- and much more - -""" - -import importlib.machinery as imm # Python 3 is required -import sys +from skbuild import setup import os -import dpctl -import numpy - -from setuptools import setup, Extension -from Cython.Build import cythonize -from Cython.Compiler import Options as cython_options - -from utils.command_style import source_style -from utils.command_clean import source_clean -from utils.command_build_clib import custom_build_clib, dpnp_backend_c_description, _project_backend_dir, _sdl_cflags, _project_extra_link_args, IS_WIN -from utils.command_build_cmake_clib import custom_build_cmake_clib - - -""" -Python version check -""" -if sys.version_info[:2] < (3, 6): - raise RuntimeError("DPNP: Python version >= 3.6 required.") +import importlib.machinery as imm """ @@ -67,160 +10,22 @@ version_mod = imm.SourceFileLoader('version', os.path.join(thefile_path, 'dpnp', 'version.py')).load_module() __version__ = version_mod.__version__ - -""" -Set project auxilary data like readme and licence files -""" -with open('README.md') as f: - __readme_file__ = f.read() - -CLASSIFIERS = """\ -Development Status :: 4 - Beta -Intended Audience :: Science/Research -Intended Audience :: Developers -License :: OSI Approved -Programming Language :: C -Programming Language :: Python -Programming Language :: Python :: 3 -Programming Language :: Python :: 3.6 -Programming Language :: Python :: 3.7 -Programming Language :: Python :: 3.8 -Programming Language :: Python :: Implementation :: CPython -Topic :: Software Development -Topic :: Scientific/Engineering -Operating System :: Microsoft :: Windows -Operating System :: POSIX -Operating System :: Unix -Operating System :: MacOS -""" - -""" -Extra defined commands for the build system - ->$ python ./setup.py --help-commands - ->$ python ./setup.py style ->$ python ./setup.py style -a ->$ python ./setup.py clean - -TODO: spell check, valgrind, code coverage -""" - -# TODO: refactor/fix -# on Win we need a specific build_clib definition to prevent using cmake during build_ext execution -if IS_WIN: - dpnp_build_commands = {'style': source_style, - 'build_clib_setuptools': custom_build_clib, - 'build_clib': custom_build_clib, - 'clean': source_clean - } -else: - dpnp_build_commands = {'style': source_style, - 'build_clib_setuptools': custom_build_clib, - 'build_clib': custom_build_cmake_clib, - 'clean': source_clean - } - -if IS_WIN: - ''' - This variable controls setuptools execution on windows - to avoid automatically search and confirm workability of the compiler - If not set, error "Microsoft Visual C++ 14.0 or greater is required." appiars - ''' - os.environ["DISTUTILS_USE_SDK"] = "1" - -""" -The project modules description -""" -kwargs_common = { - "include_dirs": [numpy.get_include(), dpctl.get_include()] + _project_backend_dir, - "library_dirs": [os.path.dirname(dpctl.get_include()),], - "libraries": ["DPCTLSyclInterface"], - "extra_compile_args": _sdl_cflags, - "extra_link_args": _project_extra_link_args, - "define_macros": [("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], - "language": "c++" -} - -dpnp_algo = Extension( - name="dpnp.dpnp_algo.dpnp_algo", - sources=[os.path.join("dpnp", "dpnp_algo", "dpnp_algo.pyx")], - **kwargs_common) - -dpnp_dparray = Extension( - name="dpnp.dparray", - sources=[os.path.join("dpnp", "dparray.pyx")], - **kwargs_common) - -dpnp_random = Extension( - name="dpnp.random.dpnp_algo_random", - sources=[os.path.join("dpnp", "random", "dpnp_algo_random.pyx")], - **kwargs_common) - -dpnp_linalg = Extension( - name="dpnp.linalg.dpnp_algo_linalg", - sources=[os.path.join("dpnp", "linalg", "dpnp_algo_linalg.pyx")], - **kwargs_common) - -dpnp_fft = Extension( - name="dpnp.fft.dpnp_algo_fft", - sources=[os.path.join("dpnp", "fft", "dpnp_algo_fft.pyx")], - **kwargs_common) - -dpnp_utils = Extension( - name="dpnp.dpnp_utils.dpnp_algo_utils", - sources=[os.path.join("dpnp", "dpnp_utils", "dpnp_algo_utils.pyx")], - **kwargs_common) - -cython_options.docstrings = True -cython_options.warning_errors = True - -dpnp_cython_mods = cythonize([dpnp_algo, dpnp_dparray, dpnp_random, dpnp_utils, dpnp_linalg, dpnp_fft], - compiler_directives={"language_level": sys.version_info[0], - "warn.unused": False, - "warn.unused_result": False, - "warn.maybe_uninitialized": False, - "warn.undeclared": False, - "boundscheck": True, - "linetrace": True - }, - gdb_debug=False, - build_dir="build_cython", - annotate=False, - quiet=False) - -setup(name="dpnp", - version=__version__, - description="NumPy-like API accelerated with SYCL", - long_description=__readme_file__, - long_description_content_type="text/markdown", - author="Intel Corporation", - maintainer="Intel Corp.", - maintainer_email="scripting@intel.com", - url="https://intelpython.github.io/dpnp/", - download_url="https://github.com/IntelPython/dpnp", - license='BSD', - classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f], - keywords="sycl numpy python3 intel mkl oneapi gpu dpcpp pstl", - platforms=["Linux", "Windows"], - test_suite="pytest", - python_requires=">=3.6", - install_requires=["numpy>=1.15"], - setup_requires=["numpy>=1.15"], - tests_require=["numpy>=1.15"], - ext_modules=dpnp_cython_mods, - cmdclass=dpnp_build_commands, - packages=['dpnp', - 'dpnp.dpnp_algo', - 'dpnp.dpnp_utils', - 'dpnp.fft', - 'dpnp.linalg', - 'dpnp.random' - ], - package_data={'dpnp': ['libdpnp_backend_c.so', 'dpnp_backend_c.lib', 'dpnp_backend_c.dll']}, - include_package_data=True, - - # this is needed for 'build' command to automatically call 'build_clib' - # it attach the library to all extensions (it is not needed) - libraries=dpnp_backend_c_description - ) +setup( + name="dpnp", + version=__version__, + description="", + long_description="", + long_description_content_type="text/markdown", + license="Apache 2.0", + author="Intel Corporation", + url="https://github.com/IntelPython/dpnp", + packages=['dpnp', + 'dpnp.dpnp_algo', + 'dpnp.dpnp_utils', + 'dpnp.fft', + 'dpnp.linalg', + 'dpnp.random' + ], + package_data={'dpnp': ['libdpnp_backend_c.so', 'dpnp_backend_c.lib', 'dpnp_backend_c.dll']}, + include_package_data=True, +) diff --git a/tests/__init__.py b/tests/__init__.py index e4085539a910..3efd595c0a04 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ from tests.third_party.cupy import testing as cupy_testing +from .helper import has_support_aspect64 import dpnp import numpy @@ -17,6 +18,8 @@ def _shaped_arange(shape, xp=dpnp, dtype=dpnp.float64, order='C'): + if dtype is dpnp.float64: + dtype = dpnp.float32 if not has_support_aspect64() else dtype res = xp.array(orig_shaped_arange(shape, xp=numpy, dtype=dtype, order=order), dtype=dtype) return res diff --git a/tests/helper.py b/tests/helper.py index 1e97615fb3de..de75251305c8 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -91,3 +91,12 @@ def is_win_platform(): Return True if a test is runing on Windows OS, False otherwise. """ return platform.startswith('win') + + +def has_support_aspect64(device=None): + """ + Return True if the device supports 64-bit precision floating point operations, + False otherwise. + """ + dev = dpctl.select_default_device() if device is None else device + return dev.has_aspect_fp64 diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index ecc5bd5e999a..d4d77828b61a 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1,5 +1,10 @@ tests/test_histograms.py::TestHistogram::test_density +tests/test_random.py::TestDistributionsMultivariateNormal::test_moments +tests/test_random.py::TestDistributionsMultivariateNormal::test_output_shape_check +tests/test_random.py::TestDistributionsMultivariateNormal::test_seed +tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.vstack([x, x]).T] + tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] tests/test_sycl_queue.py::test_broadcasting[opencl:gpu:0-floor_divide-data12-data22] @@ -186,13 +191,17 @@ tests/test_linalg.py::test_svd[(3,4)-float64] tests/test_linalg.py::test_svd[(5,3)-float64] tests/test_linalg.py::test_svd[(16,16)-float64] +tests/test_logic.py::test_allclose[int32] +tests/test_logic.py::test_allclose[int64] +tests/test_logic.py::test_allclose[float32] +tests/test_logic.py::test_allclose[float64] +tests/test_logic.py::test_allclose[None] + tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([[i, i] for i in x])] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)] -tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1] - tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace @@ -210,6 +219,11 @@ tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::tes tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_setter_zero_dim tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_zero_dim tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestScalarConversion::test_scalar_conversion +tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_0_{shape=()}::test_item +tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_1_{shape=(1,)}::test_item +tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_2_{shape=(2, 3)}::test_item +tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_3_{order='C', shape=(2, 3)}::test_item +tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_4_{order='F', shape=(2, 3)}::test_item tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_type tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_astype_strides @@ -233,11 +247,16 @@ tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAn tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestArrayCopyAndView::test_view_non_contiguous_raise tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestNumPyArrayCopyView_param_0_{src_order='C'}::test_isinstance_numpy_view_copy_f tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py::TestNumPyArrayCopyView_param_1_{src_order='F'}::test_isinstance_numpy_view_copy_f +tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_0_{decimals=-3}::test_round_halfway_float +tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_1_{decimals=-2}::test_round_halfway_float +tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_2_{decimals=-1}::test_round_halfway_float +tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_3_{decimals=0}::test_round_halfway_float tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_0_{decimals=-3}::test_round_halfway_uint tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_1_{decimals=-2}::test_round_halfway_uint tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_2_{decimals=-1}::test_round_halfway_uint tests/third_party/cupy/core_tests/test_ndarray_math.py::TestRoundHalfway_param_3_{decimals=0}::test_round_halfway_uint + tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_min_nan tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_all tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_all_keepdims @@ -248,6 +267,8 @@ tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction: tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes_keepdims tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan +tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_imag +tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_real tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_max tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_min @@ -385,6 +406,11 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asco tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype +tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_construction_from_list +tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_construction_from_tuple +tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_extraction_from_nested_list +tests/third_party/cupy/creation_tests/test_matrix.py::TestMatrix::test_diag_extraction_from_nested_tuple + tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2 @@ -456,21 +482,18 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compr tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_empty_1dim_no_axis tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_axis tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_bool -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_empty_1dim -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_no_bool -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_shape_mismatch -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch2 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_index_range_overflow tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_choicelist_condlist_broadcast +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_scalar tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_empty_lists tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_length_error tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_non_broadcastable tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_type_error_condlist tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_diagonal @@ -514,12 +537,6 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_5_{ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_6_{shape=(3, 3), val=(2, 2), wrap=True}::test_columnar_slice tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_7_{shape=(3, 3), val=(2, 2), wrap=False}::test_columnar_slice tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_9_{shape=(2, 2, 2), val=1, wrap=False}::test_1darray -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_shape_unmatch_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_shape_unmatch_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_shape_unmatch_error tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmaskDifferentDtypes::test_putmask_differnt_dtypes_raises tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmask::test_putmask_non_equal_shape_raises tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiter::test_next @@ -604,7 +621,9 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int +tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 +tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two @@ -613,7 +632,14 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot +tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot +tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes +tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes +tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot +tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_array_scalar +tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_finite tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite +tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite_equal_nan tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_min_int tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal @@ -644,15 +670,6 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure3 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure4 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure3 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast_arrays tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_1_{shapes=[(3, 2), (2, 3)]}::test_invalid_broadcast @@ -724,8 +741,6 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_2_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_3_{reps=(0, 1)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps=(2, 3)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile -tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 -tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_455_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_457_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='fmod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_459_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='remainder', use_dtype=False}::test_binary @@ -757,6 +772,7 @@ tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_ldexp tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_combination tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_nextafter_float tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_signbit +tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_absolute_negative tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip1 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip2 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_clip3 @@ -774,6 +790,7 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_inf_na tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_negative_for_old_numpy +tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_sign_negative tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_0_{value=(14, -1)}::test_around_negative2 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_0_{value=(14, -1)}::test_around_positive2 tests/third_party/cupy/math_tests/test_rounding.py::TestRoundBorder_param_1_{value=(15, -1)}::test_around_negative2 @@ -806,9 +823,14 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{de tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{decimals=99}::test_round_small tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_large tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_small +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_around +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_ceil tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_floor tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_ +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all2 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_keepdims @@ -835,6 +857,7 @@ tests/third_party/cupy/math_tests/test_sumprod.py::TestDiff::test_diff_2dim_with tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_arraylike tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_huge_array tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_numpy_array +tests/third_party/cupy/math_tests/test_sumprod.py::TestCumprod::test_cumprod_out_noncontiguous tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_arraylike tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_0_{axis=0}::test_cumsum_numpy_array tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_1_{axis=1}::test_cumsum_arraylike @@ -877,6 +900,40 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsExpo tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsExponential_param_2_{scale_shape=(), shape=None}::test_exponential tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_0_{p_shape=(), shape=(4, 3, 2)}::test_geometric tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_1_{p_shape=(), shape=(3, 2)}::test_geometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_2_{p_shape=(3, 2), shape=(4, 3, 2)}::test_geometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGeometric_param_3_{p_shape=(3, 2), shape=(3, 2)}::test_geometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_0_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_1_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_2_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_3_{nbad_shape=(), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_4_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_5_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_6_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_7_{nbad_shape=(), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_8_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_9_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_10_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_11_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_12_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_13_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_14_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_15_{nbad_shape=(), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_16_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_17_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_18_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_19_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_20_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_21_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_22_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_23_{nbad_shape=(3, 2), ngood_shape=(), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_24_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_25_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_26_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_27_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int32, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_28_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_29_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(), shape=(3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_30_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(4, 3, 2)}::test_hypergeometric +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsHyperGeometric_param_31_{nbad_shape=(3, 2), ngood_shape=(3, 2), nsample_dtype=int64, nsample_shape=(3, 2), shape=(3, 2)}::test_hypergeometric tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_0_{loc_shape=(), scale_shape=(), shape=(4, 3, 2)}::test_gumbel tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_1_{loc_shape=(), scale_shape=(), shape=(3, 2)}::test_gumbel tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsGumbel_param_2_{loc_shape=(), scale_shape=(3, 2), shape=(4, 3, 2)}::test_gumbel @@ -932,6 +989,10 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNonc tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNoncentralF_param_8_{dfden_shape=(3, 2), dfnum_shape=(), nonc_shape=(), shape=(4, 3, 2)}::test_noncentral_f tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsNoncentralF_param_9_{dfden_shape=(3, 2), dfnum_shape=(), nonc_shape=(), shape=(3, 2)}::test_noncentral_f +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_0_{lam_shape=(), shape=(4, 3, 2)}::test_poisson +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_1_{lam_shape=(), shape=(3, 2)}::test_poisson +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_2_{lam_shape=(3, 2), shape=(4, 3, 2)}::test_poisson +tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_3_{lam_shape=(3, 2), shape=(3, 2)}::test_poisson tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_0_{a_shape=(), shape=(4, 3, 2)}::test_power tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_0_{a_shape=(), shape=(4, 3, 2)}::test_power_for_negative_a tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPower_param_1_{a_shape=(), shape=(3, 2)}::test_power @@ -1088,10 +1149,28 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{ext tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_one_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_sequence_kth tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_zero_dim +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis1 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis2 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis1 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis2 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_multi_dim +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_negative_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_none_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_invalid_axis tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan1 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan2 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis1 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis2 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis1 +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis2 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_multi_dim +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_negative_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_none_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_axis +tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_invalid_axis tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan1 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan2 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_non_contiguous @@ -1099,18 +1178,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} + tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim @@ -1120,30 +1188,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_sequence_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_sequence_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim + tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim @@ -1237,6 +1282,8 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_b tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered +tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights +tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights @@ -1257,6 +1304,7 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch +tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_all_nan tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_float16 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_huge diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index d3864a05b7e6..f18d39cd9f48 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -53,15 +53,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19] tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19] tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_no_axis -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_3_{n_vals=1, shape=(7,)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_4_{n_vals=1, shape=(2, 3)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_5_{n_vals=1, shape=(4, 3, 2)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_6_{n_vals=3, shape=(7,)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_7_{n_vals=3, shape=(2, 3)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_8_{n_vals=3, shape=(4, 3, 2)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_9_{n_vals=15, shape=(7,)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_10_{n_vals=15, shape=(2, 3)}::test_place -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_11_{n_vals=15, shape=(4, 3, 2)}::test_place tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_0_{n=2, ndim=2}::test_diag_indices tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_1_{n=2, ndim=3}::test_diag_indices tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndices_param_2_{n=2, ndim=1}::test_diag_indices @@ -266,28 +257,9 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis1 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis2 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis1 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis2 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis1 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis2 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis1 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis2 -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_axis tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_none_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_sequence_kth - tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory tests/test_arraymanipulation.py::TestHstack::test_generator tests/test_arraymanipulation.py::TestVstack::test_generator @@ -358,7 +330,6 @@ tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp.asarray([(i, i) for i in x], [("a", int), ("b", int)]).view(dpnp.recarray))] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)] -tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1] tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag @@ -414,6 +385,8 @@ tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction: tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_multiple_axes_keepdims tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan +tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_imag +tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestArrayReduction::test_ptp_nan_real tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_max tests/third_party/cupy/core_tests/test_ndarray_reduction.py::TestCubReduction_param_0_{order='C', shape=(10,)}::test_cub_min @@ -650,21 +623,18 @@ tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compr tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_empty_1dim_no_axis tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_axis tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_compress_no_bool -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_empty_1dim -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_no_bool -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_shape_mismatch -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch -tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_extract_size_mismatch2 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_index_range_overflow tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_1D_choicelist tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_choicelist_condlist_broadcast +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_default_scalar tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_empty_lists tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_length_error tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable +tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_broadcastable_complex tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_odd_shaped_non_broadcastable tests/third_party/cupy/indexing_tests/test_indexing.py::TestSelect::test_select_type_error_condlist tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_diagonal @@ -708,12 +678,6 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_5_{ tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_6_{shape=(3, 3), val=(2, 2), wrap=True}::test_columnar_slice tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_7_{shape=(3, 3), val=(2, 2), wrap=False}::test_columnar_slice tests/third_party/cupy/indexing_tests/test_insert.py::TestFillDiagonal_param_9_{shape=(2, 2, 2), val=1, wrap=False}::test_1darray -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_0_{shape=(7,)}::test_place_shape_unmatch_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_1_{shape=(2, 3)}::test_place_shape_unmatch_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_empty_value_error -tests/third_party/cupy/indexing_tests/test_insert.py::TestPlaceRaises_param_2_{shape=(4, 3, 2)}::test_place_shape_unmatch_error tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmaskDifferentDtypes::test_putmask_differnt_dtypes_raises tests/third_party/cupy/indexing_tests/test_insert.py::TestPutmask::test_putmask_non_equal_shape_raises tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiter::test_next @@ -849,15 +813,6 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_8_{s tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_9_{shapes=[(0, 1, 1, 3), (2, 1, 0, 0, 3)]}::test_broadcast_arrays -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_int_axis_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure3 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_scalar_failure4 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure1 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure2 -tests/third_party/cupy/manipulation_tests/test_dims.py::TestDims::test_squeeze_tuple_axis_failure3 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_0_{shapes=[(3,), (2,)]}::test_invalid_broadcast_arrays tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_1_{shapes=[(3, 2), (2, 3)]}::test_invalid_broadcast @@ -929,8 +884,6 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_2_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_3_{reps=(0, 1)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps=(2, 3)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile -tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 -tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary @@ -1013,9 +966,14 @@ tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{de tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_5_{decimals=99}::test_round_small tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_large tests/third_party/cupy/math_tests/test_rounding.py::TestRoundExtreme_param_6_{decimals=100}::test_round_small +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_around +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_ceil tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_floor tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_rint_negative +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_round_ +tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_trunc tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_all_transposed2 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_sum_axes @@ -1311,18 +1269,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000} + tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim @@ -1332,32 +1279,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_sequence_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_axis -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_sequence_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_zero_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_multi_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_kth -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_non_contiguous -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim -tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim + tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim @@ -1451,6 +1373,8 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_b tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered +tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights +tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 1a40777afac8..022d42ca1b0f 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1,13 +1,75 @@ import pytest +from .helper import get_all_dtypes + import dpnp import numpy from numpy.testing import ( - assert_array_equal + assert_, + assert_array_equal, + assert_equal ) +class TestIndexing: + def test_ellipsis_index(self): + a = dpnp.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + assert_(a[...] is not a) + assert_equal(a[...], a) + + # test that slicing with ellipsis doesn't skip an arbitrary number of dimensions + assert_equal(a[0, ...], a[0]) + assert_equal(a[0, ...], a[0,:]) + assert_equal(a[..., 0], a[:, 0]) + + # test that slicing with ellipsis always results in an array + assert_equal(a[0, ..., 1], dpnp.array(2)) + + # assignment with `(Ellipsis,)` on 0-d arrays + b = dpnp.array(1) + b[(Ellipsis,)] = 2 + assert_equal(b, 2) + + def test_boolean_indexing_list(self): + a = dpnp.array([1, 2, 3]) + b = dpnp.array([True, False, True]) + + assert_equal(a[b], [1, 3]) + assert_equal(a[None, b], [[1, 3]]) + + def test_indexing_array_weird_strides(self): + np_x = numpy.ones(10) + dp_x = dpnp.ones(10) + + np_ind = numpy.arange(10)[:, None, None, None] + np_ind = numpy.broadcast_to(np_ind, (10, 55, 4, 4)) + + dp_ind = dpnp.arange(10)[:, None, None, None] + dp_ind = dpnp.broadcast_to(dp_ind, (10, 55, 4, 4)) + + # single advanced index case + assert_array_equal(dp_x[dp_ind], np_x[np_ind]) + + np_x2 = numpy.ones((10, 2)) + dp_x2 = dpnp.ones((10, 2)) + + np_zind = numpy.zeros(4, dtype=np_ind.dtype) + dp_zind = dpnp.zeros(4, dtype=dp_ind.dtype) + + # higher dimensional advanced index + assert_array_equal(dp_x2[dp_ind, dp_zind], np_x2[np_ind, np_zind]) + + def test_indexing_array_negative_strides(self): + arr = dpnp.zeros((4, 4))[::-1, ::-1] + + slices = (slice(None), dpnp.array([0, 1, 2, 3])) + arr[slices] = 10 + assert_array_equal(arr, 10.) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_choose(): a = numpy.r_[:4] @@ -53,6 +115,18 @@ def test_diagonal(array, offset): assert_array_equal(expected, result) +@pytest.mark.parametrize("arr_dtype", get_all_dtypes()) +@pytest.mark.parametrize("cond_dtype", get_all_dtypes()) +def test_extract_1d(arr_dtype, cond_dtype): + a = numpy.array([-2, -1, 0, 1, 2, 3], dtype=arr_dtype) + ia = dpnp.array(a) + cond = numpy.array([1, -1, 2, 0, -2, 3], dtype=cond_dtype) + icond = dpnp.array(cond) + expected = numpy.extract(cond, a) + result = dpnp.extract(icond, ia) + assert_array_equal(expected, result) + + @pytest.mark.parametrize("val", [-1, 0, 1], ids=['-1', '0', '1']) @@ -113,7 +187,6 @@ def test_nonzero(array): assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("vals", [[100, 200], (100, 200)], @@ -138,12 +211,12 @@ def test_place1(arr, mask, vals): ia = dpnp.array(a) m = numpy.array(mask) im = dpnp.array(m) + iv = dpnp.array(vals) numpy.place(a, m, vals) - dpnp.place(ia, im, vals) + dpnp.place(ia, im, iv) assert_array_equal(a, ia) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("vals", [[100, 200], [100, 200, 300, 400, 500, 600], @@ -162,12 +235,12 @@ def test_place2(arr, mask, vals): ia = dpnp.array(a) m = numpy.array(mask) im = dpnp.array(m) + iv = dpnp.array(vals) numpy.place(a, m, vals) - dpnp.place(ia, im, vals) + dpnp.place(ia, im, iv) assert_array_equal(a, ia) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("vals", [[100, 200], [100, 200, 300, 400, 500, 600], @@ -187,8 +260,9 @@ def test_place3(arr, mask, vals): ia = dpnp.array(a) m = numpy.array(mask) im = dpnp.array(m) + iv = dpnp.array(vals) numpy.place(a, m, vals) - dpnp.place(ia, im, vals) + dpnp.place(ia, im, iv) assert_array_equal(a, ia) @@ -507,3 +581,22 @@ def test_triu_indices_from(array, k): result = dpnp.triu_indices_from(ia, k) expected = numpy.triu_indices_from(a, k) assert_array_equal(expected, result) + + +@pytest.mark.parametrize("cond_dtype", get_all_dtypes()) +@pytest.mark.parametrize("scalar_dtype", get_all_dtypes(no_none=True)) +def test_where_with_scalars(cond_dtype, scalar_dtype): + a = numpy.array([-1, 0, 1, 0], dtype=cond_dtype) + ia = dpnp.array(a) + + result = dpnp.where(ia, scalar_dtype(1), scalar_dtype(0)) + expected = numpy.where(a, scalar_dtype(1), scalar_dtype(0)) + assert_array_equal(expected, result) + + result = dpnp.where(ia, ia*2, scalar_dtype(0)) + expected = numpy.where(a, a*2, scalar_dtype(0)) + assert_array_equal(expected, result) + + result = dpnp.where(ia, scalar_dtype(1), dpnp.array(0)) + expected = numpy.where(a, scalar_dtype(1), numpy.array(0)) + assert_array_equal(expected, result) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index d9784a41558f..d90ac8bf9c6c 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,5 +1,8 @@ import pytest -from .helper import get_all_dtypes +from .helper import ( + get_all_dtypes, + get_complex_dtypes +) import dpnp as inp @@ -33,12 +36,12 @@ def vvsort(val, vec, size, xp): unravel_imax = numpy.unravel_index(imax, val.shape) # swap elements in val array - temp = xp.array(val[unravel_i], dtype=vec.dtype, **val_kwargs) + temp = xp.array(val[unravel_i], dtype=val.dtype, **val_kwargs) val[unravel_i] = val[unravel_imax] val[unravel_imax] = temp # swap corresponding columns in vec matrix - temp = xp.array(vec[:, i], dtype=val.dtype, **vec_kwargs) + temp = xp.array(vec[:, i], dtype=vec.dtype, **vec_kwargs) vec[:, i] = vec[:, imax] vec[:, imax] = temp @@ -126,11 +129,49 @@ def test_eig_arange(type, size): assert (dpnp_vec.dtype == np_vec.dtype) assert (dpnp_val.shape == np_val.shape) assert (dpnp_vec.shape == np_vec.shape) + assert (dpnp_val.usm_type == dpnp_symm.usm_type) + assert (dpnp_vec.usm_type == dpnp_symm.usm_type) assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05) assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_none=True)) +@pytest.mark.parametrize("size", [2, 4, 8]) +def test_eigh_arange(type, size): + a = numpy.arange(size * size, dtype=type).reshape((size, size)) + symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type)) + symm = symm_orig + dpnp_symm_orig = inp.array(symm) + dpnp_symm = dpnp_symm_orig + + dpnp_val, dpnp_vec = inp.linalg.eigh(dpnp_symm) + np_val, np_vec = numpy.linalg.eigh(symm) + + # DPNP sort val/vec by abs value + vvsort(dpnp_val, dpnp_vec, size, inp) + + # NP sort val/vec by abs value + vvsort(np_val, np_vec, size, numpy) + + # NP change sign of vectors + for i in range(np_vec.shape[1]): + if (np_vec[0, i] * dpnp_vec[0, i]).asnumpy() < 0: + np_vec[:, i] = -np_vec[:, i] + + assert_array_equal(symm_orig, symm) + assert_array_equal(dpnp_symm_orig, dpnp_symm) + + assert (dpnp_val.shape == np_val.shape) + assert (dpnp_vec.shape == np_vec.shape) + assert (dpnp_val.usm_type == dpnp_symm.usm_type) + assert (dpnp_vec.usm_type == dpnp_symm.usm_type) + + assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-04) + assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-04) + + @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_eigvals(type): if dpctl.get_current_device_type() != dpctl.device_type.gpu: diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index e58e129c03b3..5f0d73b23b7b 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1,6 +1,7 @@ import pytest from .helper import ( get_all_dtypes, + get_float_complex_dtypes, is_cpu_device, is_win_platform ) @@ -634,34 +635,232 @@ def test_invalid_shape(self, shape): dpnp.trunc(dp_array, out=dp_out) -class TestPower: +class TestAdd: + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_add(self, dtype): + array1_data = numpy.arange(10) + array2_data = numpy.arange(5, 15) + out = numpy.empty(10, dtype=dtype) + + # DPNP + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) + result = dpnp.add(dp_array1, dp_array2, out=dp_out) + + # original + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) + expected = numpy.add(np_array1, np_array2, out=out) + + assert_allclose(expected, result) + assert_allclose(out, dp_out) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 + + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.add(np_array1, np_array2, out=np_out) + + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.add(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_overlap(self, dtype): + size = 1 if dtype == dpnp.bool else 15 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.add(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.add(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 21 + + np_a = numpy.arange(size, dtype=dtype) + np_a[::3] += 4 + + dp_a = dpnp.arange(size, dtype=dtype) + dp_a[::3] += 4 + + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("shape", + [(0,), (15, ), (2, 2)], + ids=['(0,)', '(15, )', '(2,2)']) + def test_invalid_shape(self, shape): + dp_array1 = dpnp.arange(10, dtype=dpnp.float64) + dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) + dp_out = dpnp.empty(shape, dtype=dpnp.float64) + + with pytest.raises(ValueError): + dpnp.add(dp_array1, dp_array2, out=dp_out) + + @pytest.mark.parametrize("out", + [4, (), [], (3, 7), [2, 4]], + ids=['4', '()', '[]', '(3, 7)', '[2, 4]']) + def test_invalid_out(self, out): + a = dpnp.arange(10) + + assert_raises(TypeError, dpnp.add, a, 2, out) + assert_raises(TypeError, numpy.add, a.asnumpy(), 2, out) - def test_power(self): + +class TestMultiply: + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_multiply(self, dtype): array1_data = numpy.arange(10) array2_data = numpy.arange(5, 15) - out = numpy.empty(10, dtype=numpy.float64) + out = numpy.empty(10, dtype=dtype) # DPNP - dp_array1 = dpnp.array(array1_data, dtype=dpnp.float64) - dp_array2 = dpnp.array(array2_data, dtype=dpnp.float64) - dp_out = dpnp.array(out, dtype=dpnp.float64) + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) + result = dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + # original + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) + expected = numpy.multiply(np_array1, np_array2, out=out) + + assert_allclose(expected, result) + assert_allclose(out, dp_out) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 + + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.multiply(np_array1, np_array2, out=np_out) + + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_out_overlap(self, dtype): + size = 1 if dtype == dpnp.bool else 15 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.multiply(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.multiply(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 21 + + np_a = numpy.arange(size, dtype=dtype) + np_a[::3] *= 4 + + dp_a = dpnp.arange(size, dtype=dtype) + dp_a[::3] *= 4 + + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("shape", + [(0,), (15, ), (2, 2)], + ids=['(0,)', '(15, )', '(2,2)']) + def test_invalid_shape(self, shape): + dp_array1 = dpnp.arange(10, dtype=dpnp.float64) + dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) + dp_out = dpnp.empty(shape, dtype=dpnp.float64) + + with pytest.raises(ValueError): + dpnp.multiply(dp_array1, dp_array2, out=dp_out) + + @pytest.mark.parametrize("out", + [4, (), [], (3, 7), [2, 4]], + ids=['4', '()', '[]', '(3, 7)', '[2, 4]']) + def test_invalid_out(self, out): + a = dpnp.arange(10) + + assert_raises(TypeError, dpnp.multiply, a, 2, out) + assert_raises(TypeError, numpy.multiply, a.asnumpy(), 2, out) + + +class TestPower: + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_power(self, dtype): + array1_data = numpy.arange(10) + array2_data = numpy.arange(5, 15) + out = numpy.empty(10, dtype=dtype) + + # DPNP + dp_array1 = dpnp.array(array1_data, dtype=dtype) + dp_array2 = dpnp.array(array2_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) result = dpnp.power(dp_array1, dp_array2, out=dp_out) # original - np_array1 = numpy.array(array1_data, dtype=numpy.float64) - np_array2 = numpy.array(array2_data, dtype=numpy.float64) + np_array1 = numpy.array(array1_data, dtype=dtype) + np_array2 = numpy.array(array2_data, dtype=dtype) expected = numpy.power(np_array1, np_array2, out=out) - assert_array_equal(expected, result) + assert_allclose(expected, result) @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True)) - def test_invalid_dtype(self, dtype): - dp_array1 = dpnp.arange(10, dtype=dpnp.complex64) - dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64) - dp_out = dpnp.empty(10, dtype=dtype) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 5 - with pytest.raises(ValueError): - dpnp.power(dp_array1, dp_array2, out=dp_out) + np_array1 = numpy.arange(size, 2 * size, dtype=dtype) + np_array2 = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.power(np_array1, np_array2, out=np_out) + + dp_array1 = dpnp.arange(size, 2 * size, dtype=dtype) + dp_array2 = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.power(dp_array1, dp_array2, out=dp_out) + + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_out_overlap(self, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + expected = numpy.power(np_a[size::], np_a[::2], out=np_a[:size:]) + + dp_a = dpnp.arange(2 * size, dtype=dtype) + result = dpnp.power(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(expected, result) + assert_allclose(dp_a, np_a) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_inplace_strided_out(self, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + np_a[::3] **= 3 + + dp_a = dpnp.arange(2 * size, dtype=dtype) + dp_a[::3] **= 3 + + assert_allclose(dp_a, np_a) @pytest.mark.parametrize("shape", [(0,), (15, ), (2, 2)], @@ -724,3 +923,35 @@ def test_float_to_inf(self): dpnp_res = dpnp.array(a) ** dpnp.array(b) assert_allclose(numpy_res, dpnp_res.asnumpy()) + + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_bool=True)) +@pytest.mark.parametrize("axis", [None, 0, 1, 2, 3]) +def test_sum_empty(dtype, axis): + a = numpy.empty((1, 2, 0, 4), dtype=dtype) + numpy_res = a.sum(axis=axis) + dpnp_res = dpnp.array(a).sum(axis=axis) + assert_array_equal(numpy_res, dpnp_res.asnumpy()) + + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_bool=True)) +def test_sum_empty_out(dtype): + a = dpnp.empty((1, 2, 0, 4), dtype=dtype) + out = dpnp.ones(()) + res = a.sum(out=out) + assert_array_equal(out.asnumpy(), res.asnumpy()) + assert_array_equal(out.asnumpy(), numpy.array(0, dtype=dtype)) + + +@pytest.mark.parametrize("shape", [(), (1, 2, 3), (1, 0, 2), (10), (3, 3, 3), (5, 5), (0, 6)]) +@pytest.mark.parametrize("dtype_in", get_all_dtypes(no_complex=True, no_bool=True)) +@pytest.mark.parametrize("dtype_out", get_all_dtypes(no_complex=True, no_bool=True)) +def test_sum(shape, dtype_in, dtype_out): + a_np = numpy.ones(shape, dtype=dtype_in) + a = dpnp.ones(shape, dtype=dtype_in) + axes = [None, 0, 1, 2] + for axis in axes: + if axis is None or axis < a.ndim: + numpy_res = a_np.sum(axis=axis, dtype=dtype_out) + dpnp_res = a.sum(axis=axis, dtype=dtype_out) + assert_array_equal(numpy_res, dpnp_res.asnumpy()) diff --git a/tests/test_random_state.py b/tests/test_random_state.py index 5ce1b759879b..e65e26b4a8ea 100644 --- a/tests/test_random_state.py +++ b/tests/test_random_state.py @@ -57,15 +57,15 @@ def test_distr(self, dtype, usm_type): # default dtype depends on fp64 support by the device dtype = get_default_floating() if dtype is None else dtype - desired = numpy.array([[0.428205496031286, -0.55383273779227 ], - [2.027017795643378, 4.318888073163015], - [2.69080893259102, -1.047967253719708]], dtype=dtype) + expected = numpy.array([[0.428205496031286, -0.55383273779227 ], + [2.027017795643378, 4.318888073163015], + [2.69080893259102, -1.047967253719708]], dtype=dtype) # TODO: discuss with opneMKL: there is a difference between CPU and GPU # generated samples since 9 digit while precision=15 for float64 # precision = numpy.finfo(dtype=dtype).precision precision = 8 if dtype == dpnp.float64 else numpy.finfo(dtype=dtype).precision - assert_array_almost_equal(dpnp_data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(dpnp_data.asnumpy(), expected, decimal=precision) # check if compute follows data isn't broken assert_cfd(dpnp_data, sycl_queue, usm_type) @@ -162,11 +162,11 @@ def test_fallback(self, loc, scale): # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy actual = data.asnumpy() - desired = numpy.random.RandomState(seed).normal(loc=loc, scale=scale, size=size) + expected = numpy.random.RandomState(seed).normal(loc=loc, scale=scale, size=size) dtype = get_default_floating() precision = numpy.finfo(dtype=dtype).precision - assert_array_almost_equal(actual, desired, decimal=precision) + assert_array_almost_equal(actual, expected, decimal=precision) # check if compute follows data isn't broken assert_cfd(data, sycl_queue) @@ -200,22 +200,22 @@ def test_distr(self, usm_type): dtype = get_default_floating() data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type) - desired = numpy.array([[0.7592552667483687, 0.5937560645397753], - [0.257010098779574 , 0.749422621447593 ], - [0.6316644293256104, 0.7411410815548152]], dtype=dtype) + expected = numpy.array([[0.7592552667483687, 0.5937560645397753], + [0.257010098779574 , 0.749422621447593 ], + [0.6316644293256104, 0.7411410815548152]], dtype=dtype) precision = numpy.finfo(dtype=numpy.float64).precision - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) assert_cfd(data, sycl_queue, usm_type) # call with the same seed has to draw the same values data = RandomState(seed, sycl_queue=sycl_queue).rand(3, 2, usm_type=usm_type) - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) assert_cfd(data, sycl_queue, usm_type) - # call with omitted dimensions has to draw the first element from desired + # call with omitted dimensions has to draw the first element from expected data = RandomState(seed, sycl_queue=sycl_queue).rand(usm_type=usm_type) - assert_array_almost_equal(data.asnumpy(), desired[0, 0], decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected[0, 0], decimal=precision) assert_cfd(data, sycl_queue, usm_type) # rand() is an alias on random_sample(), map arguments @@ -276,10 +276,10 @@ def test_distr(self, dtype, usm_type): size=(3, 2), dtype=dtype, usm_type=usm_type) - desired = numpy.array([[4, 1], - [5, 3], - [5, 7]], dtype=numpy.int32) - assert_array_equal(data.asnumpy(), desired) + expected = numpy.array([[4, 1], + [5, 3], + [5, 7]], dtype=numpy.int32) + assert_array_equal(data.asnumpy(), expected) assert_cfd(data, sycl_queue, usm_type) # call with the same seed has to draw the same values @@ -288,15 +288,15 @@ def test_distr(self, dtype, usm_type): size=(3, 2), dtype=dtype, usm_type=usm_type) - assert_array_equal(data.asnumpy(), desired) + assert_array_equal(data.asnumpy(), expected) assert_cfd(data, sycl_queue, usm_type) - # call with omitted dimensions has to draw the first element from desired + # call with omitted dimensions has to draw the first element from expected data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low, high=high, dtype=dtype, usm_type=usm_type) - assert_array_equal(data.asnumpy(), desired[0, 0]) + assert_array_equal(data.asnumpy(), expected[0, 0]) assert_cfd(data, sycl_queue, usm_type) # rand() is an alias on random_sample(), map arguments @@ -311,15 +311,15 @@ def test_distr(self, dtype, usm_type): def test_float_bounds(self): actual = RandomState(365852).randint(low=0.6, high=6.789102534, size=(7,)).asnumpy() - desired = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32) - assert_array_equal(actual, desired) + expected = numpy.array([4, 4, 3, 3, 1, 0, 3], dtype=numpy.int32) + assert_array_equal(actual, expected) def test_negative_bounds(self): actual = RandomState(5143).randint(low=-15.74, high=-3, size=(2, 7)).asnumpy() - desired = numpy.array([[-9, -12, -4, -12, -5, -13, -9], - [-4, -6, -13, -9, -9, -6, -15]], dtype=numpy.int32) - assert_array_equal(actual, desired) + expected = numpy.array([[-9, -12, -4, -12, -5, -13, -9], + [-4, -6, -13, -9, -9, -6, -15]], dtype=numpy.int32) + assert_array_equal(actual, expected) def test_negative_interval(self): @@ -417,8 +417,8 @@ def test_bounds_fallback(self, low, high): # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy actual = RandomState(seed).randint(low=low, high=high, size=size).asnumpy() - desired = numpy.random.RandomState(seed).randint(low=low, high=high, size=size) - assert_equal(actual, desired) + expected = numpy.random.RandomState(seed).randint(low=low, high=high, size=size) + assert_equal(actual, expected) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -436,8 +436,8 @@ def test_dtype_fallback(self, dtype): # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy() - desired = numpy.random.RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype) - assert_equal(actual, desired) + expected = numpy.random.RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype) + assert_equal(actual, expected) assert_raises(TypeError, RandomState().randint, dtype=dtype) @@ -459,7 +459,7 @@ def test_distr(self, usm_type): dtype = get_default_floating() data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type) - desired = numpy.array([[-0.862485623762009, 1.169492612490272], + expected = numpy.array([[-0.862485623762009, 1.169492612490272], [-0.405876118480338, 0.939006537666719], [-0.615075625641019, 0.555260469834381]], dtype=dtype) @@ -467,16 +467,15 @@ def test_distr(self, usm_type): # generated samples since 9 digit while precision=15 for float64 # precision = numpy.finfo(dtype=numpy.float64).precision precision = numpy.finfo(dtype=numpy.float32).precision - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) # call with the same seed has to draw the same values data = RandomState(seed, sycl_queue=sycl_queue).randn(3, 2, usm_type=usm_type) - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) - # TODO: discuss with oneMKL: return 0.0 instead of the 1st element - # call with omitted dimensions has to draw the first element from desired - # actual = dpnp.asnumpy(RandomState(seed).randn(usm_type=usm_type)) - # assert_array_almost_equal(actual, desired[0, 0], decimal=precision) + # call with omitted dimensions has to draw the first element from expected + actual = dpnp.asnumpy(RandomState(seed).randn(usm_type=usm_type)) + assert_array_almost_equal(actual, expected[0, 0], decimal=precision) # randn() is an alias on standard_normal(), map arguments with mock.patch('dpnp.random.RandomState.standard_normal') as m: @@ -611,21 +610,24 @@ def test_distr(self, usm_type): dtype = get_default_floating() data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type) - desired = numpy.array([[0.112455902594571, -0.249919829443642], - [0.702423540827815, 1.548132130318456], - [0.947364919775284, -0.432257289195464], - [0.736848611436872, 1.557284323302839]], dtype=dtype) + expected = numpy.array([[0.112455902594571, -0.249919829443642], + [0.702423540827815, 1.548132130318456], + [0.947364919775284, -0.432257289195464], + [0.736848611436872, 1.557284323302839]], dtype=dtype) # TODO: discuss with opneMKL: there is a difference between CPU and GPU # generated samples since 9 digit while precision=15 for float64 # precision = numpy.finfo(dtype=numpy.float64).precision precision = numpy.finfo(dtype=numpy.float32).precision - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) - # TODO: discuss with oneMKL: return 0.0 instead of the 1st element - # call with omitted dimensions has to draw the first element from desired - # actual = dpnp.asnumpy(RandomState(seed).standard_normal(usm_type=usm_type)) - # assert_array_almost_equal(actual, desired[0, 0], decimal=precision) + # call with the same seed has to draw the same values + data = RandomState(seed, sycl_queue=sycl_queue).standard_normal(size=(4, 2), usm_type=usm_type) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) + + # call with omitted dimensions has to draw the first element from expected + actual = dpnp.asnumpy(RandomState(seed).standard_normal(usm_type=usm_type)) + assert_array_almost_equal(actual, expected[0, 0], decimal=precision) # random_sample() is an alias on uniform(), map arguments with mock.patch('dpnp.random.RandomState.normal') as m: @@ -668,17 +670,17 @@ def test_distr(self, usm_type): dtype = get_default_floating() data = RandomState(seed, sycl_queue=sycl_queue).random_sample(size=(4, 2), usm_type=usm_type) - desired = numpy.array([[0.1887628440745175, 0.2763057765550911], - [0.3973943444434553, 0.2975987731479108], - [0.4144027342554182, 0.2636592474300414], - [0.6129623607266694, 0.2596735346596688]], dtype=dtype) + expected = numpy.array([[0.1887628440745175, 0.2763057765550911], + [0.3973943444434553, 0.2975987731479108], + [0.4144027342554182, 0.2636592474300414], + [0.6129623607266694, 0.2596735346596688]], dtype=dtype) precision = numpy.finfo(dtype=dtype).precision - assert_array_almost_equal(data.asnumpy(), desired, decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected, decimal=precision) - # call with omitted dimensions has to draw the first element from desired + # call with omitted dimensions has to draw the first element from expected data = RandomState(seed, sycl_queue=sycl_queue).random_sample(usm_type=usm_type) - assert_array_almost_equal(data.asnumpy(), desired[0, 0], decimal=precision) + assert_array_almost_equal(data.asnumpy(), expected[0, 0], decimal=precision) # random_sample() is an alias on uniform(), map arguments with mock.patch('dpnp.random.RandomState.uniform') as m: @@ -745,15 +747,15 @@ def test_distr(self, bounds, dtype, usm_type): # default dtype depends on fp64 support by the device dtype = get_default_floating() if dtype is None else dtype if dtype != dpnp.int32: - desired = numpy.array([[4.023770128630567, 8.87456423597643 ], - [2.888630247435067, 4.823004481580574], - [2.030351535445079, 4.533497077834326]]) - assert_array_almost_equal(actual, desired, decimal=numpy.finfo(dtype=dtype).precision) + expected = numpy.array([[4.023770128630567, 8.87456423597643 ], + [2.888630247435067, 4.823004481580574], + [2.030351535445079, 4.533497077834326]]) + assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision) else: - desired = numpy.array([[3, 8], + expected = numpy.array([[3, 8], [2, 4], [1, 4]]) - assert_array_equal(actual, desired) + assert_array_equal(actual, expected) # check if compute follows data isn't broken assert_cfd(dpnp_data, sycl_queue, usm_type) @@ -782,12 +784,12 @@ def test_low_high_equal(self, dtype, usm_type): # default dtype depends on fp64 support by the device dtype = get_default_floating() if dtype is None else dtype - desired = numpy.full(shape=shape, fill_value=low, dtype=dtype) + expected = numpy.full(shape=shape, fill_value=low, dtype=dtype) if dtype == dpnp.int32: - assert_array_equal(actual, desired) + assert_array_equal(actual, expected) else: - assert_array_almost_equal(actual, desired, decimal=numpy.finfo(dtype=dtype).precision) + assert_array_almost_equal(actual, expected, decimal=numpy.finfo(dtype=dtype).precision) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -822,11 +824,11 @@ def test_fallback(self, low, high): # dpnp accepts only scalar as low and/or high, in other cases it will be a fallback to numpy actual = data.asnumpy() - desired = numpy.random.RandomState(seed).uniform(low=low, high=high, size=size) + expected = numpy.random.RandomState(seed).uniform(low=low, high=high, size=size) dtype = get_default_floating() precision = numpy.finfo(dtype=dtype).precision - assert_array_almost_equal(actual, desired, decimal=precision) + assert_array_almost_equal(actual, expected, decimal=precision) # check if compute follows data isn't broken assert_cfd(data, sycl_queue) diff --git a/tests/test_sort.py b/tests/test_sort.py index aa633c0c3ad9..cfcef2c3db05 100644 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -1,16 +1,18 @@ import pytest +from .helper import get_all_dtypes import dpnp import numpy +from numpy.testing import ( + assert_array_equal +) @pytest.mark.parametrize("kth", [0, 1], ids=['0', '1']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) @pytest.mark.parametrize("array", [[3, 4, 2, 1], [[1, 0], [3, 0]], @@ -25,11 +27,14 @@ '[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]', '[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]']) def test_partition(array, dtype, kth): - a = numpy.array(array, dtype) - ia = dpnp.array(array, dtype) - expected = numpy.partition(a, kth) - result = dpnp.partition(ia, kth) - numpy.testing.assert_array_equal(expected, result) + a = dpnp.array(array, dtype) + p = dpnp.partition(a, kth) + + # TODO: remove once dpnp.less_equal() support complex types + p = p.asnumpy() + + assert (p[..., 0:kth] <= p[..., kth:kth + 1]).all() + assert (p[..., kth:kth + 1] <= p[..., kth + 1:]).all() @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -77,4 +82,4 @@ def test_searchsorted(array, dtype, v_, side): iv = dpnp.array(v_, dtype) expected = numpy.searchsorted(a, v, side=side) result = dpnp.searchsorted(ia, iv, side=side) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 04a765a73bce..d9879244f098 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -1,5 +1,5 @@ import pytest - +from .helper import get_all_dtypes import dpnp import numpy @@ -114,3 +114,18 @@ def test_bincount_weights(self, array, weights): expected = numpy.bincount(np_a, weights=weights) result = dpnp.bincount(dpnp_a, weights=weights) numpy.testing.assert_array_equal(expected, result) + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)) +def test_cov_rowvar(dtype): + a = dpnp.array([[0, 2], [1, 1], [2, 0]], dtype=dtype) + b = numpy.array([[0, 2], [1, 1], [2, 0]], dtype=dtype) + numpy.testing.assert_array_equal(dpnp.cov(a.T), dpnp.cov(a,rowvar=False)) + numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False)) + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)) +def test_cov_1D_rowvar(dtype): + a = dpnp.array([[0, 1, 2]], dtype=dtype) + b = numpy.array([[0, 1, 2]], dtype=dtype) + numpy.testing.assert_array_equal(numpy.cov(b,rowvar=False), dpnp.cov(a,rowvar=False)) + + diff --git a/tests/test_strides.py b/tests/test_strides.py index e56e9befeee4..84449db23d61 100644 --- a/tests/test_strides.py +++ b/tests/test_strides.py @@ -1,6 +1,6 @@ import math import pytest -from .helper import get_all_dtypes +from .helper import get_all_dtypes, is_cpu_device import dpnp @@ -68,19 +68,16 @@ def test_strides_1arg(func_name, dtype, shape): [(10,)], ids=["(10,)"]) def test_strides_erf(dtype, shape): - a = numpy.arange(numpy.prod(shape), dtype=dtype).reshape(shape) + a = dpnp.reshape(dpnp.linspace(-1, 1, num=numpy.prod(shape), dtype=dtype), shape) b = a[::2] - dpa = dpnp.reshape(dpnp.arange(numpy.prod(shape), dtype=dtype), shape) - dpb = dpa[::2] - - result = dpnp.erf(dpb) + result = dpnp.erf(b) - expected = numpy.empty_like(b) + expected = numpy.empty_like(b.asnumpy()) for idx, val in enumerate(b): expected[idx] = math.erf(val) - assert_allclose(result, expected) + assert_allclose(result, expected, rtol=1e-06) @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @@ -214,3 +211,102 @@ def test_strides_true_devide(dtype, shape): expected = numpy.fmod(a, b) assert_allclose(result, expected) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_out_2args(func_name, dtype): + np_out = numpy.ones((5, 3, 2))[::3] + np_a = numpy.arange(numpy.prod(np_out.shape), dtype=dtype).reshape(np_out.shape) + np_b = numpy.full(np_out.shape, fill_value=0.7, dtype=dtype) + + dp_out = dpnp.ones((5, 3, 2))[::3] + dp_a = dpnp.array(np_a) + dp_b = dpnp.array(np_b) + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res) + assert_allclose(dp_out.asnumpy(), np_out) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_in_out_2args(func_name, dtype): + sh = (3, 4, 2) + prod = numpy.prod(sh) + + np_out = numpy.ones(sh, dtype=dtype)[::2] + np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2] + np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2].T + + dp_out = dpnp.ones(sh, dtype=dtype)[::2] + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2] + dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2].T + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strided_in_out_2args_diff_out_dtype(func_name, dtype): + sh = (3, 3, 2) + prod = numpy.prod(sh) + + np_out = numpy.ones(sh, dtype=numpy.complex64)[::2] + np_a = numpy.arange(prod, dtype=dtype).reshape(sh)[::2].T + np_b = numpy.full(sh, fill_value=0.7, dtype=dtype)[::2] + + dp_out = dpnp.ones(sh, dtype=dpnp.complex64)[::2] + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh)[::2].T + dp_b = dpnp.full(sh, fill_value=0.7, dtype=dtype)[::2] + + np_res = _getattr(numpy, func_name)(np_a, np_b, out=np_out) + dp_res = _getattr(dpnp, func_name)(dp_a, dp_b, out=dp_out) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_out.asnumpy(), np_out, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +def test_strided_in_2args_overlap(func_name, dtype): + size = 5 + + np_a = numpy.arange(2 * size, dtype=dtype) + dp_a = dpnp.arange(2 * size, dtype=dtype) + + np_res = _getattr(numpy, func_name)(np_a[size::], np_a[::2], out=np_a[:size:]) + dp_res = _getattr(dpnp, func_name)(dp_a[size::], dp_a[::2], out=dp_a[:size:]) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06) + + +@pytest.mark.parametrize("func_name", + ["add", "multiply", "power"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +def test_strided_in_out_2args_overlap(func_name, dtype): + sh = (4, 3, 2) + prod = numpy.prod(sh) + + np_a = numpy.arange(prod, dtype=dtype).reshape(sh) + np_b = numpy.full(np_a[::2].shape, fill_value=0.7, dtype=dtype) + + dp_a = dpnp.arange(prod, dtype=dtype).reshape(sh) + dp_b = dpnp.full(dp_a[::2].shape, fill_value=0.7, dtype=dtype) + + np_res = _getattr(numpy, func_name)(np_a[::2], np_b, out=np_a[1::2]) + dp_res = _getattr(dpnp, func_name)(dp_a[::2], dp_b, out=dp_a[1::2]) + + assert_allclose(dp_res.asnumpy(), np_res, rtol=1e-06) + assert_allclose(dp_a.asnumpy(), np_a, rtol=1e-06) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index a523c46465bf..e5e53646e1a1 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -9,6 +9,7 @@ import numpy from numpy.testing import ( + assert_allclose, assert_array_equal, assert_raises ) @@ -218,7 +219,7 @@ def test_array_creation_cross_device(func, args, kwargs, device_x, device_y): dpnp_kwargs = dict(kwargs) dpnp_kwargs['device'] = device_y - + y = getattr(dpnp, func)(*dpnp_args, **dpnp_kwargs) numpy.testing.assert_allclose(y_orig, y) @@ -279,6 +280,8 @@ def test_meshgrid(device_x, device_y): [1., 2.]), pytest.param("sign", [-5., 4.5]), + pytest.param("sqrt", + [1., 3., 9.]), pytest.param("sum", [1., 2.]), pytest.param("trapz", @@ -297,7 +300,7 @@ def test_1in_1out(func, data, device): x = dpnp.array(data, device=device) result = getattr(dpnp, func)(x) - assert_array_equal(result, expected) + assert_allclose(result, expected) expected_queue = x.get_array().sycl_queue result_queue = result.get_array().sycl_queue @@ -529,6 +532,33 @@ def test_random_state(func, args, kwargs, device, usm_type): assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.parametrize( + "func,data", + [ + pytest.param("sqrt", + [0., 1., 2., 3., 4., 5., 6., 7., 8.]), + ], +) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_out_1in_1out(func, data, device): + x_orig = numpy.array(data) + np_out = getattr(numpy, func)(x_orig) + expected = numpy.empty_like(np_out) + getattr(numpy, func)(x_orig, out=expected) + + x = dpnp.array(data, device=device) + dp_out = getattr(dpnp, func)(x) + result = dpnp.empty_like(dp_out) + getattr(dpnp, func)(x, out=result) + + assert_allclose(result, expected) + + assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "func,data1,data2", @@ -574,7 +604,7 @@ def test_random_state(func, args, kwargs, device, usm_type): @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) -def test_out(func, data1, data2, device): +def test_out_2in_1out(func, data1, data2, device): x1_orig = numpy.array(data1) x2_orig = numpy.array(data2) np_out = getattr(numpy, func)(x1_orig, x2_orig) @@ -739,6 +769,38 @@ def test_eig(device): assert_sycl_queue_equal(dpnp_vec_queue, expected_queue) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_eigh(device): + size = 4 + a = numpy.arange(size * size, dtype=numpy.float64).reshape((size, size)) + symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=numpy.float64)) + numpy_data = symm_orig + dpnp_symm_orig = dpnp.array(numpy_data, device=device) + dpnp_data = dpnp_symm_orig + + dpnp_val, dpnp_vec = dpnp.linalg.eigh(dpnp_data) + numpy_val, numpy_vec = numpy.linalg.eigh(numpy_data) + + assert_allclose(dpnp_val, numpy_val, rtol=1e-05, atol=1e-05) + assert_allclose(dpnp_vec, numpy_vec, rtol=1e-05, atol=1e-05) + + assert (dpnp_val.dtype == numpy_val.dtype) + assert (dpnp_vec.dtype == numpy_vec.dtype) + assert (dpnp_val.shape == numpy_val.shape) + assert (dpnp_vec.shape == numpy_vec.shape) + + expected_queue = dpnp_data.get_array().sycl_queue + dpnp_val_queue = dpnp_val.get_array().sycl_queue + dpnp_vec_queue = dpnp_vec.get_array().sycl_queue + + # compare queue and device + assert_sycl_queue_equal(dpnp_val_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vec_queue, expected_queue) + + @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) @@ -945,3 +1007,15 @@ def test_broadcast_to(device): x = dpnp.arange(5, device=device) y = dpnp.broadcast_to(x, (3, 5)) assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue) + + +@pytest.mark.parametrize("device_x", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +@pytest.mark.parametrize("device_y", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_asarray(device_x, device_y): + x = dpnp.array([1, 2, 3], device=device_x) + y = dpnp.asarray([x, x, x], device=device_y) + assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) diff --git a/tests/test_umath.py b/tests/test_umath.py index 6122b253ca37..7b5c4b762d88 100644 --- a/tests/test_umath.py +++ b/tests/test_umath.py @@ -1,6 +1,15 @@ import pytest +from .helper import ( + get_all_dtypes, + get_float_dtypes +) import numpy +from numpy.testing import ( + assert_allclose, + assert_array_equal +) + import dpnp # full list of umaths @@ -71,7 +80,7 @@ def test_umaths(test_cases): # DPNP result = getattr(dpnp, umath)(*iargs) - numpy.testing.assert_allclose(result, expected, rtol=1e-6) + assert_allclose(result, expected, rtol=1e-6) class TestSin: @@ -89,7 +98,7 @@ def test_sin_ordinary(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.sin(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -129,7 +138,7 @@ def test_cos(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.cos(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -169,7 +178,7 @@ def test_log(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.log(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -209,7 +218,7 @@ def test_exp(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.exp(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -249,7 +258,7 @@ def test_arcsin(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arcsin(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -289,7 +298,7 @@ def test_arctan(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arctan(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -329,7 +338,7 @@ def test_tan(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.tan(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -355,7 +364,6 @@ def test_invalid_shape(self, shape): class TestArctan2: - def test_arctan2(self): array_data = numpy.arange(10) out = numpy.empty(10, dtype=numpy.float64) @@ -369,26 +377,79 @@ def test_arctan2(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.arctan2(np_array, np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) + def test_out_dtypes(self, dtype): + size = 2 if dtype == dpnp.bool else 10 + + np_array = numpy.arange(size, dtype=dtype) + np_out = numpy.empty(size, dtype=numpy.complex64) + expected = numpy.arctan2(np_array, np_array, out=np_out) + + dp_array = dpnp.arange(size, dtype=dtype) + dp_out = dpnp.empty(size, dtype=dpnp.complex64) + result = dpnp.arctan2(dp_array, dp_array, out=dp_out) + + assert_allclose(expected, result) + + @pytest.mark.parametrize("shape", + [(0,), (15, ), (2, 2)], + ids=['(0,)', '(15, )', '(2,2)']) + def test_invalid_shape(self, shape): + + dp_array = dpnp.arange(10, dtype=dpnp.float64) + dp_out = dpnp.empty(shape, dtype=dpnp.float64) + + with pytest.raises(ValueError): + dpnp.arctan2(dp_array, dp_array, out=dp_out) + + +class TestSqrt: + @pytest.mark.parametrize("dtype", get_float_dtypes()) + def test_sqrt_ordinary(self, dtype): + array_data = numpy.arange(10) + out = numpy.empty(10, dtype=dtype) + + # DPNP + dp_array = dpnp.array(array_data, dtype=dtype) + dp_out = dpnp.array(out, dtype=dtype) + result = dpnp.sqrt(dp_array, out=dp_out) + + # original + np_array = numpy.array(array_data, dtype=dtype) + expected = numpy.sqrt(np_array, out=out) + + numpy.testing.assert_allclose(expected, result) + numpy.testing.assert_allclose(out, dp_out) @pytest.mark.parametrize("dtype", - [numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float32', 'numpy.int64', 'numpy.int32']) + [numpy.int64, numpy.int32], + ids=['numpy.int64', 'numpy.int32']) def test_invalid_dtype(self, dtype): - dp_array = dpnp.arange(10, dtype=dpnp.float64) + dp_array = dpnp.arange(10, dtype=dpnp.float32) dp_out = dpnp.empty(10, dtype=dtype) with pytest.raises(ValueError): - dpnp.arctan2(dp_array, dp_array, out=dp_out) + dpnp.sqrt(dp_array, out=dp_out) @pytest.mark.parametrize("shape", [(0,), (15, ), (2, 2)], ids=['(0,)', '(15, )', '(2,2)']) def test_invalid_shape(self, shape): - dp_array = dpnp.arange(10, dtype=dpnp.float64) - dp_out = dpnp.empty(shape, dtype=dpnp.float64) + dp_array = dpnp.arange(10, dtype=dpnp.float32) + dp_out = dpnp.empty(shape, dtype=dpnp.float32) with pytest.raises(ValueError): - dpnp.arctan2(dp_array, dp_array, out=dp_out) + dpnp.sqrt(dp_array, out=dp_out) + + @pytest.mark.parametrize("out", + [4, (), [], (3, 7), [2, 4]], + ids=['4', '()', '[]', '(3, 7)', '[2, 4]']) + def test_invalid_out(self, out): + a = dpnp.arange(10) + + numpy.testing.assert_raises(TypeError, dpnp.sqrt, a, out) + numpy.testing.assert_raises(TypeError, numpy.sqrt, a.asnumpy(), out) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 817bdee66a57..df8575197b38 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -18,6 +18,8 @@ def test_coerced_usm_types_sum(usm_type_x, usm_type_y): y = dp.arange(1000, usm_type = usm_type_y) z = 1.3 + x + y + 2 + z += x + z += 7.4 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -31,6 +33,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): y = dp.arange(10, usm_type = usm_type_y) z = 3 * x * y * 1.5 + z *= x + z *= 4.8 assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y @@ -174,6 +178,23 @@ def test_meshgrid(usm_type_x, usm_type_y): assert z[1].usm_type == usm_type_y +@pytest.mark.parametrize( + "func,data", + [ + pytest.param( + "sqrt", + [1.0, 3.0, 9.0], + ), + ], +) +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +def test_1in_1out(func, data, usm_type): + x = dp.array(data, usm_type=usm_type) + res = getattr(dp, func)(x) + assert x.usm_type == usm_type + assert res.usm_type == usm_type + + @pytest.mark.parametrize( "func,data1,data2", [ diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py index ac94297354f0..11e1d7f96048 100644 --- a/tests/third_party/cupy/creation_tests/test_ranges.py +++ b/tests/third_party/cupy/creation_tests/test_ranges.py @@ -192,7 +192,7 @@ def test_linspace_array_start_stop_axis1(self, xp, dtype_range, dtype_out): @testing.with_requires('numpy>=1.16') @testing.for_complex_dtypes() - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose() def test_linspace_complex_start_stop(self, xp, dtype): start = xp.array([0, 120], dtype=dtype) stop = xp.array([100, 0], dtype=dtype) diff --git a/tests/third_party/cupy/indexing_tests/test_indexing.py b/tests/third_party/cupy/indexing_tests/test_indexing.py index 1cdab954bbe8..e8292831b747 100644 --- a/tests/third_party/cupy/indexing_tests/test_indexing.py +++ b/tests/third_party/cupy/indexing_tests/test_indexing.py @@ -166,6 +166,7 @@ def test_extract_no_bool(self, xp, dtype): b = xp.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]], dtype=dtype) return xp.extract(b, a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_extract_shape_mismatch(self, xp): a = testing.shaped_arange((2, 3), xp) @@ -174,6 +175,7 @@ def test_extract_shape_mismatch(self, xp): [True, False]]) return xp.extract(b, a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_extract_size_mismatch(self, xp): a = testing.shaped_arange((3, 3), xp) @@ -181,6 +183,7 @@ def test_extract_size_mismatch(self, xp): [False, True, False]]) return xp.extract(b, a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_extract_size_mismatch2(self, xp): a = testing.shaped_arange((3, 3), xp) @@ -188,6 +191,7 @@ def test_extract_size_mismatch2(self, xp): [False, True, False, True]]) return xp.extract(b, a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_extract_empty_1dim(self, xp): a = testing.shaped_arange((3, 3), xp) diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py index fdcc5357e19e..809c44b66dae 100644 --- a/tests/third_party/cupy/indexing_tests/test_insert.py +++ b/tests/third_party/cupy/indexing_tests/test_insert.py @@ -39,6 +39,7 @@ class TestPlaceRaises(unittest.TestCase): # https://github.com/numpy/numpy/pull/5821 @testing.with_requires('numpy>=1.10') @testing.for_all_dtypes() + @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_place_empty_value_error(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange(self.shape, xp, dtype) diff --git a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py new file mode 100644 index 000000000000..fe577e32b285 --- /dev/null +++ b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py @@ -0,0 +1,199 @@ +import unittest + +import numpy +import pytest + +import dpnp as cupy +from tests.third_party.cupy import testing + + +def _get_hermitian(xp, a, UPLO): + # TODO: fix this, currently dpnp.transpose() doesn't support complex types + # and no dpnp_array.swapaxes() + a = _wrap_as_numpy_array(xp, a) + _xp = numpy + + if UPLO == 'U': + _a = _xp.triu(a) + _xp.triu(a, k=1).swapaxes(-2, -1).conj() + else: + _a = _xp.tril(a) + _xp.tril(a, k=-1).swapaxes(-2, -1).conj() + return xp.array(_a) + +# TODO: remove once all required functionality is supported +def _wrap_as_numpy_array(xp, a): + return a.asnumpy() if xp is cupy else a + +@testing.parameterize(*testing.product({ + 'UPLO': ['U', 'L'], +})) +class TestEigenvalue(unittest.TestCase): + + @testing.for_all_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False) + def test_eigh(self, xp, dtype): + if xp == numpy and dtype == numpy.float16: + # NumPy's eigh does not support float16 + _dtype = 'f' + else: + _dtype = dtype + if numpy.dtype(_dtype).kind == 'c': + a = xp.array([[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], _dtype) + else: + a = xp.array([[1, 0, 3], [0, 5, 0], [7, 0, 9]], _dtype) + w, v = xp.linalg.eigh(a, UPLO=self.UPLO) + + # Changed the verification method to check if Av and vw match, since + # the eigenvectors of eigh() with CUDA 11.6 are mathematically correct + # but may not match NumPy. + A = _get_hermitian(xp, a, self.UPLO) + if _dtype == numpy.float16: + tol = 1e-3 + else: + tol = 1e-5 + + # TODO: remove _wrap_as_numpy_array() once @ support complex types + testing.assert_allclose(_wrap_as_numpy_array(xp, A) @ _wrap_as_numpy_array(xp, v), + _wrap_as_numpy_array(xp, v) @ numpy.diag(_wrap_as_numpy_array(xp, w)), + atol=tol, rtol=tol) + + # Check if v @ vt is an identity matrix + testing.assert_allclose(_wrap_as_numpy_array(xp, v) @ _wrap_as_numpy_array(xp, v).swapaxes(-2, -1).conj(), + numpy.identity(_wrap_as_numpy_array(xp, A).shape[-1], _dtype), atol=tol, + rtol=tol) + if xp == numpy and dtype == numpy.float16: + w = w.astype('e') + return w + + @testing.for_all_dtypes(no_bool=True, no_float16=True, no_complex=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False) + def test_eigh_batched(self, xp, dtype): + a = xp.array([[[1, 0, 3], [0, 5, 0], [7, 0, 9]], + [[3, 0, 3], [0, 7, 0], [7, 0, 11]]], dtype) + w, v = xp.linalg.eigh(a, UPLO=self.UPLO) + + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so w's should be directly comparable. However, both cuSOLVER + # and rocSOLVER pick a different convention for constructing + # eigenvectors, so v's are not directly comparible and we verify + # them through the eigen equation A*v=w*v. + A = _get_hermitian(xp, a, self.UPLO) + for i in range(a.shape[0]): + testing.assert_allclose( + A[i].dot(v[i]), w[i]*v[i], rtol=1e-5, atol=1e-5) + return w + + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4, contiguous_check=False) + def test_eigh_complex_batched(self, xp, dtype): + print() + a = xp.array([[[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], + [[0, 2j, 3], [4j, 4, 6j], [7, 8j, 8]]], dtype) + w, v = xp.linalg.eigh(a, UPLO=self.UPLO) + + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so w's should be directly comparable. However, both cuSOLVER + # and rocSOLVER pick a different convention for constructing + # eigenvectors, so v's are not directly comparible and we verify + # them through the eigen equation A*v=w*v. + A = _get_hermitian(xp, a, self.UPLO) + + # TODO: remove _wrap_as_numpy_array() once dpnp.dot() support complex types + A = _wrap_as_numpy_array(xp, A) + v = _wrap_as_numpy_array(xp, v) + w = _wrap_as_numpy_array(xp, w) + + for i in range(a.shape[0]): + testing.assert_allclose( + A[i].dot(v[i]), w[i]*v[i], rtol=1e-5, atol=1e-5) + return w + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + @testing.for_all_dtypes(no_float16=True, no_complex=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_eigvalsh(self, xp, dtype): + a = xp.array([[1, 0, 3], [0, 5, 0], [7, 0, 9]], dtype) + w = xp.linalg.eigvalsh(a, UPLO=self.UPLO) + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so they should be directly comparable + return w + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + @testing.for_all_dtypes(no_float16=True, no_complex=True) + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_eigvalsh_batched(self, xp, dtype): + a = xp.array([[[1, 0, 3], [0, 5, 0], [7, 0, 9]], + [[3, 0, 3], [0, 7, 0], [7, 0, 11]]], dtype) + w = xp.linalg.eigvalsh(a, UPLO=self.UPLO) + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so they should be directly comparable + return w + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_eigvalsh_complex(self, xp, dtype): + a = xp.array([[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], dtype) + w = xp.linalg.eigvalsh(a, UPLO=self.UPLO) + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so they should be directly comparable + return w + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + @testing.for_complex_dtypes() + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4) + def test_eigvalsh_complex_batched(self, xp, dtype): + a = xp.array([[[1, 2j, 3], [4j, 5, 6j], [7, 8j, 9]], + [[0, 2j, 3], [4j, 4, 6j], [7, 8j, 8]]], dtype) + w = xp.linalg.eigvalsh(a, UPLO=self.UPLO) + # NumPy, cuSOLVER, rocSOLVER all sort in ascending order, + # so they should be directly comparable + return w + + +@testing.parameterize(*testing.product({ + 'UPLO': ['U', 'L'], + 'shape': [(0, 0), + (2, 0, 0), + (0, 3, 3)] +})) +class TestEigenvalueEmpty(unittest.TestCase): + + @testing.for_dtypes('ifdFD') + @testing.numpy_cupy_allclose() + def test_eigh(self, xp, dtype): + a = xp.empty(self.shape, dtype=dtype) + assert a.size == 0 + return xp.linalg.eigh(a, UPLO=self.UPLO) + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + @testing.for_dtypes('ifdFD') + @testing.numpy_cupy_allclose() + def test_eigvalsh(self, xp, dtype): + a = xp.empty(self.shape, dtype) + assert a.size == 0 + return xp.linalg.eigvalsh(a, UPLO=self.UPLO) + + +@testing.parameterize(*testing.product({ + 'UPLO': ['U', 'L'], + 'shape': [(), + (3,), + (2, 3), + (4, 0), + (2, 2, 3), + (0, 2, 3)] +})) +class TestEigenvalueInvalid(unittest.TestCase): + + def test_eigh_shape_error(self): + for xp in (numpy, cupy): + a = xp.zeros(self.shape) + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.eigh(a, self.UPLO) + + @pytest.mark.skip("No support of dpnp.eigvalsh()") + def test_eigvalsh_shape_error(self): + for xp in (numpy, cupy): + a = xp.zeros(self.shape) + with pytest.raises((numpy.linalg.LinAlgError, ValueError)): + xp.linalg.eigvalsh(a, self.UPLO) diff --git a/tests/third_party/cupy/manipulation_tests/test_transpose.py b/tests/third_party/cupy/manipulation_tests/test_transpose.py index ed06f050f777..91e8fe9a2cdb 100644 --- a/tests/third_party/cupy/manipulation_tests/test_transpose.py +++ b/tests/third_party/cupy/manipulation_tests/test_transpose.py @@ -41,14 +41,12 @@ def test_moveaxis6(self, xp): return xp.moveaxis(a, [0, 2, 1], [3, 4, 0]) # dim is too large - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid1_1(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) with pytest.raises(numpy.AxisError): xp.moveaxis(a, [0, 1], [1, 3]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid1_2(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) @@ -56,14 +54,12 @@ def test_moveaxis_invalid1_2(self): xp.moveaxis(a, [0, 1], [1, 3]) # dim is too small - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid2_1(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) with pytest.raises(numpy.AxisError): xp.moveaxis(a, [0, -4], [1, 2]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid2_2(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) @@ -71,7 +67,6 @@ def test_moveaxis_invalid2_2(self): xp.moveaxis(a, [0, -4], [1, 2]) # len(source) != len(destination) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid3(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) @@ -79,7 +74,6 @@ def test_moveaxis_invalid3(self): xp.moveaxis(a, [0, 1, 2], [1, 2]) # len(source) != len(destination) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid4(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) @@ -87,7 +81,6 @@ def test_moveaxis_invalid4(self): xp.moveaxis(a, [0, 1], [1, 2, 0]) # Use the same axis twice - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_moveaxis_invalid5_1(self): for xp in (numpy, cupy): a = testing.shaped_arange((2, 3, 4), xp) diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py index 17751aed75c0..b0531244a2fb 100644 --- a/tests/third_party/cupy/sorting_tests/test_search.py +++ b/tests/third_party/cupy/sorting_tests/test_search.py @@ -262,7 +262,6 @@ def test_argminmax_dtype(self, in_dtype, result_dtype): {'cond_shape': (2, 3, 4), 'x_shape': (2, 3, 4), 'y_shape': (3, 4)}, {'cond_shape': (3, 4), 'x_shape': (2, 3, 4), 'y_shape': (4,)}, ) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestWhereTwoArrays(unittest.TestCase): @@ -274,8 +273,6 @@ def test_where_two_arrays(self, xp, cond_type, x_type, y_type): # Almost all values of a matrix `shaped_random` makes are not zero. # To make a sparse matrix, we need multiply `m`. cond = testing.shaped_random(self.cond_shape, xp, cond_type) * m - if xp is cupy: - cond = cond.astype(cupy.bool) x = testing.shaped_random(self.x_shape, xp, x_type, seed=0) y = testing.shaped_random(self.y_shape, xp, y_type, seed=1) return xp.where(cond, x, y) diff --git a/tests/third_party/cupy/sorting_tests/test_sort.py b/tests/third_party/cupy/sorting_tests/test_sort.py index f26d13cf537b..eb3cc8a1a803 100644 --- a/tests/third_party/cupy/sorting_tests/test_sort.py +++ b/tests/third_party/cupy/sorting_tests/test_sort.py @@ -469,7 +469,6 @@ def test_sort_complex_nan(self, xp, dtype): 'length': [10, 20000], })) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@testing.gpu class TestPartition(unittest.TestCase): def partition(self, a, kth, axis=-1): @@ -495,18 +494,19 @@ def test_partition_one_dim(self, xp, dtype): a = testing.shaped_random((self.length,), xp, dtype) kth = 2 x = self.partition(a, kth) - self.assertTrue(xp.all(x[0:kth] <= x[kth:kth + 1])) - self.assertTrue(xp.all(x[kth:kth + 1] <= x[kth + 1:])) + assert xp.all(x[0:kth] <= x[kth:kth + 1]) + assert xp.all(x[kth:kth + 1] <= x[kth + 1:]) return x[kth] + @pytest.mark.skip("multidimensional case doesn't work properly") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_partition_multi_dim(self, xp, dtype): a = testing.shaped_random((10, 10, self.length), xp, dtype) kth = 2 x = self.partition(a, kth) - self.assertTrue(xp.all(x[:, :, 0:kth] <= x[:, :, kth:kth + 1])) - self.assertTrue(xp.all(x[:, :, kth:kth + 1] <= x[:, :, kth + 1:])) + assert xp.all(x[:, :, 0:kth] <= x[:, :, kth:kth + 1]) + assert xp.all(x[:, :, kth:kth + 1] <= x[:, :, kth + 1:]) return x[:, :, kth:kth + 1] # Test non-contiguous array @@ -515,16 +515,10 @@ def test_partition_multi_dim(self, xp, dtype): def test_partition_non_contiguous(self, xp): a = testing.shaped_random((self.length,), xp)[::-1] kth = 2 - if not self.external: - if xp is cupy: - with self.assertRaises(NotImplementedError): - return self.partition(a, kth) - return 0 # dummy - else: - x = self.partition(a, kth) - self.assertTrue(xp.all(x[0:kth] <= x[kth:kth + 1])) - self.assertTrue(xp.all(x[kth:kth + 1] <= x[kth + 1:])) - return x[kth] + x = self.partition(a, kth) + assert xp.all(x[0:kth] <= x[kth:kth + 1]) + assert xp.all(x[kth:kth + 1] <= x[kth + 1:]) + return x[kth] # Test kth diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py index 3c68a998b5ad..b726951373af 100644 --- a/tests/third_party/cupy/statistics_tests/test_correlation.py +++ b/tests/third_party/cupy/statistics_tests/test_correlation.py @@ -48,7 +48,7 @@ def generate_input(self, a_shape, y_shape, xp, dtype): return a, y @testing.for_all_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=False) def check(self, a_shape, y_shape=None, rowvar=True, bias=False, ddof=None, xp=None, dtype=None): a, y = self.generate_input(a_shape, y_shape, xp, dtype) diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py index 5f9864dadc59..af8f6e545b29 100644 --- a/tests/third_party/cupy/testing/helper.py +++ b/tests/third_party/cupy/testing/helper.py @@ -15,6 +15,7 @@ # from dpnp.core import internal from tests.third_party.cupy.testing import array from tests.third_party.cupy.testing import parameterized +from dpctl import select_default_device # import dpnp # import dpnp.scipy.sparse @@ -199,6 +200,16 @@ def _contains_signed_and_unsigned(kw): any(d in vs for d in _float_dtypes + _signed_dtypes) +def _wraps_partial(wrapped, *names): + # Only `wrapped` function have args of `names`. + def decorator(impl): + impl = functools.wraps(wrapped)(impl) + impl.__signature__ = inspect.signature( + functools.partial(wrapped, **{name: None for name in names})) + return impl + return decorator + + def _make_decorator(check_func, name, type_check, accept_error, sp_name=None, scipy_name=None): assert isinstance(name, str) @@ -639,24 +650,36 @@ def for_dtypes(dtypes, name='dtype'): argument. """ def decorator(impl): - @functools.wraps(impl) - def test_func(self, *args, **kw): + @_wraps_partial(impl, name) + def test_func(*args, **kw): for dtype in dtypes: try: kw[name] = numpy.dtype(dtype).type - impl(self, *args, **kw) + impl(*args, **kw) except unittest.SkipTest as e: - pass # print(f"Function decorator(): skipped: name={name} dtype={dtype} error={e}") + print('skipped: {} = {} ({})'.format(name, dtype, e)) except Exception: - # print(f"Function decorator(): name={name} dtype={dtype}") + print(name, 'is', dtype) raise return test_func return decorator +def _get_supported_float_dtypes(): + if select_default_device().has_aspect_fp64: + return (numpy.float64, numpy.float32) + else: + return (numpy.float32,) + +def _get_supported_complex_dtypes(): + if select_default_device().has_aspect_fp64: + return (numpy.complex128, numpy.complex64) + else: + return (numpy.complex64,) + -_complex_dtypes = () -_regular_float_dtypes = (numpy.float64, numpy.float32) +_complex_dtypes = _get_supported_complex_dtypes() +_regular_float_dtypes = _get_supported_float_dtypes() _float_dtypes = _regular_float_dtypes _signed_dtypes = () _unsigned_dtypes = tuple(numpy.dtype(i).type for i in 'BHILQ') @@ -667,7 +690,7 @@ def test_func(self, *args, **kw): def _make_all_dtypes(no_float16, no_bool, no_complex): - return (numpy.float64, numpy.float32, numpy.int64, numpy.int32) + return (numpy.int64, numpy.int32) + _get_supported_float_dtypes() # if no_float16: # dtypes = _regular_float_dtypes # else: diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py deleted file mode 100644 index d16bab3aec4a..000000000000 --- a/utils/command_build_clib.py +++ /dev/null @@ -1,427 +0,0 @@ -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2023, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -"""Module to customize build_clib command -Originally, 'build_clib' command produce static C library only. -This modification add: - - build shared C library - - copy this library to the project tree - - a check if source needs to be rebuilt based on time stamp - - a check if librayr needs to be rebuilt based on time stamp -""" - -import os -import sys - -from ctypes.util import find_library as find_shared_lib -from setuptools.command import build_clib -from distutils import log -from distutils.dep_util import newer_group -from distutils.file_util import copy_file - -from utils.dpnp_build_utils import find_cmplr, find_dpl, find_mathlib, find_python_env - -IS_WIN = False -IS_MAC = False -IS_LIN = False - -if 'linux' in sys.platform: - IS_LIN = True -elif sys.platform == 'darwin': - IS_MAC = True -elif sys.platform in ['win32', 'cygwin']: - IS_WIN = True -else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - -""" -Set compiler for the project -""" -# default variables (for Linux) -_project_compiler = "icpx" -_project_linker = "icpx" -_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"] -_project_cmplr_flag_sycl = ["-fsycl"] -_project_cmplr_flag_stdcpp_static = [] # This brakes TBB ["-static-libstdc++", "-static-libgcc"] -_project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"] -_project_cmplr_flag_lib = ["-shared"] -_project_cmplr_flag_release_build = ["-O3", "-DNDEBUG", "-fPIC"] -_project_cmplr_flag_debug_build = ["-g", "-O1", "-W", "-Wextra", "-Wshadow", "-Wall", "-Wstrict-prototypes", "-fPIC"] -_project_cmplr_flag_default_build = [] -_project_cmplr_macro = [] -_project_force_build = False -_project_sycl_queue_control_macro = [("DPNP_LOCAL_QUEUE", "1")] -_project_rpath = ["$ORIGIN", os.path.join("$ORIGIN", "..")] -_dpctrl_include = [] -_dpctrl_libpath = [] -_dpctrl_lib = [] -_sdl_cflags = ["-fstack-protector-strong", - "-fPIC", "-D_FORTIFY_SOURCE=2", - "-Wformat", - "-Wformat-security", - "-fno-strict-overflow", - "-fno-delete-null-pointer-checks"] -_sdl_ldflags = ["-Wl,-z,noexecstack,-z,relro,-z,now"] - -# TODO remove when it will be fixed on TBB side. Details: -# In GCC versions 9 and 10 the application that uses Parallel STL algorithms may fail to compile due to incompatible -# interface changes between earlier versions of Intel TBB and oneTBB. Disable support for Parallel STL algorithms -# by defining PSTL_USE_PARALLEL_POLICIES (in GCC 9), _GLIBCXX_USE_TBB_PAR_BACKEND (in GCC 10) macro to zero -# before inclusion of the first standard header file in each translation unit. -_project_cmplr_macro += [("PSTL_USE_PARALLEL_POLICIES", "0"), ("_GLIBCXX_USE_TBB_PAR_BACKEND", "0")] - -# disable PSTL predefined policies objects (global queues, prevent fail on Windows) -_project_cmplr_macro += [("ONEDPL_USE_PREDEFINED_POLICIES", "0")] - -try: - """ - Detect external SYCL queue handling library - """ - import dpctl - - _dpctrl_include += [dpctl.get_include()] - # _dpctrl_libpath = for package build + for local build - _dpctrl_libpath = ["$ORIGIN/../dpctl"] + [os.path.join(dpctl.get_include(), '..')] - _dpctrl_lib = ["DPCTLSyclInterface"] -except ImportError: - """ - Set local SYCL queue handler - """ - _project_cmplr_macro += _project_sycl_queue_control_macro - -# other OS specific -if IS_WIN: - _project_compiler = "dpcpp" - _project_linker = "lld-link" - _project_cmplr_flag_sycl = [] - _project_cmplr_flag_stdcpp_static = [] - _project_cmplr_flag_compatibility = [] - _project_cmplr_flag_lib = ["/DLL"] - _project_cmplr_flag_release_build += _project_cmplr_flag_sycl_devel - _project_cmplr_macro += [("_WIN", "1")] - _project_rpath = [] - # TODO this flag creates unexpected behavior during compilation, need to be fixed - # _sdl_cflags = ["-GS"] - _sdl_cflags = [] - _sdl_ldflags = ["-NXCompat", "-DynamicBase"] - -""" -Get the project build type -""" -__dpnp_debug__ = os.environ.get('DPNP_DEBUG', None) -if __dpnp_debug__ is not None: - """ - Debug configuration - """ - _project_cmplr_flag_default_build = _project_cmplr_flag_debug_build -else: - """ - Release configuration - """ - _project_cmplr_flag_sycl += _project_cmplr_flag_sycl_devel - _project_cmplr_flag_default_build = _project_cmplr_flag_release_build - -""" -Get the math library environemnt -""" -_project_cmplr_macro += [("MKL_ILP64", "1")] # using 64bit integers in MKL interface (long) -if IS_LIN: - _mathlibs = ["mkl_sycl", "mkl_intel_ilp64", "mkl_sequential", - "mkl_core", "sycl", "OpenCL", "pthread", "m", "dl"] -elif IS_WIN: - _sycl_lib = None - for lib in {"sycl", "sycl6", "sycl7"}: - if find_shared_lib(lib): - _sycl_lib = lib - if not _sycl_lib: - raise EnvironmentError("DPNP: sycl library is not found") - - _mathlibs = ["mkl_sycl_dll", "mkl_intel_ilp64_dll", "mkl_tbb_thread_dll", "mkl_core_dll", _sycl_lib, "OpenCL", "tbb"] - -""" -Final set of arguments for extentions -""" -_project_extra_link_args = _project_cmplr_flag_compatibility + _project_cmplr_flag_stdcpp_static + \ - ["-Wl,-rpath," + x for x in _project_rpath] + _sdl_ldflags -_project_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") -_project_backend_dir = [os.path.join(_project_dir, "dpnp", "backend", "include"), - os.path.join(_project_dir, "dpnp", "backend", "src") # not a public headers location - ] - -dpnp_backend_c_description = [ - ["dpnp_backend_c", - { - "sources": [ - "dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp", - "dpnp/backend/kernels/dpnp_krnl_bitwise.cpp", - "dpnp/backend/kernels/dpnp_krnl_common.cpp", - "dpnp/backend/kernels/dpnp_krnl_elemwise.cpp", - "dpnp/backend/kernels/dpnp_krnl_fft.cpp", - "dpnp/backend/kernels/dpnp_krnl_indexing.cpp", - "dpnp/backend/kernels/dpnp_krnl_linalg.cpp", - "dpnp/backend/kernels/dpnp_krnl_logic.cpp", - "dpnp/backend/kernels/dpnp_krnl_manipulation.cpp", - "dpnp/backend/kernels/dpnp_krnl_mathematical.cpp", - "dpnp/backend/kernels/dpnp_krnl_random.cpp", - "dpnp/backend/kernels/dpnp_krnl_reduction.cpp", - "dpnp/backend/kernels/dpnp_krnl_searching.cpp", - "dpnp/backend/kernels/dpnp_krnl_sorting.cpp", - "dpnp/backend/kernels/dpnp_krnl_statistics.cpp", - "dpnp/backend/src/dpnp_iface_fptr.cpp", - "dpnp/backend/src/memory_sycl.cpp", - "dpnp/backend/src/constants.cpp", - "dpnp/backend/src/queue_sycl.cpp", - "dpnp/backend/src/verbose.cpp", - "dpnp/backend/src/dpnp_random_state.cpp" - ], - } - ] -] - - -def _compiler_compile(self, sources, - output_dir=None, macros=None, include_dirs=None, debug=0, - extra_preargs=None, extra_postargs=None, depends=None): - - if not self.initialized: - self.initialize() - compile_info = self._setup_compile(output_dir, macros, include_dirs, - sources, depends, extra_postargs) - macros, objects, extra_postargs, pp_opts, build = compile_info - - compile_opts = extra_preargs or [] - compile_opts.append('/c') - if debug: - compile_opts.extend(self.compile_options_debug) - else: - compile_opts.extend(self.compile_options) - - add_cpp_opts = False - - for obj in objects: - try: - src, ext = build[obj] - except KeyError: - continue - if debug: - # pass the full pathname to MSVC in debug mode, - # this allows the debugger to find the source file - # without asking the user to browse for it - src = os.path.abspath(src) - - # Anaconda/conda-forge customisation, we want our pdbs to be - # relocatable: - # https://developercommunity.visualstudio.com/comments/623156/view.html - d1trimfile_opts = [] - # if 'SRC_DIR' in os.environ: - # d1trimfile_opts.append("/d1trimfile:" + os.environ['SRC_DIR']) - - if ext in self._c_extensions: - input_opt = "/Tc" + src - elif ext in self._cpp_extensions: - input_opt = "/Tp" + src - add_cpp_opts = True - elif ext in self._rc_extensions: - # compile .RC to .RES file - input_opt = src - output_opt = "/fo" + obj - try: - self.spawn([self.rc] + pp_opts + [output_opt, input_opt]) - except DistutilsExecError as msg: - raise CompileError(msg) - continue - elif ext in self._mc_extensions: - # Compile .MC to .RC file to .RES file. - # * '-h dir' specifies the directory for the - # generated include file - # * '-r dir' specifies the target directory of the - # generated RC file and the binary message resource - # it includes - # - # For now (since there are no options to change this), - # we use the source-directory for the include file and - # the build directory for the RC file and message - # resources. This works at least for win32all. - h_dir = os.path.dirname(src) - rc_dir = os.path.dirname(obj) - try: - # first compile .MC to .RC and .H file - self.spawn([self.mc, '-h', h_dir, '-r', rc_dir, src]) - base, _ = os.path.splitext(os.path.basename(src)) - rc_file = os.path.join(rc_dir, base + '.rc') - # then compile .RC to .RES file - self.spawn([self.rc, "/fo" + obj, rc_file]) - - except DistutilsExecError as msg: - raise CompileError(msg) - continue - else: - # how to handle this file? - raise CompileError("Don't know how to compile {} to {}" - .format(src, obj)) - - args = [self.cc] + compile_opts + pp_opts + d1trimfile_opts - if add_cpp_opts: - args.append('/EHsc') - args.append(input_opt) - args.append("/Fo" + obj) - args.extend(extra_postargs) - - try: - self.spawn(args) - except DistutilsExecError as msg: - raise CompileError(msg) - - return objects - - -class custom_build_clib(build_clib.build_clib): - - def build_libraries(self, libraries): - """ - This function is overloaded to the original function in build_clib.py file - """ - - for (lib_name, build_info) in libraries: - c_library_name = self.compiler.library_filename(lib_name, lib_type='shared') - c_library_filename = os.path.join(self.build_clib, c_library_name) - dest_filename = "dpnp" # TODO need to fix destination directory - - sources = build_info.get('sources') - if sources is None or not isinstance(sources, (list, tuple)): - err_msg = f"in 'libraries' option (library '{lib_name}')," - err_msg += f" 'sources' must be present and must be a list of source filenames" - raise DistutilsSetupError(err_msg) - - sources = list(sources) - - log.info(f"DPNP: building {lib_name} library") - - """ - Get the compiler environemnt - """ - _cmplr_include, _cmplr_libpath = find_cmplr(verbose=True) - _mathlib_include, _mathlib_path = find_mathlib(verbose=True) - # _, _omp_libpath = find_omp(verbose=True) - _dpl_include, _ = find_dpl(verbose=True) - _py_env_include, _py_env_lib = find_python_env(verbose=True) - - macros = _project_cmplr_macro - include_dirs = _cmplr_include + _dpl_include + _mathlib_include + _project_backend_dir + _dpctrl_include + _py_env_include - libraries = _mathlibs + _dpctrl_lib - library_dirs = _mathlib_path + _dpctrl_libpath + _py_env_lib # + _omp_libpath - runtime_library_dirs = _project_rpath + _dpctrl_libpath - extra_preargs = _project_cmplr_flag_sycl + _sdl_cflags - extra_link_postargs = _project_cmplr_flag_lib - extra_link_preargs = _project_cmplr_flag_compatibility + _sdl_ldflags - force_build = _project_force_build - compiler = [_project_compiler] - linker = [_project_linker] - default_flags = _project_cmplr_flag_default_build - language = "c++" - - # set compiler and options - self.compiler.compiler_so = compiler + default_flags - self.compiler.compiler = self.compiler.compiler_so - self.compiler.compiler_cxx = self.compiler.compiler_so - self.compiler.linker_so = linker + default_flags - self.compiler.linker_exe = self.compiler.linker_so - - os.environ["CC"] = _project_compiler - - objects = [] - """ - Build object files from sources - """ - if IS_WIN: - self.compiler.compile = _compiler_compile - - for source_it in sources: - obj_file_list = self.compiler.object_filenames([source_it], strip_dir=0, output_dir=self.build_temp) - obj_file = "".join(obj_file_list) # convert from list to file name - - newer_than_obj = newer_group([source_it], obj_file, missing="newer") - if force_build or newer_than_obj: - if IS_WIN: - obj_file_list = self.compiler.compile(self.compiler, - [source_it], - output_dir=self.build_temp, - macros=macros, - include_dirs=include_dirs, - extra_preargs=extra_preargs, - debug=self.debug) - else: - obj_file_list = self.compiler.compile([source_it], - output_dir=self.build_temp, - macros=macros, - include_dirs=include_dirs, - extra_preargs=extra_preargs, - debug=self.debug) - objects.extend(obj_file_list) - else: - objects.append(obj_file) - - """ - Build library file from objects - """ - newer_than_lib = newer_group(objects, c_library_filename, missing="newer") - if force_build or newer_than_lib: - # TODO very brute way, need to refactor - if IS_WIN: - link_command = " ".join(compiler) - link_command += " " + " ".join(default_flags) - link_command += " " + " ".join(objects) # specify *.obj files - link_command += " /link" # start linker options - link_command += " " + " ".join(extra_link_preargs) - link_command += " " + ".lib ".join(libraries) + ".lib" # libraries - link_command += " /LIBPATH:" + " /LIBPATH:".join(library_dirs) - link_command += " /OUT:" + c_library_filename # output file name - link_command += " " + " ".join(extra_link_postargs) - print(link_command) - os.system(link_command) - else: - self.compiler.link_shared_lib(objects, - lib_name, - output_dir=self.build_clib, - libraries=libraries, - library_dirs=library_dirs, - runtime_library_dirs=runtime_library_dirs, - extra_preargs=extra_preargs + extra_link_preargs, - extra_postargs=extra_link_postargs, - debug=self.debug, - build_temp=self.build_temp, - target_lang=language) - - """ - Copy library to the destination path - """ - copy_file(c_library_filename, dest_filename, verbose=self.verbose, dry_run=self.dry_run) - # TODO very brute way, need to refactor - if c_library_filename.endswith(".dll"): - copy_file(c_library_filename.replace(".dll", ".lib"), - dest_filename, verbose=self.verbose, dry_run=self.dry_run) - - log.info(f"DPNP: building {lib_name} library finished") diff --git a/utils/command_build_cmake_clib.py b/utils/command_build_cmake_clib.py deleted file mode 100644 index 0ef0e240bb1e..000000000000 --- a/utils/command_build_cmake_clib.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -""" -Module to call cmake based procedure by build_cmake_clib command -""" - -import os -import sys -import pathlib -from setuptools.command import build_clib -from distutils import log - - -""" -Detect platform -""" -IS_WIN = False -IS_MAC = False -IS_LIN = False - -if 'linux' in sys.platform: - IS_LIN = True -elif sys.platform == 'darwin': - IS_MAC = True -elif sys.platform in ['win32', 'cygwin']: - IS_WIN = True -else: - raise EnvironmentError("DPNP cmake builder: " + sys.platform + " not supported") - - -""" -Detect external SYCL queue manager -""" -_dpctrl_include_dir = "No_sycl_queue_mgr_include_dir" -_dpctrl_library_dir = "No_sycl_queue_mgr_library_dir" -_dpctrl_exists = "OFF" -try: - """ - Detect external SYCL queue handling library - """ - import dpctl - - _dpctrl_include_dir = str(os.path.abspath(dpctl.get_include())) - _dpctrl_library_dir = str(os.path.abspath(os.path.join(dpctl.get_include(), ".."))) - _dpctrl_exists = "ON" -except ImportError: - """ - Set local SYCL queue handler set by default in CmakeList.txt - """ - pass - -""" -Detect enabling DPNP backend tests -""" -_dpnp_backend_tests_enable = os.environ.get('DPNP_BACKEND_TESTS_ENABLE', None) - - -""" -CmakeList.txt based build_clib -""" - - -class custom_build_cmake_clib(build_clib.build_clib): - def run(self): - root_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) - log.info(f"Project directory is: {root_dir}") - - backend_directory = os.path.join(root_dir, "dpnp", "backend") - install_directory = os.path.join(root_dir, "dpnp") - - build_temp = pathlib.Path(self.build_temp) - build_temp.mkdir(parents=True, exist_ok=True) - abs_build_temp_path = str(os.path.abspath(build_temp)) - log.info(f"build directory is: {abs_build_temp_path}") - - config = "Debug" if self.debug else "Release" - - cmake_generator = str() - enable_tests = "OFF" - - if IS_WIN: - cmake_generator = "-GNinja" - if _dpnp_backend_tests_enable is not None: - enable_tests = "ON" - - cmake_args = [ - cmake_generator, - "-S" + backend_directory, - "-B" + abs_build_temp_path, - "-DCMAKE_BUILD_TYPE=" + config, - "-DDPNP_INSTALL_PREFIX=" + install_directory.replace(os.sep, "/"), # adjust to cmake requirenments - "-DDPNP_INSTALL_STRUCTURED=OFF", - # "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + install_directory, - "-DDPNP_SYCL_QUEUE_MGR_ENABLE:BOOL=" + _dpctrl_exists, - "-DDPNP_QUEUEMGR_INCLUDE_DIR=" + _dpctrl_include_dir, - "-DDPNP_QUEUEMGR_LIB_DIR=" + _dpctrl_library_dir, - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", - "-DDPNP_BACKEND_TESTS:BOOL=" + enable_tests - ] - - # didn't find how to add it inside cmake, that is why this is here - import multiprocessing - cpu_count = multiprocessing.cpu_count() - # possible that jobs count must be +-1 against CPUs count - jobs = "-j" + str(cpu_count) - - self.spawn(["cmake"] + cmake_args + [backend_directory]) - if not self.dry_run: - self.spawn(["cmake", "--build", abs_build_temp_path, jobs]) - self.spawn(["cmake", "--install", abs_build_temp_path]) diff --git a/utils/command_clean.py b/utils/command_clean.py deleted file mode 100644 index 785340aa4023..000000000000 --- a/utils/command_clean.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -import os -from setuptools import Command -from fnmatch import fnmatch -from shutil import rmtree -from distutils import log - - -class source_clean(Command): - """ - Command to clean all generated files in the project - - Usage: - To run the command: python ./setup.py clean - """ - - description = "Clean up the project source tree" - - CLEAN_ROOTDIRS = ['build', 'build_cython', 'cython_debug', 'Intel_NumPy.egg-info', 'doc/_build', 'CMakeFiles'] - CLEAN_DIRS = ['__pycache__'] - CLEAN_FILES = ['*.so', '*.pyc', '*.pyd', '*.dll', '*.lib', 'CMakeCache.txt'] - - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - log.info(f"DPNP: cleaning in {root_dir}") - - # removing dirs from root_dir - for dir_mask in self.CLEAN_ROOTDIRS: - rdir = os.path.join(root_dir, dir_mask) - if os.path.isdir(rdir): - log.info(f"rm {rdir}") - rmtree(rdir) - - for (dirpath, dirnames, filenames) in os.walk(root_dir): - # removing subdirs - for dir in dirnames: - for dir_mask in self.CLEAN_DIRS: - if fnmatch(dir, dir_mask): - rdir = os.path.join(dirpath, dir) - log.info(f"rm {rdir}") - rmtree(rdir) - - # removing files - for file in filenames: - for file_mask in self.CLEAN_FILES: - if fnmatch(file, file_mask): - rfile = os.path.join(dirpath, file) - log.info(f"rm {rfile}") - os.remove(rfile) - - log.info(f"DPNP: cleaning finished") diff --git a/utils/command_style.py b/utils/command_style.py deleted file mode 100644 index 4d5a48ed5683..000000000000 --- a/utils/command_style.py +++ /dev/null @@ -1,155 +0,0 @@ -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -import os -from setuptools import Command - - -class source_style(Command): - """ - Command to check and adjust code style - - Usage: - To check style: python ./setup.py style - To fix style: python ./setup.py style -a - - """ - - user_options = [ - ('apply', 'a', 'Apply codestyle changes to sources.') - ] - description = "Code style check and apply (with -a)" - boolean_options = [] - - _result_marker = "Result:" - _project_directory_excluded = ['build', '.git'] - - _c_formatter = 'clang-format' - _c_formatter_install_msg = 'pip install clang' - _c_formatter_command_line = [_c_formatter, '-style=file'] - _c_file_extensions = ['.h', '.c', '.hpp', '.cpp'] - - _py_checker = 'pycodestyle' - _py_formatter = 'autopep8' - _py_formatter_install_msg = 'pip install --upgrade autopep8\npip install --upgrade pycodestyle' - _py_checker_command_line = [_py_checker] - _py_formatter_command_line = [ - _py_formatter, - '--in-place'] - _py_file_extensions = ['.py', '.pyx', '.pxd', '.pxi'] - - def _get_file_list(self, path, search_extentions): - """ Return file list to be adjusted or checked - - path - is the project base path - search_extentions - list of strings with files extension to search recurcivly - """ - files = [] - exluded_directories_full_path = [os.path.join( - path, excluded_dir) for excluded_dir in self._project_directory_excluded] - - # r=root, d=directories, f = files - for r, d, f in os.walk(path): - # match exclude pattern in current directory - found = False - for excluded_dir in exluded_directories_full_path: - if r.find(excluded_dir) >= 0: - found = True - - if found: - continue - - for file in f: - filename, extention = os.path.splitext(file) - if extention in search_extentions: - files.append(os.path.join(r, file)) - - return files - - def initialize_options(self): - self.apply = 0 - - def finalize_options(self): - pass - - def run(self): - root_dir = os.path.join(os.path.dirname(__file__), "..") - print("Project directory is: %s" % root_dir) - - if self.apply: - self._c_formatter_command_line += ['-i'] - else: - self._c_formatter_command_line += ['-output-replacements-xml'] - - import subprocess - - bad_style_file_names = [] - - # C files handling - c_files = self._get_file_list(root_dir, self._c_file_extensions) - try: - for f in c_files: - command_output = subprocess.Popen( - self._c_formatter_command_line + [f], stdout=subprocess.PIPE) - command_cout, command_cerr = command_output.communicate() - if not self.apply: - if command_cout.find(b' 0: - bad_style_file_names.append(f) - except BaseException as original_error: - print("%s is not installed.\nPlease use: %s" % - (self._c_formatter, self._c_formatter_install_msg)) - print("Original error message is:\n", original_error) - exit(1) - - # Python files handling - py_files = self._get_file_list(root_dir, self._py_file_extensions) - try: - for f in py_files: - if not self.apply: - command_output = subprocess.Popen( - self._py_checker_command_line + [f]) - returncode = command_output.wait() - if returncode != 0: - bad_style_file_names.append(f) - else: - command_output = subprocess.Popen( - self._py_formatter_command_line + [f]) - command_output.wait() - except BaseException as original_error: - print("%s is not installed.\nPlease use: %s" % - (self._py_formatter, self._py_formatter_install_msg)) - print("Original error message is:\n", original_error) - exit(1) - - if bad_style_file_names: - print("Following files style need to be adjusted:") - for line in bad_style_file_names: - print(line) - print("%s Style check failed" % self._result_marker) - exit(1) - else: - print("%s Style check passed" % self._result_marker) - exit(0) diff --git a/utils/dpnp_build_utils.py b/utils/dpnp_build_utils.py deleted file mode 100644 index 2ccf211587d3..000000000000 --- a/utils/dpnp_build_utils.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -import os -import sys - - -IS_CONDA_BUILD = os.environ.get("CONDA_BUILD") == "1" - - -def find_library(var_name, rel_header_paths, rel_lib_paths, - rel_include_path="include", rel_libdir_path="lib", verbose=False): - """ - Find specified libraries/headers in the directory from the environment variable. - - Parameters - ---------- - var_name : str - the name of the environment variable - rel_header_paths : list(str) - relative paths to required headers - rel_lib_paths : list(str) - relative paths to required libraries - rel_include_path : str - relative path to the include directory - rel_libdir_path : str - relative path to the library directory - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - root_dir = os.getenv(var_name) - if root_dir is None: - return [], [] - - include_find = os.path.join(root_dir, rel_include_path) - libpath_find = os.path.join(root_dir, rel_libdir_path) - required_headers = [os.path.join(include_find, rel_path) for rel_path in rel_header_paths] - required_libs = [os.path.join(libpath_find, rel_path) for rel_path in rel_lib_paths] - - for required_file in required_headers + required_libs: - if not os.path.exists(required_file): - return [], [] - - if verbose: - msg_template = "DPNP: using ${} based library. include={}, libpath={}" - print(msg_template.format(var_name, include_find, libpath_find)) - - return [include_find], [libpath_find] - - -def find_cmplr(verbose=False): - """ - Find compiler. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - - rel_header_paths = rel_lib_paths = [] - - # try to find library in specified directory from $DPCPPROOT - if 'linux' in sys.platform: - rel_include_path = os.path.join('linux', 'include') - rel_libdir_path = os.path.join('linux', 'lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('windows', 'include') - rel_libdir_path = os.path.join('windows', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - cmplr_include, cmplr_libpath = find_library("DPCPPROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find library in specified directory from $ONEAPI_ROOT - if not cmplr_include or not cmplr_libpath: - if sys.platform in ['linux']: - rel_include_path = os.path.join('compiler', 'latest', 'linux', 'include') - rel_libdir_path = os.path.join('compiler', 'latest', 'linux', 'lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('compiler', 'latest', 'windows', 'include') - rel_libdir_path = os.path.join('compiler', 'latest', 'windows', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - cmplr_include, cmplr_libpath = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find in Python environment - if not cmplr_include or not cmplr_libpath: - if sys.platform in ['linux']: - rel_include_path = os.path.join('include') - rel_libdir_path = os.path.join('lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('Library', 'include') - rel_libdir_path = os.path.join('Library', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX" - - cmplr_include, cmplr_libpath = find_library(conda_root_var, rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - if not cmplr_include or not cmplr_libpath: - raise EnvironmentError("DPNP: Unable to find compiler") - - return cmplr_include, cmplr_libpath - - -def find_dpl(verbose=False): - """ - Find DPL. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - - rel_header_paths = [os.path.join("oneapi", "dpl", "algorithm")] - rel_lib_paths = [] - rel_libdir_path = "" - - # try to find library in specified directory from $DPLROOT like a repository - rel_include_path = os.path.join('include') - - dpl_include, dpl_libpath = find_library("DPLROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find library in specified directory from $DPLROOT - if not dpl_include or not dpl_libpath: - if 'linux' in sys.platform: - rel_include_path = os.path.join('linux', 'include') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('windows', 'include') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - dpl_include, dpl_libpath = find_library("DPLROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find library in specified directory from $ONEAPI_ROOT - if not dpl_include or not dpl_libpath: - if sys.platform in ['linux']: - rel_include_path = os.path.join('dpl', 'latest', 'linux', 'include') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('dpl', 'latest', 'windows', 'include') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - dpl_include, dpl_libpath = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find in Python environment - if not dpl_include or not dpl_libpath: - if sys.platform in ['linux']: - rel_include_path = os.path.join('include') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('Library', 'include') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX" - - dpl_include, dpl_libpath = find_library(conda_root_var, rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - if not dpl_include or not dpl_libpath: - raise EnvironmentError("DPNP: Unable to find DPL") - - return dpl_include, dpl_libpath - - -def find_mathlib(verbose=False): - """ - Find mathlib. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - - if sys.platform in ['linux']: - rel_header_paths = [os.path.join("oneapi", "mkl.hpp")] - rel_lib_paths = ["libmkl_sycl.so"] - elif sys.platform in ['win32', 'cygwin']: - rel_header_paths = [os.path.join("oneapi", "mkl.hpp")] - rel_lib_paths = ["mkl_sycl_dll.lib"] - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - # try to find library in specified directory from $MKLROOT - if sys.platform in ['linux']: - rel_include_path = os.path.join('linux', 'include') - rel_libdir_path = os.path.join('linux', 'lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('windows', 'include') - rel_libdir_path = os.path.join('windows', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - mathlib_include, mathlib_path = find_library("MKLROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find library in specified directory from $ONEAPI_ROOT - if not mathlib_include or not mathlib_path: - if sys.platform in ['linux']: - rel_include_path = os.path.join('mkl', 'latest', 'linux', 'include') - rel_libdir_path = os.path.join('mkl', 'latest', 'linux', 'lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('mkl', 'latest', 'windows', 'include') - rel_libdir_path = os.path.join('mkl', 'latest', 'windows', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - mathlib_include, mathlib_path = find_library("ONEAPI_ROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - # try to find in Python environment - if not mathlib_include or not mathlib_path: - if sys.platform in ['linux']: - rel_include_path = os.path.join('include') - rel_libdir_path = os.path.join('lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('Library', 'include') - rel_libdir_path = os.path.join('Library', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX" - - mathlib_include, mathlib_path = find_library(conda_root_var, rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - if not mathlib_include or not mathlib_path: - raise EnvironmentError("DPNP: Unable to find math library") - - return mathlib_include, mathlib_path - - -def _find_omp_in_dpcpp_root(verbose=False): - """ - Find omp in dpcpp root using $DPCPPROOT. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - rel_header_paths = rel_lib_paths = [] - - if 'linux' in sys.platform: - rel_include_path = os.path.join('linux', 'compiler', 'include') - rel_libdir_path = os.path.join('linux', 'compiler', 'lib', 'intel64') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('windows', 'compiler', 'include') - rel_libdir_path = os.path.join('windows', 'compiler', 'lib', 'intel64_win') - else: - rel_include_path, rel_libdir_path = 'include', 'lib' - - return find_library("DPCPPROOT", rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, rel_libdir_path=rel_libdir_path, verbose=verbose) - - -def find_omp(verbose=False): - """ - Find omp in environment. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - omp_include, omp_libpath = _find_omp_in_dpcpp_root(verbose=verbose) - - if not omp_include or not omp_libpath: - raise EnvironmentError(f"DPNP: Unable to find omp. Please install Intel OneAPI environment") - - return omp_include, omp_libpath - - -def find_python_env(verbose=False): - """ - Find Python environment. - - Parameters - ---------- - verbose : bool - to print paths to include and library directories - - Returns - ------- - tuple(list(str), list(str)) - path to include directory, path to library directory - """ - - rel_header_paths = rel_lib_paths = [] - - if sys.platform in ['linux']: - rel_include_path = os.path.join('include') - rel_libdir_path = os.path.join('lib') - elif sys.platform in ['win32', 'cygwin']: - rel_include_path = os.path.join('Library', 'include') - rel_libdir_path = os.path.join('Library', 'lib') - else: - raise EnvironmentError("DPNP: " + sys.platform + " not supported") - - conda_root_var = "PREFIX" if IS_CONDA_BUILD else "CONDA_PREFIX" - - env_include, env_path = find_library(conda_root_var, rel_header_paths, rel_lib_paths, - rel_include_path=rel_include_path, - rel_libdir_path=rel_libdir_path, - verbose=verbose) - - env_include += [os.path.join(os.getenv(conda_root_var), 'include')] - - if not env_include or not env_path: - raise EnvironmentError(f"DPNP: Unable to find Python environment paths") - - return env_include, env_path diff --git a/utils/dpnp_coverage.py b/utils/dpnp_coverage.py deleted file mode 100644 index 5959fe81b863..000000000000 --- a/utils/dpnp_coverage.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -import os -import inspect - -name_dict = {} -module_names_set = dict() -extra_modules = ["fft", "linalg", "random", "char"] -sep = ":" - -col0_width = 4 -col1_width = 40 -col2_width = 60 - - -def print_header_line(): - print(f"{'='*col0_width}", end=sep) - print(f"{'='*col1_width}", end=sep) - for mod_name in module_names_set.keys(): - print(f"{'='*col2_width}", end=sep) - print() - - -def print_header(): - print_header_line() - - print("#".center(col0_width), end=sep) - print("Name".center(col1_width), end=sep) - for mod_name in module_names_set.keys(): - print(mod_name.center(col2_width), end=sep) - print() - - print_header_line() - - -def print_footer(): - print_header_line() - - print("".center(col0_width), end=sep) - print("".center(col1_width), end=sep) - for mod_name, mod_sym_count in module_names_set.items(): - count_str = mod_name + " total " + str(mod_sym_count) - print(count_str.rjust(col2_width), end=sep) - print() - - print_header_line() - - -def add_symbol(item_name, module_name, item_val): - if item_name not in name_dict.keys(): - name_dict[item_name] = dict() - if not name_dict[item_name].get(module_name, False): - name_dict[item_name][module_name] = str(item_val) - - if module_name not in module_names_set.keys(): - module_names_set[module_name] = 0 - else: - module_names_set[module_name] += 1 -# else: -# print(f"item_name={item_name}, {name_dict[item_name][module_name]} replaced with {str(item_val)}") - - -def fill_data(module_name, module_obj, parent_module_name=""): - for item_name_raw, item_val in inspect.getmembers(module_obj): - if (item_name_raw[0] == "_"): - continue - - item_name = os.path.join(parent_module_name, item_name_raw) - if getattr(item_val, '__call__', False): - str_item = item_val - try: - str_item = inspect.signature(item_val) - except ValueError: - pass - add_symbol(item_name, module_name, str_item) - elif inspect.ismodule(item_val): - if item_name in extra_modules: - fill_data(module_name, item_val, parent_module_name=item_name) - else: - print(f"IGNORED: {module_name}: module: {item_name}") -# elif isinstance(item_val, (tuple, list, float, int)): -# add_symbol(item_name, module_name, item_val) -# elif isinstance(item_val, str): -# add_symbol(item_name, module_name, item_val.replace('\n', '').strip()) -# else: -# add_symbol(item_name, module_name, type(item_val)) -# print(f"Symbol {item_name} unrecognized. Symbol: {item_val}, type: {type(item_val)}") - - -def print_data(): - print_header() - - symbol_id = 0 - for symbol_name, symbol_values in sorted(name_dict.items()): - print(f"{symbol_id:<{col0_width}}", end=sep) - symbol_id += 1 - print(f"{symbol_name:{col1_width}}", end=sep) - - for mod_name in module_names_set.keys(): - val = symbol_values.get(mod_name, "") - val_prn = str(val)[0:col2_width - 1] - print(f"{val_prn:{col2_width}}", end=sep) - - print() - - print_footer() - - -if __name__ == '__main__': - - try: - import dpnp - fill_data("DPNP", dpnp) - except ImportError: - print("No DPNP module loaded") - - try: - import numpy - fill_data("NumPy", numpy) - except ImportError: - print("No NumPy module loaded") - - try: - import cupy - fill_data("cuPy", cupy) - except ImportError: - print("No cuPy module loaded") - - print_data()