From 24df7f88290ec0c23692513495276535d812e38d Mon Sep 17 00:00:00 2001 From: mattip Date: Thu, 30 Oct 2025 22:28:51 +0200 Subject: [PATCH 01/15] update OpenBLAS version to v0.3.30-322-gef6f9762 --- .github/workflows/posix.yml | 2 +- .github/workflows/windows-arm.yml | 2 +- .github/workflows/windows.yml | 2 +- .travis.yml | 2 +- pyproject.toml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 111508c..8a01758 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -10,7 +10,7 @@ on: - cron: '0 0 * * 0' env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" MACOSX_DEPLOYMENT_TARGET: 10.9 jobs: diff --git a/.github/workflows/windows-arm.yml b/.github/workflows/windows-arm.yml index e0e5181..f6b688c 100644 --- a/.github/workflows/windows-arm.yml +++ b/.github/workflows/windows-arm.yml @@ -7,7 +7,7 @@ on: branches: [ main ] env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" OPENBLAS_ROOT: "c:\\opt" # Preserve working directory for calls into bash # Without this, invoking bash will cd to the home directory diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 69f70ae..44aae26 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -8,7 +8,7 @@ on: workflow_dispatch: null env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" OPENBLAS_ROOT: "c:\\opt" # Preserve working directory for calls into bash # Without this, invoking bash will cd to the home directory diff --git a/.travis.yml b/.travis.yml index 327d9eb..9c2a478 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ env: global: # The archive that gets built has name from ``git describe`` on this # commit. - - OPENBLAS_COMMIT: "v0.3.30" + - OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" dist: jammy services: docker diff --git a/pyproject.toml b/pyproject.toml index f8a83ed..055063a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,8 +8,8 @@ build-backend = "setuptools.build_meta" [project] name = "scipy-openblas64" -# v0.3.30 -version = "0.3.30.0.7" +# v0.3.30-322-gef6f9762 +version = "0.3.30.322.0" requires-python = ">=3.7" description = "Provides OpenBLAS for python packaging" readme = "README.md" From 3c0376e56d54798c297586cd8e674570c2c802e2 Mon Sep 17 00:00:00 2001 From: mattip Date: Thu, 30 Oct 2025 22:35:48 +0200 Subject: [PATCH 02/15] remove unneeded patch --- OpenBLAS | 2 +- patches/0001-backout-PR-4741.patch | 61 ------------------------------ 2 files changed, 1 insertion(+), 62 deletions(-) delete mode 100644 patches/0001-backout-PR-4741.patch diff --git a/OpenBLAS b/OpenBLAS index b5456c1..ef6f976 160000 --- a/OpenBLAS +++ b/OpenBLAS @@ -1 +1 @@ -Subproject commit b5456c1b41ea88d4e0041778aa8ec09ee2a111a0 +Subproject commit ef6f97624ba9a560f64e470ec339f469865fac33 diff --git a/patches/0001-backout-PR-4741.patch b/patches/0001-backout-PR-4741.patch deleted file mode 100644 index fa08d49..0000000 --- a/patches/0001-backout-PR-4741.patch +++ /dev/null @@ -1,61 +0,0 @@ -From fe2f02876134dcfdd75860c622226b6913eef93e Mon Sep 17 00:00:00 2001 -From: mattip -Date: Wed, 22 Oct 2025 14:40:01 +0300 -Subject: [PATCH] backout PR 4741 - ---- - driver/level3/level3_thread.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c -index 22f27975b..0832db58b 100644 ---- a/driver/level3/level3_thread.c -+++ b/driver/level3/level3_thread.c -@@ -588,8 +588,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else - static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; -- static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; -- volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; -+ // static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; -+ // volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; - #endif - - blas_arg_t newarg; -@@ -659,12 +659,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else - pthread_mutex_lock(&level3_lock); -- while(CPU_AVAILABLE < nthreads) { -- pthread_cond_wait(&level3_wakeup, &level3_lock); -- } -- CPU_AVAILABLE -= nthreads; -- WMB; -- pthread_mutex_unlock(&level3_lock); -+ // while(CPU_AVAILABLE < nthreads) { -+ // pthread_cond_wait(&level3_wakeup, &level3_lock); -+ // } -+ // CPU_AVAILABLE -= nthreads; -+ // WMB; -+ // pthread_mutex_unlock(&level3_lock); - #endif - - #ifdef USE_ALLOC_HEAP -@@ -816,10 +816,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - #elif defined(OS_WINDOWS) - LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else -- pthread_mutex_lock(&level3_lock); -- CPU_AVAILABLE += nthreads; -- WMB; -- pthread_cond_signal(&level3_wakeup); -+ // pthread_mutex_lock(&level3_lock); -+ // CPU_AVAILABLE += nthreads; -+ // WMB; -+ // pthread_cond_signal(&level3_wakeup); - pthread_mutex_unlock(&level3_lock); - #endif - --- -2.43.0 - From 6abf69db9f50b17f213f934a54b4917b9310d965 Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 31 Oct 2025 00:41:34 +0200 Subject: [PATCH 03/15] fixes for windows and win-arm64 --- .github/workflows/windows.yml | 2 +- tools/build_steps_win_arm64.bat | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 44aae26..66d327d 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -131,7 +131,7 @@ jobs: cat tools/LICENSE_win32.txt >> LICENSE.txt python -m pip wheel -w dist -vv . # move the mis-named scipy_openblas64-none-any.whl to a platform-specific name - for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + for f in dist/*any*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done - name: Set up different Python uses: actions/setup-python@v5 diff --git a/tools/build_steps_win_arm64.bat b/tools/build_steps_win_arm64.bat index 5da4153..96e6a9a 100755 --- a/tools/build_steps_win_arm64.bat +++ b/tools/build_steps_win_arm64.bat @@ -97,6 +97,7 @@ for /f "usebackq tokens=*" %%i in (`"C:\Program Files (x86)\Microsoft Visual Stu PATH=C:\Program Files\LLVM\bin;%PATH% :: Run CMake and Ninja build +set CFLAGS="-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage" cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=1 -DNUM_THREADS=24 -DTARGET=ARMV8 -DBUILD_SHARED_LIBS=ON -DARCH=arm64 ^ -DBINARY=%build_bits% -DCMAKE_SYSTEM_PROCESSOR=ARM64 -DCMAKE_C_COMPILER=clang-cl ^ -DCMAKE_Fortran_COMPILER=flang-new -DSYMBOLPREFIX="scipy_" -DLIBNAMEPREFIX="scipy_" %interface_flags% From b836842a00568698e0af800e08c7d1265dc81e23 Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 31 Oct 2025 08:31:52 +0200 Subject: [PATCH 04/15] fixes for windows and win-arm64 --- .github/workflows/windows.yml | 4 +++- tools/build_steps_win_arm64.bat | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 66d327d..66cc98f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -131,7 +131,9 @@ jobs: cat tools/LICENSE_win32.txt >> LICENSE.txt python -m pip wheel -w dist -vv . # move the mis-named scipy_openblas64-none-any.whl to a platform-specific name - for f in dist/*any*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + if [ -e dist/*any*.whl ]; then + for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + fi - name: Set up different Python uses: actions/setup-python@v5 diff --git a/tools/build_steps_win_arm64.bat b/tools/build_steps_win_arm64.bat index 96e6a9a..b4206d5 100755 --- a/tools/build_steps_win_arm64.bat +++ b/tools/build_steps_win_arm64.bat @@ -97,7 +97,7 @@ for /f "usebackq tokens=*" %%i in (`"C:\Program Files (x86)\Microsoft Visual Stu PATH=C:\Program Files\LLVM\bin;%PATH% :: Run CMake and Ninja build -set CFLAGS="-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage" +set CFLAGS=-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=1 -DNUM_THREADS=24 -DTARGET=ARMV8 -DBUILD_SHARED_LIBS=ON -DARCH=arm64 ^ -DBINARY=%build_bits% -DCMAKE_SYSTEM_PROCESSOR=ARM64 -DCMAKE_C_COMPILER=clang-cl ^ -DCMAKE_Fortran_COMPILER=flang-new -DSYMBOLPREFIX="scipy_" -DLIBNAMEPREFIX="scipy_" %interface_flags% From d869e1080df9c86673a93962830a1d598856709f Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 31 Oct 2025 10:45:16 +0200 Subject: [PATCH 05/15] make win-arm64 build more like upstream --- .github/workflows/windows-arm.yml | 16 +++++++++++----- .github/workflows/windows.yml | 2 +- tools/build_steps_win_arm64.bat | 23 ++++++++++++++++++----- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/.github/workflows/windows-arm.yml b/.github/workflows/windows-arm.yml index f6b688c..36f7d31 100644 --- a/.github/workflows/windows-arm.yml +++ b/.github/workflows/windows-arm.yml @@ -48,14 +48,20 @@ jobs: - name: Download and install LLVM installer run: | - Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.5/LLVM-19.1.5-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe + Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Update CMake for WoA - run: | - pip install cmake - get-command cmake + - name: Install CMake and Ninja for Win-ARM64 + shell: pwsh + run: | + Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi + Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait + echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH + + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip + Expand-Archive ninja-winarm64.zip -DestinationPath ninja + Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" - name: Set env variables run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 66cc98f..a5f03d1 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -131,7 +131,7 @@ jobs: cat tools/LICENSE_win32.txt >> LICENSE.txt python -m pip wheel -w dist -vv . # move the mis-named scipy_openblas64-none-any.whl to a platform-specific name - if [ -e dist/*any*.whl ]; then + if [[ -e dist/*any*.whl ]]; then for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done fi diff --git a/tools/build_steps_win_arm64.bat b/tools/build_steps_win_arm64.bat index b4206d5..0c93df8 100755 --- a/tools/build_steps_win_arm64.bat +++ b/tools/build_steps_win_arm64.bat @@ -91,19 +91,32 @@ mkdir build || exit /b 1 & cd build || exit /b 1 echo Setting up ARM64 Developer Command Prompt and running CMake... :: Initialize VS ARM64 environment -for /f "usebackq tokens=*" %%i in (`"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath`) do call "%%i\VC\Auxiliary\Build\vcvarsall.bat" arm64 +CALL "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsarm64.bat" :: Prefer LLVM flang PATH=C:\Program Files\LLVM\bin;%PATH% :: Run CMake and Ninja build + set CFLAGS=-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage -cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=1 -DNUM_THREADS=24 -DTARGET=ARMV8 -DBUILD_SHARED_LIBS=ON -DARCH=arm64 ^ --DBINARY=%build_bits% -DCMAKE_SYSTEM_PROCESSOR=ARM64 -DCMAKE_C_COMPILER=clang-cl ^ --DCMAKE_Fortran_COMPILER=flang-new -DSYMBOLPREFIX="scipy_" -DLIBNAMEPREFIX="scipy_" %interface_flags% +cmake .. -G Ninja ^ + -DCMAKE_BUILD_TYPE=Release ^ + -DTARGET=ARMV8 ^ + -DBINARY=%build_bits% ^ + -DCMAKE_C_COMPILER=clang-cl ^ + -DCMAKE_Fortran_COMPILER=flang-new ^ + -DBUILD_SHARED_LIBS=ON ^ + -DCMAKE_SYSTEM_PROCESSOR=arm64 ^ + -DCMAKE_SYSTEM_NAME=Windows ^ + -DSYMBOLPREFIX="scipy_" ^ + -DLIBNAMEPREFIX="scipy_" ^ + -DUSE_THREADS=1 ^ + -DNUM_THREADS=24 ^ + %interface_flags% + if errorlevel 1 exit /b 1 -ninja +ninja -j 16 if errorlevel 1 exit /b 1 echo Build complete. Returning to Batch. From c438b6470f8f9574b9a8b6bdbb800fe1443c8966 Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 31 Oct 2025 10:48:24 +0200 Subject: [PATCH 06/15] do not upload openblas.zip artifact, no need anymore --- .github/workflows/windows-arm.yml | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/.github/workflows/windows-arm.yml b/.github/workflows/windows-arm.yml index 36f7d31..ce04793 100644 --- a/.github/workflows/windows-arm.yml +++ b/.github/workflows/windows-arm.yml @@ -37,15 +37,6 @@ jobs: python-version: 3.12 architecture: arm64 - - name: Setup visual studio - uses: microsoft/setup-msbuild@v2 - - - name: Download, install 7zip. - run: | - Invoke-WebRequest https://www.7-zip.org/a/7z2409-arm64.exe -UseBasicParsing -OutFile 7z_arm.exe - Start-Process -FilePath ".\7z_arm.exe" -ArgumentList "/S" -Wait - echo "C:\Program Files\7-Zip" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Download and install LLVM installer run: | Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe @@ -58,7 +49,7 @@ jobs: Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH - + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip Expand-Archive ninja-winarm64.zip -DestinationPath ninja Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" @@ -76,12 +67,6 @@ jobs: git submodule update --init --recursive .\tools\build_steps_win_arm64.bat 64 ${env:INTERFACE_BITS} - - name: Pack - run: | - cd local - cp -r "scipy_openblas${env:INTERFACE_BITS}" $env:INTERFACE_BITS - 7z a ../builds/openblas-${env:PLAT}-${env:INTERFACE64}.zip -tzip $env:INTERFACE_BITS - - name: Test ${{ matrix.INTERFACE_BITS }}-bit interface wheel run: | python -m pip install --no-index --find-links dist scipy_openblas${env:INTERFACE_BITS} @@ -93,11 +78,6 @@ jobs: name: wheels-${{ env.PLAT }}-${{ env.INTERFACE64 }} path: dist/scipy_openblas*.whl - - uses: actions/upload-artifact@v4.3.0 - with: - name: openblas-${{ env.PLAT }}-${{ env.INTERFACE64 }} - path: builds/openblas*.zip - - name: Install Anaconda client run: | # Rust installation needed for rpds-py. From bb821ac2350707e01cba69a1263391589971bdb6 Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 31 Oct 2025 11:06:49 +0200 Subject: [PATCH 07/15] typos --- .github/workflows/windows-arm.yml | 18 +++++++++--------- .github/workflows/windows.yml | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/windows-arm.yml b/.github/workflows/windows-arm.yml index ce04793..78f1298 100644 --- a/.github/workflows/windows-arm.yml +++ b/.github/workflows/windows-arm.yml @@ -44,15 +44,15 @@ jobs: echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - name: Install CMake and Ninja for Win-ARM64 - shell: pwsh - run: | - Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi - Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait - echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH - - Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip - Expand-Archive ninja-winarm64.zip -DestinationPath ninja - Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" + shell: pwsh + run: | + Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi + Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait + echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH + + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip + Expand-Archive ninja-winarm64.zip -DestinationPath ninja + Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" - name: Set env variables run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index a5f03d1..e0440e4 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -131,9 +131,9 @@ jobs: cat tools/LICENSE_win32.txt >> LICENSE.txt python -m pip wheel -w dist -vv . # move the mis-named scipy_openblas64-none-any.whl to a platform-specific name - if [[ -e dist/*any*.whl ]]; then - for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done - fi + if [[ -e dist/*any*.whl ]]; then + for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + fi - name: Set up different Python uses: actions/setup-python@v5 From 2101a16c78e824b8c5c2d7d7afcc3738aa4c7f1d Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 08:20:20 +0200 Subject: [PATCH 08/15] add upstream patch to fix win-arm64 --- ...ort-for-HFLOAT16-in-Windows-symbol-r.patch | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch diff --git a/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch b/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch new file mode 100644 index 0000000..4ba2f68 --- /dev/null +++ b/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch @@ -0,0 +1,54 @@ +From 358c582ef9dde59f960773a5dfde13af17aa9af5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Sat, 1 Nov 2025 19:27:56 +0100 +Subject: [PATCH] Fix missing support for HFLOAT16 in Windows symbol + renaming/dll generation + +--- + CMakeLists.txt | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 5895cf060..96c2a4364 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -498,6 +498,11 @@ if (BUILD_SHARED_LIBS OR DELETE_STATIC_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFI + else () + set (BBF16 0) + endif() ++ if (${BUILD_HFLOAT16}) ++ set (BHF16 1) ++ else () ++ set (BHF16 0) ++ endif() + if (${BUILD_SINGLE}) + set (BS 1) + else () +@@ -533,7 +538,7 @@ endif() + #if (USE_PERL) + message(STATUS "adding postbuild instruction to rename syms") + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_static POST_BUILD +- COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def ++ COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} -c -o ${PROJECT_BINARY_DIR}/dllinit.o ${PROJECT_SOURCE_DIR}/exports/dllinit.c + COMMAND lld-link -nodefaultlib:libcmt -defaultlib:${CRTLIB} ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $ -wholearchive:$ -dll -out:$/${OpenBLAS_LIBNAME}.dll -implib:$/${OpenBLAS_LIBNAME}.dll.a ${PDBOPT} + #COMMAND lld-link -nodefaultlib:libcmt -defaultlib:msvcrt ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $ -wholearchive:$ -dll -out:$/${OpenBLAS_LIBNAME}.dll -implib:$/${OpenBLAS_LIBNAME}.dll.a +@@ -543,13 +548,13 @@ message(STATUS "adding postbuild instruction to rename syms") + else () + if (NOT USE_PERL) + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD +- COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def ++ COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so + COMMENT "renaming symbols" + ) + else() + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD +- COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def ++ COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so + COMMENT "renaming symbols" + ) +-- +2.43.0 + From d650a3e8569f11ae9f40442b39bf68f722deed45 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 08:57:49 +0200 Subject: [PATCH 09/15] apply patches on win-arm64, filter more warnings --- tools/build_steps_win_arm64.bat | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/build_steps_win_arm64.bat b/tools/build_steps_win_arm64.bat index 0c93df8..9aaf02b 100755 --- a/tools/build_steps_win_arm64.bat +++ b/tools/build_steps_win_arm64.bat @@ -77,6 +77,10 @@ if errorlevel 1 ( exit /b 2 ) +:: Patch +for /r %%f in (..\patches\*) do git am %%f +if errorlevel 1 exit /b 1 + :: Set suffixed-ILP64 flags if "%if_bits%"=="64" ( set "interface_flags=-DINTERFACE64=1 -DSYMBOLSUFFIX=64_" @@ -98,7 +102,7 @@ PATH=C:\Program Files\LLVM\bin;%PATH% :: Run CMake and Ninja build -set CFLAGS=-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage +set CFLAGS=-Wno-reserved-macro-identifier -Wno-unsafe-buffer-usage -Wno-unused-macros -Wno-sign-conversion -Wno-reserved-identifier cmake .. -G Ninja ^ -DCMAKE_BUILD_TYPE=Release ^ -DTARGET=ARMV8 ^ From 0caac580971075a13a4bbf7e022cca0c8840dd25 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 09:19:39 +0200 Subject: [PATCH 10/15] use 'git apply' to avoid committing --- tools/build_steps_win_arm64.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build_steps_win_arm64.bat b/tools/build_steps_win_arm64.bat index 9aaf02b..f53d957 100755 --- a/tools/build_steps_win_arm64.bat +++ b/tools/build_steps_win_arm64.bat @@ -78,7 +78,7 @@ if errorlevel 1 ( ) :: Patch -for /r %%f in (..\patches\*) do git am %%f +for /r %%f in (..\patches\*) do git apply %%f if errorlevel 1 exit /b 1 :: Set suffixed-ILP64 flags From 26edc67cc3438f0a45859eabced7bdd949181148 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 14:10:41 +0200 Subject: [PATCH 11/15] limit aarch64 kernels to avoid SVE and up --- tools/build_steps.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/build_steps.sh b/tools/build_steps.sh index 0d77b93..cbcca13 100644 --- a/tools/build_steps.sh +++ b/tools/build_steps.sh @@ -171,6 +171,8 @@ EOF Linux-aarch64) local bitness=64 local target="ARMV8" + # manylinux2014 image uses gcc-10, which miscompiles ARMV8SVE and up + local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX" ;; Darwin-arm64) local bitness=64 From 8adc4737b0fe202b420dfaf1504e09208a913267 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 14:26:32 +0200 Subject: [PATCH 12/15] limit aarch64 kernels to ARMV8 only --- tools/build_steps.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/build_steps.sh b/tools/build_steps.sh index cbcca13..5f07ea1 100644 --- a/tools/build_steps.sh +++ b/tools/build_steps.sh @@ -172,7 +172,8 @@ EOF local bitness=64 local target="ARMV8" # manylinux2014 image uses gcc-10, which miscompiles ARMV8SVE and up - local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX" + # local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX" + local dynamic_list="ARMV8" ;; Darwin-arm64) local bitness=64 From b5ba22c1248d8c2722c129a3b029cd3e555ca379 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 22:21:51 +0200 Subject: [PATCH 13/15] add manylinux_2_28 wheels for aarch64, use older dynamic_list for manylinx2014 ones --- .github/workflows/posix.yml | 2 ++ tools/build_steps.sh | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 8a01758..7650cf1 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -39,6 +39,8 @@ jobs: - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '2014', MB_ML_LIBC: manylinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '2014', MB_ML_LIBC: manylinux} + - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '_2_28', MB_ML_LIBC: manylinux} + - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '_2_28', MB_ML_LIBC: manylinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '_1_2', MB_ML_LIBC: musllinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '_1_2', MB_ML_LIBC: musllinux} diff --git a/tools/build_steps.sh b/tools/build_steps.sh index 5f07ea1..90e9131 100644 --- a/tools/build_steps.sh +++ b/tools/build_steps.sh @@ -172,8 +172,12 @@ EOF local bitness=64 local target="ARMV8" # manylinux2014 image uses gcc-10, which miscompiles ARMV8SVE and up - # local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX" - local dynamic_list="ARMV8" + if [ "$MB_ML_VER" == "2014" ]; then + echo setting DYNAMIC_LIST for manylinux2014 to ARMV8 only + local dynamic_list="ARMV8" + else + local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX A64FX ARM9SME" + fi ;; Darwin-arm64) local bitness=64 From 65cb1b32118a85af13d6a5bae745477a479ab70f Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 2 Nov 2025 22:40:54 +0200 Subject: [PATCH 14/15] do not specify dynamic list for aarch64 builds execpt for manylinux2014 --- tools/build_steps.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/build_steps.sh b/tools/build_steps.sh index 90e9131..6aacb01 100644 --- a/tools/build_steps.sh +++ b/tools/build_steps.sh @@ -175,8 +175,6 @@ EOF if [ "$MB_ML_VER" == "2014" ]; then echo setting DYNAMIC_LIST for manylinux2014 to ARMV8 only local dynamic_list="ARMV8" - else - local dynamic_list="ARMV8 CORTEXA53 NEOVERSEN1 NEOVERSEN2 VORTEX A64FX ARM9SME" fi ;; Darwin-arm64) From 9193baa58ff963f65b0091319d32b003d592c9b6 Mon Sep 17 00:00:00 2001 From: mattip Date: Mon, 3 Nov 2025 08:54:07 +0200 Subject: [PATCH 15/15] remove patch allready applied upstream --- .../remove-lock-around-thread-shutdown.patch | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 patches/remove-lock-around-thread-shutdown.patch diff --git a/patches/remove-lock-around-thread-shutdown.patch b/patches/remove-lock-around-thread-shutdown.patch deleted file mode 100644 index ce66797..0000000 --- a/patches/remove-lock-around-thread-shutdown.patch +++ /dev/null @@ -1,35 +0,0 @@ -From ef6f97624ba9a560f64e470ec339f469865fac33 Mon Sep 17 00:00:00 2001 -From: Martin Kroeker -Date: Thu, 30 Oct 2025 11:12:47 -0700 -Subject: [PATCH 5/6] [WIP,Testing] remove the lock around the thread shutdown - function again (#5479) - -* remove the lock around the thread shutdown function - server is locked already here ---- - driver/others/blas_server.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c -index 3d89803a6..4a3182354 100644 ---- a/driver/others/blas_server.c -+++ b/driver/others/blas_server.c -@@ -984,8 +984,6 @@ int BLASFUNC(blas_thread_shutdown)(void){ - - int i; - -- LOCK_COMMAND(&server_lock); -- - //Free buffers allocated for threads - for(i=0; i