diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 111508c..7650cf1 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -10,7 +10,7 @@ on: - cron: '0 0 * * 0' env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" MACOSX_DEPLOYMENT_TARGET: 10.9 jobs: @@ -39,6 +39,8 @@ jobs: - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '2014', MB_ML_LIBC: manylinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '2014', MB_ML_LIBC: manylinux} + - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '_2_28', MB_ML_LIBC: manylinux} + - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '_2_28', MB_ML_LIBC: manylinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '0', MB_ML_VER: '_1_2', MB_ML_LIBC: musllinux} - { os: ubuntu-24.04-arm, PLAT: aarch64, INTERFACE64: '1', MB_ML_VER: '_1_2', MB_ML_LIBC: musllinux} diff --git a/.github/workflows/windows-arm.yml b/.github/workflows/windows-arm.yml index e0e5181..78f1298 100644 --- a/.github/workflows/windows-arm.yml +++ b/.github/workflows/windows-arm.yml @@ -7,7 +7,7 @@ on: branches: [ main ] env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" OPENBLAS_ROOT: "c:\\opt" # Preserve working directory for calls into bash # Without this, invoking bash will cd to the home directory @@ -37,25 +37,22 @@ jobs: python-version: 3.12 architecture: arm64 - - name: Setup visual studio - uses: microsoft/setup-msbuild@v2 - - - name: Download, install 7zip. - run: | - Invoke-WebRequest https://www.7-zip.org/a/7z2409-arm64.exe -UseBasicParsing -OutFile 7z_arm.exe - Start-Process -FilePath ".\7z_arm.exe" -ArgumentList "/S" -Wait - echo "C:\Program Files\7-Zip" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Download and install LLVM installer run: | - Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.5/LLVM-19.1.5-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe + Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - - name: Update CMake for WoA + - name: Install CMake and Ninja for Win-ARM64 + shell: pwsh run: | - pip install cmake - get-command cmake + Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi + Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait + echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH + + Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip + Expand-Archive ninja-winarm64.zip -DestinationPath ninja + Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" - name: Set env variables run: | @@ -70,12 +67,6 @@ jobs: git submodule update --init --recursive .\tools\build_steps_win_arm64.bat 64 ${env:INTERFACE_BITS} - - name: Pack - run: | - cd local - cp -r "scipy_openblas${env:INTERFACE_BITS}" $env:INTERFACE_BITS - 7z a ../builds/openblas-${env:PLAT}-${env:INTERFACE64}.zip -tzip $env:INTERFACE_BITS - - name: Test ${{ matrix.INTERFACE_BITS }}-bit interface wheel run: | python -m pip install --no-index --find-links dist scipy_openblas${env:INTERFACE_BITS} @@ -87,11 +78,6 @@ jobs: name: wheels-${{ env.PLAT }}-${{ env.INTERFACE64 }} path: dist/scipy_openblas*.whl - - uses: actions/upload-artifact@v4.3.0 - with: - name: openblas-${{ env.PLAT }}-${{ env.INTERFACE64 }} - path: builds/openblas*.zip - - name: Install Anaconda client run: | # Rust installation needed for rpds-py. diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 69f70ae..e0440e4 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -8,7 +8,7 @@ on: workflow_dispatch: null env: - OPENBLAS_COMMIT: "v0.3.30" + OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" OPENBLAS_ROOT: "c:\\opt" # Preserve working directory for calls into bash # Without this, invoking bash will cd to the home directory @@ -131,7 +131,9 @@ jobs: cat tools/LICENSE_win32.txt >> LICENSE.txt python -m pip wheel -w dist -vv . # move the mis-named scipy_openblas64-none-any.whl to a platform-specific name - for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + if [[ -e dist/*any*.whl ]]; then + for f in dist/*.whl; do mv $f "${f/%any.whl/$WHEEL_PLAT.whl}"; done + fi - name: Set up different Python uses: actions/setup-python@v5 diff --git a/.travis.yml b/.travis.yml index 327d9eb..9c2a478 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ env: global: # The archive that gets built has name from ``git describe`` on this # commit. - - OPENBLAS_COMMIT: "v0.3.30" + - OPENBLAS_COMMIT: "v0.3.30-322-gef6f9762" dist: jammy services: docker diff --git a/OpenBLAS b/OpenBLAS index b5456c1..ef6f976 160000 --- a/OpenBLAS +++ b/OpenBLAS @@ -1 +1 @@ -Subproject commit b5456c1b41ea88d4e0041778aa8ec09ee2a111a0 +Subproject commit ef6f97624ba9a560f64e470ec339f469865fac33 diff --git a/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch b/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch new file mode 100644 index 0000000..4ba2f68 --- /dev/null +++ b/patches/0001-Fix-missing-support-for-HFLOAT16-in-Windows-symbol-r.patch @@ -0,0 +1,54 @@ +From 358c582ef9dde59f960773a5dfde13af17aa9af5 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Sat, 1 Nov 2025 19:27:56 +0100 +Subject: [PATCH] Fix missing support for HFLOAT16 in Windows symbol + renaming/dll generation + +--- + CMakeLists.txt | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 5895cf060..96c2a4364 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -498,6 +498,11 @@ if (BUILD_SHARED_LIBS OR DELETE_STATIC_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFI + else () + set (BBF16 0) + endif() ++ if (${BUILD_HFLOAT16}) ++ set (BHF16 1) ++ else () ++ set (BHF16 0) ++ endif() + if (${BUILD_SINGLE}) + set (BS 1) + else () +@@ -533,7 +538,7 @@ endif() + #if (USE_PERL) + message(STATUS "adding postbuild instruction to rename syms") + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_static POST_BUILD +- COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def ++ COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} -c -o ${PROJECT_BINARY_DIR}/dllinit.o ${PROJECT_SOURCE_DIR}/exports/dllinit.c + COMMAND lld-link -nodefaultlib:libcmt -defaultlib:${CRTLIB} ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $ -wholearchive:$ -dll -out:$/${OpenBLAS_LIBNAME}.dll -implib:$/${OpenBLAS_LIBNAME}.dll.a ${PDBOPT} + #COMMAND lld-link -nodefaultlib:libcmt -defaultlib:msvcrt ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $ -wholearchive:$ -dll -out:$/${OpenBLAS_LIBNAME}.dll -implib:$/${OpenBLAS_LIBNAME}.dll.a +@@ -543,13 +548,13 @@ message(STATUS "adding postbuild instruction to rename syms") + else () + if (NOT USE_PERL) + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD +- COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def ++ COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so + COMMENT "renaming symbols" + ) + else() + add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD +- COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def ++ COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BHF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so + COMMENT "renaming symbols" + ) +-- +2.43.0 + diff --git a/patches/0001-backout-PR-4741.patch b/patches/0001-backout-PR-4741.patch deleted file mode 100644 index fa08d49..0000000 --- a/patches/0001-backout-PR-4741.patch +++ /dev/null @@ -1,61 +0,0 @@ -From fe2f02876134dcfdd75860c622226b6913eef93e Mon Sep 17 00:00:00 2001 -From: mattip -Date: Wed, 22 Oct 2025 14:40:01 +0300 -Subject: [PATCH] backout PR 4741 - ---- - driver/level3/level3_thread.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c -index 22f27975b..0832db58b 100644 ---- a/driver/level3/level3_thread.c -+++ b/driver/level3/level3_thread.c -@@ -588,8 +588,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else - static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; -- static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; -- volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; -+ // static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; -+ // volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; - #endif - - blas_arg_t newarg; -@@ -659,12 +659,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else - pthread_mutex_lock(&level3_lock); -- while(CPU_AVAILABLE < nthreads) { -- pthread_cond_wait(&level3_wakeup, &level3_lock); -- } -- CPU_AVAILABLE -= nthreads; -- WMB; -- pthread_mutex_unlock(&level3_lock); -+ // while(CPU_AVAILABLE < nthreads) { -+ // pthread_cond_wait(&level3_wakeup, &level3_lock); -+ // } -+ // CPU_AVAILABLE -= nthreads; -+ // WMB; -+ // pthread_mutex_unlock(&level3_lock); - #endif - - #ifdef USE_ALLOC_HEAP -@@ -816,10 +816,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG - #elif defined(OS_WINDOWS) - LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); - #else -- pthread_mutex_lock(&level3_lock); -- CPU_AVAILABLE += nthreads; -- WMB; -- pthread_cond_signal(&level3_wakeup); -+ // pthread_mutex_lock(&level3_lock); -+ // CPU_AVAILABLE += nthreads; -+ // WMB; -+ // pthread_cond_signal(&level3_wakeup); - pthread_mutex_unlock(&level3_lock); - #endif - --- -2.43.0 - diff --git a/patches/remove-lock-around-thread-shutdown.patch b/patches/remove-lock-around-thread-shutdown.patch deleted file mode 100644 index ce66797..0000000 --- a/patches/remove-lock-around-thread-shutdown.patch +++ /dev/null @@ -1,35 +0,0 @@ -From ef6f97624ba9a560f64e470ec339f469865fac33 Mon Sep 17 00:00:00 2001 -From: Martin Kroeker -Date: Thu, 30 Oct 2025 11:12:47 -0700 -Subject: [PATCH 5/6] [WIP,Testing] remove the lock around the thread shutdown - function again (#5479) - -* remove the lock around the thread shutdown function - server is locked already here ---- - driver/others/blas_server.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c -index 3d89803a6..4a3182354 100644 ---- a/driver/others/blas_server.c -+++ b/driver/others/blas_server.c -@@ -984,8 +984,6 @@ int BLASFUNC(blas_thread_shutdown)(void){ - - int i; - -- LOCK_COMMAND(&server_lock); -- - //Free buffers allocated for threads - for(i=0; i