From 485cba2cd1c8317fb80dad456f3ef0de21688d75 Mon Sep 17 00:00:00 2001 From: "weslley.spereira" Date: Mon, 30 Aug 2021 14:20:04 -0600 Subject: [PATCH 01/13] Adds more fixes to the documentation of CROTG and ZROTG --- BLAS/SRC/crotg.f90 | 4 ++-- BLAS/SRC/zrotg.f90 | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index 7806140668..dc2c0a2066 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -28,8 +28,8 @@ !> r = sgn(a)*sqrt(|a|**2 + |b|**2) !> c = a / r !> s = b / r -!> the same as in CROTG when |a| > |b|. When |b| >= |a|, the -!> sign of c and s will be different from those computed by CROTG +!> the same as in SROTG when |a| > |b|. When |b| >= |a|, the +!> sign of c and s will be different from those computed by SROTG !> if the signs of a and b are not the same. !> !> \endverbatim diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index 288e5c7ef5..2806713a16 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -28,8 +28,8 @@ !> r = sgn(a)*sqrt(|a|**2 + |b|**2) !> c = a / r !> s = b / r -!> the same as in ZROTG when |a| > |b|. When |b| >= |a|, the -!> sign of c and s will be different from those computed by ZROTG +!> the same as in DROTG when |a| > |b|. When |b| >= |a|, the +!> sign of c and s will be different from those computed by DROTG !> if the signs of a and b are not the same. !> !> \endverbatim From c657043a2b00626d4785ca7e1afa5058716aa16e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 1 Sep 2021 14:15:24 +0200 Subject: [PATCH 02/13] make.inc.gfortran: define a default value for DOCSDIR --- INSTALL/make.inc.gfortran | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/INSTALL/make.inc.gfortran b/INSTALL/make.inc.gfortran index e84f1a4d37..faa3301383 100644 --- a/INSTALL/make.inc.gfortran +++ b/INSTALL/make.inc.gfortran @@ -79,3 +79,8 @@ CBLASLIB = $(TOPSRCDIR)/libcblas.a LAPACKLIB = $(TOPSRCDIR)/liblapack.a TMGLIB = $(TOPSRCDIR)/libtmglib.a LAPACKELIB = $(TOPSRCDIR)/liblapacke.a + +# DOCUMENTATION DIRECTORY +# If you generate html pages (make html), documentation will be placed in $(DOCSDIR)/explore-html +# If you generate man pages (make man), documentation will be placed in $(DOCSDIR)/man +DOCSDIR = $(TOPSRCDIR)/DOCS From e6411157cb4a3c568105088d99c3974f29d43c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 1 Sep 2021 14:43:21 +0200 Subject: [PATCH 03/13] Fix CBLAS xerbla following hidden strlen argument changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commits 2d9fbdeecca906d7abb89e9cc16b02ed5286298e and 628a2095c24021ab07904e036389ebc4feb3e67a changed the function prototypes in the cblas_f77.h. However, the F77_xerbla case was not treated in the same way as the others. In particular, the consequence is now that the xerbla function provided by the CBLAS/src/xerbla.c file is never called at runtime (because it is identified as “F77_xerbla” in the symbol table, instead of simply “xerbla”, since F77_xerbla is *not* a macro). This patch restores the symmetry between xerbla and the other functions. --- CBLAS/include/cblas_f77.h | 6 +++++- CBLAS/src/xerbla.c | 11 +++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index 9c87428ace..e4ae8125e2 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -527,7 +527,11 @@ extern "C" { #endif -void F77_xerbla(FCHAR, void *); +#ifdef BLAS_FORTRAN_STRLEN_END + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__, 1) +#else + #define F77_xerbla(...) F77_xerbla_base(__VA_ARGS__) +#endif void F77_xerbla_base(FCHAR, void * #ifdef BLAS_FORTRAN_STRLEN_END , size_t diff --git a/CBLAS/src/xerbla.c b/CBLAS/src/xerbla.c index 1857e92e84..fbe4415042 100644 --- a/CBLAS/src/xerbla.c +++ b/CBLAS/src/xerbla.c @@ -10,13 +10,16 @@ void #ifdef HAS_ATTRIBUTE_WEAK_SUPPORT __attribute__((weak)) #endif -F77_xerbla +F77_xerbla_base #ifdef F77_CHAR -(F77_CHAR F77_srname, void *vinfo) +(F77_CHAR F77_srname, void *vinfo #else -(char *srname, void *vinfo) +(char *srname, void *vinfo #endif - +#ifdef BLAS_FORTRAN_STRLEN_END +, size_t len +#endif +) { #ifdef F77_CHAR char *srname; From f06a027f144963d687adb84e8a3fc6287f4cf078 Mon Sep 17 00:00:00 2001 From: Igor Zhuravlov Date: Mon, 6 Sep 2021 15:39:29 +1000 Subject: [PATCH 04/13] unify comments: expression for residual This resolves discussion in PR #582. --- TESTING/LIN/ctbt02.f | 8 ++++---- TESTING/LIN/ctpt02.f | 7 ++++--- TESTING/LIN/ctrt02.f | 8 ++++---- TESTING/LIN/dtbt02.f | 8 ++++---- TESTING/LIN/dtpt02.f | 7 ++++--- TESTING/LIN/dtrt02.f | 8 ++++---- TESTING/LIN/stbt02.f | 8 ++++---- TESTING/LIN/stpt02.f | 7 ++++--- TESTING/LIN/strt02.f | 8 ++++---- TESTING/LIN/ztbt02.f | 8 ++++---- TESTING/LIN/ztpt02.f | 7 ++++--- TESTING/LIN/ztrt02.f | 8 ++++---- 12 files changed, 48 insertions(+), 44 deletions(-) diff --git a/TESTING/LIN/ctbt02.f b/TESTING/LIN/ctbt02.f index 8c6a24b51e..bf58aa1da1 100644 --- a/TESTING/LIN/ctbt02.f +++ b/TESTING/LIN/ctbt02.f @@ -30,10 +30,10 @@ *> *> CTBT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when A is a -*> triangular band matrix. The test ratio is the maximum over the -*> number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> triangular band matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: diff --git a/TESTING/LIN/ctpt02.f b/TESTING/LIN/ctpt02.f index 12d8870c9d..9c32d4206a 100644 --- a/TESTING/LIN/ctpt02.f +++ b/TESTING/LIN/ctpt02.f @@ -30,9 +30,10 @@ *> CTPT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when the *> triangular matrix A is stored in packed format. The test ratio is -*> the maximum over the number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: diff --git a/TESTING/LIN/ctrt02.f b/TESTING/LIN/ctrt02.f index 1addc5881c..72f44af4c7 100644 --- a/TESTING/LIN/ctrt02.f +++ b/TESTING/LIN/ctrt02.f @@ -30,10 +30,10 @@ *> *> CTRT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, where A is a -*> triangular matrix. The test ratio is the maximum over the number of -*> right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> triangular matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: diff --git a/TESTING/LIN/dtbt02.f b/TESTING/LIN/dtbt02.f index 5f5192a003..347fbcb9c8 100644 --- a/TESTING/LIN/dtbt02.f +++ b/TESTING/LIN/dtbt02.f @@ -29,10 +29,10 @@ *> *> DTBT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when A is a -*> triangular band matrix. The test ratio is the maximum over the -*> number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H and EPS is the machine epsilon. +*> triangular band matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/dtpt02.f b/TESTING/LIN/dtpt02.f index 9506a1d195..8b0b4a73d4 100644 --- a/TESTING/LIN/dtpt02.f +++ b/TESTING/LIN/dtpt02.f @@ -29,9 +29,10 @@ *> DTPT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when the *> triangular matrix A is stored in packed format. The test ratio is -*> the maximum over the number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A or A**T, and EPS is the machine epsilon. +*> the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/dtrt02.f b/TESTING/LIN/dtrt02.f index a7c95e136d..6d22f688ba 100644 --- a/TESTING/LIN/dtrt02.f +++ b/TESTING/LIN/dtrt02.f @@ -29,10 +29,10 @@ *> *> DTRT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, where A is a -*> triangular matrix. The test ratio is the maximum over the number of -*> right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A or A**T, and EPS is the machine epsilon. +*> triangular matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/stbt02.f b/TESTING/LIN/stbt02.f index 31fbf9262a..f95b5d5f10 100644 --- a/TESTING/LIN/stbt02.f +++ b/TESTING/LIN/stbt02.f @@ -29,10 +29,10 @@ *> *> STBT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when A is a -*> triangular band matrix. The test ratio is the maximum over the -*> number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A or A**T, and EPS is the machine epsilon. +*> triangular band matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/stpt02.f b/TESTING/LIN/stpt02.f index 72936dd044..f81fa29b99 100644 --- a/TESTING/LIN/stpt02.f +++ b/TESTING/LIN/stpt02.f @@ -29,9 +29,10 @@ *> STPT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when the *> triangular matrix A is stored in packed format. The test ratio is -*> the maximum over the number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A or A**T, and EPS is the machine epsilon. +*> the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/strt02.f b/TESTING/LIN/strt02.f index df91501ded..26a95510a9 100644 --- a/TESTING/LIN/strt02.f +++ b/TESTING/LIN/strt02.f @@ -29,10 +29,10 @@ *> *> STRT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, where A is a -*> triangular matrix. The test ratio is the maximum over the number of -*> right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A or A**T, and EPS is the machine epsilon. +*> triangular matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A or A**T, b is the column of B, x is the solution +*> vector, and EPS is the machine epsilon. *> The norm used is the 1-norm. *> \endverbatim * diff --git a/TESTING/LIN/ztbt02.f b/TESTING/LIN/ztbt02.f index 3c9f119569..20917d9f7e 100644 --- a/TESTING/LIN/ztbt02.f +++ b/TESTING/LIN/ztbt02.f @@ -30,10 +30,10 @@ *> *> ZTBT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when A is a -*> triangular band matrix. The test ratio is the maximum over the -*> number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> triangular band matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: diff --git a/TESTING/LIN/ztpt02.f b/TESTING/LIN/ztpt02.f index 8555053d14..d6b9dabafa 100644 --- a/TESTING/LIN/ztpt02.f +++ b/TESTING/LIN/ztpt02.f @@ -30,9 +30,10 @@ *> ZTPT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, when the *> triangular matrix A is stored in packed format. The test ratio is -*> the maximum over the number of right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: diff --git a/TESTING/LIN/ztrt02.f b/TESTING/LIN/ztrt02.f index 47e9f3463c..69f04c9dbc 100644 --- a/TESTING/LIN/ztrt02.f +++ b/TESTING/LIN/ztrt02.f @@ -30,10 +30,10 @@ *> *> ZTRT02 computes the residual for the computed solution to a *> triangular system of linear equations op(A)*X = B, where A is a -*> triangular matrix. The test ratio is the maximum over the number of -*> right hand sides of -*> norm(op(A)*X - B) / ( norm(op(A)) * norm(X) * EPS ), -*> where op(A) = A, A**T, or A**H, and EPS is the machine epsilon. +*> triangular matrix. The test ratio is the maximum over +*> norm(b - op(A)*x) / ( ||op(A)||_1 * norm(x) * EPS ), +*> where op(A) = A, A**T, or A**H, b is the column of B, x is the +*> solution vector, and EPS is the machine epsilon. *> \endverbatim * * Arguments: From fb14d49b3a10ae24af762ef7d3873de87b84a78c Mon Sep 17 00:00:00 2001 From: Igor Zhuravlov Date: Mon, 6 Sep 2021 15:42:57 +1000 Subject: [PATCH 05/13] fix typo in SRC/?la_??rfsx_extended.f comment --- SRC/cla_gbrfsx_extended.f | 2 +- SRC/cla_gerfsx_extended.f | 2 +- SRC/cla_herfsx_extended.f | 2 +- SRC/cla_porfsx_extended.f | 2 +- SRC/cla_syrfsx_extended.f | 2 +- SRC/dla_gbrfsx_extended.f | 2 +- SRC/dla_gerfsx_extended.f | 2 +- SRC/dla_porfsx_extended.f | 2 +- SRC/dla_syrfsx_extended.f | 2 +- SRC/sla_gbrfsx_extended.f | 2 +- SRC/sla_gerfsx_extended.f | 2 +- SRC/sla_porfsx_extended.f | 2 +- SRC/sla_syrfsx_extended.f | 2 +- SRC/zla_gbrfsx_extended.f | 2 +- SRC/zla_gerfsx_extended.f | 2 +- SRC/zla_herfsx_extended.f | 2 +- SRC/zla_porfsx_extended.f | 2 +- SRC/zla_syrfsx_extended.f | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/SRC/cla_gbrfsx_extended.f b/SRC/cla_gbrfsx_extended.f index 612598eb1c..a54be9c5fd 100644 --- a/SRC/cla_gbrfsx_extended.f +++ b/SRC/cla_gbrfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/cla_gerfsx_extended.f b/SRC/cla_gerfsx_extended.f index 03beefe354..4b10311017 100644 --- a/SRC/cla_gerfsx_extended.f +++ b/SRC/cla_gerfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERRS_N *> and ERRS_C for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERRS_N and ERRS_C. *> \endverbatim * diff --git a/SRC/cla_herfsx_extended.f b/SRC/cla_herfsx_extended.f index 96ff805e91..6d007ef580 100644 --- a/SRC/cla_herfsx_extended.f +++ b/SRC/cla_herfsx_extended.f @@ -55,7 +55,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/cla_porfsx_extended.f b/SRC/cla_porfsx_extended.f index 49c5324c53..9ced9b1b93 100644 --- a/SRC/cla_porfsx_extended.f +++ b/SRC/cla_porfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/cla_syrfsx_extended.f b/SRC/cla_syrfsx_extended.f index 498759ebfe..4fe538a989 100644 --- a/SRC/cla_syrfsx_extended.f +++ b/SRC/cla_syrfsx_extended.f @@ -55,7 +55,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/dla_gbrfsx_extended.f b/SRC/dla_gbrfsx_extended.f index ecb0b9f09f..5454b1bfcd 100644 --- a/SRC/dla_gbrfsx_extended.f +++ b/SRC/dla_gbrfsx_extended.f @@ -55,7 +55,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/dla_gerfsx_extended.f b/SRC/dla_gerfsx_extended.f index cff131b8bf..92b0d76d44 100644 --- a/SRC/dla_gerfsx_extended.f +++ b/SRC/dla_gerfsx_extended.f @@ -53,7 +53,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERRS_N *> and ERRS_C for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERRS_N and ERRS_C. *> \endverbatim * diff --git a/SRC/dla_porfsx_extended.f b/SRC/dla_porfsx_extended.f index cabf631fd3..5c8850fef2 100644 --- a/SRC/dla_porfsx_extended.f +++ b/SRC/dla_porfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/dla_syrfsx_extended.f b/SRC/dla_syrfsx_extended.f index 4ccac897aa..e1cde6fc2b 100644 --- a/SRC/dla_syrfsx_extended.f +++ b/SRC/dla_syrfsx_extended.f @@ -56,7 +56,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/sla_gbrfsx_extended.f b/SRC/sla_gbrfsx_extended.f index 036f76b3b2..499d6bf900 100644 --- a/SRC/sla_gbrfsx_extended.f +++ b/SRC/sla_gbrfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/sla_gerfsx_extended.f b/SRC/sla_gerfsx_extended.f index 5149b33937..de05d8eb41 100644 --- a/SRC/sla_gerfsx_extended.f +++ b/SRC/sla_gerfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERRS_N *> and ERRS_C for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERRS_N and ERRS_C. *> \endverbatim * diff --git a/SRC/sla_porfsx_extended.f b/SRC/sla_porfsx_extended.f index bb67bf4531..ada4cad21e 100644 --- a/SRC/sla_porfsx_extended.f +++ b/SRC/sla_porfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/sla_syrfsx_extended.f b/SRC/sla_syrfsx_extended.f index 9bd878c255..d5096be024 100644 --- a/SRC/sla_syrfsx_extended.f +++ b/SRC/sla_syrfsx_extended.f @@ -56,7 +56,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/zla_gbrfsx_extended.f b/SRC/zla_gbrfsx_extended.f index d8185ec073..fe4d635b12 100644 --- a/SRC/zla_gbrfsx_extended.f +++ b/SRC/zla_gbrfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/zla_gerfsx_extended.f b/SRC/zla_gerfsx_extended.f index 355def45cf..9d618f2942 100644 --- a/SRC/zla_gerfsx_extended.f +++ b/SRC/zla_gerfsx_extended.f @@ -53,7 +53,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERRS_N *> and ERRS_C for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERRS_N and ERRS_C. *> \endverbatim * diff --git a/SRC/zla_herfsx_extended.f b/SRC/zla_herfsx_extended.f index 2c8c778af7..a55dd94318 100644 --- a/SRC/zla_herfsx_extended.f +++ b/SRC/zla_herfsx_extended.f @@ -55,7 +55,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/zla_porfsx_extended.f b/SRC/zla_porfsx_extended.f index e2265c56f5..12e05e049e 100644 --- a/SRC/zla_porfsx_extended.f +++ b/SRC/zla_porfsx_extended.f @@ -54,7 +54,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * diff --git a/SRC/zla_syrfsx_extended.f b/SRC/zla_syrfsx_extended.f index bdb7c05c3b..d6c241499a 100644 --- a/SRC/zla_syrfsx_extended.f +++ b/SRC/zla_syrfsx_extended.f @@ -55,7 +55,7 @@ *> In addition to normwise error bound, the code provides maximum *> componentwise error bound if possible. See comments for ERR_BNDS_NORM *> and ERR_BNDS_COMP for details of the error bounds. Note that this -*> subroutine is only resonsible for setting the second fields of +*> subroutine is only responsible for setting the second fields of *> ERR_BNDS_NORM and ERR_BNDS_COMP. *> \endverbatim * From 78db64ab53d02a310b6a3fe9127a71c827962d7e Mon Sep 17 00:00:00 2001 From: Igor Zhuravlov Date: Mon, 6 Sep 2021 15:47:45 +1000 Subject: [PATCH 06/13] fix typo in TESTING/EIG/?chkst2stg.f comment --- TESTING/EIG/cchkst2stg.f | 2 +- TESTING/EIG/dchkst2stg.f | 2 +- TESTING/EIG/schkst2stg.f | 2 +- TESTING/EIG/zchkst2stg.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/TESTING/EIG/cchkst2stg.f b/TESTING/EIG/cchkst2stg.f index d2eaea4360..e0bf1a1ee3 100644 --- a/TESTING/EIG/cchkst2stg.f +++ b/TESTING/EIG/cchkst2stg.f @@ -1991,7 +1991,7 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* Do Test 34 +* Do Test 37 * TEMP1 = ZERO TEMP2 = ZERO diff --git a/TESTING/EIG/dchkst2stg.f b/TESTING/EIG/dchkst2stg.f index 90eeec33dc..869f49ac63 100644 --- a/TESTING/EIG/dchkst2stg.f +++ b/TESTING/EIG/dchkst2stg.f @@ -1973,7 +1973,7 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* Do Test 34 +* Do Test 37 * TEMP1 = ZERO TEMP2 = ZERO diff --git a/TESTING/EIG/schkst2stg.f b/TESTING/EIG/schkst2stg.f index 62b93c08f8..11806b4b93 100644 --- a/TESTING/EIG/schkst2stg.f +++ b/TESTING/EIG/schkst2stg.f @@ -1973,7 +1973,7 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* Do Test 34 +* Do Test 37 * TEMP1 = ZERO TEMP2 = ZERO diff --git a/TESTING/EIG/zchkst2stg.f b/TESTING/EIG/zchkst2stg.f index 1e2dd624ca..492a3afa2a 100644 --- a/TESTING/EIG/zchkst2stg.f +++ b/TESTING/EIG/zchkst2stg.f @@ -1991,7 +1991,7 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* Do Test 34 +* Do Test 37 * TEMP1 = ZERO TEMP2 = ZERO From 7165ff7108b91a3590793869bd23dbff9dc06f6a Mon Sep 17 00:00:00 2001 From: Igor Zhuravlov Date: Mon, 6 Sep 2021 15:50:13 +1000 Subject: [PATCH 07/13] fix typo in TESTING/EIG/?chkst2stg.f comment --- TESTING/EIG/cchkst2stg.f | 6 ------ TESTING/EIG/dchkst2stg.f | 5 ----- TESTING/EIG/schkst2stg.f | 5 ----- TESTING/EIG/zchkst2stg.f | 6 ------ 4 files changed, 22 deletions(-) diff --git a/TESTING/EIG/cchkst2stg.f b/TESTING/EIG/cchkst2stg.f index e0bf1a1ee3..e4deb8ac85 100644 --- a/TESTING/EIG/cchkst2stg.f +++ b/TESTING/EIG/cchkst2stg.f @@ -1072,7 +1072,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do Tests 3 and 4 which are similar to 11 and 12 but with the * D1 computed using the standard 1-stage reduction as reference * @@ -1735,7 +1734,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do test 28 * TEMP2 = TWO*( TWO*N-ONE )*ULP* @@ -1792,7 +1790,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Do Tests 29 and 30 * -* * Call CSTEMR to compute D2, do tests. * * Compute D2 @@ -1832,7 +1829,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 31 ) = TEMP2 / MAX( UNFL, $ ULP*MAX( TEMP1, TEMP2 ) ) * -* * Call CSTEMR(V,V) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1934,7 +1930,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 34 ) = ZERO END IF * -* * Call CSTEMR(V,A) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -2010,7 +2005,6 @@ SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * End of Loop -- Check for RESULT(j) > THRESH * -* * Print out tests which fail. * DO 290 JR = 1, NTEST diff --git a/TESTING/EIG/dchkst2stg.f b/TESTING/EIG/dchkst2stg.f index 869f49ac63..2c98b802d3 100644 --- a/TESTING/EIG/dchkst2stg.f +++ b/TESTING/EIG/dchkst2stg.f @@ -1055,7 +1055,6 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do Tests 3 and 4 which are similar to 11 and 12 but with the * D1 computed using the standard 1-stage reduction as reference * @@ -1715,7 +1714,6 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do test 28 * TEMP2 = TWO*( TWO*N-ONE )*ULP* @@ -1814,7 +1812,6 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 31 ) = TEMP2 / MAX( UNFL, $ ULP*MAX( TEMP1, TEMP2 ) ) * -* * Call DSTEMR(V,V) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1916,7 +1913,6 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 34 ) = ZERO END IF * -* * Call DSTEMR(V,A) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1992,7 +1988,6 @@ SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * End of Loop -- Check for RESULT(j) > THRESH * -* * Print out tests which fail. * DO 290 JR = 1, NTEST diff --git a/TESTING/EIG/schkst2stg.f b/TESTING/EIG/schkst2stg.f index 11806b4b93..ac5a3fc393 100644 --- a/TESTING/EIG/schkst2stg.f +++ b/TESTING/EIG/schkst2stg.f @@ -1055,7 +1055,6 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do Tests 3 and 4 which are similar to 11 and 12 but with the * D1 computed using the standard 1-stage reduction as reference * @@ -1715,7 +1714,6 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do test 28 * TEMP2 = TWO*( TWO*N-ONE )*ULP* @@ -1814,7 +1812,6 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 31 ) = TEMP2 / MAX( UNFL, $ ULP*MAX( TEMP1, TEMP2 ) ) * -* * Call SSTEMR(V,V) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1916,7 +1913,6 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 34 ) = ZERO END IF * -* * Call SSTEMR(V,A) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1992,7 +1988,6 @@ SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * End of Loop -- Check for RESULT(j) > THRESH * -* * Print out tests which fail. * DO 290 JR = 1, NTEST diff --git a/TESTING/EIG/zchkst2stg.f b/TESTING/EIG/zchkst2stg.f index 492a3afa2a..b1ef808166 100644 --- a/TESTING/EIG/zchkst2stg.f +++ b/TESTING/EIG/zchkst2stg.f @@ -1072,7 +1072,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do Tests 3 and 4 which are similar to 11 and 12 but with the * D1 computed using the standard 1-stage reduction as reference * @@ -1735,7 +1734,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, END IF END IF * -* * Do test 28 * TEMP2 = TWO*( TWO*N-ONE )*ULP* @@ -1792,7 +1790,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * Do Tests 29 and 30 * -* * Call ZSTEMR to compute D2, do tests. * * Compute D2 @@ -1832,7 +1829,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 31 ) = TEMP2 / MAX( UNFL, $ ULP*MAX( TEMP1, TEMP2 ) ) * -* * Call ZSTEMR(V,V) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -1934,7 +1930,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, RESULT( 34 ) = ZERO END IF * -* * Call ZSTEMR(V,A) to compute D1 and Z, do tests. * * Compute D1 and Z @@ -2010,7 +2005,6 @@ SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, * * End of Loop -- Check for RESULT(j) > THRESH * -* * Print out tests which fail. * DO 290 JR = 1, NTEST From d2781152e1067815e9206ad53446e1c376d3c661 Mon Sep 17 00:00:00 2001 From: Quellyn Snead Date: Wed, 15 Sep 2021 11:14:42 -0600 Subject: [PATCH 08/13] Address issues with IBM XL builds (#606). Be more explicit about selecting recursive fortran flags, as not every compiler will correctly reject an incorrect option (e.g., XL). --- CBLAS/CMakeLists.txt | 4 +--- CMakeLists.txt | 36 ++++++++++++++++++++++++------------ INSTALL/make.inc.XLF | 4 ++-- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/CBLAS/CMakeLists.txt b/CBLAS/CMakeLists.txt index a64ab0cef4..36211fbc59 100644 --- a/CBLAS/CMakeLists.txt +++ b/CBLAS/CMakeLists.txt @@ -11,9 +11,7 @@ FortranCInterface_HEADER(${LAPACK_BINARY_DIR}/include/cblas_mangling.h MACRO_NAMESPACE "F77_" SYMBOL_NAMESPACE "F77_") if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) - message(WARNING "Reverting to pre-defined include/lapacke_mangling.h") - configure_file(include/lapacke_mangling_with_flags.h.in - ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) + message(WARNING "Reverting to pre-defined include/cblas_mangling.h") configure_file(include/cblas_mangling_with_flags.h.in ${LAPACK_BINARY_DIR}/include/cblas_mangling.h) endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 07df064d23..f4d0c2239b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,28 +91,40 @@ include(PreventInBuildInstalls) # Check if recursive flag exists include(CheckFortranCompilerFlag) -check_fortran_compiler_flag("-recursive" _recursiveFlag) -check_fortran_compiler_flag("-frecursive" _frecursiveFlag) -check_fortran_compiler_flag("-Mrecursive" _MrecursiveFlag) +if(CMAKE_Fortran_COMPILER_ID STREQUAL Flang) + check_fortran_compiler_flag("-Mrecursive" _MrecursiveFlag) +elseif(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) + check_fortran_compiler_flag("-frecursive" _frecursiveFlag) +elseif(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) + check_fortran_compiler_flag("-recursive" _recursiveFlag) +elseif(CMAKE_Fortran_COMPILER_ID STREQUAL XL) + check_fortran_compiler_flag("-qrecur" _qrecurFlag) +endif() # Add recursive flag -if(_recursiveFlag) - string(REGEX MATCH "-recursive" output_test "${CMAKE_Fortran_FLAGS}") +if(_MrecursiveFlag) + string(REGEX MATCH "-Mrecursive" output_test "${CMAKE_Fortran_FLAGS}") if(NOT output_test) - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -recursive" + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mrecursive" CACHE STRING "Recursive flag must be set" FORCE) endif() elseif(_frecursiveFlag) string(REGEX MATCH "-frecursive" output_test "${CMAKE_Fortran_FLAGS}") if(NOT output_test) set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -frecursive" - CACHE STRING "Recursive flag must be set" FORCE) + CACHE STRING "Recursive flag must be set" FORCE) endif() -elseif(_MrecursiveFlag) - string(REGEX MATCH "-Mrecursive" output_test "${CMAKE_Fortran_FLAGS}") +elseif(_recursiveFlag) + string(REGEX MATCH "-recursive" output_test "${CMAKE_Fortran_FLAGS}") if(NOT output_test) - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -Mrecursive" - CACHE STRING "Recursive flag must be set" FORCE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -recursive" + CACHE STRING "Recursive flag must be set" FORCE) + endif() +elseif(_qrecurFlag) + string(REGEX MATCH "-qrecur" output_test "${CMAKE_Fortran_FLAGS}") + if(NOT output_test) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qrecur" + CACHE STRING "Recursive flag must be set" FORCE) endif() endif() @@ -121,7 +133,7 @@ if(UNIX) set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") endif() if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict") endif() # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin diff --git a/INSTALL/make.inc.XLF b/INSTALL/make.inc.XLF index 64f8b85529..dafc962429 100644 --- a/INSTALL/make.inc.XLF +++ b/INSTALL/make.inc.XLF @@ -14,10 +14,10 @@ CFLAGS = -O3 -qnosave # the compiler options desired when NO OPTIMIZATION is selected. # FC = xlf -FFLAGS = -O3 -qfixed -qnosave +FFLAGS = -O3 -qfixed -qnosave -qrecur # For -O2, add -qstrict=none FFLAGS_DRV = $(FFLAGS) -FFLAGS_NOOPT = -O0 -qfixed -qnosave +FFLAGS_NOOPT = -O0 -qfixed -qnosave -qrecur # Define LDFLAGS to the desired linker options for your machine. # From 26a81d142c6d7c3511a67ba620ee20e6f0604c3c Mon Sep 17 00:00:00 2001 From: Quellyn Snead Date: Tue, 21 Sep 2021 09:40:01 -0600 Subject: [PATCH 09/13] Add an informative fall-through clause in case of unfamiliar compiler (#26117). --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4d0c2239b..7335d10a3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,10 @@ elseif(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) check_fortran_compiler_flag("-recursive" _recursiveFlag) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL XL) check_fortran_compiler_flag("-qrecur" _qrecurFlag) +else() + message(WARNING "Fortran local arrays should be allocated on the stack." + " Please use a compiler which guarantees that feature." + " See https://github.com/Reference-LAPACK/lapack/pull/188 and references therein.") endif() # Add recursive flag From d04553067e61fb75818c900c78403f50489ba9aa Mon Sep 17 00:00:00 2001 From: "Larson, Eric" Date: Fri, 24 Sep 2021 13:03:59 -0700 Subject: [PATCH 10/13] ILP support long's in windows are 4 bytes (MSVS, intel compilers). Use int64_t and int32_t to ensure 8 byte integers for ILP interface. support 8 byte integer flag for intel ifort compiler --- CBLAS/include/cblas.h | 7 +++++-- CBLAS/include/cblas_f77.h | 7 +++++-- CMAKE/CheckLAPACKCompilerFlags.cmake | 13 +++++++++++++ CMakeLists.txt | 2 +- LAPACKE/include/lapacke_config.h | 5 +++-- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/CBLAS/include/cblas.h b/CBLAS/include/cblas.h index 8b1cd2a1c4..98ae526cd6 100644 --- a/CBLAS/include/cblas.h +++ b/CBLAS/include/cblas.h @@ -1,6 +1,7 @@ #ifndef CBLAS_H #define CBLAS_H #include +#include #ifdef __cplusplus @@ -15,10 +16,12 @@ extern "C" { /* Assume C declarations for C++ */ /* * Integer type */ +#ifndef CBLAS_INT #ifdef WeirdNEC - #define CBLAS_INT long + #define CBLAS_INT int64_t #else - #define CBLAS_INT int + #define CBLAS_INT int32_t +#endif #endif typedef enum CBLAS_LAYOUT {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; diff --git a/CBLAS/include/cblas_f77.h b/CBLAS/include/cblas_f77.h index e4ae8125e2..2df10afedf 100644 --- a/CBLAS/include/cblas_f77.h +++ b/CBLAS/include/cblas_f77.h @@ -10,6 +10,7 @@ #define CBLAS_F77_H #include +#include /* It seems all current Fortran compilers put strlen at end. * Some historical compilers put strlen after the str argument @@ -24,10 +25,12 @@ #define F77_STRLEN(a) (_fcdlen) #endif +#ifndef F77_INT #ifdef WeirdNEC - #define F77_INT long + #define F77_INT int64_t #else - #define F77_INT int + #define F77_INT int32_t +#endif #endif #ifdef F77_CHAR diff --git a/CMAKE/CheckLAPACKCompilerFlags.cmake b/CMAKE/CheckLAPACKCompilerFlags.cmake index d727a15923..609bd4115b 100644 --- a/CMAKE/CheckLAPACKCompilerFlags.cmake +++ b/CMAKE/CheckLAPACKCompilerFlags.cmake @@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags ) set( FPE_EXIT FALSE ) +# FORTRAN ILP default +if ( FORTRAN_ILP ) + if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) + if ( WIN32 ) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64") + else () + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64") + endif() + else() + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8") + endif() +endif() + # GNU Fortran if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") diff --git a/CMakeLists.txt b/CMakeLists.txt index 7335d10a3c..379e2347fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ if(BUILD_INDEX64) set(LAPACKELIB "lapacke64") set(TMGLIB "tmglib64") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8") + set(FORTRAN_ILP TRUE) else() set(BLASLIB "blas") set(CBLASLIB "cblas") diff --git a/LAPACKE/include/lapacke_config.h b/LAPACKE/include/lapacke_config.h index 765359163b..4a7d15760c 100644 --- a/LAPACKE/include/lapacke_config.h +++ b/LAPACKE/include/lapacke_config.h @@ -41,12 +41,13 @@ extern "C" { #endif /* __cplusplus */ #include +#include #ifndef lapack_int #if defined(LAPACK_ILP64) -#define lapack_int long +#define lapack_int int64_t #else -#define lapack_int int +#define lapack_int int32_t #endif #endif From 0631b6beaed60ba118b0b027c0f8d35397bf5df0 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 30 Sep 2021 03:51:23 -0400 Subject: [PATCH 11/13] Fix out of bounds read in slarrv This was originally reported as https://github.com/JuliaLang/julia/issues/42415. I've tracked this down to an our of bounds read on the following line: https://github.com/Reference-LAPACK/lapack/blob/44ecb6a5ff821b1cbb39f8cc2166cb098e060b4d/SRC/slarrv.f#L423 In the crashing example, `M` is `0`, causing `slarrv` to read uninitialized memory from the work array. I believe the `0` for `M` is correct and indeed, the documentation above supports that `M` may be zero: https://github.com/Reference-LAPACK/lapack/blob/44ecb6a5ff821b1cbb39f8cc2166cb098e060b4d/SRC/slarrv.f#L113-L116 I believe it may be sufficient to early-out this function as suggested in this PR. However, I have limited context for the full routine here, so I would appreciate a sanity check. --- SRC/clarrv.f | 2 +- SRC/dlarrv.f | 2 +- SRC/slarrv.f | 2 +- SRC/zlarrv.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/clarrv.f b/SRC/clarrv.f index 1f09e4da6a..42f7107573 100644 --- a/SRC/clarrv.f +++ b/SRC/clarrv.f @@ -348,7 +348,7 @@ SUBROUTINE CLARRV( N, VL, VU, D, L, PIVMIN, * * Quick return if possible * - IF( N.LE.0 ) THEN + IF( (N.LE.0).OR.(M.LE.0) ) THEN RETURN END IF * diff --git a/SRC/dlarrv.f b/SRC/dlarrv.f index b036c1e660..2994303612 100644 --- a/SRC/dlarrv.f +++ b/SRC/dlarrv.f @@ -350,7 +350,7 @@ SUBROUTINE DLARRV( N, VL, VU, D, L, PIVMIN, * * Quick return if possible * - IF( N.LE.0 ) THEN + IF( (N.LE.0).OR.(M.LE.0) ) THEN RETURN END IF * diff --git a/SRC/slarrv.f b/SRC/slarrv.f index 9d72b339a9..95f94fd1bd 100644 --- a/SRC/slarrv.f +++ b/SRC/slarrv.f @@ -350,7 +350,7 @@ SUBROUTINE SLARRV( N, VL, VU, D, L, PIVMIN, * * Quick return if possible * - IF( N.LE.0 ) THEN + IF( (N.LE.0).OR.(M.LE.0) ) THEN RETURN END IF * diff --git a/SRC/zlarrv.f b/SRC/zlarrv.f index 51ec558f53..e4be63e0db 100644 --- a/SRC/zlarrv.f +++ b/SRC/zlarrv.f @@ -348,7 +348,7 @@ SUBROUTINE ZLARRV( N, VL, VU, D, L, PIVMIN, * * Quick return if possible * - IF( N.LE.0 ) THEN + IF( (N.LE.0).OR.(M.LE.0) ) THEN RETURN END IF * From 70b8ee22ad2c0392b48e5b6572cf15fff02b22a7 Mon Sep 17 00:00:00 2001 From: Julien Langou Date: Sat, 9 Oct 2021 14:12:23 -0600 Subject: [PATCH 12/13] fix a typo, thanks to Jim Demmel for letting me know --- BLAS/SRC/crotg.f90 | 2 +- BLAS/SRC/zrotg.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BLAS/SRC/crotg.f90 b/BLAS/SRC/crotg.f90 index 7806140668..18b7d74a71 100644 --- a/BLAS/SRC/crotg.f90 +++ b/BLAS/SRC/crotg.f90 @@ -11,7 +11,7 @@ ! CROTG constructs a plane rotation ! [ c s ] [ a ] = [ r ] ! [ -conjg(s) c ] [ b ] [ 0 ] -! where c is real, s ic complex, and c**2 + conjg(s)*s = 1. +! where c is real, s is complex, and c**2 + conjg(s)*s = 1. ! !> \par Purpose: ! ============= diff --git a/BLAS/SRC/zrotg.f90 b/BLAS/SRC/zrotg.f90 index 288e5c7ef5..81ae976cc5 100644 --- a/BLAS/SRC/zrotg.f90 +++ b/BLAS/SRC/zrotg.f90 @@ -11,7 +11,7 @@ ! ZROTG constructs a plane rotation ! [ c s ] [ a ] = [ r ] ! [ -conjg(s) c ] [ b ] [ 0 ] -! where c is real, s ic complex, and c**2 + conjg(s)*s = 1. +! where c is real, s is complex, and c**2 + conjg(s)*s = 1. ! !> \par Purpose: ! ============= From 619d92717317ae2ccfb1a4066c46e9f922173ad3 Mon Sep 17 00:00:00 2001 From: "Weslley S. Pereira" Date: Fri, 12 Nov 2021 10:56:09 -0700 Subject: [PATCH 13/13] Fix xGEQRF and xGERQF following thanks to @andreasvarga and @VasileSima4 --- SRC/cgeqrf.f | 18 ++++++++++++------ SRC/cgerqf.f | 8 +++++--- SRC/dgeqrf.f | 18 ++++++++++++------ SRC/dgerqf.f | 8 +++++--- SRC/sgeqrf.f | 20 +++++++++++++------- SRC/sgerqf.f | 11 +++++------ SRC/zgeqrf.f | 18 ++++++++++++------ SRC/zgerqf.f | 8 +++++--- 8 files changed, 69 insertions(+), 40 deletions(-) diff --git a/SRC/cgeqrf.f b/SRC/cgeqrf.f index 4b75edea0d..d71bd5b33b 100644 --- a/SRC/cgeqrf.f +++ b/SRC/cgeqrf.f @@ -95,7 +95,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,N). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise. *> For optimum performance LWORK >= N*NB, where NB is *> the optimal blocksize. *> @@ -175,10 +176,9 @@ SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * * Test the input arguments * + K = MIN( M, N ) INFO = 0 NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -186,19 +186,25 @@ SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + ELSE IF( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) ) + $ INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'CGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT RETURN END IF * * Quick return if possible * - K = MIN( M, N ) IF( K.EQ.0 ) THEN WORK( 1 ) = 1 RETURN diff --git a/SRC/cgerqf.f b/SRC/cgerqf.f index 2df1936205..d2247844ce 100644 --- a/SRC/cgerqf.f +++ b/SRC/cgerqf.f @@ -88,7 +88,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,M). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M, otherwise. *> For optimum performance LWORK >= M*NB, where NB is *> the optimal blocksize. *> @@ -188,8 +189,9 @@ SUBROUTINE CGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) END IF WORK( 1 ) = LWKOPT * - IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + IF ( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) + $ INFO = -7 END IF END IF * diff --git a/SRC/dgeqrf.f b/SRC/dgeqrf.f index 5d649f692e..705e939286 100644 --- a/SRC/dgeqrf.f +++ b/SRC/dgeqrf.f @@ -95,7 +95,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,N). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise. *> For optimum performance LWORK >= N*NB, where NB is *> the optimal blocksize. *> @@ -175,10 +176,9 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * * Test the input arguments * + K = MIN( M, N ) INFO = 0 NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -186,19 +186,25 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + ELSE IF( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) ) + $ INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT RETURN END IF * * Quick return if possible * - K = MIN( M, N ) IF( K.EQ.0 ) THEN WORK( 1 ) = 1 RETURN diff --git a/SRC/dgerqf.f b/SRC/dgerqf.f index 6381a873af..cca9d6367b 100644 --- a/SRC/dgerqf.f +++ b/SRC/dgerqf.f @@ -88,7 +88,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,M). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M, otherwise. *> For optimum performance LWORK >= M*NB, where NB is *> the optimal blocksize. *> @@ -188,8 +189,9 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) END IF WORK( 1 ) = LWKOPT * - IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + IF ( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) + $ INFO = -7 END IF END IF * diff --git a/SRC/sgeqrf.f b/SRC/sgeqrf.f index 0fc5ba11a9..b24615f7a1 100644 --- a/SRC/sgeqrf.f +++ b/SRC/sgeqrf.f @@ -95,7 +95,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,N). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise. *> For optimum performance LWORK >= N*NB, where NB is *> the optimal blocksize. *> @@ -175,10 +176,9 @@ SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * * Test the input arguments * + K = MIN( M, N ) INFO = 0 NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -186,19 +186,25 @@ SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + ELSE IF( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) ) + $ INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'SGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT RETURN END IF * * Quick return if possible -* - K = MIN( M, N ) +* IF( K.EQ.0 ) THEN WORK( 1 ) = 1 RETURN diff --git a/SRC/sgerqf.f b/SRC/sgerqf.f index 24ff42b4c5..037cd5345b 100644 --- a/SRC/sgerqf.f +++ b/SRC/sgerqf.f @@ -88,7 +88,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,M). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M, otherwise. *> For optimum performance LWORK >= M*NB, where NB is *> the optimal blocksize. *> @@ -176,8 +177,6 @@ SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 - ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN - INFO = -7 END IF * IF( INFO.EQ.0 ) THEN @@ -187,12 +186,12 @@ SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) ELSE NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) LWKOPT = M*NB - WORK( 1 ) = LWKOPT END IF WORK( 1 ) = LWKOPT * - IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + IF ( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) + $ INFO = -7 END IF END IF * diff --git a/SRC/zgeqrf.f b/SRC/zgeqrf.f index dd31483c10..2032bf742b 100644 --- a/SRC/zgeqrf.f +++ b/SRC/zgeqrf.f @@ -95,7 +95,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,N). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise. *> For optimum performance LWORK >= N*NB, where NB is *> the optimal blocksize. *> @@ -175,10 +176,9 @@ SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * * Test the input arguments * + K = MIN( M, N ) INFO = 0 NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 @@ -186,19 +186,25 @@ SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + ELSE IF( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) ) + $ INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'ZGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT RETURN END IF * * Quick return if possible * - K = MIN( M, N ) IF( K.EQ.0 ) THEN WORK( 1 ) = 1 RETURN diff --git a/SRC/zgerqf.f b/SRC/zgerqf.f index 22bd5b94ff..26e901390f 100644 --- a/SRC/zgerqf.f +++ b/SRC/zgerqf.f @@ -88,7 +88,8 @@ *> \param[in] LWORK *> \verbatim *> LWORK is INTEGER -*> The dimension of the array WORK. LWORK >= max(1,M). +*> The dimension of the array WORK. +*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M, otherwise. *> For optimum performance LWORK >= M*NB, where NB is *> the optimal blocksize. *> @@ -188,8 +189,9 @@ SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) END IF WORK( 1 ) = LWKOPT * - IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN - INFO = -7 + IF ( .NOT.LQUERY ) THEN + IF( LWORK.LE.0 .OR. ( N.GT.0 .AND. LWORK.LT.MAX( 1, M ) ) ) + $ INFO = -7 END IF END IF *