diff --git a/Changelog.txt b/Changelog.txt index 574f462265..dd186b683b 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,16 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.8 +01-Aug-2013 +common: + * Support Open64 5.0. (#266) + * Add executable stack markings. (#262, Thank Sébastien Fabbro) + * Respect user's LDFLAGS (Thank Sébastien Fabbro) + +x86/x86-64: + * Rollback bulldozer and piledriver kernels to barcelona kernels (#263) + We will fix the compuational error bug in bulldozer and piledriver kernels. + ==================================================================== Version 0.2.7 20-Jul-2013 diff --git a/Makefile.rule b/Makefile.rule index fb377c3775..a92eb500ac 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.7 +VERSION = 0.2.8 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/Makefile.system b/Makefile.system index b1f9ba514f..727b089606 100644 --- a/Makefile.system +++ b/Makefile.system @@ -324,14 +324,16 @@ ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif @@ -895,6 +897,7 @@ export CC export FC export BU export FU +export NEED2UNDERSCORES export USE_THREAD export NUM_THREADS export NUM_CORES diff --git a/cpuid.h b/cpuid.h index 2cbbd45390..4311ce95e6 100644 --- a/cpuid.h +++ b/cpuid.h @@ -105,8 +105,8 @@ #define CORE_NANO 19 #define CORE_SANDYBRIDGE 20 #define CORE_BOBCAT 21 -#define CORE_BULLDOZER 22 -#define CORE_PILEDRIVER 23 +#define CORE_BULLDOZER CORE_BARCELONA +#define CORE_PILEDRIVER CORE_BARCELONA #define CORE_HASWELL CORE_SANDYBRIDGE #define HAVE_SSE (1 << 0) @@ -198,8 +198,8 @@ typedef struct { #define CPUTYPE_NANO 43 #define CPUTYPE_SANDYBRIDGE 44 #define CPUTYPE_BOBCAT 45 -#define CPUTYPE_BULLDOZER 46 -#define CPUTYPE_PILEDRIVER 47 +#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA +#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA // this define is because BLAS doesn't have haswell specific optimizations yet #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 197cc2b2de..bf60efb190 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA; extern gotoblas_t gotoblas_BOBCAT; #ifndef NO_AVX extern gotoblas_t gotoblas_SANDYBRIDGE; -extern gotoblas_t gotoblas_BULLDOZER; -extern gotoblas_t gotoblas_PILEDRIVER; +//extern gotoblas_t gotoblas_BULLDOZER; +//extern gotoblas_t gotoblas_PILEDRIVER; #else //Use NEHALEM kernels for sandy bridge #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM +#endif + #define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA -#endif + //Use sandy bridge kernels for haswell. #define gotoblas_HASWELL gotoblas_SANDYBRIDGE diff --git a/exports/Makefile b/exports/Makefile index 9fd93dd93e..64d7e181f5 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -18,6 +18,10 @@ ifndef NO_LAPACKE NO_LAPACKE = 0 endif +ifndef NEED2UNDERSCORES +NEED2UNDERSCORES=0 +endif + ifeq ($(OSNAME), WINNT) ifeq ($(F_COMPILER), GFORTRAN) EXTRALIB += -lgfortran @@ -94,13 +98,13 @@ libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) libopenblas.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto2_shared.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto_hpl.def : gensymbol - perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) @@ -187,23 +191,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) linux.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) osx.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) aix.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) symbol.S : gensymbol - perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S + perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S test : linktest.c $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c + perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* diff --git a/exports/gensymbol b/exports/gensymbol index 0a9729a6c0..7076412918 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -114,8 +114,8 @@ # ALLAUX -- Auxiliary routines called from all precisions # already provided by @blasobjs: xerbla, lsame - ilaenv, ieeeck, lsamen, xerbla_array, iparmq, - ilaprec, ilatrans, ilauplo, iladiag, chla_transtype, + ilaenv, ieeeck, lsamen, iparmq, + ilaprec, ilatrans, ilauplo, iladiag, ilaver, slamch, slamc3, # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. @@ -2672,12 +2672,25 @@ #LAPACKE_zlagsy_work, ); +#These function may need 2 underscores. +@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,); + if ($ARGV[5] == 1) { #NO_LAPACK=1 @underscore_objs = (@blasobjs, @misc_underscore_objs); } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || -d "../lapack-3.4.2" || -d "../lapack-netlib") { - @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + + if ($ARGV[7] == 0){ + # NEED2UNDERSCORES=0 + # Don't need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs); + }else{ + # Need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + @need_2underscore_objs = (@lapack_embeded_underscore_objs); + }; + } else { @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); } @@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){ print "_", $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print "_", $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print "_", $objs, "\n"; @@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){ print "\t$uppercase=$objs", "_ \@", $count, "\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } #for misc_common_objs foreach $objs (@misc_common_objs) { @@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){ print "\t$uppercase\_ = $objs","_\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } + exit(0); } @@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){ print "_", $uppercase, "_:\n"; print "\tjmp\t_", $objs, "_\n"; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t.align 16\n"; + print "\t.globl _", $uppercase, "__\n"; + print "_", $uppercase, "__:\n"; + print "\tjmp\t_", $objs, "__\n"; + } + exit(0); } @@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){ foreach $objs (@underscore_objs) { print $objs, $bu, "();\n" if $objs ne "xerbla"; } + + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "();\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "();\n"; diff --git a/f_check b/f_check index d7c0b23283..86f1fa689b 100644 --- a/f_check +++ b/f_check @@ -114,6 +114,12 @@ if ($compiler eq "") { $vendor = IBM; $openmp = "-openmp"; } + + # for embeded underscore name, e.g. zho_ge, it may append 2 underscores. + $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; + if ($data =~ /zho_ge__/) { + $need2bu = 1; + } } if ($vendor eq "") { @@ -245,6 +251,8 @@ if ($link ne "") { $link =~ s/\-rpath\s+/\-rpath\@/g; + $link =~ s/\-rpath-link\s+/\-rpath-link\@/g; + @flags = split(/[\s\,\n]/, $link); # remove leading and trailing quotes from each flag. @flags = map {s/^['"]|['"]$//g; $_} @flags; @@ -265,7 +273,15 @@ if ($link ne "") { $linker_L .= "-Wl,". $flags . " "; } - if ($flags =~ /^\-rpath/) { + if ($flags =~ /^\-rpath\@/) { + $flags =~ s/\@/\,/g; + if ($vendor eq "PGI") { + $flags =~ s/lib$/libso/; + } + $linker_L .= "-Wl,". $flags . " " ; + } + + if ($flags =~ /^\-rpath-link\@/) { $flags =~ s/\@/\,/g; if ($vendor eq "PGI") { $flags =~ s/lib$/libso/; @@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; +print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; + +print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; if (($linker_l ne "") || ($linker_a ne "")) { print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; diff --git a/ftest3.f b/ftest3.f new file mode 100644 index 0000000000..8f2cd332f5 --- /dev/null +++ b/ftest3.f @@ -0,0 +1,6 @@ + double complex function zho_ge() + + zho_ge = (0.0d0,0.0d0) + + return + end diff --git a/getarch.c b/getarch.c index 3ffda62446..ec9bb75a84 100644 --- a/getarch.c +++ b/getarch.c @@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "OPTERON" #endif -#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) +#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER) #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BOBCAT" #endif -#if defined (FORCE_BULLDOZER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BULLDOZER" #endif -#if defined (FORCE_PILEDRIVER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" diff --git a/getarch_2nd.c b/getarch_2nd.c index fc800cfacd..0b140bba4c 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -8,7 +8,7 @@ int main(int argc, char **argv) { - if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) { + if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); diff --git a/interface/trtri.c b/interface/trtri.c index 0564bc1830..007dbd7faf 100644 --- a/interface/trtri.c +++ b/interface/trtri.c @@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * }; #endif -extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); +extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ @@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In // double trtri_U single thread error // call dtrtri from lapack for a walk around. if(uplo==0){ - dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); + BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); +#ifndef PPC440 + blas_memory_free(buffer); +#endif return 0; } #endif diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f index 31a880f764..8e9a081705 100644 --- a/lapack/trtri/dtrtri_lapack.f +++ b/lapack/trtri/dtrtri_lapack.f @@ -107,7 +107,7 @@ *> \ingroup doubleOTHERcomputational * * ===================================================================== - SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO ) + SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) * * -- LAPACK computational routine (version 3.4.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, --