diff --git a/Makefile.arm64 b/Makefile.arm64 index b98933b77a..00871f6a08 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -224,6 +224,21 @@ endif endif endif +# Detect Qualcomm Oryon. +ifeq ($(CORE), ORYON) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ15) $(ISCLANG))) +CCOMMON_OPT += -mcpu=oryon-1 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -mcpu=oryon-1 +endif +else +CCOMMON_OPT += -march=armv8.7-a -mtune=neoverse-n1 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.7-a -mtune=neoverse-n1 +endif +endif +endif + # Detect Ampere AmpereOne(ampere1,ampere1a) processors. ifeq ($(CORE), AMPERE1) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) diff --git a/Makefile.system b/Makefile.system index 6241006a81..9b52848a0f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -713,6 +713,7 @@ DYNAMIC_CORE = ARMV8 DYNAMIC_CORE += CORTEXA53 DYNAMIC_CORE += CORTEXA57 DYNAMIC_CORE += NEOVERSEN1 +DYNAMIC_CORE += ORYON ifneq ($(NO_SVE), 1) DYNAMIC_CORE += NEOVERSEV1 DYNAMIC_CORE += NEOVERSEN2 diff --git a/TargetList.txt b/TargetList.txt index b890c1440d..071f8189ab 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -115,6 +115,7 @@ A64FX ARMV8SVE ARMV9SME FT2000 +ORYON 9.System Z: ZARCH_GENERIC diff --git a/cmake/arch.cmake b/cmake/arch.cmake index f5e901a316..33894e9bda 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -33,7 +33,7 @@ endif () if (DYNAMIC_ARCH) if (ARM64) - set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) + set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110 ORYON) if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 10) # SVE ACLE supported in GCC >= 10 set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) diff --git a/cmake/cc.cmake b/cmake/cc.cmake index ccd7fc5bda..2610860bb0 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -285,6 +285,18 @@ if (${CORE} STREQUAL NEOVERSEN1) endif () endif () +if (${CORE} STREQUAL ORYON) + if (NOT DYNAMIC_ARCH) + if (${GCC_VERSION} VERSION_GREATER 15.0 OR ${GCC_VERSION} VERSION_EQUAL 15.0) + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.7-a -mtune=oryon-1") + elseif (${GCC_VERSION} VERSION_GREATER 9.4 OR ${GCC_VERSION} VERSION_EQUAL 9.4) + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.7-a -mtune=neoverse-n1") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.7-a") + endif () + endif () +endif () + if (${CORE} STREQUAL AMPEREONE) if (NOT DYNAMIC_ARCH) if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 9320af9224..8a65f413a2 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -992,6 +992,37 @@ endif () set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_N 4) set(SYMV_P 16) + elseif ("${TCORE}" STREQUAL "ORYON") + file(APPEND ${TARGET_CONF_TEMP} + "#define L1_CODE_SIZE\t196608\n" + "#define L1_CODE_LINESIZE\t64\n" + "#define L1_CODE_ASSOCIATIVE\t6\n" + "#define L1_DATA_SIZE\t98304\n" + "#define L1_DATA_LINESIZE\t64\n" + "#define L1_DATA_ASSOCIATIVE\t6\n" + "#define L2_SIZE\t12582912\n\n" + "#define L2_LINESIZE\t32\n" + "#define L2_ASSOCIATIVE\t12\n" + "#define ITB_SIZE\t4096\n" + "#define ITB_DEFAULT_ENTRIES\t256\n" + "#define ITB_ASSOCIATIVE\t8\n" + "#define DTB_ASSOCIATIVE\t7\n" + "#define DTB_DEFAULT_ENTRIES\t48\n" + "#define DTB_SIZE\t4096\n" + "#define HAVE_VFPV4\n" + "#define HAVE_VFPV3\n" + "#define HAVE_VFP\n" + "#define HAVE_NEON\n" + "#define ARMV8\n") + set(SGEMM_UNROLL_M 16) + set(SGEMM_UNROLL_N 4) + set(DGEMM_UNROLL_M 8) + set(DGEMM_UNROLL_N 4) + set(CGEMM_UNROLL_M 8) + set(CGEMM_UNROLL_N 4) + set(ZGEMM_UNROLL_M 4) + set(ZGEMM_UNROLL_N 4) + set(SYMV_P 16) elseif ("${TCORE}" STREQUAL "NEOVERSEN1") file(APPEND ${TARGET_CONF_TEMP} "#define L1_CODE_SIZE\t65536\n" diff --git a/common_arm64.h b/common_arm64.h index 5856898a2b..a6f80c35bc 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -175,7 +175,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define HUGE_PAGESIZE ( 4 << 20) #ifndef BUFFERSIZE -#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE) || defined(ARMV9SME) +#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ORYON) || defined(ARMV8SVE) || defined(ARMV9SME) #define BUFFER_SIZE (32 << 22) #else #define BUFFER_SIZE (32 << 20) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 6af3c499fb..c68d79618c 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -71,6 +71,7 @@ size_t length64=sizeof(value64); #define CPU_CORTEXA710 21 // Qualcomm #define CPU_FALKOR 6 +#define CPU_ORYON 26 // Cavium #define CPU_THUNDERX 7 #define CPU_THUNDERX2T99 8 @@ -113,7 +114,8 @@ static char *cpuname[] = { "FT2000", "CORTEXA76", "NEOVERSEV2", - "AMPERE1" + "AMPERE1", + "ORYON" }; static char *cpuname_lower[] = { @@ -143,7 +145,7 @@ static char *cpuname_lower[] = { "cortexa76", "neoversev2", "ampere1", - "ampere1a" + "oryon" }; static int cpulowperf=0; @@ -325,6 +327,8 @@ int detect(void) // Qualcomm else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) return CPU_FALKOR; + else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0x001")) + return CPU_ORYON; // Cavium else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1")) return CPU_THUNDERX; @@ -418,7 +422,7 @@ int detect(void) if (errcode != ERROR_SUCCESS) wprintf(L"Error reading cpuname from registry:%x\n",errcode); //wprintf(stderr,L"%s\n",(PWSTR)valstring); RegCloseKey(reghandle); - if (strstr(valstring, "Snapdragon(R) X Elite")) return CPU_NEOVERSEN1; + if (strstr(valstring, "Snapdragon(R) X Elite")) return CPU_ORYON; if (strstr(valstring, "Ampere(R) Altra")) return CPU_NEOVERSEN1; if (strstr(valstring, "Snapdragon (TM) 8cx Gen 3")) return CPU_CORTEXX1; if (strstr(valstring, "Snapdragon Compute Platform")) return CPU_CORTEXX1; @@ -765,6 +769,24 @@ void get_cpuconfig(void) printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_SIZE 4096\n"); break; + case CPU_ORYON: + printf("#define ORYON\n"); + printf("#define L1_CODE_SIZE 196608\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 6\n"); + printf("#define L1_DATA_SIZE 98304\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 6\n"); + printf("#define L2_SIZE 12582912\n"); + printf("#define L2_LINESIZE 32\n"); + printf("#define L2_ASSOCIATIVE 12\n"); + printf("#define ITB_SIZE 4096\n"); + printf("#define ITB_ASSOCIATIVE 8\n"); + printf("#define ITB_DEFAULT_ENTRIES 256\n"); + printf("#define DTB_DEFAULT_ENTRIES 224\n"); + printf("#define DTB_ASSOCIATIVE 7\n"); + printf("#define DTB_SIZE 4096\n"); + break; } get_cpucount(); } diff --git a/getarch.c b/getarch.c index 2f0bccfaf7..d7f67a4afe 100644 --- a/getarch.c +++ b/getarch.c @@ -1449,6 +1449,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "CORTEXA710" #endif +#ifdef FORCE_ORYON +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "ORYON" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DORYON " \ + "-DL1_CODE_SIZE=196608 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=6 " \ + "-DL1_DATA_SIZE=98304 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=6 " \ + "-DL2_SIZE=12582912 -DL2_LINESIZE=32 -DL2_ASSOCIATIVE=12 " \ + "-DDTB_DEFAULT_ENTRIES=224 -DDTB_ASSOCIATIVE=7 -DDTB_SIZE=4096 " \ + "-DITB_DEFAULT_ENTRIES=256 -DITB_ASSOCIATIVE=8 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \ + "-march=armv8.7-a -mtune=oryon-1" +#define LIBNAME "oryon" +#define CORENAME "ORYON" +#endif + #ifdef FORCE_NEOVERSEN1 #define FORCE #define ARCHITECTURE "ARM64" @@ -1465,6 +1482,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "NEOVERSEN1" #endif + #ifdef FORCE_NEOVERSEV1 #define FORCE #define ARCHITECTURE "ARM64" @@ -1481,7 +1499,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "NEOVERSEV1" #endif - #ifdef FORCE_NEOVERSEN2 #define FORCE #define ARCHITECTURE "ARM64" diff --git a/kernel/arm64/KERNEL.ORYON b/kernel/arm64/KERNEL.ORYON new file mode 100644 index 0000000000..46a34469c3 --- /dev/null +++ b/kernel/arm64/KERNEL.ORYON @@ -0,0 +1 @@ +include $(KERNELDIR)/KERNEL.NEOVERSEN1 diff --git a/param.h b/param.h index 8e598d8a01..eec2430ae2 100644 --- a/param.h +++ b/param.h @@ -3392,7 +3392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*FIXME: this should be using the cache size, but there is currently no easy way to query that on ARM. So if getarch counted more than 8 cores we simply assume the host is a big desktop or server with abundant cache rather than a phone or embedded device */ -#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) +#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) || defined(ORYON) #define SGEMM_DEFAULT_P 512 #define DGEMM_DEFAULT_P 256 #define CGEMM_DEFAULT_P 256