Merge xmrig v6.15.3 into master

MoneroOcean · Nov 1, 2021 · 449982a · 449982a
2 parents f60350e + ae8459b
commit 449982a
Show file tree

Hide file tree

Showing 37 changed files with 3,113 additions and 2,740 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# v6.15.3
+- [#2614](https://github.com/xmrig/xmrig/pull/2614) OpenCL fixes for non-AMD platforms.
+- [#2623](https://github.com/xmrig/xmrig/pull/2623) Fixed compiling without kawpow.
+- [#2636](https://github.com/xmrig/xmrig/pull/2636) [#2639](https://github.com/xmrig/xmrig/pull/2639) AstroBWT speedup (up to +35%).
+- [#2646](https://github.com/xmrig/xmrig/pull/2646) Fixed MSVC compilation error.
+
 # v6.15.2
 - [#2606](https://github.com/xmrig/xmrig/pull/2606) Fixed: AstroBWT auto-config ignored `max-threads-hint`.
 - Fixed possible crash on Windows (regression in v6.15.1).

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,6 +19,8 @@ option(WITH_MSR             "Enable MSR mod & 1st-gen Ryzen fix" ON)
 option(WITH_ENV_VARS        "Enable environment variables support in config file" ON)
 option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
 option(WITH_OPENCL          "Enable OpenCL backend" ON)
+set(WITH_OPENCL_VERSION 200 CACHE STRING "Target OpenCL version")
+set_property(CACHE WITH_OPENCL_VERSION PROPERTY STRINGS 120 200 210 220)
 option(WITH_CUDA            "Enable CUDA backend" ON)
 option(WITH_NVML            "Enable NVML (NVIDIA Management Library) support (only if CUDA backend enabled)" ON)
 option(WITH_ADL             "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)

diff --git a/src/3rdparty/getopt/getopt.h b/src/3rdparty/getopt/getopt.h
@@ -109,11 +109,7 @@ char    *optarg;		/* argument associated with option */
 extern char __declspec(dllimport) *__progname;
 #endif
 
-#ifdef __CYGWIN__
 static char EMSG[] = "";
-#else
-#define	EMSG		""
-#endif
 
 static int getopt_internal(int, char * const *, const char *,
 			   const struct option *, int *, int);

diff --git a/src/backend/opencl/cl/cn/blake256.cl b/src/backend/opencl/cl/cn/blake256.cl
@@ -27,7 +27,7 @@
 *
 * @author   djm34
 */
-__constant static const int sigma[16][16] = {
+__constant STATIC const int sigma[16][16] = {
 		{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
 		{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
 		{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
@@ -47,21 +47,21 @@ __constant static const int sigma[16][16] = {
 };
 
 
-__constant static const sph_u32  c_IV256[8] = {
+__constant STATIC const sph_u32  c_IV256[8] = {
 	0x6A09E667, 0xBB67AE85,
 	0x3C6EF372, 0xA54FF53A,
 	0x510E527F, 0x9B05688C,
 	0x1F83D9AB, 0x5BE0CD19
 };
 
 /* Second part (64-80) msg never change, store it */
-__constant static const sph_u32  c_Padding[16] = {
+__constant STATIC const sph_u32  c_Padding[16] = {
 	0, 0, 0, 0,
 	0x80000000, 0, 0, 0,
 	0, 0, 0, 0,
 	0, 1, 0, 640,
 };
-__constant static const sph_u32  c_u256[16] = {
+__constant STATIC const sph_u32  c_u256[16] = {
 	0x243F6A88, 0x85A308D3,
 	0x13198A2E, 0x03707344,
 	0xA4093822, 0x299F31D0,

diff --git a/src/backend/opencl/cl/cn/cryptonight.cl b/src/backend/opencl/cl/cn/cryptonight.cl
@@ -22,6 +22,15 @@
  *   along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef STATIC
+#   undef STATIC
+#endif
+#ifdef cl_amd_media_ops
+#   define STATIC static
+#else
+#   define STATIC
+#endif
+
 /* For Mesa clover support */
 #ifdef cl_clang_storage_class_specifiers
 #   pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
@@ -39,7 +48,7 @@
 #include "keccak.cl"
 
 
-#if defined(__NV_CL_C_VERSION) && STRIDED_INDEX != 0
+#if (defined(__NV_CL_C_VERSION) || defined(__APPLE__)) && STRIDED_INDEX != 0
 #   undef STRIDED_INDEX
 #   define STRIDED_INDEX 0
 #endif
@@ -755,7 +764,7 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
 
 __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();
 
     // do not use early return here
     if(idx < BranchBuf[Threads]) {
@@ -800,9 +809,9 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
         // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
         // and expect an accurate result for target > 32-bit without implementing carries
         if (p.s3 <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
             if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
             }
         }
     }
@@ -838,7 +847,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
 
 __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();
 
     // do not use early return here
     if (idx < BranchBuf[Threads]) {
@@ -872,9 +881,9 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
         // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
         // and expect an accurate result for target > 32-bit without implementing carries
         if (h7l <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
             if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
             }
         }
     }
@@ -886,7 +895,7 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
 
 __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();
 
     // do not use early return here
     if (idx < BranchBuf[Threads]) {
@@ -973,9 +982,9 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
         // and expect an accurate result for target > 32-bit without implementing carries
         uint2 t = (uint2)(h[6],h[7]);
         if (as_ulong(t) <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
             if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
             }
         }
     }
@@ -987,7 +996,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
 
 __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
-    const uint idx = get_global_id(0) - get_global_offset(0);
+    const uint idx = getIdx();
 
     // do not use early return here
     if (idx < BranchBuf[Threads]) {
@@ -1073,9 +1082,9 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
         // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values
         // and expect an accurate result for target > 32-bit without implementing carries
         if (State[7] <= Target) {
-            ulong outIdx = atomic_inc(output + 0xFF);
+            const uint outIdx = atomic_inc(output + 0xFF);
             if (outIdx < 0xFF) {
-                output[outIdx] = BranchBuf[idx] + (uint) get_global_offset(0);
+                ((__global uint *)output)[outIdx] = BranchBuf[idx] + (uint)get_global_offset(0);
             }
         }
     }