Merge xmrig v6.19.3 into master

C3Pool · Jun 5, 2023 · 7343157 · 7343157
1 parent 5a08ed7
commit 7343157
Show file tree

Hide file tree

Showing 16 changed files with 186 additions and 123 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# v6.19.3
+- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job.
+- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs.
+- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation.
+- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior.
+- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list.
+- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts.
+
 # v6.19.2
 - [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`.
 - [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.

diff --git a/scripts/pool_mine_example.cmd b/scripts/pool_mine_example.cmd
@@ -16,5 +16,5 @@
 :: Smaller pools also often have smaller fees/payout limits.
 
 cd /d "%~dp0"
-xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
+xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
 pause
diff --git a/scripts/solo_mine_example.cmd b/scripts/solo_mine_example.cmd
@@ -12,5 +12,5 @@
 :: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.
 
 cd /d "%~dp0"
-xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
+xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
 pause
diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp
@@ -296,7 +296,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
                 // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
                 m_jccErratum =
                     ((model == 0x4E) && (stepping == 0x3)) ||
-                    ((model == 0x55) && (stepping == 0x4)) ||
+                    ((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
                     ((model == 0x5E) && (stepping == 0x3)) ||
                     ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
                     ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||

diff --git a/src/backend/cpu/platform/HwlocCpuInfo.cpp b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@@ -298,8 +298,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
     cores.reserve(m_cores);
     findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
 
+    const bool L3_exclusive = isCacheExclusive(cache);
+
 #   ifdef XMRIG_ALGO_GHOSTRIDER
-    if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
+    if ((algorithm == Algorithm::GHOSTRIDER_RTM) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
         // Don't use E-cores on Alder Lake
         cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
 
@@ -311,7 +313,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
 #   endif
 
     size_t L3               = cache->attr->cache.size;
-    const bool L3_exclusive = isCacheExclusive(cache);
     size_t L2               = 0;
     int L2_associativity    = 0;
     size_t extra            = 0;
@@ -355,6 +356,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
 #   endif
 
 #   ifdef XMRIG_ALGO_RANDOMX
+    if ((algorithm.family() == Algorithm::RANDOM_X) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
+        // Use all L3+L2 on latest Intel CPUs with P-cores, E-cores and exclusive L3 cache
+        cacheHashes = (L3 + L2) / scratchpad;
+    }
     if (extra == 0 && algorithm.l2() > 0) {
         cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
     }

diff --git a/src/config.json b/src/config.json
@@ -65,7 +65,7 @@
         {
             "algo": null,
             "coin": null,
-            "url": "mine.c3pool.com:19999",
+            "url": "auto.c3pool.org:19999",
             "user": "YOUR_WALLET_ADDRESS",
             "pass": "x",
             "rig-id": null,

diff --git a/src/crypto/kawpow/KPHash.h b/src/crypto/kawpow/KPHash.h
@@ -7,8 +7,8 @@
  * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
  * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
  * Copyright 2018-2019 tevador     <tevador@gmail.com>
- * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2023 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2023 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
  *
  *   This program is free software: you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -28,7 +28,7 @@
 #define XMRIG_KP_HASH_H
 
 
-#include <stdint.h>
+#include <cstdint>
 
 
 namespace xmrig
@@ -43,16 +43,16 @@ class KPHash
 public:
     static constexpr uint32_t EPOCH_LENGTH  = 7500;
     static constexpr uint32_t PERIOD_LENGTH = 3;
-    static constexpr int CNT_CACHE = 11;
-    static constexpr int CNT_MATH = 18;
-    static constexpr uint32_t REGS = 32;
-    static constexpr uint32_t LANES = 16;
+    static constexpr int CNT_CACHE          = 11;
+    static constexpr int CNT_MATH           = 18;
+    static constexpr uint32_t REGS          = 32;
+    static constexpr uint32_t LANES         = 16;
 
     static void calculate(const KPCache& light_cache, uint32_t block_height, const uint8_t (&header_hash)[32], uint64_t nonce, uint32_t (&output)[8], uint32_t (&mix_hash)[8]);
 };
 
 
-} /* namespace xmrig */
+} // namespace xmrig
 
 
-#endif /* XMRIG_KP_HASH_H */
+#endif // XMRIG_KP_HASH_H
diff --git a/src/crypto/randomx/aes_hash.cpp b/src/crypto/randomx/aes_hash.cpp
@@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "base/tools/Chrono.h"
 #include "crypto/randomx/randomx.h"
 #include "crypto/randomx/soft_aes.h"
+#include "crypto/randomx/instruction.hpp"
+#include "crypto/randomx/common.hpp"
 #include "crypto/rx/Profiler.h"
 
 #define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
@@ -165,6 +167,9 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
 template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
 template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
 
+static constexpr randomx::Instruction inst{ 0xFF, 7, 7, 0xFF, 0xFFFFFFFFU };
+alignas(16) static const randomx::Instruction inst_mask[2] = { inst, inst };
+
 template<int softAes>
 void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 	const uint8_t* outptr = (uint8_t*)buffer;
@@ -187,32 +192,42 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 	state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
 	state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
 
-	while (outptr < outputEnd) {
-		state0 = aesdec<softAes>(state0, key0);
-		state1 = aesenc<softAes>(state1, key0);
-		state2 = aesdec<softAes>(state2, key4);
-		state3 = aesenc<softAes>(state3, key4);
-
-		state0 = aesdec<softAes>(state0, key1);
-		state1 = aesenc<softAes>(state1, key1);
-		state2 = aesdec<softAes>(state2, key5);
-		state3 = aesenc<softAes>(state3, key5);
-
-		state0 = aesdec<softAes>(state0, key2);
-		state1 = aesenc<softAes>(state1, key2);
-		state2 = aesdec<softAes>(state2, key6);
-		state3 = aesenc<softAes>(state3, key6);
-
-		state0 = aesdec<softAes>(state0, key3);
-		state1 = aesenc<softAes>(state1, key3);
-		state2 = aesdec<softAes>(state2, key7);
-		state3 = aesenc<softAes>(state3, key7);
-
+#define TRANSFORM do { \
+	state0 = aesdec<softAes>(state0, key0); \
+	state1 = aesenc<softAes>(state1, key0); \
+	state2 = aesdec<softAes>(state2, key4); \
+	state3 = aesenc<softAes>(state3, key4); \
+	state0 = aesdec<softAes>(state0, key1); \
+	state1 = aesenc<softAes>(state1, key1); \
+	state2 = aesdec<softAes>(state2, key5); \
+	state3 = aesenc<softAes>(state3, key5); \
+	state0 = aesdec<softAes>(state0, key2); \
+	state1 = aesenc<softAes>(state1, key2); \
+	state2 = aesdec<softAes>(state2, key6); \
+	state3 = aesenc<softAes>(state3, key6); \
+	state0 = aesdec<softAes>(state0, key3); \
+	state1 = aesenc<softAes>(state1, key3); \
+	state2 = aesdec<softAes>(state2, key7); \
+	state3 = aesenc<softAes>(state3, key7); \
+} while (0)
+
+	for (int i = 0; i < 2; ++i, outptr += 64) {
+		TRANSFORM;
 		rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
 		rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+	}
 
+	static_assert(sizeof(inst_mask) == sizeof(rx_vec_i128), "Incorrect inst_mask size");
+	const rx_vec_i128 mask = *reinterpret_cast<const rx_vec_i128*>(inst_mask);
+
+	while (outptr < outputEnd) {
+		TRANSFORM;
+		rx_store_vec_i128((rx_vec_i128*)outptr + 0, rx_and_vec_i128(state0, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 1, rx_and_vec_i128(state1, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 2, rx_and_vec_i128(state2, mask));
+		rx_store_vec_i128((rx_vec_i128*)outptr + 3, rx_and_vec_i128(state3, mask));
 		outptr += 64;
 	}
 }

diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h
@@ -126,6 +126,7 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
 
 #define rx_xor_vec_f128 _mm_xor_pd
 #define rx_and_vec_f128 _mm_and_pd
+#define rx_and_vec_i128 _mm_and_si128
 #define rx_or_vec_f128 _mm_or_pd
 
 #ifdef __AES__
@@ -278,6 +279,10 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return (rx_vec_f128)vec_and(a,b);
 }
 
+FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	return (rx_vec_i128)vec_and(a, b);
+}
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return (rx_vec_f128)vec_or(a,b);
 }
@@ -444,6 +449,8 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
 }
 
+#define rx_and_vec_i128 vandq_u8
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
 }
@@ -635,6 +642,13 @@ FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	return x;
 }
 
+FORCE_INLINE rx_vec_i128 rx_and_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	rx_vec_i128 x;
+	x.u64[0] = a.u64[0] & b.u64[0];
+	x.u64[1] = a.u64[1] & b.u64[1];
+	return x;
+}
+
 FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
 	rx_vec_f128 x;
 	x.i.u64[0] = a.i.u64[0] | b.i.u64[0];

diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp
@@ -144,8 +144,6 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	for (uint32_t i = 0; i < program.getSize(); ++i)
 	{
 		Instruction& instr = program(i);
-		instr.src %= RegistersCount;
-		instr.dst %= RegistersCount;
 		(this->*engine[instr.opcode])(instr, codePos);
 	}
 
@@ -204,8 +202,6 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	for (uint32_t i = 0; i < program.getSize(); ++i)
 	{
 		Instruction& instr = program(i);
-		instr.src %= RegistersCount;
-		instr.dst %= RegistersCount;
 		(this->*engine[instr.opcode])(instr, codePos);
 	}