Skip to content

Commit

Permalink
v3.9.4
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Jun 18, 2019
1 parent 71d6b97 commit d6e8d7a
Show file tree
Hide file tree
Showing 75 changed files with 1,794 additions and 785 deletions.
3 changes: 0 additions & 3 deletions INSTALL_LINUX
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ openssl 1.1.0e or higher. Add one of the following, depending on the
compiler version, to CFLAGS:
"-march=native" or "-march=znver1" or "-msha".

Due to poor AVX2 performance on Ryzen users should add -DRYZEN_ to CFLAGS
to override multiway AVX2 on algos with sha256, and use SHA instead.

Additional instructions for static compilalation can be found here:
https://lxadm.com/Static_compilation_of_cpuminer
Static builds should only considered in a homogeneous HW and SW environment.
Expand Down
5 changes: 4 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ cpuminer_SOURCES = \
algo/lyra2/lyra2h-4way.c \
algo/lyra2/allium-4way.c \
algo/lyra2/allium.c \
algo/lyra2/phi2-4way.c \
algo/lyra2/phi2.c \
algo/m7m.c \
algo/neoscrypt/neoscrypt.c \
Expand All @@ -147,6 +148,9 @@ cpuminer_SOURCES = \
algo/quark/anime-gate.c \
algo/quark/anime.c \
algo/quark/anime-4way.c \
algo/quark/hmq1725-gate.c \
algo/quark/hmq1725-4way.c \
algo/quark/hmq1725.c \
algo/qubit/qubit-gate.c \
algo/qubit/qubit.c \
algo/qubit/qubit-2way.c \
Expand Down Expand Up @@ -257,7 +261,6 @@ cpuminer_SOURCES = \
algo/x17/xevan-gate.c \
algo/x17/xevan.c \
algo/x17/xevan-4way.c \
algo/x17/hmq1725.c \
algo/x17/sonoa-gate.c \
algo/x17/sonoa-4way.c \
algo/x17/sonoa.c \
Expand Down
2 changes: 1 addition & 1 deletion README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cpuminer-sse2.exe "-msse2" Core2, Nehalem
cpuminer-aes-sse42.exe "-march=westmere" Westmere
cpuminer-avx.exe "-march=corei7-avx" Sandy-Ivybridge
cpuminer-avx2.exe "-march=core-avx2" Haswell, Sky-Kaby-Coffeelake
cpuminer-zen "-march=znver1 -DRYZEN_" Ryzen
cpuminer-zen "-march=znver1" AMD Ryzen, Threadripper

If you like this software feel free to donate:

Expand Down
10 changes: 9 additions & 1 deletion RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,17 @@ supported.
Change Log
----------

v3.9.4

Faster AVX2 for lyra2v3, quark, anime.
Fixed skein AVX2 regression (invalid shares since v3.9.0) and faster.
Faster skein2 with 4way AVX2 enabled.
Automatic SHA override on Ryzen CPUs, no need for -DRYZEN compile flag.
Ongoing restructuring.

v3.9.3.1

Skippped v3.9.3 due to misidentification of v3.9.2.5 as v3.9.3.
Skipped v3.9.3 due to misidentification of v3.9.2.5 as v3.9.3.
Fixed x16r algo 25% invalid share reject rate. The bug may have also
affected other algos.

Expand Down
8 changes: 4 additions & 4 deletions algo/argon2/argon2d/argon2d/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int allocate_memory(const argon2_context *context, uint8_t **memory,
void free_memory(const argon2_context *context, uint8_t *memory,
size_t num, size_t size) {
size_t memory_size = num*size;
clear_internal_memory(memory, memory_size);
// clear_internal_memory(memory, memory_size);
if (context->free_cbk) {
(context->free_cbk)(memory, memory_size);
} else {
Expand All @@ -137,7 +137,7 @@ void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
int FLAG_clear_internal_memory = 0;
void clear_internal_memory(void *v, size_t n) {
if (FLAG_clear_internal_memory && v) {
secure_wipe_memory(v, n);
// secure_wipe_memory(v, n);
}
}

Expand Down Expand Up @@ -559,7 +559,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
context->pwdlen);

if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
secure_wipe_memory(context->pwd, context->pwdlen);
// secure_wipe_memory(context->pwd, context->pwdlen);
context->pwdlen = 0;
}
}
Expand All @@ -580,7 +580,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context,
context->secretlen);

if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
secure_wipe_memory(context->secret, context->secretlen);
// secure_wipe_memory(context->secret, context->secretlen);
context->secretlen = 0;
}
}
Expand Down
57 changes: 32 additions & 25 deletions algo/bmw/bmw256-hash-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,8 @@ bmw32_4way(bmw_4way_small_context *sc, const void *data, size_t len)
}
}
sc->ptr = ptr;


if ( h1 != sc->H )
memcpy_128( sc->H, h1, 16 );
}
Expand Down Expand Up @@ -571,6 +573,7 @@ bmw32_4way_close(bmw_4way_small_context *sc, unsigned ub, unsigned n,

for ( u = 0; u < 16; u ++ )
buf[u] = h2[u];

compress_small( buf, (__m128i*)final_s, h1 );

for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
Expand Down Expand Up @@ -1041,22 +1044,22 @@ static const __m256i final_s8[16] =

void bmw256_8way_init( bmw256_8way_context *ctx )
{
ctx->H[ 0] = _mm256_set1_epi64x( IV256[ 0] );
ctx->H[ 1] = _mm256_set1_epi64x( IV256[ 1] );
ctx->H[ 2] = _mm256_set1_epi64x( IV256[ 2] );
ctx->H[ 3] = _mm256_set1_epi64x( IV256[ 3] );
ctx->H[ 4] = _mm256_set1_epi64x( IV256[ 4] );
ctx->H[ 5] = _mm256_set1_epi64x( IV256[ 5] );
ctx->H[ 6] = _mm256_set1_epi64x( IV256[ 6] );
ctx->H[ 7] = _mm256_set1_epi64x( IV256[ 7] );
ctx->H[ 8] = _mm256_set1_epi64x( IV256[ 8] );
ctx->H[ 9] = _mm256_set1_epi64x( IV256[ 9] );
ctx->H[10] = _mm256_set1_epi64x( IV256[10] );
ctx->H[11] = _mm256_set1_epi64x( IV256[11] );
ctx->H[12] = _mm256_set1_epi64x( IV256[12] );
ctx->H[13] = _mm256_set1_epi64x( IV256[13] );
ctx->H[14] = _mm256_set1_epi64x( IV256[14] );
ctx->H[15] = _mm256_set1_epi64x( IV256[15] );
ctx->H[ 0] = _mm256_set1_epi32( IV256[ 0] );
ctx->H[ 1] = _mm256_set1_epi32( IV256[ 1] );
ctx->H[ 2] = _mm256_set1_epi32( IV256[ 2] );
ctx->H[ 3] = _mm256_set1_epi32( IV256[ 3] );
ctx->H[ 4] = _mm256_set1_epi32( IV256[ 4] );
ctx->H[ 5] = _mm256_set1_epi32( IV256[ 5] );
ctx->H[ 6] = _mm256_set1_epi32( IV256[ 6] );
ctx->H[ 7] = _mm256_set1_epi32( IV256[ 7] );
ctx->H[ 8] = _mm256_set1_epi32( IV256[ 8] );
ctx->H[ 9] = _mm256_set1_epi32( IV256[ 9] );
ctx->H[10] = _mm256_set1_epi32( IV256[10] );
ctx->H[11] = _mm256_set1_epi32( IV256[11] );
ctx->H[12] = _mm256_set1_epi32( IV256[12] );
ctx->H[13] = _mm256_set1_epi32( IV256[13] );
ctx->H[14] = _mm256_set1_epi32( IV256[14] );
ctx->H[15] = _mm256_set1_epi32( IV256[15] );
ctx->ptr = 0;
ctx->bit_count = 0;

Expand All @@ -1076,14 +1079,15 @@ void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len )
ptr = ctx->ptr;
h1 = ctx->H;
h2 = htmp;

while ( len > 0 )
{
size_t clen;
clen = buf_size - ptr;
if ( clen > len )
clen = len;
memcpy_256( buf + (ptr>>3), vdata, clen >> 3 );
vdata = vdata + (clen>>3);
memcpy_256( buf + (ptr>>2), vdata, clen >> 2 );
vdata = vdata + (clen>>2);
len -= clen;
ptr += clen;
if ( ptr == buf_size )
Expand All @@ -1097,6 +1101,7 @@ void bmw256_8way( bmw256_8way_context *ctx, const void *data, size_t len )
}
}
ctx->ptr = ptr;

if ( h1 != ctx->H )
memcpy_256( ctx->H, h1, 16 );
}
Expand All @@ -1106,24 +1111,26 @@ void bmw256_8way_close( bmw256_8way_context *ctx, void *dst )
__m256i *buf;
__m256i h1[16], h2[16], *h;
size_t ptr, u, v;
// unsigned z;
const int buf_size = 64; // bytes of one lane, compatible with len

buf = ctx->buf;
ptr = ctx->ptr;
buf[ ptr>>3 ] = _mm256_set1_epi32( 0x80 );
ptr += 8;
buf[ ptr>>2 ] = _mm256_set1_epi32( 0x80 );
ptr += 4;
h = ctx->H;

if ( ptr > (buf_size - 8) )
if ( ptr > (buf_size - 4) )
{
memset_zero_256( buf + (ptr>>3), (buf_size - ptr) >> 3 );
memset_zero_256( buf + (ptr>>2), (buf_size - ptr) >> 2 );
compress_small_8way( buf, h, h1 );
ptr = 0;
h = h1;
}
memset_zero_256( buf + (ptr>>3), (buf_size - 8 - ptr) >> 3 );
buf[ (buf_size - 8) >> 3 ] = _mm256_set1_epi64x( ctx->bit_count );
memset_zero_256( buf + (ptr>>2), (buf_size - 8 - ptr) >> 2 );
buf[ (buf_size - 8) >> 2 ] = _mm256_set1_epi32( ctx->bit_count );
buf[ (buf_size - 4) >> 2 ] = m256_zero;


compress_small_8way( buf, h, h2 );

for ( u = 0; u < 16; u ++ )
Expand Down
16 changes: 13 additions & 3 deletions algo/lyra2/lyra2-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ bool lyra2rev3_thread_init()

int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
l2v3_wholeMatrix = _mm_malloc( size, 64 );
#if defined (LYRA2REV3_4WAY)
#if defined (LYRA2REV3_8WAY)
init_lyra2rev3_8way_ctx();;
#elif defined (LYRA2REV3_4WAY)
init_lyra2rev3_4way_ctx();;
#else
init_lyra2rev3_ctx();
Expand All @@ -57,7 +59,10 @@ bool lyra2rev3_thread_init()

bool register_lyra2rev3_algo( algo_gate_t* gate )
{
#if defined (LYRA2REV3_4WAY)
#if defined (LYRA2REV3_8WAY)
gate->scanhash = (void*)&scanhash_lyra2rev3_8way;
gate->hash = (void*)&lyra2rev3_8way_hash;
#elif defined (LYRA2REV3_4WAY)
gate->scanhash = (void*)&scanhash_lyra2rev3_4way;
gate->hash = (void*)&lyra2rev3_4way_hash;
#else
Expand Down Expand Up @@ -203,13 +208,18 @@ void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )

bool register_phi2_algo( algo_gate_t* gate )
{
init_phi2_ctx();
// init_phi2_ctx();
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
gate->build_extraheader = (void*)&phi2_build_extraheader;
gate->set_target = (void*)&alt_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL;
#if defined(PHI2_4WAY)
gate->scanhash = (void*)&scanhash_phi2_4way;
#else
init_phi2_ctx();
gate->scanhash = (void*)&scanhash_phi2;
#endif
return true;
}
26 changes: 24 additions & 2 deletions algo/lyra2/lyra2-gate.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <stdint.h>
#include "lyra2.h"

//#if defined(__AVX2__)
#if defined(__AVX2__)
#define LYRA2REV3_8WAY
#endif

#if defined(__SSE2__)
#define LYRA2REV3_4WAY
Expand All @@ -14,8 +16,14 @@
extern __thread uint64_t* l2v3_wholeMatrix;

bool register_lyra2rev3_algo( algo_gate_t* gate );
#if defined(LYRA2REV3_8WAY)

void lyra2rev3_8way_hash( void *state, const void *input );
int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
bool init_lyra2rev3_8way_ctx();

#if defined(LYRA2REV3_4WAY)
#elif defined(LYRA2REV3_4WAY)

void lyra2rev3_4way_hash( void *state, const void *input );
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
Expand Down Expand Up @@ -142,15 +150,29 @@ bool init_allium_ctx();

/////////////////////////////////////////

#if defined(__AVX2__) && defined(__AES__)
// #define PHI2_4WAY
#endif

bool phi2_has_roots;

bool register_phi2_algo( algo_gate_t* gate );
#if defined(PHI2_4WAY)

void phi2_hash_4way( void *state, const void *input );
int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
//void init_phi2_ctx();

#else

void phi2_hash( void *state, const void *input );
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr );
void init_phi2_ctx();

#endif

#endif // LYRA2_GATE_H__


Loading

0 comments on commit d6e8d7a

Please sign in to comment.