Skip to content

Commit

Permalink
v3.9.2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Jun 13, 2019
1 parent 7fec680 commit b233137
Show file tree
Hide file tree
Showing 70 changed files with 4,410 additions and 4,357 deletions.
5 changes: 5 additions & 0 deletions RELEASE_NOTES
Expand Up @@ -38,6 +38,11 @@ supported.
Change Log
----------

v3.9.2.5

Fixed 2 regressions: hodl AES detection, x16r invalid shares with AVX2.
More restructuring.

v3.9.2.4

Yet another affinity fix. Hopefully the last one.
Expand Down
3 changes: 1 addition & 2 deletions algo-gate-api.h
Expand Up @@ -2,8 +2,7 @@
#include <stdbool.h>
#include <stdint.h>
#include "miner.h"
#include "avxdefs.h"
#include "interleave.h"
#include "simd-utils.h"

/////////////////////////////
////
Expand Down
2 changes: 1 addition & 1 deletion algo/blake/blake-hash-4way.h
Expand Up @@ -45,7 +45,7 @@ extern "C"{

#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"

#define SPH_SIZE_blake256 256

Expand Down
2 changes: 1 addition & 1 deletion algo/blake/blake2s-hash-4way.h
Expand Up @@ -16,7 +16,7 @@

#if defined(__SSE4_2__)

#include "avxdefs.h"
#include "simd-utils.h"

#include <stddef.h>
#include <stdint.h>
Expand Down
2 changes: 1 addition & 1 deletion algo/bmw/bmw-hash-4way.h
Expand Up @@ -43,7 +43,7 @@ extern "C"{
#include <stddef.h>

#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"

#define SPH_SIZE_bmw256 256

Expand Down
2 changes: 1 addition & 1 deletion algo/cubehash/cube-hash-2way.h
Expand Up @@ -4,7 +4,7 @@
#if defined(__AVX2__)

#include <stdint.h>
#include "avxdefs.h"
#include "simd-utils.h"

// 2x128, 2 way parallel SSE2

Expand Down
2 changes: 1 addition & 1 deletion algo/cubehash/cubehash_sse2.c
Expand Up @@ -13,7 +13,7 @@
#include <stdbool.h>
#include <unistd.h>
#include <memory.h>
#include "avxdefs.h"
#include "simd-utils.h"
#include <stdio.h>

// The result of hashing 10 rounds of initial data which is params and
Expand Down
2 changes: 1 addition & 1 deletion algo/groestl/aes_ni/hash-groestl.c
Expand Up @@ -12,7 +12,7 @@
#include <memory.h>
#include "hash-groestl.h"
#include "miner.h"
#include "avxdefs.h"
#include "simd-utils.h"

#ifndef NO_AES_NI

Expand Down
2 changes: 1 addition & 1 deletion algo/groestl/aes_ni/hash-groestl256.c
Expand Up @@ -9,7 +9,7 @@
#include <memory.h>
#include "hash-groestl256.h"
#include "miner.h"
#include "avxdefs.h"
#include "simd-utils.h"

#ifndef NO_AES_NI

Expand Down
2 changes: 1 addition & 1 deletion algo/hamsi/hamsi-hash-4way.h
Expand Up @@ -40,7 +40,7 @@

#if defined (__AVX2__)

#include "avxdefs.h"
#include "simd-utils.h"

#ifdef __cplusplus
extern "C"{
Expand Down
2 changes: 1 addition & 1 deletion algo/haval/haval-hash-4way.h
Expand Up @@ -69,7 +69,7 @@ extern "C"{

#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"

#define SPH_SIZE_haval256_5 256

Expand Down
2 changes: 1 addition & 1 deletion algo/hodl/hodl-gate.c
Expand Up @@ -156,7 +156,7 @@ int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,

bool register_hodl_algo( algo_gate_t* gate )
{
#if defined(__AES__)
#if !defined(__AES__)
applog( LOG_ERR, "Only CPUs with AES are supported, use legacy version.");
return false;
#endif
Expand Down
2 changes: 1 addition & 1 deletion algo/jh/jh-hash-4way.h
Expand Up @@ -44,7 +44,7 @@ extern "C"{

#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"

#define SPH_SIZE_jh256 256

Expand Down
2 changes: 1 addition & 1 deletion algo/keccak/keccak-hash-4way.h
Expand Up @@ -44,7 +44,7 @@ extern "C"{

#include <stddef.h>
#include "algo/sha/sph_types.h"
#include "avxdefs.h"
#include "simd-utils.h"

#define SPH_SIZE_keccak256 256

Expand Down
2 changes: 1 addition & 1 deletion algo/luffa/luffa-hash-2way.c
Expand Up @@ -24,7 +24,7 @@

#if defined(__AVX2__)

#include "avxdefs.h"
#include "simd-utils.h"

#define MASK _mm256_set_epi32( 0UL, 0UL, 0UL, 0xffffffffUL, \
0UL, 0UL, 0UL, 0xffffffffUL )
Expand Down
2 changes: 1 addition & 1 deletion algo/luffa/luffa-hash-2way.h
Expand Up @@ -24,7 +24,7 @@

#include <immintrin.h>
#include "algo/sha/sha3-defs.h"
#include "avxdefs.h"
#include "simd-utils.h"

/* The length of digests*/
#define DIGEST_BIT_LEN_224 224
Expand Down
2 changes: 1 addition & 1 deletion algo/luffa/luffa_for_sse2.c
Expand Up @@ -20,7 +20,7 @@

#include <string.h>
#include <emmintrin.h>
#include "avxdefs.h"
#include "simd-utils.h"
#include "luffa_for_sse2.h"

#define MULT2(a0,a1) do \
Expand Down
35 changes: 13 additions & 22 deletions algo/lyra2/allium-4way.c
Expand Up @@ -44,10 +44,11 @@ void allium_4way_hash( void *state, const void *input )
blake256_4way( &ctx.blake, input + (64<<2), 16 );
blake256_4way_close( &ctx.blake, vhash32 );

mm256_reinterleave_4x64( vhash64, vhash32, 256 );
mm256_rintrlv_4x32_4x64( vhash64, vhash32, 256 );
keccak256_4way( &ctx.keccak, vhash64, 32 );
keccak256_4way_close( &ctx.keccak, vhash64 );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
Expand All @@ -67,34 +68,30 @@ void allium_4way_hash( void *state, const void *input )
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );

mm256_interleave_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );

skein256_4way( &ctx.skein, vhash64, 32 );
skein256_4way_close( &ctx.skein, vhash64 );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

update_and_final_groestl256( &ctx.groestl, hash0, hash0, 256 );
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );

update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash1, hash1, 256 );
update_and_final_groestl256( &ctx.groestl, state+32, hash1, 256 );
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash2, hash2, 256 );
update_and_final_groestl256( &ctx.groestl, state+64, hash2, 256 );
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
sizeof(hashState_groestl256) );
update_and_final_groestl256( &ctx.groestl, hash3, hash3, 256 );

memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );
memcpy( state+96, hash3, 32 );
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
}

int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
Expand All @@ -106,13 +103,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( opt_benchmark )
( (uint32_t*)ptarget )[7] = 0x0000ff;

casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );

mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
mm128_bswap_intrlv80_4x32( vdata, pdata );
blake256_4way_init( &allium_4way_ctx.blake );
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );

Expand All @@ -124,7 +115,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,

for ( int lane = 0; lane < 4; lane++ ) if ( (hash+(lane<<3))[7] <= Htarg )
{
if ( fulltest( hash+(lane<<3), ptarget ) )
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
{
pdata[19] = n + lane;
submit_solution( work, hash+(lane<<3), mythr, lane );
Expand Down
4 changes: 3 additions & 1 deletion algo/lyra2/lyra2-gate.h
Expand Up @@ -5,7 +5,9 @@
#include <stdint.h>
#include "lyra2.h"

#if defined(__AVX2__)
//#if defined(__AVX2__)

#if defined(__SSE2__)
#define LYRA2REV3_4WAY
#endif

Expand Down
2 changes: 1 addition & 1 deletion algo/lyra2/lyra2.c
Expand Up @@ -566,7 +566,7 @@ int LYRA2RE( void *K, uint64_t kLen, const void *pwd, const uint64_t pwdlen,

#if defined(__AVX2__)
memset_zero_256( (__m256i*)wholeMatrix, i>>5 );
#elif defined(__SSE4_2__)
#elif defined(__SSE2__)
memset_zero_128( (__m128i*)wholeMatrix, i>>4 );
#else
memset( wholeMatrix, 0, i );
Expand Down
50 changes: 18 additions & 32 deletions algo/lyra2/lyra2h-4way.c
Expand Up @@ -36,67 +36,53 @@ void lyra2h_4way_hash( void *state, const void *input )
blake256_4way( &ctx_blake, input + (64*4), 16 );
blake256_4way_close( &ctx_blake, vhash );

mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, vhash, 256 );

LYRA2Z( lyra2h_4way_matrix, hash0, 32, hash0, 32, hash0, 32, 16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, hash1, 32, hash1, 32, hash1, 32, 16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, hash2, 32, hash2, 32, hash2, 32, 16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, hash3, 32, hash3, 32, hash3, 32, 16, 16, 16 );

memcpy( state, hash0, 32 );
memcpy( state+32, hash1, 32 );
memcpy( state+64, hash2, 32 );
memcpy( state+96, hash3, 32 );
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );

LYRA2Z( lyra2h_4way_matrix, state, 32, hash0, 32, hash0, 32,
16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, state+32, 32, hash1, 32, hash1,
32, 16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, state+64, 32, hash2, 32, hash2,
32, 16, 16, 16 );
LYRA2Z( lyra2h_4way_matrix, state+96, 32, hash3, 32, hash3,
32, 16, 16, 16 );
}

int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8*4] __attribute__ ((aligned (64)));
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) edata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
int num_found = 0;
uint32_t *noncep= vdata + 76; // 19*4
__m128i *noncev = (__m128i*)vdata + 19; // aligned
/* int */ thr_id = mythr->id; // thr_id arg is deprecated

if ( opt_benchmark )
ptarget[7] = 0x0000ff;

for ( int i=0; i < 20; i++ )
be32enc( &edata[i], pdata[i] );

mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );

mm128_bswap_intrlv80_4x32( vdata, pdata );
lyra2h_4way_midstate( vdata );

do {
be32enc( noncep, n );
be32enc( noncep+1, n+1 );
be32enc( noncep+2, n+2 );
be32enc( noncep+3, n+3 );

be32enc( &edata[19], n );
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
lyra2h_4way_hash( hash, vdata );

for ( int i = 0; i < 4; i++ )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget )
&& !opt_benchmark )
{
pdata[19] = n+i;
nonces[ num_found++ ] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
submit_solution( work, hash+(i<<3), mythr, i );
}
n += 4;
} while ( (num_found == 0) && (n < max_nonce-4)
&& !work_restart[thr_id].restart);
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);

*hashes_done = n - first_nonce + 1;
return num_found;
return 0;
}

#endif
Expand Down
2 changes: 1 addition & 1 deletion algo/lyra2/lyra2re.c
Expand Up @@ -6,7 +6,7 @@
#include "algo/keccak/sph_keccak.h"
#include "lyra2.h"
#include "algo-gate-api.h"
#include "avxdefs.h"
#include "simd-utils.h"
#if defined(__AES__)
#include "algo/groestl/aes_ni/hash-groestl256.h"
#endif
Expand Down

0 comments on commit b233137

Please sign in to comment.