Skip to content

Commit

Permalink
v3.9.2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
JayDDee committed Jun 8, 2019
1 parent 1b0a5aa commit 7fec680
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 128 deletions.
4 changes: 4 additions & 0 deletions RELEASE_NOTES
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ supported.
Change Log
----------

v3.9.2.4

Yet another affinity fix. Hopefully the last one.

v3.9.2.3

Another cpu-affinity fix.
Expand Down
1 change: 1 addition & 0 deletions algo-gate-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ void get_algo_alias( char** algo_or_alias )
#undef ALIAS
#undef PROPER

// only for parallel when there are lanes.
bool submit_solution( struct work *work, void *hash,
struct thr_info *thr, int lane )
{
Expand Down
2 changes: 2 additions & 0 deletions algo/fugue/sph_fugue.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ extern "C"{
#pragma warning (disable: 4146)
#endif

#define SPH_FUGUE_NOCOPY 1

static const sph_u32 IV224[] = {
SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
Expand Down
8 changes: 1 addition & 7 deletions algo/lyra2/allium-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( fulltest( hash+(lane<<3), ptarget ) )
{
pdata[19] = n + lane;
work_set_target_ratio( work, hash+(lane<<3) );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
submit_solution( work, hash+(lane<<3), mythr, lane );
}
}
n += 4;
Expand Down
8 changes: 6 additions & 2 deletions algo/lyra2/lyra2-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@
// Convert algos that don't yet do so to use dynamic alllocation.
// Alloc huge pages globally. If ok each thread will create a pointer to
// its chunk. If fail each thread will use use _mm_alloc for itself.
// BLOCK_LEN_BYTES is 768.

#define LYRA2REV3_NROWS 4
#define LYRA2REV3_NCOLS 4
//#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)*(LYRA2REV3_NCOLS)* \
// (LYRA2REV3_NROWS)*8)
/*
#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)*(LYRA2REV3_NCOLS)* \
(LYRA2REV3_NROWS)*8)
*/

#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)<<4)

__thread uint64_t* l2v3_wholeMatrix;
Expand Down
2 changes: 1 addition & 1 deletion algo/lyra2/lyra2.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ int LYRA2REV3( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
//Tries to allocate enough space for the whole memory matrix

const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
const int64_t BLOCK_LEN = BLOCK_LEN_BLAKE2_SAFE_INT64;
/*
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
Expand Down
8 changes: 1 addition & 7 deletions algo/lyra2/lyra2rev3-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,7 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + lane;
work_set_target_ratio( work, lane_hash );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, lane );
else
applog( LOG_WARNING, "Failed to submit share." );
submit_solution( work, lane_hash, mythr, lane );
}
}
n += 4;
Expand Down
8 changes: 1 addition & 7 deletions algo/lyra2/lyra2z-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
{
pdata[19] = n+i;
work_set_target_ratio( work, hash+(i<<3) );
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
accepted_share_count + rejected_share_count + 1,
thr_id, i );
else
applog( LOG_WARNING, "Failed to submit share." );
submit_solution( work, hash+(i<<3), mythr, i );
}
n += 8;
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
Expand Down
63 changes: 33 additions & 30 deletions algo/lyra2/lyra2z330.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,41 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
uint32_t hash[8] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
if (opt_benchmark)
ptarget[7] = 0x0000ff;

for (int i=0; i < 19; i++) {
be32enc(&endiandata[i], pdata[i]);
}

do {
be32enc(&endiandata[19], nonce);
lyra2z330_hash( hash, endiandata, work->height );

if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;

} while (nonce < max_nonce && !work_restart[thr_id].restart);

pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
if (opt_benchmark)
ptarget[7] = 0x0000ff;

for (int i=0; i < 19; i++)
be32enc(&endiandata[i], pdata[i]);

do
{
be32enc(&endiandata[19], nonce);
lyra2z330_hash( hash, endiandata, work->height );
if ( hash[7] <= Htarg && fulltest(hash, ptarget) )
{
work_set_target_ratio(work, hash);
pdata[19] = nonce;
if ( submit_work( mythr, work ) )
applog( LOG_NOTICE, "Share %d submitted by thread %d",
accepted_share_count + rejected_share_count + 1,
mythr->id );
else
applog( LOG_WARNING, "Failed to submit share." );
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}

void lyra2z330_set_target( struct work* work, double job_diff )
Expand Down
9 changes: 9 additions & 0 deletions algo/sha/sha256_hash_11way.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ void sha256_11way_round( __m256i *inx, __m256i rx[8], __m64 *iny, __m64 ry[8],
Wy[15] = mm64_bswap_32( iny[15] );
Wz[15] = bswap_32( inz[15] );

Ax = rx[0]; Ay = ry[0]; Az = rz[0];
Bx = rx[1]; By = ry[1]; Bz = rz[1];
Cx = rx[2]; Cy = ry[2]; Cz = rz[2];
Dx = rx[3]; Dy = ry[3]; Dz = rz[3];
Ex = rx[4]; Ey = ry[4]; Ez = rz[4];
Fx = rx[5]; Fy = ry[5]; Fz = rz[5];
Gx = rx[6]; Gy = ry[6]; Gz = rz[6];
Hx = rx[7]; Hy = ry[7]; Hz = rz[7];

SHA2s_11WAY_STEP( Ax, Bx, Cx, Dx, Ex, Fx, Gx, Hx,
Ay, By, Cy, Dy, Ey, Fy, Gy, Hy,
Az, Bz, Cz, Dz, Ez, Fz, Gz, Hz, 0, 0 );
Expand Down
27 changes: 14 additions & 13 deletions algo/sha/sha256t-4way.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
do
{
*noncex = mm256_bswap_32(
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
_mm256_set_epi32( n+7, n+6, n+5, n+4, n+3, n+2, n+1, n ) );
*noncey = mm64_bswap_32( _mm_set_pi32( n+9, n+8 ) );
*noncez = bswap_32( n+10 );

pdata[19] = n;
pdata[19] = n;

sha256t_11way_hash( hashx, hashy, hashz, datax, datay, dataz );

Expand All @@ -102,28 +102,29 @@ int scanhash_sha256t_11way( int thr_id, struct work *work, uint32_t max_nonce,
mm256_extract_lane_8x32( lane_hash, hashx, i, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + i;
pdata[19] = n + i;
submit_solution( work, lane_hash, mythr, i );
}
}
}

hash7 = &(hashy[7<<1]);
hash7 = &(hashy[7<<1]);
for( i = 0; i < 2; i++ ) if ( !(hash7[ 0] & mask ) )

{
mm64_extract_lane_2x32( lane_hash, hashy, i, 256 );
if ( fulltest( lane_hash, ptarget ) )
{
if ( fulltest( lane_hash, ptarget ) )
{
pdata[19] = n + 8 + i;
submit_solution( work, lane_hash, mythr, i+8 );
}
}
}
}

if ( !(hashz[7] & mask ) && fulltest( hashz, ptarget ) )
{
if ( !(hashz[7] & mask ) && fulltest( hashz, ptarget ) )
{
pdata[19] = n+10;
submit_solution( work, hashz, mythr, 10 );
}
n += 11;
}
n += 11;

} while ( (n < max_nonce-12) && !work_restart[thr_id].restart );
break;
Expand Down
62 changes: 52 additions & 10 deletions avxdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,25 +100,67 @@
#include <stdbool.h>

// First some integer stuff that mirrors the SIMD utilities

#define ror_64( x, c ) (((x)>>(c)) | ((x)<<(64-(c))))
#define rol_64( x, c ) (((x)<<(c)) | ((x)>>(64-(c))))
#define ror_32( x, c ) (((x)>>(c)) | ((x)<<(32-(c))))
#define rol_32( x, c ) (((x)<<(c)) | ((x)>>(32-(c))))
#define bswap_64( x ) __builtin_bswap64(x)
#define bswap_32( x ) __builtin_bswap32(x)
#define ror_64( x, c ) \
(uint64_t)( ( (uint64_t)(x) >> (c) ) | ( (uint64_t)(x) << (64-(c)) ) )
#define rol_64( x, c ) \
(uint64_t)( ( (uint64_t)(x) << (c) ) | ( (uint64_t)(x) >> (64-(c)) ) )
#define ror_32( x, c ) \
(uint32_t)( ( (uint32_t)(x) >> (c) ) | ( (uint32_t)(x) << (32-(c)) ) )
#define rol_32( x, c ) \
(uint32_t)( ( (uint32_t)(x) << (c) ) | ( (uint32_t)(x) >> (32-(c)) ) )
#define ror_16( x, c ) \
(uint16_t)( ( (uint16_t)(x) >> (c) ) | ( (uint16_t)(x) << (16-(c)) ) )
#define rol_16( x, c ) \
(uint16_t)( ( (uint16_t)(x) << (c) ) | ( (uint16_t)(x) >> (16-(c)) ) )
#define ror_8( x, c ) \
(uint8_t) ( ( (uint8_t) (x) >> (c) ) | ( (uint8_t) (x) << ( 8-(c)) ) )
#define rol_8( x, c ) \
(uint8_t) ( ( (uint8_t) (x) << (c) ) | ( (uint8_t) (x) >> ( 8-(c)) ) )

#define bswap_64( x ) __builtin_bswap64(x)
#define bswap_32( x ) __builtin_bswap32(x)

// 128 bit integer
//
// Int128 uses two 64 bit GPRs to hold the data. The main benefits are
// for 128 bit arithmetic. Vectors are preferred when 128 bit arith
// is not required. int128 also works better with other integer sizes.
// Vectors benefit from wider registers.
//
// Use typecasting for conversion to/from 128 bit vector:
// __m128i v128 = (__m128i)my_int128l
// __m256i v256 = _mm256_set_m128i( (__m128i)my_int128, (__m128i)my_int128 );
// my_int128 = (uint128_t)_mm256_extracti128_si256( v256, 1 );

#if ( __GNUC__ > 4 ) || ( ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ >= 8 ) )

// Test this before using int128.
#define GCC_INT128 1

// Familiar looking type names
typedef __int128 int128_t;
typedef unsigned __int128 uint128_t;

// No real need or use.
#define i128_neg1 (uint128_t)(-1LL)
#define i128_hi64( x ) (uint64_t)( (uint128_t)(x) >> 64 )
#define i128_lo64( x ) (uint64_t)( (uint128_t)(x) << 64 >> 64 )

// Extract selected 64 bit half of 128 bit integer.
// A generic macro with a selector argument can't be encoded as a statement
// function and would require a branch.
#define i128_hi64( x ) (uint64_t)( (uint128_t)(x) >> 64 )
#define i128_lo64( x ) (uint64_t)( (uint128_t)(x) << 64 >> 64 )

// Not much need for this but it fills a gap.
#define ror_128( x, c ) \
( ( (uint128_t)(x) >> (c) ) | ( (uint128_t)(x) << (128-(c)) ) )
#define rol_128( x, c ) \
( ( (uint128_t)(x) << (c) ) | ( (uint128_t)(x) >> (128-(c)) ) )

#endif // INT128

////////////////////////////////////////////////////////////////
//
// 64 bit MMX vectors.
// 64 bit MMX vectors.
//
// There are rumours MMX wil be removed. Although casting with int64
// works there is likely some overhead to move the data to An MMX register
Expand Down
Loading

0 comments on commit 7fec680

Please sign in to comment.