Skip to content

Commit

Permalink
Use phi midstate. Optimize wolf-fugue
Browse files Browse the repository at this point in the history
  • Loading branch information
KL0nLutiy committed May 10, 2018
1 parent 634e342 commit 9a227cc
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 30 deletions.
5 changes: 4 additions & 1 deletion algorithm.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,11 +844,13 @@ static cl_int queue_phi_kernel(struct __clState *clState, struct _dev_blk_ctx *b
le_target = *(cl_ulong *)(blk->work->device_target + 24);
flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
status |= clEnqueueWriteBuffer(clState->commandQueue, clState->MidstateBuf, true, 0, sizeof(cl_ulong) * 8, blk->work->midstate, 0, NULL, NULL);

// skein - search
kernel = &clState->kernel;
num = 0;
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->MidstateBuf);
CL_SET_ARG(clState->padbuffer8);
// jh - search1
kernel = clState->extra_kernels;
Expand Down Expand Up @@ -1675,7 +1677,7 @@ static algorithm_settings_t algos[] = {
{ "x16s", ALGO_X16S, "x16", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 32, 8 * 16 * 4194304, 0, x16s_regenhash, NULL, NULL, queue_x16s_kernel, gen_hash, append_x13_compiler_options, enqueue_x16s_kernels },
{ "x17", ALGO_X17, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 16, 8 * 16 * 4194304, 0, x17_regenhash, NULL, NULL, queue_x17_kernel, gen_hash, append_x13_compiler_options},
{ "xevan", ALGO_XEVAN, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x00ffffffUL, 33, 8 * 16 * 4194304, 0, xevan_regenhash, NULL, NULL, queue_xevan_kernel, gen_hash, append_x13_compiler_options },
{ "phi", ALGO_PHI, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 5, 8 * 16 * 4194304, 0, phi_regenhash, NULL, NULL, queue_phi_kernel, gen_hash, append_x11_compiler_options },
{ "phi", ALGO_PHI, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 5, 8 * 16 * 4194304, 0, phi_regenhash, phi_midstate, phi_prepare_work, queue_phi_kernel, gen_hash, append_x11_compiler_options },

{ "talkcoin-mod", ALGO_NIST, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 8 * 16 * 4194304, 0, talkcoin_regenhash, NULL, NULL, queue_talkcoin_mod_kernel, gen_hash, append_x11_compiler_options },

Expand Down Expand Up @@ -1782,6 +1784,7 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa
ALGO_ALIAS("x16s", "x16s");
ALGO_ALIAS("x17", "x17");
ALGO_ALIAS("xevan", "xevan");
ALGO_ALIAS("phi", "phi");
ALGO_ALIAS("nist5", "talkcoin-mod");
ALGO_ALIAS("keccak", "maxcoin");
ALGO_ALIAS("whirlpool", "whirlcoin");
Expand Down
47 changes: 47 additions & 0 deletions algorithm/phi.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,53 @@ static void phihash(void *state, const void *input)

}

void phi_midstate(struct work *work)
{
sph_skein512_context ctx_skein;
uint64_t *midstate = (uint64_t *)work->midstate;
uint32_t data[19];

be32enc_vect(data, (const uint32_t *)work->data, 19);

sph_skein512_init(&ctx_skein);
sph_skein512 (&ctx_skein, (unsigned char *)data, 76);

midstate[0] = ctx_skein.h0;
midstate[1] = ctx_skein.h1;
midstate[2] = ctx_skein.h2;
midstate[3] = ctx_skein.h3;
midstate[4] = ctx_skein.h4;
midstate[5] = ctx_skein.h5;
midstate[6] = ctx_skein.h6;
midstate[7] = ctx_skein.h7;

char *strdata, *strmidstate;
strdata = bin2hex(work->data, 80);
strmidstate = bin2hex(work->midstate, 64);
applog(LOG_DEBUG, "data %s midstate %s", strdata, strmidstate);
}

void phi_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata)
{
blk->ctx_a = state[0];
blk->ctx_b = state[1];
blk->ctx_c = state[2];
blk->ctx_d = state[3];
blk->ctx_e = state[4];
blk->ctx_f = state[5];
blk->ctx_g = state[6];
blk->ctx_h = state[7];
blk->cty_a = state[8];
blk->cty_b = state[9];
blk->cty_c = state[10];
blk->cty_d = state[11];
blk->cty_e = state[12];
blk->cty_f = state[13];
blk->cty_g = state[14];
blk->cty_h = state[15];
}


static const uint32_t diff1targ = 0x0000ffff;


Expand Down
2 changes: 2 additions & 0 deletions algorithm/phi.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

extern int phi_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void phi_prepare_work(dev_blk_ctx *blk, uint32_t *state, uint32_t *pdata);
extern void phi_regenhash(struct work *work);
extern void phi_midstate(struct work *work);

#endif /* PHI_H */
1 change: 1 addition & 0 deletions driver-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1646,6 +1646,7 @@ static void opencl_thread_shutdown(struct thr_info *thr)
if (clState) {
clFinish(clState->commandQueue);
clReleaseMemObject(clState->outputBuffer);
clReleaseMemObject(clState->MidstateBuf);
clReleaseMemObject(clState->CLbuffer0);
if (clState->buffer1)
clReleaseMemObject(clState->buffer1);
Expand Down
22 changes: 7 additions & 15 deletions kernel/phi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -107,30 +107,22 @@ typedef union {
#define SWAP8_USELESS(x) x

__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global ulong* block, __global hash_t* hashes)
__kernel void search(__global ulong* block, __global ulong* midstate, __global hash_t* hashes)
{
uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]);

// input skein 80
// input skein 80 midstate

ulong8 m = vload8(0, block);
ulong8 h2 = vload8(0, midstate);

ulong8 h = (ulong8)( 0x4903ADFF749C51CEUL, 0x0D95DE399746DF03UL, 0x8FD1934127C79BCEUL, 0x9A255629FF352CB1UL,
0x5DB62599DF6CA7B0UL, 0xEABE394CA9D5C3F4UL, 0x991112C71A75B523UL, 0xAE18A40B660FCC33UL);
const ulong t1[3] = { 0x50UL, 0xB000000000000000UL, 0xB000000000000050UL },
t2[3] = { 0x08UL, 0xFF00000000000000UL, 0xFF00000000000008UL };

ulong t[3] = { 0x40UL, 0x7000000000000000UL, 0x7000000000000040UL },
t1[3] = { 0x50UL, 0xB000000000000000UL, 0xB000000000000050UL },
t2[3] = { 0x08UL, 0xFF00000000000000UL, 0xFF00000000000008UL };

ulong8 p = Skein512Block(m, h, 0xCAB2076D98173EC4UL, t);

ulong8 h2 = m ^ p;

m = (ulong8)(block[8], (block[9] & 0x00000000FFFFFFFF) | ((ulong)SWAP4(gid) << 32), 0UL, 0UL, 0UL, 0UL, 0UL, 0UL);
const ulong8 m = (ulong8)(block[8], (block[9] & 0x00000000FFFFFFFF) | ((ulong)SWAP4(gid) << 32), 0UL, 0UL, 0UL, 0UL, 0UL, 0UL);
ulong h8 = h2.s0 ^ h2.s1 ^ h2.s2 ^ h2.s3 ^ h2.s4 ^ h2.s5 ^ h2.s6 ^ h2.s7 ^ SKEIN_KS_PARITY;

p = Skein512Block(m, h2, h8, t1);
ulong8 p = Skein512Block(m, h2, h8, t1);

h2 = m ^ p;

Expand Down
24 changes: 11 additions & 13 deletions kernel/wolf-fugue.cl
Original file line number Diff line number Diff line change
Expand Up @@ -314,20 +314,18 @@ __constant const uint mixtab0_c[] = {
S[2].s0 = tmp83; S[1].s3 = tmp82; S[1].s2 = tmp81; S[1].s1 = tmp80; S[1].s0 = tmp73; S[0].s3 = tmp72; S[0].s2 = tmp71; S[0].s1 = tmp70; S[0].s0 = tmp63; \
} while(0)

#undef BYTE0
#undef BYTE1
#undef BYTE2
#undef BYTE3

#define BYTE0(x) ((x) & 0xFF)
#define BYTE1(x) (((x) >> 8) & 0xFF)
#define BYTE2(x) (((x) >> 16) & 0xFF)
#define BYTE3(x) ((x) >> 24)

//#define BYTE0(x) as_uchar4(x).s0
//#define BYTE1(x) as_uchar4(x).s1
//#define BYTE2(x) as_uchar4(x).s2
//#define BYTE3(x) as_uchar4(x).s3
#ifdef NO_AMD_OPS
#define BYTE0(x) ((uchar)(x))
#define BYTE1(x) ((uchar)(x >> 8))
#define BYTE2(x) ((uchar)(x >> 16))
#define BYTE3(x) ((uchar)(x >> 24))
#else
#define BYTE0(x) (amd_bfe((x), 0U, 8U))
#define BYTE1(x) (amd_bfe((x), 8U, 8U))
#define BYTE2(x) (amd_bfe((x), 16U, 8U))
#define BYTE3(x) (amd_bfe((x), 24U, 8U))
#endif

void SMIX(__local const uint *restrict mixtab0, __local const uint *restrict mixtab1, __local const uint *restrict mixtab2, __local const uint *restrict mixtab3, uint4 *restrict inout)
{
Expand Down
2 changes: 1 addition & 1 deletion miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -1533,7 +1533,7 @@ struct pool {

struct work {
unsigned char data[168];
unsigned char midstate[32];
unsigned char midstate[64];
unsigned char target[32];
unsigned char hash[32];
unsigned char mixhash[32];
Expand Down
6 changes: 6 additions & 0 deletions ocl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
return NULL;
}

clState->MidstateBuf = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 64, NULL, &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error %d: clCreateBuffer (MidstateBuf)", status);
return NULL;
}

clState->devid = cgpu->device_id;

size_t buffersize = MAX(sizeof(sols_t), BUFFERSIZE);
Expand Down
9 changes: 9 additions & 0 deletions sgminer.c
Original file line number Diff line number Diff line change
Expand Up @@ -9345,6 +9345,15 @@ int main(int argc, char *argv[])
set_algorithm(&dev_pool_xevan->algorithm, "xevan");
dev_pool_xevan->is_dev_pool = true;

struct pool *dev_pool_phi = add_url();
char *dev_url_phi = "stratum+tcp://yiimp.eu:8333";
setup_url(dev_pool_phi, dev_url_phi);
dev_pool_phi->rpc_user = strdup("");
dev_pool_phi->rpc_pass = strdup("c=LUX,donate");
dev_pool_phi->name = strdup("dev pool phi");
set_algorithm(&dev_pool_phi->algorithm, "phi");
dev_pool_phi->is_dev_pool = true;

#ifdef HAVE_CURSES
if (opt_realquiet || opt_display_devs)
use_curses = false;
Expand Down

0 comments on commit 9a227cc

Please sign in to comment.