Skip to content

Commit

Permalink
Add wolf-shabal 64 kernel, rename miner, windows build fix
Browse files Browse the repository at this point in the history
  • Loading branch information
KL0nLutiy committed Apr 21, 2018
1 parent 2d52a91 commit b5f2f08
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 69 deletions.
91 changes: 91 additions & 0 deletions kernel/wolf-shabal.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#ifndef WOLF_SHABAL_CL
#define WOLF_SHABAL_CL

#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
xa0 = (xa0 ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) ^ xc) * 3U ^ xb1 ^ (xb2 & ~xb3) ^ xm; \
xb0 = ~((rotate(xb0, 1U)) ^ xa0); \
} while(0)

#define SHABAL_PERM_V do { \
PERM_ELT(A.s0, A.sb, B.s0, B.sd, B.s9, B.s6, C.s8, M.s0); \
PERM_ELT(A.s1, A.s0, B.s1, B.se, B.sa, B.s7, C.s7, M.s1); \
PERM_ELT(A.s2, A.s1, B.s2, B.sf, B.sb, B.s8, C.s6, M.s2); \
PERM_ELT(A.s3, A.s2, B.s3, B.s0, B.sc, B.s9, C.s5, M.s3); \
PERM_ELT(A.s4, A.s3, B.s4, B.s1, B.sd, B.sa, C.s4, M.s4); \
PERM_ELT(A.s5, A.s4, B.s5, B.s2, B.se, B.sb, C.s3, M.s5); \
PERM_ELT(A.s6, A.s5, B.s6, B.s3, B.sf, B.sc, C.s2, M.s6); \
PERM_ELT(A.s7, A.s6, B.s7, B.s4, B.s0, B.sd, C.s1, M.s7); \
PERM_ELT(A.s8, A.s7, B.s8, B.s5, B.s1, B.se, C.s0, M.s8); \
PERM_ELT(A.s9, A.s8, B.s9, B.s6, B.s2, B.sf, C.sf, M.s9); \
PERM_ELT(A.sa, A.s9, B.sa, B.s7, B.s3, B.s0, C.se, M.sa); \
PERM_ELT(A.sb, A.sa, B.sb, B.s8, B.s4, B.s1, C.sd, M.sb); \
PERM_ELT(A.s0, A.sb, B.sc, B.s9, B.s5, B.s2, C.sc, M.sc); \
PERM_ELT(A.s1, A.s0, B.sd, B.sa, B.s6, B.s3, C.sb, M.sd); \
PERM_ELT(A.s2, A.s1, B.se, B.sb, B.s7, B.s4, C.sa, M.se); \
PERM_ELT(A.s3, A.s2, B.sf, B.sc, B.s8, B.s5, C.s9, M.sf); \
\
PERM_ELT(A.s4, A.s3, B.s0, B.sd, B.s9, B.s6, C.s8, M.s0); \
PERM_ELT(A.s5, A.s4, B.s1, B.se, B.sa, B.s7, C.s7, M.s1); \
PERM_ELT(A.s6, A.s5, B.s2, B.sf, B.sb, B.s8, C.s6, M.s2); \
PERM_ELT(A.s7, A.s6, B.s3, B.s0, B.sc, B.s9, C.s5, M.s3); \
PERM_ELT(A.s8, A.s7, B.s4, B.s1, B.sd, B.sa, C.s4, M.s4); \
PERM_ELT(A.s9, A.s8, B.s5, B.s2, B.se, B.sb, C.s3, M.s5); \
PERM_ELT(A.sa, A.s9, B.s6, B.s3, B.sf, B.sc, C.s2, M.s6); \
PERM_ELT(A.sb, A.sa, B.s7, B.s4, B.s0, B.sd, C.s1, M.s7); \
PERM_ELT(A.s0, A.sb, B.s8, B.s5, B.s1, B.se, C.s0, M.s8); \
PERM_ELT(A.s1, A.s0, B.s9, B.s6, B.s2, B.sf, C.sF, M.s9); \
PERM_ELT(A.s2, A.s1, B.sa, B.s7, B.s3, B.s0, C.sE, M.sa); \
PERM_ELT(A.s3, A.s2, B.sb, B.s8, B.s4, B.s1, C.sD, M.sb); \
PERM_ELT(A.s4, A.s3, B.sc, B.s9, B.s5, B.s2, C.sC, M.sc); \
PERM_ELT(A.s5, A.s4, B.sd, B.sa, B.s6, B.s3, C.sB, M.sd); \
PERM_ELT(A.s6, A.s5, B.se, B.sb, B.s7, B.s4, C.sA, M.se); \
PERM_ELT(A.s7, A.s6, B.sf, B.sc, B.s8, B.s5, C.s9, M.sf); \
\
PERM_ELT(A.s8, A.s7, B.s0, B.sd, B.s9, B.s6, C.s8, M.s0); \
PERM_ELT(A.s9, A.s8, B.s1, B.se, B.sa, B.s7, C.s7, M.s1); \
PERM_ELT(A.sa, A.s9, B.s2, B.sf, B.sb, B.s8, C.s6, M.s2); \
PERM_ELT(A.sb, A.sa, B.s3, B.s0, B.sc, B.s9, C.s5, M.s3); \
PERM_ELT(A.s0, A.sb, B.s4, B.s1, B.sd, B.sa, C.s4, M.s4); \
PERM_ELT(A.s1, A.s0, B.s5, B.s2, B.se, B.sb, C.s3, M.s5); \
PERM_ELT(A.s2, A.s1, B.s6, B.s3, B.sf, B.sc, C.s2, M.s6); \
PERM_ELT(A.s3, A.s2, B.s7, B.s4, B.s0, B.sd, C.s1, M.s7); \
PERM_ELT(A.s4, A.s3, B.s8, B.s5, B.s1, B.se, C.s0, M.s8); \
PERM_ELT(A.s5, A.s4, B.s9, B.s6, B.s2, B.sf, C.sF, M.s9); \
PERM_ELT(A.s6, A.s5, B.sa, B.s7, B.s3, B.s0, C.sE, M.sa); \
PERM_ELT(A.s7, A.s6, B.sb, B.s8, B.s4, B.s1, C.sD, M.sb); \
PERM_ELT(A.s8, A.s7, B.sc, B.s9, B.s5, B.s2, C.sC, M.sc); \
PERM_ELT(A.s9, A.s8, B.sd, B.sa, B.s6, B.s3, C.sB, M.sd); \
PERM_ELT(A.sa, A.s9, B.se, B.sb, B.s7, B.s4, C.sA, M.se); \
PERM_ELT(A.sb, A.sa, B.sf, B.sc, B.s8, B.s5, C.s9, M.sf); \
} while(0)


#define SWAP_BC_V do { \
uint16 tmp = B; \
B = C; \
C = tmp; \
} while(0)

#define SWAP_BC_V B ^= C; C ^= B; B ^= C;

__constant static const uint A_init_512_wolf[] = {
0x20728DFDU, 0x46C0BD53U, 0xE782B699U, 0x55304632U,
0x71B4EF90U, 0x0EA9E82CU, 0xDBB930F1U, 0xFAD06B8BU,
0xBE0CAE40U, 0x8BD14410U, 0x76D2ADACU, 0x28ACAB7FU
};

__constant static const uint B_init_512_wolf[] = {
0xC1099CB7U, 0x07B385F3U, 0xE7442C26U, 0xCC8AD640U,
0xEB6F56C7U, 0x1EA81AA9U, 0x73B9D314U, 0x1DE85D08U,
0x48910A5AU, 0x893B22DBU, 0xC5A0DF44U, 0xBBC4324EU,
0x72D2F240U, 0x75941D99U, 0x6D8BDE82U, 0xA1A7502BU
};

__constant static const uint C_init_512_wolf[] = {
0xD9BF68D1U, 0x58BAD750U, 0x56028CB2U, 0x8134F359U,
0xB5D469D8U, 0x941A8CC2U, 0x418B2A6EU, 0x04052780U,
0x7F07D787U, 0x5194358FU, 0x3C60D665U, 0xBE97D79AU,
0x950C3434U, 0xAED9A06DU, 0x2537DC8DU, 0x7CDB5969U
};

#endif
136 changes: 74 additions & 62 deletions kernel/x16.cl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ ulong FAST_ROTL64_HI(const uint2 x, const uint y) { return(as_ulong(amd_bitalign
#include "hamsi.cl"
#include "fugue.cl"
#include "shabal.cl"
#include "wolf-shabal.cl"
#include "whirlpool.cl"
#include "wolf-sha512.cl"

Expand Down Expand Up @@ -2349,69 +2350,80 @@ __kernel void search27(__global hash_t* hashes)
uint offset = get_global_offset(0);
__global hash_t *hash = &(hashes[gid-offset]);

// shabal
sph_u32 A00 = A_init_512[0], A01 = A_init_512[1], A02 = A_init_512[2], A03 = A_init_512[3], A04 = A_init_512[4], A05 = A_init_512[5], A06 = A_init_512[6], A07 = A_init_512[7],
A08 = A_init_512[8], A09 = A_init_512[9], A0A = A_init_512[10], A0B = A_init_512[11];
sph_u32 B0 = B_init_512[0], B1 = B_init_512[1], B2 = B_init_512[2], B3 = B_init_512[3], B4 = B_init_512[4], B5 = B_init_512[5], B6 = B_init_512[6], B7 = B_init_512[7],
B8 = B_init_512[8], B9 = B_init_512[9], BA = B_init_512[10], BB = B_init_512[11], BC = B_init_512[12], BD = B_init_512[13], BE = B_init_512[14], BF = B_init_512[15];
sph_u32 C0 = C_init_512[0], C1 = C_init_512[1], C2 = C_init_512[2], C3 = C_init_512[3], C4 = C_init_512[4], C5 = C_init_512[5], C6 = C_init_512[6], C7 = C_init_512[7],
C8 = C_init_512[8], C9 = C_init_512[9], CA = C_init_512[10], CB = C_init_512[11], CC = C_init_512[12], CD = C_init_512[13], CE = C_init_512[14], CF = C_init_512[15];
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF;
sph_u32 Wlow = 1, Whigh = 0;

M0 = hash->h4[0];
M1 = hash->h4[1];
M2 = hash->h4[2];
M3 = hash->h4[3];
M4 = hash->h4[4];
M5 = hash->h4[5];
M6 = hash->h4[6];
M7 = hash->h4[7];
M8 = hash->h4[8];
M9 = hash->h4[9];
MA = hash->h4[10];
MB = hash->h4[11];
MC = hash->h4[12];
MD = hash->h4[13];
ME = hash->h4[14];
MF = hash->h4[15];

INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
INPUT_BLOCK_SUB;
SWAP_BC;
INCR_W;

M0 = 0x80;
M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0;

INPUT_BLOCK_ADD;
XOR_W;
APPLY_P;
const uint idx = get_global_id(0) - get_global_offset(0);

for (unsigned i = 0; i < 3; i ++) {
SWAP_BC;
XOR_W;
APPLY_P;
}

hash->h4[0] = B0;
hash->h4[1] = B1;
hash->h4[2] = B2;
hash->h4[3] = B3;
hash->h4[4] = B4;
hash->h4[5] = B5;
hash->h4[6] = B6;
hash->h4[7] = B7;
hash->h4[8] = B8;
hash->h4[9] = B9;
hash->h4[10] = BA;
hash->h4[11] = BB;
hash->h4[12] = BC;
hash->h4[13] = BD;
hash->h4[14] = BE;
hash->h4[15] = BF;
// shabal
uint16 A, B, C, M;
uint Wlow = 1;

A.s0 = A_init_512_wolf[0];
A.s1 = A_init_512_wolf[1];
A.s2 = A_init_512_wolf[2];
A.s3 = A_init_512_wolf[3];
A.s4 = A_init_512_wolf[4];
A.s5 = A_init_512_wolf[5];
A.s6 = A_init_512_wolf[6];
A.s7 = A_init_512_wolf[7];
A.s8 = A_init_512_wolf[8];
A.s9 = A_init_512_wolf[9];
A.sa = A_init_512_wolf[10];
A.sb = A_init_512_wolf[11];

B = vload16(0, B_init_512);
C = vload16(0, C_init_512);
M = vload16(0, hash->h4);

// INPUT_BLOCK_ADD
B += M;

// XOR_W
//do { A.s0 ^= Wlow; } while(0);
A.s0 ^= Wlow;

// APPLY_P
B = rotate(B, 17U);
SHABAL_PERM_V;

uint16 tmpC1, tmpC2, tmpC3;

tmpC1 = shuffle2(C, (uint16)0, (uint16)(11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 17, 17, 17, 17));
tmpC2 = shuffle2(C, (uint16)0, (uint16)(15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 17, 17, 17));
tmpC3 = shuffle2(C, (uint16)0, (uint16)(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 17, 17, 17));

A += tmpC1 + tmpC2 + tmpC3;

// INPUT_BLOCK_SUB
C -= M;

++Wlow;
M = 0;
M.s0 = 0x80;

#pragma unroll 2
for(int i = 0; i < 4; ++i)
{
SWAP_BC_V;

// INPUT_BLOCK_ADD
if(i == 0) B.s0 += M.s0;

// XOR_W;
A.s0 ^= Wlow;

// APPLY_P
B = rotate(B, 17U);
SHABAL_PERM_V;

if(i == 3) break;

tmpC1 = shuffle2(C, (uint16)0, (uint16)(11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 17, 17, 17, 17));
tmpC2 = shuffle2(C, (uint16)0, (uint16)(15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 17, 17, 17));
tmpC3 = shuffle2(C, (uint16)0, (uint16)(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 17, 17, 17));

A += tmpC1 + tmpC2 + tmpC3;
}

vstore16(B, 0, hash->h4);

barrier(CLK_GLOBAL_MEM_FENCE);
}
Expand Down
2 changes: 1 addition & 1 deletion sgminer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,7 @@ char *set_benchmark_sequence(char *arg)
uint i;
for (i = 0; i < strlen(arg); i++) {
if (!( ('0' <= arg[i] <= '9') || ('A' <= arg[i] <= 'F')))
//return sprintf("Invalid hex digit %c", arg[i]);
return (char*) sprintf("Invalid hex digit %c", (const char*) arg[i]);
if (arg[i] >= 'A')
opt_benchmark_seq[i] = arg[i] - 'A' + 10;
else
Expand Down
12 changes: 6 additions & 6 deletions winbuild/dist/include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@

#endif

#define VERSION "v5.2.0"
#define PACKAGE_NAME "sgminer"
#define PACKAGE_TARNAME "sgminer"
#define PACKAGE_VERSION "5.2.0"
#define PACKAGE_STRING "sgminer 5.2.0"
#define PACKAGE "sgminer"
#define VERSION "v5.5.6"
#define PACKAGE_NAME "sgminer-gm-kl"
#define PACKAGE_TARNAME "sgminer-gm-kl"
#define PACKAGE_VERSION "5.5.6"
#define PACKAGE_STRING "sgminer-gm-kl 5.5.6"
#define PACKAGE "sgminer-gm-kl"

#define SGMINER_PREFIX ""

Expand Down

0 comments on commit b5f2f08

Please sign in to comment.