Skip to content

Commit

Permalink
ok, I give up. No matter what I do, it doesn't work.
Browse files Browse the repository at this point in the history
  • Loading branch information
KlausT committed Feb 7, 2017
1 parent 74f1752 commit 12af33a
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 26 deletions.
33 changes: 21 additions & 12 deletions pascal/cuda_pascal.cu
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void pascal_gpu_hash(const uint32_t threads, uint32_t *const result, const uint3
w[30] = w[23] + (ROTR32(0x100, 7) ^ ROTR32(0x100, 18) ^ (0x100 >> 3)) + (ROTR32(w[28], 17) ^ ROTR32(w[28], 19) ^ (w[28] >> 10));
w[31] = 0x100 + w[24] + (ROTR32(w[16], 7) ^ ROTR32(w[16], 18) ^ (w[16] >> 3)) + (ROTR32(w[29], 17) ^ ROTR32(w[29], 19) ^ (w[29] >> 10));
#pragma unroll
for (int i = 32; i < 59; i++)
for (int i = 32; i < 64; i++)
w[i] = w[i - 16] + w[i - 7] + (ROTR32(w[i - 15], 7) ^ ROTR32(w[i - 15], 18) ^ (w[i - 15] >> 3)) + (ROTR32(w[i - 2], 17) ^ ROTR32(w[i - 2], 19) ^ (w[i - 2] >> 10));

d = 0x98c7e2a2U + w[0];
Expand Down Expand Up @@ -273,16 +273,25 @@ void pascal_gpu_hash(const uint32_t threads, uint32_t *const result, const uint3
round(b, c, d, e, f, g, h, a, 55);

round(a, b, c, d, e, f, g, h, 56);
round(h, a, b, c, d, e, f, g, 57);
round(g, h, a, b, c, d, e, f, 58);
round(f, g, h, a, b, c, d, e, 59);
round(e, f, g, h, a, b, c, d, 60);
round(d, e, f, g, h, a, b, c, 61);
round(c, d, e, f, g, h, a, b, 62);
round(b, c, d, e, f, g, h, a, 63);

c += g + rot1(d) + maj(d, e, f) + 0x78a5636fU + w[57];

b += f + rot1(c) + maj(c, d, e) + 0x84c87814U + w[58];

a += e + rot1(b) + maj(b, c, d) + 0x8cc70208U + w[43] + w[52] + (ROTR32(w[44], 7) ^ ROTR32(w[44], 18) ^ (w[44] >> 3)) + (ROTR32(w[57], 17) ^ ROTR32(w[57], 19) ^ (w[57] >> 10));

h += d + rot1(a) + maj(a, b, c) + 0x90befffaU + w[44] + w[53] + (ROTR32(w[45], 7) ^ ROTR32(w[45], 18) ^ (w[45] >> 3)) + (ROTR32(w[58], 17) ^ ROTR32(w[58], 19) ^ (w[58] >> 10));

if (h == 0xa41f32e7)
a += 0x6A09E667;
b += 0xBB67AE85;
c += 0x3C6EF372;
d += 0xA54FF53A;
e += 0x510E527F;
f += 0x9B05688C;
g += 0x1F83D9AB;
h += 0x5BE0CD19;


if (a == 0)
{
uint32_t tmp = atomicCAS(result, 0, nonce);
if (tmp != 0)
Expand All @@ -291,7 +300,7 @@ void pascal_gpu_hash(const uint32_t threads, uint32_t *const result, const uint3
} // nonce loop
} // if thread<threads
}

/*
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
__global__ __launch_bounds__(TPB, 2)
Expand Down Expand Up @@ -866,7 +875,7 @@ void pascal_8bytes_gpu_hash(const uint32_t threads, uint32_t *const result, cons
} // nonce loop
} // if thread<threads
}

*/
__host__
void pascal_cpu_hash(int thr_id, uint32_t threads, uint32_t startnonce, uint32_t nonceoffset, uint32_t *ms, uint32_t *h_result)
{
Expand Down
50 changes: 36 additions & 14 deletions pascal/pascal.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,27 @@ void copydata(const uint32_t *data);

#define rrot(x, n) ROTR32(x, n)

bool fulltest_pascal(const uint32_t *vhash64, const uint32_t *ptarget)
{
if(vhash64[0] > ptarget[7])
return false;
if(vhash64[1] > ptarget[6])
return false;
if(vhash64[2] > ptarget[5])
return false;
if(vhash64[3] > ptarget[4])
return false;
if(vhash64[4] > ptarget[3])
return false;
if(vhash64[5] > ptarget[2])
return false;
if(vhash64[6] > ptarget[1])
return false;
if(vhash64[7] > ptarget[0])
return false;
return true;
}

void pascal_hash(uint32_t *output, const uint32_t *data, uint32_t datasize, uint32_t nonce, const uint32_t *midstate)
{
int i;
Expand Down Expand Up @@ -109,14 +130,14 @@ void pascal_hash(uint32_t *output, const uint32_t *data, uint32_t datasize, uint
b = a;
a = t1 + t2;
}
be32enc(&output[0], a + hc[0]);
be32enc(&output[1], b + hc[1]);
be32enc(&output[2], c + hc[2]);
be32enc(&output[3], d + hc[3]);
be32enc(&output[4], e + hc[4]);
be32enc(&output[5], f + hc[5]);
be32enc(&output[6], g + hc[6]);
be32enc(&output[7], h + hc[7]);
output[0] = a + hc[0];
output[1] = b + hc[1];
output[2] = c + hc[2];
output[3] = d + hc[3];
output[4] = e + hc[4];
output[5] = f + hc[5];
output[6] = g + hc[6];
output[7] = h + hc[7];
}

void pascal_midstate(const uint32_t *data, uint32_t *hc)
Expand Down Expand Up @@ -255,15 +276,16 @@ int scanhash_pascal(int thr_id, uint32_t *pdata, uint32_t datasize,
{
uint32_t vhash64[8] = {0};
pascal_hash(vhash64, pdata, datasize, result[0], ms);
if(!opt_verify || (vhash64[7] == 0 && fulltest(vhash64, ptarget)))

if(!opt_verify || (vhash64[0] == 0 && fulltest_pascal(vhash64, ptarget)))
{
int res = 1;
// check if there was some other ones...
*hashes_done = pdata[datasize / 4 - 1] - first_nonce + throughput;
if(result[1] != 0 && datasize <= 252)
if(result[1] != 0)
{
pascal_hash(vhash64, pdata, datasize, result[1], ms);
if(!opt_verify || (vhash64[7] == 0 && fulltest(vhash64, ptarget)))
if(!opt_verify || (vhash64[0] == 0 && fulltest_pascal(vhash64, ptarget)))
{
pdata[datasize / 4 + 1] = result[1];
res++;
Expand All @@ -272,20 +294,20 @@ int scanhash_pascal(int thr_id, uint32_t *pdata, uint32_t datasize,
}
else
{
if(vhash64[7] > 0)
if(vhash64[0] > 0)
{
applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], result[1]);
}
}
}
pdata[datasize / 4 - 1] = swab32(result[0]);
pdata[datasize / 4 - 1] = result[0];
if(opt_benchmark)
applog(LOG_INFO, "GPU #%d Found nounce %08x", device_map[thr_id], result[0]);
return res;
}
else
{
if(vhash64[7] > 0)
if(vhash64[0] > 0)
{
applog(LOG_WARNING, "GPU #%d: result for %08x does not validate on CPU!", device_map[thr_id], result[0]);
}
Expand Down

0 comments on commit 12af33a

Please sign in to comment.