Permalink
Browse files

2GB cudaMalloc limit for 32bit builds

1 parent 31b27c7 commit b4a1b59b6a72be367f51a8d4d568d06f56456d5a @KlausT committed Jun 15, 2016
Showing with 182 additions and 3 deletions.
  1. +10 −0 cuda_nist5.cu
  2. +9 −0 fuguecoin.cpp
  3. +1 −1 lyra2/lyra2REv2.cu
  4. +9 −0 myriadgroestl.cpp
  5. +11 −0 neoscrypt/neoscrypt.cu
  6. +10 −0 pentablake.cu
  7. +11 −2 quark/quarkcoin.cu
  8. +10 −0 qubit/deep.cu
  9. +10 −0 qubit/doom.cu
  10. +11 −0 qubit/qubit.cu
  11. +10 −0 x11/c11.cu
  12. +10 −0 x11/fresh.cu
  13. +10 −0 x11/s3.cu
  14. +10 −0 x11/x11.cu
  15. +10 −0 x13/x13.cu
  16. +10 −0 x15/whirlpool.cu
  17. +10 −0 x15/x14.cu
  18. +10 −0 x15/x15.cu
  19. +10 −0 x17/x17.cu
View
@@ -89,6 +89,16 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughput > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
// Konstanten kopieren, Speicher belegen
quark_groestl512_cpu_init(thr_id, throughput);
View
@@ -41,6 +41,15 @@ extern int scanhash_fugue256(int thr_id, uint32_t *pdata, uint32_t *ptarget,
static THREAD volatile bool init = false;
if(!init)
{
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (8 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ proper_exit(2);
+ }
+#endif
fugue256_cpu_init(thr_id, throughputmax);
init = true;
}
View
@@ -125,7 +125,7 @@ int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
-#if !defined _WIN64
+#if defined WIN32 && !defined _WIN64
// 2GB limit for cudaMalloc
if(throughputmax > 0x7fffffffULL / (16 * 4 * 4 * sizeof(uint64_t)))
{
View
@@ -57,6 +57,15 @@ extern int scanhash_myriad(int thr_id, uint32_t *pdata, uint32_t *ptarget,
{
#if BIG_DEBUG
#else
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ proper_exit(2);
+ }
+#endif
myriadgroestl_cpu_init(thr_id, throughputmax);
#endif
cudaMallocHost(&h_found, 4 * sizeof(uint32_t));
@@ -88,6 +88,17 @@ int scanhash_neoscrypt(bool stratum, int thr_id, uint32_t *pdata,
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
CUDA_SAFE_CALL(cudaMallocHost(&foundNonce, 2 * 4));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (32 * 128 * sizeof(uint64_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
+
neoscrypt_cpu_init_2stream(thr_id, throughputmax);
init = true;
}
View
@@ -457,6 +457,16 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata, uint32_t *ptarget,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / 64)
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
CUDA_SAFE_CALL(cudaMalloc(&d_hash, 64 * throughputmax));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
View
@@ -154,6 +154,17 @@ extern int scanhash_quark(int thr_id, uint32_t *pdata,
get_cuda_arch(&cuda_arch[thr_id]);
// }
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
+
// Konstanten kopieren, Speicher belegen
CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
CUDA_SAFE_CALL(cudaMallocHost(&foundnonces, 4 * 4));
@@ -166,8 +177,6 @@ extern int scanhash_quark(int thr_id, uint32_t *pdata,
CUDA_SAFE_CALL(cudaMalloc(&d_branch2Nonces, sizeof(uint32_t)*noncebuffersize2));
CUDA_SAFE_CALL(cudaMalloc(&d_branch3Nonces, sizeof(uint32_t)*noncebuffersize));
quark_blake512_cpu_init(thr_id);
- quark_groestl512_cpu_init(thr_id, throughputmax);
- quark_bmw512_cpu_init(thr_id, throughputmax);
quark_compactTest_cpu_init(thr_id, throughputmax);
quark_keccak512_cpu_init(thr_id);
quark_jh512_cpu_init(thr_id);
View
@@ -72,6 +72,16 @@ extern int scanhash_deep(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
View
@@ -51,6 +51,16 @@ extern int scanhash_doom(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
View
@@ -124,6 +124,17 @@ extern int scanhash_qubit(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
+
qubit_luffa512_cpu_init(thr_id, throughputmax);
x11_simd512_cpu_init(thr_id, throughputmax);
x11_echo512_cpu_init(thr_id, throughputmax);
View
@@ -150,6 +150,16 @@ int scanhash_c11(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
x11_echo512_cpu_init(thr_id, throughputmax);
if(x11_simd512_cpu_init(thr_id, throughputmax) != 0)
View
@@ -89,6 +89,16 @@ extern int scanhash_fresh(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
x11_simd512_cpu_init(thr_id, throughputmax);
x11_echo512_cpu_init(thr_id, throughputmax);
View
@@ -79,6 +79,16 @@ extern int scanhash_s3(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
x11_simd512_cpu_init(thr_id, throughputmax);
quark_skein512_cpu_init(thr_id);
View
@@ -161,6 +161,16 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
quark_groestl512_cpu_init(thr_id, throughputmax);
quark_bmw512_cpu_init(thr_id, throughputmax);
x11_echo512_cpu_init(thr_id, throughputmax);
View
@@ -174,6 +174,16 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
quark_groestl512_cpu_init(thr_id, throughputmax);
quark_bmw512_cpu_init(thr_id, throughputmax);
View
@@ -67,6 +67,16 @@ extern int scanhash_whc(int thr_id, uint32_t *pdata,
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
x15_whirlpool_cpu_init(thr_id, throughputmax, 1 /* old whirlpool */);
View
@@ -173,6 +173,16 @@ extern int scanhash_x14(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
quark_groestl512_cpu_init(thr_id, throughputmax);
quark_skein512_cpu_init(thr_id);
View
@@ -184,6 +184,16 @@ extern int scanhash_x15(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
quark_groestl512_cpu_init(thr_id, throughputmax);
quark_skein512_cpu_init(thr_id);
View
@@ -203,6 +203,16 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+ // 2GB limit for cudaMalloc
+ if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+ {
+ applog(LOG_ERR, "intensity too high");
+ mining_has_stopped[thr_id] = true;
+ cudaStreamDestroy(gpustream[thr_id]);
+ proper_exit(2);
+ }
+#endif
quark_groestl512_cpu_init(thr_id, throughputmax);
quark_skein512_cpu_init(thr_id);

0 comments on commit b4a1b59

Please sign in to comment.