Merge pull request #167 from AstroAccelerateOrg/cc_error_checking

Cc error checking
AstroAccelerateOrg · Mar 5, 2019 · 17e4ba5 · 17e4ba5
2 parents 55ab8cc + c84f3ee
commit 17e4ba5
Show file tree

Hide file tree

Showing 28 changed files with 880 additions and 308 deletions.
diff --git a/include/aa_device_MSD.hpp b/include/aa_device_MSD.hpp
@@ -3,8 +3,6 @@
 
 #include <vector>
 
-#include <helper_cuda.h>
-
 #include "aa_device_MSD_Configuration.hpp"
 #include "aa_device_MSD_shared_kernel_functions.hpp"
 #include "aa_device_MSD_normal_kernel.hpp"

diff --git a/include/aa_device_SPS_long.hpp b/include/aa_device_SPS_long.hpp
@@ -5,8 +5,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <helper_cuda.h>
-
 #include "aa_params.hpp"
 #include "aa_device_BC_plan.hpp"
 #include "aa_device_SPS_long_kernel.hpp"

diff --git a/include/aa_device_peak_find.hpp b/include/aa_device_peak_find.hpp
@@ -5,7 +5,6 @@
 
 #include <vector>
 #include <npp.h>
-#include <helper_cuda.h>
 
 #include "aa_params.hpp"
 #include "aa_device_peak_find_kernel.hpp"

diff --git a/include/aa_fdas_host.hpp b/include/aa_fdas_host.hpp
@@ -17,7 +17,6 @@
 #include "aa_params.hpp"
 #include "aa_fdas_test_parameters.hpp"
 //#include <helper_functions.h>
-#include <helper_cuda.h>
 #include <curand.h>
 #include <libgen.h>
 //#include <random> // C++11 to use normal distribution

diff --git a/include/aa_permitted_pipelines_1.hpp b/include/aa_permitted_pipelines_1.hpp
@@ -3,7 +3,6 @@
 
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include <helper_cuda.h>
 
 #include <stdio.h>
 #include <fstream>
@@ -335,33 +334,33 @@ namespace astroaccelerate {
 
       const int *ndms = m_ddtr_strategy.ndms_data();
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
       load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
 	zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
 	zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(enable_old_rfi) {
 	printf("\nPerforming old GPU rfi...");
 	rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       int oldBin = 1;
       for(size_t dm_range = 0; dm_range < range; dm_range++) {
@@ -371,19 +370,19 @@ namespace astroaccelerate {
 	maxshift = maxshift_original / inBin[dm_range];
 
 	cudaDeviceSynchronize();
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 
 	if (inBin[dm_range] > oldBin) {
 	  bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
 	  ( tsamp ) = ( tsamp ) * 2.0f;
 	}
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);
 
@@ -392,7 +391,7 @@ namespace astroaccelerate {
 	    save_data_offset(d_output, k * t_processed[dm_range][t], m_output_buffer[dm_range][k], inc / inBin[dm_range], sizeof(float) * t_processed[dm_range][t]);
 	  }
 	}
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 	oldBin = inBin[dm_range];
       }
 

diff --git a/include/aa_permitted_pipelines_2.hpp b/include/aa_permitted_pipelines_2.hpp
@@ -3,7 +3,6 @@
 
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include <helper_cuda.h>
 
 #include <stdio.h>
 #include "aa_pipeline.hpp"
@@ -187,7 +186,10 @@ namespace astroaccelerate {
       printf("\n\n\n%d\n\n\n", time_samps);
       size_t gpu_inputsize = (size_t) time_samps * (size_t) nchans * sizeof(unsigned short);
 
-      checkCudaErrors( cudaMalloc((void **) d_input, gpu_inputsize) );
+      cudaError_t e = cudaMalloc((void **) d_input, gpu_inputsize);
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
 
       size_t gpu_outputsize = 0;
       if (nchans < max_ndms) {
@@ -197,7 +199,10 @@ namespace astroaccelerate {
 	gpu_outputsize = (size_t)time_samps * (size_t)nchans * sizeof(float);
       }
 
-      checkCudaErrors( cudaMalloc((void **) d_output, gpu_outputsize) );
+      e = cudaMalloc((void **) d_output, gpu_outputsize);
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
       cudaMemset(*d_output, 0, gpu_outputsize);
     }
 
@@ -206,9 +211,23 @@ namespace astroaccelerate {
      */
     void allocate_memory_MSD(float **const d_MSD_workarea, unsigned short **d_MSD_output_taps, float **const d_MSD_interpolated,
 			     const unsigned long int &MSD_maxtimesamples, const size_t &MSD_profile_size) {
-      checkCudaErrors(cudaMalloc((void **) d_MSD_workarea,        MSD_maxtimesamples*5.5*sizeof(float)));
-      checkCudaErrors(cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples));
-      checkCudaErrors(cudaMalloc((void **) d_MSD_interpolated,    sizeof(float)*MSD_profile_size));
+      cudaError_t e = cudaMalloc((void **) d_MSD_workarea,        MSD_maxtimesamples*5.5*sizeof(float));
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
+
+      e = cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples);
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
+
+      e = cudaMalloc((void **) d_MSD_interpolated,    sizeof(float)*MSD_profile_size);
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
     }
 
     /**
@@ -351,32 +370,32 @@ namespace astroaccelerate {
 
       const int *ndms = m_ddtr_strategy.ndms_data();
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
       load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
 	zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
 	zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(enable_old_rfi) {
 	printf("\nPerforming old GPU rfi...");
 	rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       int oldBin = 1;
 
@@ -393,23 +412,23 @@ namespace astroaccelerate {
 	maxshift = maxshift_original / inBin[dm_range];
 
 	cudaDeviceSynchronize();
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 
 	if (inBin[dm_range] > oldBin) {
 	  bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
 	  ( tsamp ) = ( tsamp ) * 2.0f;
 	}
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	if(dump_to_user) {
 	  for (int k = 0; k < ndms[dm_range]; k++) {

diff --git a/include/aa_permitted_pipelines_3.hpp b/include/aa_permitted_pipelines_3.hpp
@@ -3,7 +3,6 @@
 
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include <helper_cuda.h>
 
 #include <stdio.h>
 #include "aa_pipeline.hpp"
@@ -188,7 +187,10 @@ namespace astroaccelerate {
       printf("\n\n\n%d\n\n\n", time_samps);
       size_t gpu_inputsize = (size_t) time_samps * (size_t) nchans * sizeof(unsigned short);
 
-      checkCudaErrors( cudaMalloc((void **) d_input, gpu_inputsize) );
+      cudaError_t e = cudaMalloc((void **) d_input, gpu_inputsize);
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
 
       size_t gpu_outputsize = 0;
       if (nchans < max_ndms) {
@@ -198,7 +200,11 @@ namespace astroaccelerate {
 	gpu_outputsize = (size_t)time_samps * (size_t)nchans * sizeof(float);
       }
 
-      checkCudaErrors( cudaMalloc((void **) d_output, gpu_outputsize) );
+      e = cudaMalloc((void **) d_output, gpu_outputsize);
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
+
       cudaMemset(*d_output, 0, gpu_outputsize);
     }
 
@@ -207,11 +213,25 @@ namespace astroaccelerate {
      */
     void allocate_memory_MSD(float **const d_MSD_workarea, unsigned short **const d_MSD_output_taps, float **const d_MSD_interpolated,
 			     const unsigned long int &MSD_maxtimesamples, const size_t &MSD_profile_size) {
-      checkCudaErrors(cudaMalloc((void **) d_MSD_workarea,        MSD_maxtimesamples*5.5*sizeof(float)));
-      checkCudaErrors(cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples));
-      checkCudaErrors(cudaMalloc((void **) d_MSD_interpolated,    sizeof(float)*MSD_profile_size));
+      cudaError_t e = cudaMalloc((void **) d_MSD_workarea,        MSD_maxtimesamples*5.5*sizeof(float));
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
+
+      e = cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples);
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
+
+      e = cudaMalloc((void **) d_MSD_interpolated,    sizeof(float)*MSD_profile_size);
+
+      if(e != cudaSuccess) {
+	LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
+      }
     }
-
+    
     /**
      * \brief Allocate a 3D array that is an output buffer that stores dedispersed array data.
      * \details This array is used by periodicity.
@@ -349,32 +369,32 @@ namespace astroaccelerate {
 
       const int *ndms = m_ddtr_strategy.ndms_data();
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
       load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
 	zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
 	zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       if(enable_old_rfi) {
 	printf("\nPerforming old GPU rfi...");
 	rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
       }
 
-      checkCudaErrors(cudaGetLastError());
+      //checkCudaErrors(cudaGetLastError());
 
       int oldBin = 1;
       for(size_t dm_range = 0; dm_range < range; dm_range++) {
@@ -384,23 +404,23 @@ namespace astroaccelerate {
 	maxshift = maxshift_original / inBin[dm_range];
 
 	cudaDeviceSynchronize();
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 
 	if (inBin[dm_range] > oldBin) {
 	  bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
 	  ( tsamp ) = ( tsamp ) * 2.0f;
 	}
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);
 
-	checkCudaErrors(cudaGetLastError());
+	//checkCudaErrors(cudaGetLastError());
 
 	for (int k = 0; k < ndms[dm_range]; k++) {
 	  save_data_offset(d_output, k * t_processed[dm_range][t], m_output_buffer[dm_range][k], inc / inBin[dm_range], sizeof(float) * t_processed[dm_range][t]);