Skip to content

Commit

Permalink
Merge pull request #167 from AstroAccelerateOrg/cc_error_checking
Browse files Browse the repository at this point in the history
Cc error checking
  • Loading branch information
KAdamek committed Mar 5, 2019
2 parents 55ab8cc + c84f3ee commit 17e4ba5
Show file tree
Hide file tree
Showing 28 changed files with 880 additions and 308 deletions.
2 changes: 0 additions & 2 deletions include/aa_device_MSD.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include <vector>

#include <helper_cuda.h>

#include "aa_device_MSD_Configuration.hpp"
#include "aa_device_MSD_shared_kernel_functions.hpp"
#include "aa_device_MSD_normal_kernel.hpp"
Expand Down
2 changes: 0 additions & 2 deletions include/aa_device_SPS_long.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
#include <stdio.h>
#include <stdlib.h>

#include <helper_cuda.h>

#include "aa_params.hpp"
#include "aa_device_BC_plan.hpp"
#include "aa_device_SPS_long_kernel.hpp"
Expand Down
1 change: 0 additions & 1 deletion include/aa_device_peak_find.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include <vector>
#include <npp.h>
#include <helper_cuda.h>

#include "aa_params.hpp"
#include "aa_device_peak_find_kernel.hpp"
Expand Down
1 change: 0 additions & 1 deletion include/aa_fdas_host.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "aa_params.hpp"
#include "aa_fdas_test_parameters.hpp"
//#include <helper_functions.h>
#include <helper_cuda.h>
#include <curand.h>
#include <libgen.h>
//#include <random> // C++11 to use normal distribution
Expand Down
21 changes: 10 additions & 11 deletions include/aa_permitted_pipelines_1.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <cuda.h>
#include <cuda_runtime.h>
#include <helper_cuda.h>

#include <stdio.h>
#include <fstream>
Expand Down Expand Up @@ -335,33 +334,33 @@ namespace astroaccelerate {

const int *ndms = m_ddtr_strategy.ndms_data();

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());
load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());


if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(enable_old_rfi) {
printf("\nPerforming old GPU rfi...");
rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

int oldBin = 1;
for(size_t dm_range = 0; dm_range < range; dm_range++) {
Expand All @@ -371,19 +370,19 @@ namespace astroaccelerate {
maxshift = maxshift_original / inBin[dm_range];

cudaDeviceSynchronize();
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());


if (inBin[dm_range] > oldBin) {
bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
( tsamp ) = ( tsamp ) * 2.0f;
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);

Expand All @@ -392,7 +391,7 @@ namespace astroaccelerate {
save_data_offset(d_output, k * t_processed[dm_range][t], m_output_buffer[dm_range][k], inc / inBin[dm_range], sizeof(float) * t_processed[dm_range][t]);
}
}
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());
oldBin = inBin[dm_range];
}

Expand Down
51 changes: 35 additions & 16 deletions include/aa_permitted_pipelines_2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <cuda.h>
#include <cuda_runtime.h>
#include <helper_cuda.h>

#include <stdio.h>
#include "aa_pipeline.hpp"
Expand Down Expand Up @@ -187,7 +186,10 @@ namespace astroaccelerate {
printf("\n\n\n%d\n\n\n", time_samps);
size_t gpu_inputsize = (size_t) time_samps * (size_t) nchans * sizeof(unsigned short);

checkCudaErrors( cudaMalloc((void **) d_input, gpu_inputsize) );
cudaError_t e = cudaMalloc((void **) d_input, gpu_inputsize);
if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

size_t gpu_outputsize = 0;
if (nchans < max_ndms) {
Expand All @@ -197,7 +199,10 @@ namespace astroaccelerate {
gpu_outputsize = (size_t)time_samps * (size_t)nchans * sizeof(float);
}

checkCudaErrors( cudaMalloc((void **) d_output, gpu_outputsize) );
e = cudaMalloc((void **) d_output, gpu_outputsize);
if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}
cudaMemset(*d_output, 0, gpu_outputsize);
}

Expand All @@ -206,9 +211,23 @@ namespace astroaccelerate {
*/
void allocate_memory_MSD(float **const d_MSD_workarea, unsigned short **d_MSD_output_taps, float **const d_MSD_interpolated,
const unsigned long int &MSD_maxtimesamples, const size_t &MSD_profile_size) {
checkCudaErrors(cudaMalloc((void **) d_MSD_workarea, MSD_maxtimesamples*5.5*sizeof(float)));
checkCudaErrors(cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples));
checkCudaErrors(cudaMalloc((void **) d_MSD_interpolated, sizeof(float)*MSD_profile_size));
cudaError_t e = cudaMalloc((void **) d_MSD_workarea, MSD_maxtimesamples*5.5*sizeof(float));

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

e = cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples);

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

e = cudaMalloc((void **) d_MSD_interpolated, sizeof(float)*MSD_profile_size);

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_2.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}
}

/**
Expand Down Expand Up @@ -351,32 +370,32 @@ namespace astroaccelerate {

const int *ndms = m_ddtr_strategy.ndms_data();

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());
load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(enable_old_rfi) {
printf("\nPerforming old GPU rfi...");
rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

int oldBin = 1;

Expand All @@ -393,23 +412,23 @@ namespace astroaccelerate {
maxshift = maxshift_original / inBin[dm_range];

cudaDeviceSynchronize();
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());


if (inBin[dm_range] > oldBin) {
bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
( tsamp ) = ( tsamp ) * 2.0f;
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(dump_to_user) {
for (int k = 0; k < ndms[dm_range]; k++) {
Expand Down
54 changes: 37 additions & 17 deletions include/aa_permitted_pipelines_3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <cuda.h>
#include <cuda_runtime.h>
#include <helper_cuda.h>

#include <stdio.h>
#include "aa_pipeline.hpp"
Expand Down Expand Up @@ -188,7 +187,10 @@ namespace astroaccelerate {
printf("\n\n\n%d\n\n\n", time_samps);
size_t gpu_inputsize = (size_t) time_samps * (size_t) nchans * sizeof(unsigned short);

checkCudaErrors( cudaMalloc((void **) d_input, gpu_inputsize) );
cudaError_t e = cudaMalloc((void **) d_input, gpu_inputsize);
if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

size_t gpu_outputsize = 0;
if (nchans < max_ndms) {
Expand All @@ -198,7 +200,11 @@ namespace astroaccelerate {
gpu_outputsize = (size_t)time_samps * (size_t)nchans * sizeof(float);
}

checkCudaErrors( cudaMalloc((void **) d_output, gpu_outputsize) );
e = cudaMalloc((void **) d_output, gpu_outputsize);
if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_gpu cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

cudaMemset(*d_output, 0, gpu_outputsize);
}

Expand All @@ -207,11 +213,25 @@ namespace astroaccelerate {
*/
void allocate_memory_MSD(float **const d_MSD_workarea, unsigned short **const d_MSD_output_taps, float **const d_MSD_interpolated,
const unsigned long int &MSD_maxtimesamples, const size_t &MSD_profile_size) {
checkCudaErrors(cudaMalloc((void **) d_MSD_workarea, MSD_maxtimesamples*5.5*sizeof(float)));
checkCudaErrors(cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples));
checkCudaErrors(cudaMalloc((void **) d_MSD_interpolated, sizeof(float)*MSD_profile_size));
cudaError_t e = cudaMalloc((void **) d_MSD_workarea, MSD_maxtimesamples*5.5*sizeof(float));

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

e = cudaMalloc((void **) &(*d_MSD_output_taps), sizeof(ushort)*2*MSD_maxtimesamples);

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}

e = cudaMalloc((void **) d_MSD_interpolated, sizeof(float)*MSD_profile_size);

if(e != cudaSuccess) {
LOG(log_level::error, "Could not allocate_memory_MSD cudaMalloc in aa_permitted_pipelines_3.hpp (" + std::string(cudaGetErrorString(e)) + ")");
}
}

/**
* \brief Allocate a 3D array that is an output buffer that stores dedispersed array data.
* \details This array is used by periodicity.
Expand Down Expand Up @@ -349,32 +369,32 @@ namespace astroaccelerate {

const int *ndms = m_ddtr_strategy.ndms_data();

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());
load_data(-1, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[0][t], maxshift, nchans, dmshifts);
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(zero_dm_type == aa_pipeline::component_option::zero_dm) {
zero_dm(d_input, nchans, t_processed[0][t]+maxshift, nbits);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(zero_dm_type == aa_pipeline::component_option::zero_dm_with_outliers) {
zero_dm_outliers(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

corner_turn(d_input, d_output, nchans, t_processed[0][t] + maxshift);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

if(enable_old_rfi) {
printf("\nPerforming old GPU rfi...");
rfi_gpu(d_input, nchans, t_processed[0][t]+maxshift);
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

int oldBin = 1;
for(size_t dm_range = 0; dm_range < range; dm_range++) {
Expand All @@ -384,23 +404,23 @@ namespace astroaccelerate {
maxshift = maxshift_original / inBin[dm_range];

cudaDeviceSynchronize();
checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

load_data(dm_range, inBin.data(), d_input, &m_input_buffer[(long int) ( inc * nchans )], t_processed[dm_range][t], maxshift, nchans, dmshifts);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());


if (inBin[dm_range] > oldBin) {
bin_gpu(d_input, d_output, nchans, t_processed[dm_range - 1][t] + maxshift * inBin[dm_range]);
( tsamp ) = ( tsamp ) * 2.0f;
}

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

dedisperse(dm_range, t_processed[dm_range][t], inBin.data(), dmshifts, d_input, d_output, nchans, &tsamp, dm_low.data(), dm_step.data(), ndms, nbits, failsafe);

checkCudaErrors(cudaGetLastError());
//checkCudaErrors(cudaGetLastError());

for (int k = 0; k < ndms[dm_range]; k++) {
save_data_offset(d_output, k * t_processed[dm_range][t], m_output_buffer[dm_range][k], inc / inBin[dm_range], sizeof(float) * t_processed[dm_range][t]);
Expand Down
Loading

0 comments on commit 17e4ba5

Please sign in to comment.