Skip to content

Commit

Permalink
Merge pull request #246 from AstroAccelerateOrg/ka_copy_to_host_fix
Browse files Browse the repository at this point in the history
Ka copy to host fix
  • Loading branch information
jan2nov committed Nov 9, 2020
2 parents caf9a15 + b8a4868 commit 410d05d
Show file tree
Hide file tree
Showing 75 changed files with 4,679 additions and 1,310 deletions.
24 changes: 19 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,23 @@ set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS})

list(APPEND CUDA_NVCC_FLAGS --ptxas-options= -Xptxas -dlcm=cg -use_fast_math)
list(APPEND CUDA_NVCC_FLAGS --use_fast_math)
list(APPEND CUDA_NVCC_FLAGS -g;)
list(APPEND CUDA_NVCC_FLAGS -Xptxas -O3 -std=c++11;)
list(APPEND CUDA_NVCC_FLAGS -lineinfo;)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-O3;)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-fopenmp;)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-lm;)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-Wall;)
list(APPEND CMAKE_CXX_FLAGS "-std=c++11 -O3 -lm -Wall -Wpedantic -Wextra")
list(APPEND CMAKE_CXX_FLAGS "-std=c++11 -O3 -lm -Wall -Wpedantic -Wextra -fopenmp")

if(NOT DEFINED CUDA_ARCH)
set(CUDA_ARCH "ALL")
message("-- INFO: Setting CUDA_ARCH to ALL.")
message("-- INFO: The target CUDA architecture can be specified using:")
message("-- INFO: -DCUDA_ARCH=\"<arch>\"")
message("-- INFO: where <arch> is one or more of:")
message("-- INFO: 3.5, 3.7, 5.0, 5.2, 6.0, 6.1, 6.2, 7.0 or ALL.")
message("-- INFO: 3.5, 3.7, 5.0, 5.2, 6.0, 6.1, 6.2, 7.0, 7.5, 8.6 or ALL.")
message("-- INFO: Separate multiple architectures with semicolons.")
endif()

Expand All @@ -84,8 +85,11 @@ foreach(ARCH ${CUDA_ARCH})
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_61,code=sm_61)
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_62,code=sm_62)
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_70,code=sm_70)
set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "35,37,50,52,60,61,62,70")
set(ASTRO_ACCELERATE_CUDA_SM_VERSION "35,37,50,52,60,61,62,70")
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_75,code=sm_75)
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_80,code=sm_80)
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_86,code=sm_86)
set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "35,37,50,52,60,61,62,70,75,80,86")
set(ASTRO_ACCELERATE_CUDA_SM_VERSION "35,37,50,52,60,61,62,70,75,80,86")
elseif(ARCH MATCHES 3.5)
message("-- INFO: Building CUDA device code for architecture 3.5")
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_35,code=sm_35)
Expand Down Expand Up @@ -131,6 +135,16 @@ foreach(ARCH ${CUDA_ARCH})
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_75,code=sm_75)
set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "75")
set(ASTRO_ACCELERATE_CUDA_SM_VERSION "75")
elseif(ARCH MATCHES 8.0)
message("-- INFO: Building CUDA device code for architecture 8.0")
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_80,code=sm_80)
set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "80")
set(ASTRO_ACCELERATE_CUDA_SM_VERSION "80")
elseif(ARCH MATCHES 8.6)
message("-- INFO: Building CUDA device code for architecture 8.6")
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_86,code=sm_86)
set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "86")
set(ASTRO_ACCELERATE_CUDA_SM_VERSION "86")
else()
message(FATAL_ERROR "-- CUDA_ARCH ${ARCH} not recognised or not defined")
endif()
Expand Down
35 changes: 28 additions & 7 deletions cmake/py_astro_accelerate.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ class aa_py_pipeline():
return lib.aa_py_get_ddtr_nRanges(self.m_obj)

def ddtr_ndms(self):
# lib.aa_py_ndms.argtypes = [ctypes.c_void_p]
lib.aa_py_get_ndms_array.argtypes = [ctypes.c_void_p]
lib.aa_py_get_ndms_array.restype = ctypes.POINTER(ctypes.c_int)
return lib.aa_py_get_ndms_array(self.m_obj)

Expand Down Expand Up @@ -488,13 +488,21 @@ class aa_py_pipeline():
return self.m_status_code_c_int.value

def get_candidates(self):
lib.aa_py_spd_nCandidates.argtypes = [ctypes.c_void_p]
lib.aa_py_spd_nCandidates.restype = ctypes.c_size_t
lib.aa_py_h_dm.argtypes = [ctypes.c_void_p]
lib.aa_py_h_dm.restype = ctypes.POINTER(ctypes.c_uint)
lib.aa_py_h_ts.argtypes = [ctypes.c_void_p]
lib.aa_py_h_ts.restype = ctypes.POINTER(ctypes.c_uint)
lib.aa_py_h_snr.argtypes = [ctypes.c_void_p]
lib.aa_py_h_snr.restype = PFLOAT
lib.aa_py_h_width.argtypes = [ctypes.c_void_p]
lib.aa_py_h_width.restype = ctypes.POINTER(ctypes.c_uint)
lib.aa_py_current_range.argtypes = [ctypes.c_void_p]
lib.aa_py_current_range.restype = ctypes.c_int
lib.aa_py_current_time_chunk.argtypes = [ctypes.c_void_p]
lib.aa_py_current_time_chunk.restype = ctypes.c_int
lib.aa_py_current_inc.argtypes = [ctypes.c_void_p]
lib.aa_py_current_inc.restype = ctypes.c_long
# time_sample = []
if self.m_status_code_c_int.value==1:
Expand All @@ -512,19 +520,21 @@ class aa_py_pipeline():

## \brief Returns a pointer to the dedispersed output_buffer in the library. #
def get_buffer(self):
#lib.my_class_get_buffer.argtypes = [ctypes.c_void_p]
lib.aa_py_buffer.argtypes = [ctypes.c_void_p]
lib.aa_py_buffer.restype = PPPFLOAT
lib.aa_py_current_inc.restype = ctypes.c_long
lib.aa_py_total_computed_samples.argtypes = [ctypes.c_void_p]
lib.aa_py_total_computed_samples.restype = ctypes.c_int
ddtr_output_pointer = lib.aa_py_buffer(self.m_obj)
tprocessed = lib.aa_py_total_computed_samples(self.m_obj)
return tprocessed, ddtr_output_pointer

def dm_low(self, pos: int):
lib.aa_py_dm_low.argtypes = [ctypes.c_int]
lib.aa_py_dm_low.argtypes = [ctypes.c_void_p, ctypes.c_int]
lib.aa_py_dm_low.restype = ctypes.c_int
return lib.aa_py_dm_low(self.m_obj, pos)

def cleanUp(self):
lib.aa_py_cleanup.argtypes = [ctypes.c_void_p]
lib.aa_py_cleanup.restype = ctypes.c_bool
api_return_value = lib.aa_py_cleanup(self.m_obj)
return ctypes.c_bool(api_return_value).value
Expand Down Expand Up @@ -575,24 +585,35 @@ class SPD():
file_cand.close()

def scale(metadata, pipeline, ddtr_plan, tprocessed, nCandidates, dm, time_samples, snr, width, current_range, current_tchunk):
print("Python -- Scaling candidates ... ")
scale_dm = []
scale_time_sample = []
scale_time = []
scale_width = []
scale_snr = []
dm_idx = []
ts_idx = []
n_dmtrials = 0
list_ndms = pipeline.ddtr_ndms()
dm_low = pipeline.dm_low(current_range)
for i in range(0, current_range):
n_dmtrials = n_dmtrials + list_ndms[i]
for i in range(0, nCandidates):
dm_low = pipeline.dm_low(current_range)
list_ndms = pipeline.ddtr_ndms()
scale_dm.append(dm[i]*(ddtr_plan.m_dm[current_range].m_step) + dm_low)
scale_time_sample.append(time_samples[i]*ddtr_plan.m_dm[current_range].m_inBin + tprocessed)
scale_time.append(time_samples[i]*metadata.m_tsamp*ddtr_plan.m_dm[current_range].m_inBin + tprocessed*metadata.m_tsamp)
scale_width.append(width[i]*ddtr_plan.m_dm[current_range].m_inBin)
scale_snr.append(snr[i])
return scale_dm, scale_snr, scale_time_sample, scale_time, scale_width
dm_idx.append(dm[i] + n_dmtrials)
ts_idx.append(time_samples[i])
print(" ... done")
return scale_dm, scale_snr, scale_time_sample, scale_time, scale_width, dm_idx, ts_idx



def write_maximum(dm, snr, time, ts, width):
print("Python -- writing candidates to file ...")
index = np.argmax(snr)
print("#DM \t SNR \t TIME \t SAMPLE \t WIDTH")
print('{:.2f}'.format(dm[index]), "\t", '{:.2f}'.format(snr[index]), "\t", '{:.2f}'.format(time[index]), "\t", ts[index], "\t", width[index])
print(" ... done")
20 changes: 10 additions & 10 deletions examples/src/dedispersion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,26 @@ int main(int argc, char *argv[]) {
aa_sigproc_input filterbank_datafile(argv[1]);
aa_filterbank_metadata metadata = filterbank_datafile.read_metadata();
filterbank_datafile.read_signal();
aa_device_info& device_info = aa_device_info::instance();
aa_device_info::CARD_ID selected_card_number = 0;
aa_device_info::aa_card_info selected_card_info;
device_info.init_card(selected_card_number, selected_card_info);

//Select desired device and initialize it by creating aa_device_info
int device = 0;
aa_device_info selected_device(device);

//-------------- Configure pipeline. Select components and their options
aa_pipeline::pipeline pipeline_components;
pipeline_components.insert(aa_pipeline::component::dedispersion); // pipeline must always contain dedispersion step
//pipeline_components.insert(aa_pipeline::component::analysis); //optional
//pipeline_components.insert(aa_pipeline::component::periodicity); // optional
//pipeline_components.insert(aa_pipeline::component::fdas); // optional
//pipeline_components.insert(aa_pipeline::component::analysis); //optional
//pipeline_components.insert(aa_pipeline::component::periodicity); // optional
//pipeline_components.insert(aa_pipeline::component::fdas); // optional

aa_pipeline::pipeline_option pipeline_options;
pipeline_options.insert(aa_pipeline::component_option::zero_dm);
aa_pipeline::pipeline_option pipeline_options;
pipeline_options.insert(aa_pipeline::component_option::zero_dm);
//insert option to copy the DDTR output data from GPU memory to the host memory
//do not insert this option if the output is not needed
pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);
//--------------<

aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_card_info);
aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_device);
pipeline_runner.bind(ddtr_plan);

if (pipeline_runner.ready()) {
Expand Down
8 changes: 3 additions & 5 deletions examples/src/dedispersion_and_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,8 @@ int main(int argc, const char *argv[]) {
aa_filterbank_metadata metadata = filterbank_datafile.read_metadata();
filterbank_datafile.read_signal();

aa_device_info& device_info = aa_device_info::instance();
aa_device_info::CARD_ID selected_card_number = 0;
aa_device_info::aa_card_info selected_card_info;
device_info.init_card(selected_card_number, selected_card_info);
int device = 0;
aa_device_info selected_device(device);

//-------------- Configure pipeline. Select components and their options
aa_pipeline::pipeline pipeline_components;
Expand All @@ -84,7 +82,7 @@ int main(int argc, const char *argv[]) {
const bool enable_MSD_outlier_rejection = true;
aa_analysis_plan::selectable_candidate_algorithm candidate_algorithm = aa_analysis_plan::selectable_candidate_algorithm::peak_find;

aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_card_info);
aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_device);

pipeline_runner.bind(ddtr_plan);

Expand Down
28 changes: 6 additions & 22 deletions examples/src/fake_signal_periodic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,13 @@ int main() {
aa_filterbank_metadata metadata(tstart, tsamp, nbits, nsamples, fch1, foff, nchans);

// Init the GPU card
aa_device_info& device_info = aa_device_info::instance();
if(device_info.check_for_devices()) {
LOG(log_level::notice, "Checked for devices.");
}
else {
LOG(log_level::error, "Could not find any devices.");
}

aa_device_info::CARD_ID selected_card = 0;
aa_device_info::aa_card_info selected_card_info;
if(device_info.init_card(selected_card, selected_card_info)) {
LOG(log_level::notice, "init_card complete. Selected card " + std::to_string(selected_card) + ".");
}
else {
LOG(log_level::error, "init_card incomplete.")
}

aa_device_info::print_card_info(selected_card_info);
int device = 0;
aa_device_info selected_device(device);

const size_t free_memory = selected_card_info.free_memory; // Free memory on the GPU in bytes
const size_t free_memory = selected_device.free_memory(); // Free memory on the GPU in bytes
bool enable_analysis = true;

aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis);
aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis, &selected_device);

if(!(strategy.ready())) {
std::cout << "There was an error" << std::endl;
Expand All @@ -73,7 +57,7 @@ int main() {
const aa_analysis_plan::selectable_candidate_algorithm algo = aa_analysis_plan::selectable_candidate_algorithm::peak_find;

aa_analysis_plan analysis_plan(strategy, sigma_cutoff, sigma_constant, max_boxcar_width_in_sec, algo, false);
aa_analysis_strategy analysis_strategy(analysis_plan);
aa_analysis_strategy analysis_strategy(analysis_plan, &selected_device);

if(!(analysis_strategy.ready())) {
std::cout << "ERROR: analysis_strategy not ready." << std::endl;
Expand Down Expand Up @@ -122,7 +106,7 @@ int main() {

aa_pipeline::pipeline_option pipeline_options;
pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);
aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_card_info);
aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_device);
runner.bind(ddtr_plan);
runner.bind(analysis_plan);
runner.bind(periodicity_plan);
Expand Down
26 changes: 5 additions & 21 deletions examples/src/fake_signal_single.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,8 @@ using namespace astroaccelerate;

int main() {
//----------------------- Init the GPU card
aa_device_info& device_info = aa_device_info::instance();
if(device_info.check_for_devices()) {
LOG(log_level::notice, "Checked for devices.");
}
else {
LOG(log_level::error, "Could not find any devices.");
}

aa_device_info::CARD_ID selected_card = 0;
aa_device_info::aa_card_info selected_card_info;
if(device_info.init_card(selected_card, selected_card_info)) {
LOG(log_level::notice, "init_card complete. Selected card " + std::to_string(selected_card) + ".");
}
else {
LOG(log_level::error, "init_card incomplete.")
}

aa_device_info::print_card_info(selected_card_info);
int device = 0;
aa_device_info selected_device(device);
//-------------------------------------------

//-------- Define user DM plan
Expand Down Expand Up @@ -65,10 +49,10 @@ int main() {
// setting the metadata for running fake generator
aa_fake_signal_metadata f_meta(dm_position, signal_start, func_width, sigma);

const size_t free_memory = selected_card_info.free_memory; // Free memory on the GPU in bytes
const size_t free_memory = selected_device.free_memory(); // Free memory on the GPU in bytes
bool enable_analysis = false;

aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis);
aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis, &selected_device);
if(!(strategy.ready())) {
std::cout << "There was an error" << std::endl;
return 0;
Expand All @@ -94,7 +78,7 @@ int main() {
//do not insert this option if the output is not needed
pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);

aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_card_info);
aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_device);
runner.bind(ddtr_plan);

if (runner.ready()) {
Expand Down
10 changes: 6 additions & 4 deletions examples/src/periodicity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ int main() {

aa_filterbank_metadata metadata(tstart, tsamp, nbits, nsamples, fch1, foff, nchans);

const size_t free_memory = 2147483648; // Free memory on the GPU in bytes
int device = 0;
aa_device_info selected_device(device);

//-------------- Configure pipeline. Select components and their options
aa_pipeline::pipeline pipeline_components;
Expand All @@ -52,7 +53,7 @@ int main() {
//--------------<

bool enable_analysis = true; // The strategy will be optimised to run just dedispersion
aa_ddtr_strategy ddtr_strategy(ddtr_plan, metadata, free_memory, enable_analysis);
aa_ddtr_strategy ddtr_strategy(ddtr_plan, metadata, selected_device.free_memory(), enable_analysis, &selected_device);

if(!(ddtr_strategy.ready())) {
std::cout << "ERROR: ddtr_strategy not ready." << std::endl;
Expand All @@ -72,7 +73,7 @@ int main() {
const aa_analysis_plan::selectable_candidate_algorithm algo = aa_analysis_plan::selectable_candidate_algorithm::peak_find;

aa_analysis_plan analysis_plan(ddtr_strategy, sigma_cutoff, sigma_constant, max_boxcar_width_in_sec, algo, enable_MSD_outlier_rejection);
aa_analysis_strategy analysis_strategy(analysis_plan);
aa_analysis_strategy analysis_strategy(analysis_plan, &selected_device);

if(!(analysis_strategy.ready())) {
std::cout << "ERROR: analysis_strategy not ready." << std::endl;
Expand All @@ -95,9 +96,10 @@ int main() {

//-------------- Create empty strategy object for unused components
aa_fdas_strategy empty_fdas_strategy;
aa_jerk_strategy empty_jerk_strategy;
//--------------<

aa_permitted_pipelines_generic pipeline_runner(pipeline_components, pipeline_options, ddtr_strategy, analysis_strategy, periodicity_strategy, empty_fdas_strategy, false, false, false, false, false, input_data.data());
aa_permitted_pipelines_generic pipeline_runner(pipeline_components, pipeline_options, ddtr_strategy, analysis_strategy, periodicity_strategy, empty_fdas_strategy, empty_jerk_strategy, false, false, false, false, false, input_data.data());
// aa_permitted_pipelines_3<aa_pipeline::component_option::zero_dm, false> runner(ddtr_strategy, analysis_strategy, periodicity_strategy, input_data.data());
if(pipeline_runner.setup()) {
while(pipeline_runner.next()) {
Expand Down
Loading

0 comments on commit 410d05d

Please sign in to comment.