Merge pull request #246 from AstroAccelerateOrg/ka_copy_to_host_fix

Ka copy to host fix
AstroAccelerateOrg · Nov 9, 2020 · 410d05d · 410d05d
2 parents caf9a15 + b8a4868
commit 410d05d
Show file tree

Hide file tree

Showing 75 changed files with 4,679 additions and 1,310 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -53,22 +53,23 @@ set(CUDA_LINK_LIBRARIES_KEYWORD PUBLIC)
 set(CUDA_PROPAGATE_HOST_FLAGS OFF)
 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS})
 
-list(APPEND CUDA_NVCC_FLAGS --ptxas-options= -Xptxas -dlcm=cg -use_fast_math)
+list(APPEND CUDA_NVCC_FLAGS --use_fast_math)
 list(APPEND CUDA_NVCC_FLAGS -g;)
 list(APPEND CUDA_NVCC_FLAGS -Xptxas -O3 -std=c++11;)
 list(APPEND CUDA_NVCC_FLAGS -lineinfo;)
 list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-O3;)
+list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-fopenmp;)
 list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-lm;)
 list(APPEND CUDA_NVCC_FLAGS -Xcompiler;-Wall;)
-list(APPEND CMAKE_CXX_FLAGS "-std=c++11 -O3 -lm -Wall -Wpedantic -Wextra")
+list(APPEND CMAKE_CXX_FLAGS "-std=c++11 -O3 -lm -Wall -Wpedantic -Wextra -fopenmp")
 
 if(NOT DEFINED CUDA_ARCH)  
 	set(CUDA_ARCH "ALL")
 	message("-- INFO: Setting CUDA_ARCH to ALL.")
 	message("-- INFO: The target CUDA architecture can be specified using:")
 	message("-- INFO:   -DCUDA_ARCH=\"<arch>\"")
 	message("-- INFO: where <arch> is one or more of:")
-	message("-- INFO:   3.5, 3.7, 5.0, 5.2, 6.0, 6.1, 6.2, 7.0 or ALL.")
+	message("-- INFO:   3.5, 3.7, 5.0, 5.2, 6.0, 6.1, 6.2, 7.0, 7.5, 8.6 or ALL.")
 	message("-- INFO: Separate multiple architectures with semicolons.")
 endif()
 
@@ -84,8 +85,11 @@ foreach(ARCH ${CUDA_ARCH})
 	   	list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_61,code=sm_61)
 	   	list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_62,code=sm_62)
 		list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_70,code=sm_70)
-		set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "35,37,50,52,60,61,62,70")
-		set(ASTRO_ACCELERATE_CUDA_SM_VERSION "35,37,50,52,60,61,62,70")
+		list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_75,code=sm_75)
+		list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_80,code=sm_80)
+		list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_86,code=sm_86)
+		set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "35,37,50,52,60,61,62,70,75,80,86")
+		set(ASTRO_ACCELERATE_CUDA_SM_VERSION "35,37,50,52,60,61,62,70,75,80,86")
 	elseif(ARCH MATCHES 3.5)
 		message("-- INFO: Building CUDA device code for architecture 3.5")
 		list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_35,code=sm_35)
@@ -131,6 +135,16 @@ foreach(ARCH ${CUDA_ARCH})
 	        list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_75,code=sm_75)
 		set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "75")
 		set(ASTRO_ACCELERATE_CUDA_SM_VERSION "75")
+        elseif(ARCH MATCHES 8.0)
+                message("-- INFO: Building CUDA device code for architecture 8.0")
+                list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_80,code=sm_80)
+                set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "80")
+                set(ASTRO_ACCELERATE_CUDA_SM_VERSION "80")
+        elseif(ARCH MATCHES 8.6)
+                message("-- INFO: Building CUDA device code for architecture 8.6")
+                list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_86,code=sm_86)
+                set(ASTRO_ACCELERATE_CUDA_ARCH_VERSION "86")
+                set(ASTRO_ACCELERATE_CUDA_SM_VERSION "86")
 	else()
 	message(FATAL_ERROR "-- CUDA_ARCH ${ARCH} not recognised or not defined")
 	endif()

diff --git a/cmake/py_astro_accelerate.py.in b/cmake/py_astro_accelerate.py.in
@@ -433,7 +433,7 @@ class aa_py_pipeline():
         return lib.aa_py_get_ddtr_nRanges(self.m_obj)
 
     def ddtr_ndms(self):
-#        lib.aa_py_ndms.argtypes = [ctypes.c_void_p]
+        lib.aa_py_get_ndms_array.argtypes = [ctypes.c_void_p]
         lib.aa_py_get_ndms_array.restype = ctypes.POINTER(ctypes.c_int)
         return lib.aa_py_get_ndms_array(self.m_obj)
 
@@ -488,13 +488,21 @@ class aa_py_pipeline():
         return self.m_status_code_c_int.value
 
     def get_candidates(self):
+        lib.aa_py_spd_nCandidates.argtypes = [ctypes.c_void_p]
         lib.aa_py_spd_nCandidates.restype = ctypes.c_size_t
+        lib.aa_py_h_dm.argtypes = [ctypes.c_void_p]
         lib.aa_py_h_dm.restype = ctypes.POINTER(ctypes.c_uint)
+        lib.aa_py_h_ts.argtypes = [ctypes.c_void_p]
         lib.aa_py_h_ts.restype = ctypes.POINTER(ctypes.c_uint) 
+        lib.aa_py_h_snr.argtypes = [ctypes.c_void_p]
         lib.aa_py_h_snr.restype = PFLOAT
+        lib.aa_py_h_width.argtypes = [ctypes.c_void_p]
         lib.aa_py_h_width.restype = ctypes.POINTER(ctypes.c_uint)
+        lib.aa_py_current_range.argtypes = [ctypes.c_void_p]
         lib.aa_py_current_range.restype = ctypes.c_int
+        lib.aa_py_current_time_chunk.argtypes = [ctypes.c_void_p]
         lib.aa_py_current_time_chunk.restype = ctypes.c_int
+        lib.aa_py_current_inc.argtypes = [ctypes.c_void_p]
         lib.aa_py_current_inc.restype = ctypes.c_long
 #        time_sample = []
         if self.m_status_code_c_int.value==1:
@@ -512,19 +520,21 @@ class aa_py_pipeline():
 
     ## \brief Returns a pointer to the dedispersed output_buffer in the library. #
     def get_buffer(self):
-        #lib.my_class_get_buffer.argtypes = [ctypes.c_void_p]
+        lib.aa_py_buffer.argtypes = [ctypes.c_void_p]
         lib.aa_py_buffer.restype = PPPFLOAT
-        lib.aa_py_current_inc.restype = ctypes.c_long
+        lib.aa_py_total_computed_samples.argtypes = [ctypes.c_void_p]
+        lib.aa_py_total_computed_samples.restype = ctypes.c_int
         ddtr_output_pointer = lib.aa_py_buffer(self.m_obj)
         tprocessed = lib.aa_py_total_computed_samples(self.m_obj)
         return tprocessed, ddtr_output_pointer
 
     def dm_low(self, pos: int):
-        lib.aa_py_dm_low.argtypes = [ctypes.c_int]
+        lib.aa_py_dm_low.argtypes = [ctypes.c_void_p, ctypes.c_int]
         lib.aa_py_dm_low.restype = ctypes.c_int
         return lib.aa_py_dm_low(self.m_obj, pos)
 
     def cleanUp(self):
+        lib.aa_py_cleanup.argtypes = [ctypes.c_void_p]
         lib.aa_py_cleanup.restype = ctypes.c_bool
         api_return_value = lib.aa_py_cleanup(self.m_obj)
         return ctypes.c_bool(api_return_value).value
@@ -575,24 +585,35 @@ class SPD():
         file_cand.close()
 
     def scale(metadata, pipeline, ddtr_plan, tprocessed, nCandidates, dm, time_samples, snr, width, current_range, current_tchunk):
+        print("Python -- Scaling candidates ... ")
         scale_dm = []
         scale_time_sample = []
         scale_time = []
         scale_width = []
         scale_snr = []
+        dm_idx = []
+        ts_idx = []
+        n_dmtrials = 0
+        list_ndms = pipeline.ddtr_ndms()
+        dm_low = pipeline.dm_low(current_range)
+        for i in range(0, current_range):
+            n_dmtrials = n_dmtrials + list_ndms[i]
         for i in range(0, nCandidates):
-            dm_low = pipeline.dm_low(current_range)
-            list_ndms = pipeline.ddtr_ndms()
             scale_dm.append(dm[i]*(ddtr_plan.m_dm[current_range].m_step) + dm_low)
             scale_time_sample.append(time_samples[i]*ddtr_plan.m_dm[current_range].m_inBin + tprocessed)
             scale_time.append(time_samples[i]*metadata.m_tsamp*ddtr_plan.m_dm[current_range].m_inBin + tprocessed*metadata.m_tsamp)
             scale_width.append(width[i]*ddtr_plan.m_dm[current_range].m_inBin)
             scale_snr.append(snr[i])
-        return scale_dm, scale_snr, scale_time_sample, scale_time, scale_width
+            dm_idx.append(dm[i] + n_dmtrials)
+            ts_idx.append(time_samples[i])
+        print(" ... done")
+        return scale_dm, scale_snr, scale_time_sample, scale_time, scale_width, dm_idx, ts_idx
 
 
 
     def write_maximum(dm, snr, time, ts, width):
+        print("Python -- writing candidates to file ...")
         index = np.argmax(snr)
         print("#DM \t SNR \t TIME \t SAMPLE \t WIDTH")
         print('{:.2f}'.format(dm[index]), "\t", '{:.2f}'.format(snr[index]), "\t", '{:.2f}'.format(time[index]), "\t", ts[index], "\t", width[index])
+        print(" ... done")
diff --git a/examples/src/dedispersion.cpp b/examples/src/dedispersion.cpp
@@ -34,26 +34,26 @@ int main(int argc, char *argv[]) {
 	aa_sigproc_input filterbank_datafile(argv[1]);
 	aa_filterbank_metadata metadata = filterbank_datafile.read_metadata();
 	filterbank_datafile.read_signal();
-	aa_device_info& device_info = aa_device_info::instance();
-	aa_device_info::CARD_ID selected_card_number = 0;
-	aa_device_info::aa_card_info selected_card_info; 
-        device_info.init_card(selected_card_number, selected_card_info);
+
+	//Select desired device and initialize it by creating aa_device_info
+	int device = 0;
+	aa_device_info selected_device(device);
 
 	//-------------- Configure pipeline. Select components and their options
 	aa_pipeline::pipeline pipeline_components;
 	pipeline_components.insert(aa_pipeline::component::dedispersion); // pipeline must always contain dedispersion step
-        //pipeline_components.insert(aa_pipeline::component::analysis); //optional
-        //pipeline_components.insert(aa_pipeline::component::periodicity); // optional
-        //pipeline_components.insert(aa_pipeline::component::fdas); // optional
+	//pipeline_components.insert(aa_pipeline::component::analysis); //optional
+	//pipeline_components.insert(aa_pipeline::component::periodicity); // optional
+	//pipeline_components.insert(aa_pipeline::component::fdas); // optional
 
-        aa_pipeline::pipeline_option pipeline_options;
-        pipeline_options.insert(aa_pipeline::component_option::zero_dm);
+	aa_pipeline::pipeline_option pipeline_options;
+	pipeline_options.insert(aa_pipeline::component_option::zero_dm);
 	//insert option to copy the DDTR output data from GPU memory to the host memory
 	//do not insert this option if the output is not needed
 	pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);
 	//--------------<
 
-	aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_card_info);
+	aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_device);
 	pipeline_runner.bind(ddtr_plan);
 
         if (pipeline_runner.ready()) {

diff --git a/examples/src/dedispersion_and_analysis.cpp b/examples/src/dedispersion_and_analysis.cpp
@@ -61,10 +61,8 @@ int main(int argc, const char *argv[]) {
 	aa_filterbank_metadata metadata = filterbank_datafile.read_metadata();
 	filterbank_datafile.read_signal();
 
-	aa_device_info& device_info = aa_device_info::instance();
-	aa_device_info::CARD_ID selected_card_number = 0;
-	aa_device_info::aa_card_info selected_card_info; 
-        device_info.init_card(selected_card_number, selected_card_info);
+	int device = 0;
+	aa_device_info selected_device(device);
 
 	//-------------- Configure pipeline. Select components and their options
 	aa_pipeline::pipeline pipeline_components;
@@ -84,7 +82,7 @@ int main(int argc, const char *argv[]) {
 	const bool  enable_MSD_outlier_rejection = true;
 	aa_analysis_plan::selectable_candidate_algorithm candidate_algorithm = aa_analysis_plan::selectable_candidate_algorithm::peak_find;
 
-	aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_card_info);
+	aa_pipeline_api<unsigned short> pipeline_runner(pipeline_components, pipeline_options, metadata, filterbank_datafile.input_buffer().data(), selected_device);
 
 	pipeline_runner.bind(ddtr_plan);
 

diff --git a/examples/src/fake_signal_periodic.cpp b/examples/src/fake_signal_periodic.cpp
@@ -38,29 +38,13 @@ int main() {
   aa_filterbank_metadata metadata(tstart, tsamp, nbits, nsamples, fch1, foff, nchans);
 
   // Init the GPU card
-  aa_device_info& device_info = aa_device_info::instance();
-  if(device_info.check_for_devices()) {
-    LOG(log_level::notice, "Checked for devices.");
-  }
-  else {
-    LOG(log_level::error, "Could not find any devices.");
-  }
-
-  aa_device_info::CARD_ID selected_card = 0;
-  aa_device_info::aa_card_info selected_card_info;
-  if(device_info.init_card(selected_card, selected_card_info)) {
-    LOG(log_level::notice, "init_card complete. Selected card " + std::to_string(selected_card) + ".");
-  }
-  else {
-    LOG(log_level::error, "init_card incomplete.")
-  }
-
-  aa_device_info::print_card_info(selected_card_info);
+  int device = 0;
+  aa_device_info selected_device(device);
 
-  const size_t free_memory = selected_card_info.free_memory; // Free memory on the GPU in bytes
+  const size_t free_memory = selected_device.free_memory(); // Free memory on the GPU in bytes
   bool enable_analysis = true; 
 
-  aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis);
+  aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis, &selected_device);
 
   if(!(strategy.ready())) {
     std::cout << "There was an error" << std::endl;
@@ -73,7 +57,7 @@ int main() {
   const aa_analysis_plan::selectable_candidate_algorithm algo = aa_analysis_plan::selectable_candidate_algorithm::peak_find;
 
   aa_analysis_plan analysis_plan(strategy, sigma_cutoff, sigma_constant, max_boxcar_width_in_sec, algo, false);
-  aa_analysis_strategy analysis_strategy(analysis_plan);
+  aa_analysis_strategy analysis_strategy(analysis_plan, &selected_device);
 
   if(!(analysis_strategy.ready())) {
     std::cout << "ERROR: analysis_strategy not ready." << std::endl;
@@ -122,7 +106,7 @@ int main() {
 
   aa_pipeline::pipeline_option pipeline_options;
   pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);
-	aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_card_info);
+	aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_device);
         runner.bind(ddtr_plan);
 	runner.bind(analysis_plan);
 	runner.bind(periodicity_plan);

diff --git a/examples/src/fake_signal_single.cpp b/examples/src/fake_signal_single.cpp
@@ -17,24 +17,8 @@ using namespace astroaccelerate;
 
 int main() {
 	//-----------------------  Init the GPU card
-	aa_device_info& device_info = aa_device_info::instance();
-	if(device_info.check_for_devices()) {
-		LOG(log_level::notice, "Checked for devices.");
-	}
-	else {
-		LOG(log_level::error, "Could not find any devices.");
-	}
-
-	aa_device_info::CARD_ID selected_card = 0;
-	aa_device_info::aa_card_info selected_card_info;
-	if(device_info.init_card(selected_card, selected_card_info)) {
-		LOG(log_level::notice, "init_card complete. Selected card " + std::to_string(selected_card) + ".");
-	}
-	else {
-		LOG(log_level::error, "init_card incomplete.")
-	}
-
-	aa_device_info::print_card_info(selected_card_info);
+	int device = 0;
+	aa_device_info selected_device(device);
 	//-------------------------------------------
 
 	//-------- Define user DM plan 
@@ -65,10 +49,10 @@ int main() {
 	// setting the metadata for running fake generator
 	aa_fake_signal_metadata f_meta(dm_position, signal_start, func_width, sigma);	
 
-	const size_t free_memory = selected_card_info.free_memory; // Free memory on the GPU in bytes
+	const size_t free_memory = selected_device.free_memory(); // Free memory on the GPU in bytes
 	bool enable_analysis = false; 
 
-	aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis);
+	aa_ddtr_strategy strategy(ddtr_plan, metadata, free_memory, enable_analysis, &selected_device);
 	if(!(strategy.ready())) {
 		std::cout << "There was an error" << std::endl;
 		return 0;
@@ -94,7 +78,7 @@ int main() {
         //do not insert this option if the output is not needed
         pipeline_options.insert(aa_pipeline::component_option::copy_ddtr_data_to_host);
 
-	aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_card_info);
+	aa_pipeline_api<unsigned short> runner(pipeline_components, pipeline_options, metadata, input_data.data(), selected_device);
 	runner.bind(ddtr_plan);
 
         if (runner.ready()) {

diff --git a/examples/src/periodicity.cpp b/examples/src/periodicity.cpp
@@ -38,7 +38,8 @@ int main() {
 
   aa_filterbank_metadata metadata(tstart, tsamp, nbits, nsamples, fch1, foff, nchans);
 
-  const size_t free_memory = 2147483648; // Free memory on the GPU in bytes
+  int device = 0;
+  aa_device_info selected_device(device);
 
   //-------------- Configure pipeline. Select components and their options
   aa_pipeline::pipeline pipeline_components;
@@ -52,7 +53,7 @@ int main() {
   //--------------<
 
   bool enable_analysis = true;       // The strategy will be optimised to run just dedispersion
-  aa_ddtr_strategy ddtr_strategy(ddtr_plan, metadata, free_memory, enable_analysis);
+  aa_ddtr_strategy ddtr_strategy(ddtr_plan, metadata, selected_device.free_memory(), enable_analysis, &selected_device);
 
   if(!(ddtr_strategy.ready())) {
     std::cout << "ERROR: ddtr_strategy not ready." << std::endl;
@@ -72,7 +73,7 @@ int main() {
   const aa_analysis_plan::selectable_candidate_algorithm algo = aa_analysis_plan::selectable_candidate_algorithm::peak_find;
 
   aa_analysis_plan analysis_plan(ddtr_strategy, sigma_cutoff, sigma_constant, max_boxcar_width_in_sec, algo, enable_MSD_outlier_rejection);
-  aa_analysis_strategy analysis_strategy(analysis_plan);
+  aa_analysis_strategy analysis_strategy(analysis_plan, &selected_device);
 
   if(!(analysis_strategy.ready())) {
     std::cout << "ERROR: analysis_strategy not ready." << std::endl;
@@ -95,9 +96,10 @@ int main() {
 
   //-------------- Create empty strategy object for unused components
   aa_fdas_strategy empty_fdas_strategy;
+  aa_jerk_strategy empty_jerk_strategy;
   //--------------<
 
-  aa_permitted_pipelines_generic pipeline_runner(pipeline_components, pipeline_options, ddtr_strategy, analysis_strategy, periodicity_strategy, empty_fdas_strategy, false, false, false, false, false, input_data.data());  
+  aa_permitted_pipelines_generic pipeline_runner(pipeline_components, pipeline_options, ddtr_strategy, analysis_strategy, periodicity_strategy, empty_fdas_strategy, empty_jerk_strategy, false, false, false, false, false, input_data.data());  
 //  aa_permitted_pipelines_3<aa_pipeline::component_option::zero_dm, false> runner(ddtr_strategy, analysis_strategy, periodicity_strategy, input_data.data());
   if(pipeline_runner.setup()) {
     while(pipeline_runner.next()) {