From 2dc48c5612ceba1a82e94a1b2ee3354a1383b13b Mon Sep 17 00:00:00 2001 From: Zoheb Khan Date: Wed, 15 Mar 2023 10:27:16 -0700 Subject: [PATCH] nvjpeg2k sample updates for v0.7 --- .../CMakeLists.txt | 32 +++-- .../nvJPEG2000-Decoder-Pipelined/README.md | 29 ++--- .../nvjpeg2k_dec_pipelined.cpp | 105 +++++++++------ .../nvjpeg2k_dec_pipelined.h | 106 ++++++--------- .../CMakeLists.txt | 31 +++-- .../nvJPEG2000-Decoder-Tile-Partial/README.md | 27 ++-- .../nvj2k_DecodeTilePartial.cpp | 123 ++++++++++-------- .../nvj2k_DecodeTilePartial.h | 92 ++++++------- nvJPEG2000/nvJPEG2000-Decoder/CMakeLists.txt | 31 +++-- nvJPEG2000/nvJPEG2000-Decoder/README.md | 35 +++-- .../nvjpeg2000DecodeSample.cpp | 29 ++--- .../nvjpeg2000DecodeSample.h | 62 +++------ nvJPEG2000/nvJPEG2000-Encoder/CMakeLists.txt | 32 +++-- nvJPEG2000/nvJPEG2000-Encoder/README.md | 26 ++-- 14 files changed, 372 insertions(+), 388 deletions(-) diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/CMakeLists.txt b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/CMakeLists.txt index 3f048b0d..0a5bdd4f 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/CMakeLists.txt +++ b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (c) 2020 - 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020 - 2023, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation @@ -8,14 +8,25 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -cmake_minimum_required(VERSION 3.13 FATAL_ERROR) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) project(nvjpeg2k_sample LANGUAGES CXX CUDA) -if(NOT NVJPEG2K_PATH) -message(FATAL_ERROR, "NVJPEG2K_PATH not set") +find_package(CUDAToolkit REQUIRED) + + +find_library(NVJPEG2K_LIB + NAMES nvjpeg2k + PATHS ${NVJPEG2K_PATH}/lib64/${CUDAToolkit_VERSION_MAJOR} + ${NVJPEG2K_PATH}/lib/${CUDAToolkit_VERSION_MAJOR}) + +if(NOT NVJPEG2K_LIB) + message(FATAL_ERROR, " nvJPEG2000 library not found. Make sure the library is installed. If using the tar.xz/zip packages set -DNVJPEG2K_PATH") +else() + message(INFO, " nvJPEG2000 library found here: ${NVJPEG2K_LIB}") endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -29,22 +40,15 @@ include_directories( SYSTEM ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) - set(EXAMPLES_DESCRIPTOR_SOURCES "nvjpeg2k_dec_pipelined.cpp") add_executable(nvjpeg2k_dec_pipelined ${EXAMPLES_DESCRIPTOR_SOURCES}) -find_library(NVJPEG2K_LIB - NAMES nvjpeg2k - PATHS ${NVJPEG2K_PATH}/lib64) - -find_library(CUDART_LIB - NAMES cudart - PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) if(UNIX) set(FILESYS -lstdc++fs) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif() - -target_link_libraries(nvjpeg2k_dec_pipelined PUBLIC ${NVJPEG2K_LIB} ${CUDART_LIB} ${FILESYS}) + +target_link_libraries(nvjpeg2k_dec_pipelined PUBLIC ${NVJPEG2K_LIB} CUDA::cudart ${FILESYS}) + diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/README.md b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/README.md index 2e46ae27..6c213a48 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/README.md +++ b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/README.md @@ -2,7 +2,7 @@ ## Description -This code demonstrates how to pipleline the decoding of multiple JPEG 2000 images nvJPEG2000 library. +This code demonstrates how to pipeline the decoding of multiple JPEG 2000 images using the nvJPEG2000 library. ## Key Concepts @@ -20,31 +20,29 @@ Linux, Windows x86_64 -## CUDA APIs involved +## API Documentation -[NVJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) +[nvJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) -# Building (make) - # Prerequisites -- A Linux system with recent NVIDIA drivers. -- Install the [CUDA 11.0 toolkit](https://developer.nvidia.com/cuda-downloads). -- CMake (3.13 or later) -- nvjpeg2k package +- Recent NVIDIA drivers. +- [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). +- CMake (3.17 or later). +- Install [nvJPEG2000](https://developer.nvidia.com/nvjpeg2000/downloads). -## Build command on Linux +## Build Instructions on Linux ``` $ mkdir build $ $ cd build $ -$ export CUDACXX= nvcc path +$ export CUDACXX=/usr/local/cuda/bin/nvcc $ -$ cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH= nvjpeg2k location -# -# example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=~/nvJPEG2kDecodeSample/libnvjpeg_2k +$ cmake .. -DCMAKE_BUILD_TYPE=Release +# Use -DNVJPEG2K_PATH, if nvJPEG2000 is not installed at the default location. +# example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=/opt/libnvjpeg_2k # $ make ``` @@ -63,4 +61,5 @@ Usage: ./nvjpeg2k_dec_pipelined -i images_dir [-b batch_size] [-t total_images] warmup_iterations: Run these many batches first without measuring performance output_dir : Write decoded images in BMP/PGM format to this directory -``` \ No newline at end of file +``` + diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.cpp b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.cpp index a66d2025..374155a9 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.cpp +++ b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.cpp @@ -29,7 +29,7 @@ #include "nvjpeg2k_dec_pipelined.h" int write_image(std::string output_path, std::string filename, const nvjpeg2kImage_t &imgdesc, int width, int height, - uint32_t num_components, uint8_t precision, bool verbose) + uint32_t num_components, uint8_t precision, uint8_t sgn, bool verbose) { // Get the file name, without extension. // This will be used to rename the output file. @@ -42,28 +42,30 @@ int write_image(std::string output_path, std::string filename, const nvjpeg2kIma sFileName = (std::string::npos == position) ? sFileName : sFileName.substr(0, position); - int err = EXIT_SUCCESS; - - // For single component image output as PGM channel + int err = EXIT_FAILURE; + // For single component image output as PGM if (num_components == 1) { std::string fname(output_path + separator + sFileName + ".pgm"); if (imgdesc.pixel_type == NVJPEG2K_UINT8) { err = writePGM(fname.c_str(), (unsigned char *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } else if (imgdesc.pixel_type == NVJPEG2K_UINT16) { err = writePGM(fname.c_str(), (unsigned short *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); + } + else if(imgdesc.pixel_type == NVJPEG2K_INT16) + { + err = writePGM(fname.c_str(), (short *)imgdesc.pixel_data[0], + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } - if (err) { std::cout << "Cannot write output file: " << fname << std::endl; } - } else if (num_components == 3 || num_components == 4) { @@ -95,7 +97,7 @@ int write_image(std::string output_path, std::string filename, const nvjpeg2kIma } else { - std::cout << "only 1 and 3 channel outputs supported\n"; + std::cout << "num channels not supported"< &file_len, - std::vector &ibuf, - std::vector &isz, + std::vector &ibuf, FileNames ¤t_names, decode_params_t ¶ms, double& parse_time) { @@ -112,12 +113,13 @@ int prepare_buffers(FileData &file_data, std::vector &file_len, nvjpeg2kImageInfo_t image_info; nvjpeg2kImageComponentInfo_t image_comp_info[NUM_COMPONENTS]; parse_time = 0; - for (uint32_t i = 0; i < file_data.size(); i++) + for (int i = 0; i < params.batch_size; i++) { - double time = Wtime(); + auto io_start = perfclock::now(); CHECK_NVJPEG2K(nvjpeg2kStreamParse(params.nvjpeg2k_handle, (unsigned char*)file_data[i].data(), file_len[i], 0, 0, params.jpeg2k_streams[i])); - parse_time += Wtime() - time; + auto io_end = perfclock::now(); + parse_time += std::chrono::duration_cast(io_end-io_start).count(); CHECK_NVJPEG2K(nvjpeg2kStreamGetImageInfo(params.jpeg2k_streams[i], &image_info)); @@ -129,40 +131,66 @@ int prepare_buffers(FileData &file_data, std::vector &file_len, for (uint32_t c = 0; c < image_info.num_components; c++) { CHECK_NVJPEG2K(nvjpeg2kStreamGetImageComponentInfo(params.jpeg2k_streams[i], &image_comp_info[c], c)); - if( image_comp_info[0].precision > MAX_PRECISION) - { - std::cout<<"Precision > "<< MAX_PRECISION<<"not supported by this sample"< "<< MAX_PRECISION<<" not supported by this sample"< isz[i].comp_sz[c]) + if (sz > ibuf[i].comp_sz[c]) { if (ibuf[i].component[c]) { CHECK_CUDA(cudaFree(ibuf[i].component[c])); } CHECK_CUDA(cudaMalloc((void**)&ibuf[i].component[c], sz)); - isz[i].comp_sz[c] = sz; + ibuf[i].comp_sz[c] = sz; } } } return EXIT_SUCCESS; } +int free_buffers(std::vector &ibuf) +{ + for(auto& buf: ibuf) + { + for(int c = 0; c < NUM_COMPONENTS; c++) + { + if(buf.component[0]) + { + CHECK_CUDA(cudaFree(buf.component[0])); + } + buf.component[0] = nullptr; + buf.comp_sz[0] = 0; + buf.pitch_in_bytes[0] = 0; + } + buf.num_comps = 0; + } + return EXIT_SUCCESS; +} -int decode_images(FileNames ¤t_names, std::vector &out, +int decode_images(FileNames ¤t_names, std::vector &out, decode_params_t ¶ms, double &time) { cudaEvent_t startEvent = NULL, stopEvent = NULL; @@ -179,23 +207,17 @@ int decode_images(FileNames ¤t_names, std::vector &out nvjpeg2kDecodeParams_t decode_params; CHECK_NVJPEG2K(nvjpeg2kDecodeParamsCreate(&decode_params)); -#if (NVJPEG2K_VER_MAJOR == 0 && NVJPEG2K_VER_MINOR >= 3) - // set RGB output for the entire batch + // set RGB output for the entire batch, applies only to images with 420/422 subsampling CHECK_NVJPEG2K(nvjpeg2kDecodeParamsSetRGBOutput(decode_params, 1)); -#endif std::vector nvjpeg2k_output; nvjpeg2k_output.resize(params.batch_size); for( int i = 0; i < params.batch_size; i++) { -#ifdef USE8BITOUTPUT - nvjpeg2k_output[i].pixel_type = NVJPEG2K_UINT8; -#else - nvjpeg2k_output[i].pixel_type = NVJPEG2K_UINT16; -#endif + nvjpeg2k_output[i].pixel_type = out[i].pixel_type; nvjpeg2k_output[i].num_components = out[i].num_comps; - nvjpeg2k_output[i].pixel_data = (void**)out[i].component; + nvjpeg2k_output[i].pixel_data = out[i].component; nvjpeg2k_output[i].pitch_in_bytes = out[i].pitch_in_bytes; } @@ -208,13 +230,10 @@ int decode_images(FileNames ¤t_names, std::vector &out // make sure that the previous stage are done CHECK_CUDA(cudaEventSynchronize(pipeline_events[buffer_index])); } -#if (NVJPEG2K_VER_MAJOR == 0 && NVJPEG2K_VER_MINOR >= 3) + CHECK_NVJPEG2K(nvjpeg2kDecodeImage(params.nvjpeg2k_handle, params.nvjpeg2k_decode_states[buffer_index], params.jpeg2k_streams[i], decode_params, &nvjpeg2k_output[i], params.stream[buffer_index])); -#else - CHECK_NVJPEG2K(nvjpeg2kDecode(params.nvjpeg2k_handle, params.nvjpeg2k_decode_states[buffer_index], - params.jpeg2k_streams[i], &nvjpeg2k_output[i], params.stream[buffer_index])); -#endif + CHECK_CUDA(cudaEventRecord(pipeline_events[buffer_index], params.stream[buffer_index])) buffer_index++; @@ -243,7 +262,8 @@ int decode_images(FileNames ¤t_names, std::vector &out CHECK_NVJPEG2K(nvjpeg2kStreamGetImageComponentInfo(params.jpeg2k_streams[i], &comp_info, 0)); write_image(params.output_dir, current_names[i], nvjpeg2k_output[i], image_info.image_width, - image_info.image_height, image_info.num_components, comp_info.precision, params.verbose); + image_info.image_height, image_info.num_components, comp_info.precision, + comp_info.sgn, params.verbose); } } @@ -268,9 +288,7 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, // we wrap over image files to process total_images of files FileNames::iterator file_iter = image_names.begin(); // output buffers - std::vector iout(params.batch_size); - // output buffer - std::vector isz(params.batch_size); + std::vector iout(params.batch_size); // stream for decoding for (int p =0; p < PIPELINE_STAGES; p++) @@ -288,7 +306,7 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, file_len, current_names, params.verbose)) return EXIT_FAILURE; double parsetime = 0; - if (prepare_buffers(file_data, file_len, iout, isz, + if (prepare_buffers(file_data, file_len, iout, current_names, params, parsetime)) return EXIT_FAILURE; @@ -311,6 +329,9 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, CHECK_CUDA(cudaStreamDestroy(params.stream[p])); } + if(free_buffers(iout)) + EXIT_FAILURE; + return EXIT_SUCCESS; } diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.h b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.h index 8e6bd33f..d931e107 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.h +++ b/nvJPEG2000/nvJPEG2000-Decoder-Pipelined/nvjpeg2k_dec_pipelined.h @@ -32,6 +32,7 @@ #include #include #include +#include #include // strcmpi @@ -73,37 +74,32 @@ namespace fs = std::experimental::filesystem::v1; } \ } +typedef std::chrono::high_resolution_clock perfclock; + constexpr int PIPELINE_STAGES = 10; constexpr int NUM_COMPONENTS = 4; - -//#define USE8BITOUTPUT - -#ifdef USE8BITOUTPUT -constexpr int MAX_PRECISION = 8; -typedef struct -{ - uint16_t num_comps; - unsigned char *component[NUM_COMPONENTS]; - size_t pitch_in_bytes[NUM_COMPONENTS]; -} nvjpeg2ksample_img; - -#else constexpr int MAX_PRECISION = 16; -typedef struct -{ - uint16_t num_comps; - unsigned short *component[NUM_COMPONENTS]; - size_t pitch_in_bytes[NUM_COMPONENTS]; -} nvjpeg2ksample_img; -#endif - - - -typedef struct +typedef struct nvjpeg2kImageSample { - size_t comp_sz[NUM_COMPONENTS]; -} nvjpeg2ksample_img_sz; + nvjpeg2kImageSample(): + pixel_type(NVJPEG2K_UINT8), + num_comps(0) + { + for( int c = 0; c < NUM_COMPONENTS; c++) + { + component[c] = nullptr; + pitch_in_bytes[c] = 0; + comp_sz[c] = 0; + } + } + + void *component[NUM_COMPONENTS]; + size_t pitch_in_bytes[NUM_COMPONENTS]; + size_t comp_sz[NUM_COMPONENTS]; + nvjpeg2kImageType_t pixel_type; + uint32_t num_comps; +} nvjpeg2kImageSample_t; int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } @@ -187,7 +183,6 @@ int read_next_batch(FileNames &image_names, int batch_size, continue; } raw_len[counter] = file_size; - current_names[counter] = *cur_iter; counter++; @@ -196,40 +191,6 @@ int read_next_batch(FileNames &image_names, int batch_size, return EXIT_SUCCESS; } -double Wtime(void) -{ -#if defined(_WIN32) - LARGE_INTEGER t; - static double oofreq; - static int checkedForHighResTimer; - static BOOL hasHighResTimer; - - if (!checkedForHighResTimer) - { - hasHighResTimer = QueryPerformanceFrequency(&t); - oofreq = 1.0 / (double)t.QuadPart; - checkedForHighResTimer = 1; - } - if (hasHighResTimer) - { - QueryPerformanceCounter(&t); - return (double)t.QuadPart * oofreq; - } - else - { - return (double)GetTickCount() / 1000.0; - } -#else - struct timespec tp; - int rv = clock_gettime(CLOCK_MONOTONIC, &tp); - - if (rv) - return 0; - - return tp.tv_nsec / 1.0E+9 + (double)tp.tv_sec; - -#endif -} // ***************************************************************************** // reading input directory to file list // ----------------------------------------------------------------------------- @@ -338,13 +299,13 @@ int getInputDir(std::string &input_dir, const char *executable_path) // write PGM, input - single channel, device template -int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision) +int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision, uint8_t sgn) { std::ofstream rOutputStream(filename, std::fstream::binary); if (!rOutputStream) { std::cerr << "Cannot open output file: " << filename << std::endl; - return 1; + return EXIT_FAILURE; } std::vector img(nHeight * (nSrcStep / sizeof(D))); D *hpSrc = img.data(); @@ -364,13 +325,26 @@ int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, i const D *pEndColumn = pRow + nWidth; for (; pRow < pEndColumn; ++pRow) { - if (precision == 8) + if (precision <= 8) { rOutputStream << static_cast(*pRow); } - else + else if (precision <= 16) { - rOutputStream << static_cast((*pRow) >> 8) << static_cast((*pRow) & 0xff); + int pix_val = *pRow; + if(sgn) + { + pix_val += (1 << (precision - 1)); + if (pix_val > 65535) + { + pix_val = 65535; + } + else if (pix_val < 0) + { + pix_val = 0; + } + } + rOutputStream << static_cast((pix_val) >> 8) << static_cast((pix_val) & 0xff); } } } diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/CMakeLists.txt b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/CMakeLists.txt index 6ec1dfba..06318daf 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/CMakeLists.txt +++ b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021 - 2023, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation @@ -8,14 +8,25 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -cmake_minimum_required(VERSION 3.13 FATAL_ERROR) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) project(nvj2k_decode_tile_partial LANGUAGES CXX CUDA) -if(NOT NVJPEG2K_PATH) -message(FATAL_ERROR, "NVJPEG2K_PATH not set") +find_package(CUDAToolkit REQUIRED) + + +find_library(NVJPEG2K_LIB + NAMES nvjpeg2k + PATHS ${NVJPEG2K_PATH}/lib64/${CUDAToolkit_VERSION_MAJOR} + ${NVJPEG2K_PATH}/lib/${CUDAToolkit_VERSION_MAJOR}) + +if(NOT NVJPEG2K_LIB) + message(FATAL_ERROR, " nvJPEG2000 library not found. Make sure the library is installed. If using the tar.xz/zip packages set -DNVJPEG2K_PATH") +else() + message(INFO, " nvJPEG2000 library found here: ${NVJPEG2K_LIB}") endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -30,22 +41,14 @@ include_directories( ) -SET(EXAMPLES_DESCRIPTOR_SOURCES "nvj2k_DecodeTilePartial.cpp") +set(EXAMPLES_DESCRIPTOR_SOURCES "nvj2k_DecodeTilePartial.cpp") add_executable(nvj2k_decode_tile_partial ${EXAMPLES_DESCRIPTOR_SOURCES}) -find_library(NVJPEG2K_LIB - NAMES nvjpeg2k - PATHS ${NVJPEG2K_PATH}/lib64) - -find_library(CUDART_LIB - NAMES cudart - PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) - if(UNIX) set(FILESYS -lstdc++fs) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif() -target_link_libraries(nvj2k_decode_tile_partial PUBLIC ${NVJPEG2K_LIB} ${CUDART_LIB} ${FILESYS}) +target_link_libraries(nvj2k_decode_tile_partial PUBLIC ${NVJPEG2K_LIB} CUDA::cudart ${FILESYS}) diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/README.md b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/README.md index 91eaaa26..9ac578e6 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/README.md +++ b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/README.md @@ -1,4 +1,4 @@ -# JPEG2000 Partial Image decoding Example using nvJPEG 2000 Library +# JPEG 2000 Partial Image decoding Example using nvJPEG2000 Library ## Description @@ -20,31 +20,29 @@ Linux, Windows x86_64 -## CUDA APIs involved +## API Documentation -[NVJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) +[nvJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) -# Building (make) - # Prerequisites -- A Linux system with recent NVIDIA drivers. -- Install the [CUDA 11.0 toolkit](https://developer.nvidia.com/cuda-downloads). -- CMake (3.13 or later) -- nvjpeg2k package +- Recent NVIDIA drivers. +- [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). +- CMake (3.17 or later). +- Install [nvJPEG2000](https://developer.nvidia.com/nvjpeg2000/downloads). -## Build command on Linux +## Build Instructions on Linux ``` $ mkdir build $ $ cd build $ -$ export CUDACXX=nvcc +$ export CUDACXX=/usr/local/cuda/bin/nvcc $ -$ cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH= nvjpeg2k location -# -# example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=~/nvJPEG2kDecodeSample/libnvjpeg_2k +$ cmake .. -DCMAKE_BUILD_TYPE=Release +# Use -DNVJPEG2K_PATH, if nvJPEG2000 is not installed at the default location. +# example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=/opt/libnvjpeg_2k # $ make ``` @@ -65,3 +63,4 @@ Usage: ./nvj2k_decode_tile_partial -i images_dir [-b batch_size] [-t total_image -da x0,y0,x1,y1 : Decode Area of Interest. The coordinates are relative to the image origin ``` + diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.cpp b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.cpp index 8dde1a56..b757e4ca 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.cpp +++ b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.cpp @@ -30,7 +30,7 @@ template int write_image(std::string output_path, std::string filename, const T &imgdesc, int width, int height, - uint32_t num_components, uint8_t precision, bool verbose) + uint32_t num_components, uint8_t precision, uint8_t sgn, bool verbose) { // Get the file name, without extension. // This will be used to rename the output file. @@ -52,19 +52,22 @@ int write_image(std::string output_path, std::string filename, const T &imgdesc, if (imgdesc.pixel_type == NVJPEG2K_UINT8) { err = writePGM(fname.c_str(), (unsigned char *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } else if (imgdesc.pixel_type == NVJPEG2K_UINT16) { err = writePGM(fname.c_str(), (unsigned short *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); + } + else if(imgdesc.pixel_type == NVJPEG2K_INT16) + { + err = writePGM(fname.c_str(), (short *)imgdesc.pixel_data[0], + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } - if (err) { std::cout << "Cannot write output file: " << fname << std::endl; } - } else if (num_components == 3 || num_components == 4) { @@ -96,7 +99,7 @@ int write_image(std::string output_path, std::string filename, const T &imgdesc, } else { - std::cout << "only 1 and 3 channel outputs supported\n"; + std::cout << "num channels not supported"< &file_len, - std::vector &img_width, std::vector &img_height, - std::vector &ibuf, - std::vector &isz, + std::vector &ibuf, FileNames ¤t_names, decode_params_t ¶ms, double& parse_time) { @@ -114,12 +115,13 @@ int prepare_buffers(FileData &file_data, std::vector &file_len, nvjpeg2kImageInfo_t image_info; nvjpeg2kImageComponentInfo_t image_comp_info[NUM_COMPONENTS]; parse_time = 0; - for (uint32_t i = 0; i < file_data.size(); i++) + for (int i = 0; i < params.batch_size; i++) { - double time = Wtime(); + auto io_start = perfclock::now(); CHECK_NVJPEG2K(nvjpeg2kStreamParse(params.nvjpeg2k_handle, (unsigned char*)file_data[i].data(), file_len[i], 0, 0, params.jpeg2k_streams[i])); - parse_time += Wtime() - time; + auto io_end = perfclock::now(); + parse_time += std::chrono::duration_cast(io_end-io_start).count(); CHECK_NVJPEG2K(nvjpeg2kStreamGetImageInfo(params.jpeg2k_streams[i], &image_info)); @@ -131,43 +133,61 @@ int prepare_buffers(FileData &file_data, std::vector &file_len, for (uint32_t c = 0; c < image_info.num_components; c++) { CHECK_NVJPEG2K(nvjpeg2kStreamGetImageComponentInfo(params.jpeg2k_streams[i], &image_comp_info[c], c)); - - if( image_comp_info[0].precision > MAX_PRECISION) - { - std::cout<<"Precision > "<< MAX_PRECISION<<" not supported by this sample"< "<< MAX_PRECISION<<" not supported by this sample"< isz[i].comp_sz[c]) + if (sz > ibuf[i].comp_sz[c]) { if (ibuf[i].pixel_data[c]) { CHECK_CUDA(cudaFree(ibuf[i].pixel_data[c])); } CHECK_CUDA(cudaMalloc((void**)&ibuf[i].pixel_data[c], sz)); - isz[i].comp_sz[c] = sz; + ibuf[i].comp_sz[c] = sz; + } + } + } + return EXIT_SUCCESS; +} + +int free_buffers(std::vector &ibuf) +{ + for(auto& buf: ibuf) + { + for(int c = 0; c < NUM_COMPONENTS; c++) + { + if(buf.pixel_data[c]) + { + CHECK_CUDA(cudaFree(buf.pixel_data[c])); } + buf.pixel_data[c] = nullptr; + buf.comp_sz[c] = 0; + buf.pitch_in_bytes[c] = 0; } + buf.num_comps = 0; } return EXIT_SUCCESS; } @@ -211,7 +231,7 @@ void determine_tiles_to_decode(const nvjpeg2kImageInfo_t& image_info, decode_par } } -int decode_images_partial(FileNames ¤t_names, std::vector &out, +int decode_images_partial(FileNames ¤t_names, std::vector &out, decode_params_t ¶ms, double &time) { cudaEvent_t startEvent = NULL, stopEvent = NULL; @@ -229,10 +249,9 @@ int decode_images_partial(FileNames ¤t_names, std::vector= 3) // set RGB output for the entire batch CHECK_NVJPEG2K(nvjpeg2kDecodeParamsSetRGBOutput(decode_params, 1)); -#endif + CHECK_CUDA(cudaEventRecord(startEvent, params.stream[0])); int buffer_index = 0; for(int batch_id = 0; batch_id < params.batch_size; batch_id++) @@ -249,16 +268,16 @@ int decode_images_partial(FileNames ¤t_names, std::vector &out, +int decode_images(FileNames ¤t_names, std::vector &out, decode_params_t ¶ms, double &time) { cudaEvent_t startEvent = NULL, stopEvent = NULL; @@ -345,10 +364,8 @@ int decode_images(FileNames ¤t_names, std::vector &out nvjpeg2kDecodeParams_t decode_params; CHECK_NVJPEG2K(nvjpeg2kDecodeParamsCreate(&decode_params)); -#if (NVJPEG2K_VER_MAJOR == 0 && NVJPEG2K_VER_MINOR >= 3) // set RGB output for the entire batch CHECK_NVJPEG2K(nvjpeg2kDecodeParamsSetRGBOutput(decode_params, 1)); -#endif CHECK_CUDA(cudaEventRecord(startEvent, params.stream[0])); int buffer_index = 0; @@ -364,14 +381,14 @@ int decode_images(FileNames ¤t_names, std::vector &out { // make sure that the previous stage are done CHECK_CUDA(cudaEventSynchronize(pipeline_events[buffer_index])); - nvjpeg2kImage16u_t tile_decode_out; + nvjpeg2kImageSample tile_decode_out; tile_decode_out.num_comps = out[batch_id].num_comps; tile_decode_out.pixel_type = out[batch_id].pixel_type; - uint32_t bytes_per_comp = sizeof(*tile_decode_out.pixel_data[0]); + uint32_t bytes_per_comp = tile_decode_out.pixel_type == NVJPEG2K_UINT8 ? 1 : 2; for(uint32_t c = 0; c < out[batch_id].num_comps; c++) { - size_t pitch_in_pixels = out[batch_id].pitch_in_bytes[c]/bytes_per_comp; - tile_decode_out.pixel_data[c] = out[batch_id].pixel_data[c] + tile_y0 * pitch_in_pixels + (tile_x0); + size_t pitch_in_bytes = out[batch_id].pitch_in_bytes[c]; + tile_decode_out.pixel_data[c] = (uint8_t*)(out[batch_id].pixel_data[c]) + tile_y0 * pitch_in_bytes + (tile_x0 * bytes_per_comp); tile_decode_out.pitch_in_bytes[c] = out[batch_id].pitch_in_bytes[c]; } // make sure that the previous stage are done before reusing @@ -419,7 +436,7 @@ int decode_images(FileNames ¤t_names, std::vector &out write_image(params.output_dir, current_names[i], out[i], image_info.image_width, image_info.image_height, image_info.num_components, comp_info.precision, - params.verbose); + comp_info.sgn, params.verbose); } } @@ -441,14 +458,10 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, FileData file_data(params.batch_size); std::vector file_len(params.batch_size); FileNames current_names(params.batch_size); - std::vector widths(params.batch_size); - std::vector heights(params.batch_size); // we wrap over image files to process total_images of files FileNames::iterator file_iter = image_names.begin(); // output buffers - std::vector iout(params.batch_size); - // output buffer - std::vector isz(params.batch_size); + std::vector iout(params.batch_size); // stream for decoding for (int p =0; p < PIPELINE_STAGES; p++) @@ -466,7 +479,7 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, file_len, current_names, params.verbose)) return EXIT_FAILURE; double parsetime = 0; - if (prepare_buffers(file_data, file_len, widths, heights, iout, isz, + if (prepare_buffers(file_data, file_len, iout, current_names, params, parsetime)) return EXIT_FAILURE; @@ -500,6 +513,9 @@ double process_images(FileNames &image_names, decode_params_t ¶ms, CHECK_CUDA(cudaStreamDestroy(params.stream[p])); } + if(free_buffers(iout)) + EXIT_FAILURE; + return EXIT_SUCCESS; } @@ -636,7 +652,6 @@ int main(int argc, const char *argv[]) { return EXIT_SUCCESS; } - } if( params.verbose) diff --git a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.h b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.h index d3da1be0..a59df23b 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.h +++ b/nvJPEG2000/nvJPEG2000-Decoder-Tile-Partial/nvj2k_DecodeTilePartial.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021 - 2023, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -32,6 +32,7 @@ #include #include #include +#include #include // strcmpi @@ -41,7 +42,6 @@ const std::string separator = "\\"; namespace fs = std::filesystem; #else -#include // timings #include const std::string separator = "/"; namespace fs = std::experimental::filesystem::v1; @@ -73,23 +73,32 @@ namespace fs = std::experimental::filesystem::v1; } \ } +typedef std::chrono::high_resolution_clock perfclock; + constexpr int PIPELINE_STAGES = 10; constexpr int MAX_PRECISION = 16; constexpr int NUM_COMPONENTS = 4; -typedef struct +typedef struct nvjpeg2kImageSample { - unsigned short *pixel_data[NUM_COMPONENTS]; - size_t pitch_in_bytes[NUM_COMPONENTS]; + nvjpeg2kImageSample(): + pixel_type(NVJPEG2K_UINT8), + num_comps(0) + { + for( int c = 0; c < NUM_COMPONENTS; c++) + { + pixel_data[c] = nullptr; + pitch_in_bytes[c] = 0; + comp_sz[c] = 0; + } + } + + void *pixel_data[NUM_COMPONENTS]; + size_t pitch_in_bytes[NUM_COMPONENTS]; + size_t comp_sz[NUM_COMPONENTS]; nvjpeg2kImageType_t pixel_type; uint32_t num_comps; -} nvjpeg2kImage16u_t; - - -typedef struct -{ - size_t comp_sz[NUM_COMPONENTS]; -} nvjpeg2ksample_img_sz; +} nvjpeg2kImageSample_t; int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } @@ -179,7 +188,6 @@ int read_next_batch(FileNames &image_names, int batch_size, continue; } raw_len[counter] = file_size; - current_names[counter] = *cur_iter; counter++; @@ -188,40 +196,6 @@ int read_next_batch(FileNames &image_names, int batch_size, return EXIT_SUCCESS; } -double Wtime(void) -{ -#if defined(_WIN32) - LARGE_INTEGER t; - static double oofreq; - static int checkedForHighResTimer; - static BOOL hasHighResTimer; - - if (!checkedForHighResTimer) - { - hasHighResTimer = QueryPerformanceFrequency(&t); - oofreq = 1.0 / (double)t.QuadPart; - checkedForHighResTimer = 1; - } - if (hasHighResTimer) - { - QueryPerformanceCounter(&t); - return (double)t.QuadPart * oofreq; - } - else - { - return (double)GetTickCount() / 1000.0; - } -#else - struct timespec tp; - int rv = clock_gettime(CLOCK_MONOTONIC, &tp); - - if (rv) - return 0; - - return tp.tv_nsec / 1.0E+9 + (double)tp.tv_sec; - -#endif -} // ***************************************************************************** // reading input directory to file list // ----------------------------------------------------------------------------- @@ -249,7 +223,6 @@ int readInput(const std::string &sInputPath, std::vector &filelist) return EXIT_FAILURE; } - return EXIT_SUCCESS; } @@ -331,13 +304,13 @@ int getInputDir(std::string &input_dir, const char *executable_path) // write PGM, input - single channel, device template -int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision) +int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision, uint8_t sgn) { std::ofstream rOutputStream(filename, std::fstream::binary); if (!rOutputStream) { std::cerr << "Cannot open output file: " << filename << std::endl; - return 1; + return EXIT_FAILURE; } std::vector img(nHeight * (nSrcStep / sizeof(D))); D *hpSrc = img.data(); @@ -357,13 +330,26 @@ int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, i const D *pEndColumn = pRow + nWidth; for (; pRow < pEndColumn; ++pRow) { - if (precision == 8) + if (precision <= 8) { rOutputStream << static_cast(*pRow); } - else + else if (precision <= 16) { - rOutputStream << static_cast((*pRow) >> 8) << static_cast((*pRow) & 0xff); + int pix_val = *pRow; + if(sgn) + { + pix_val += (1 << (precision - 1)); + if (pix_val > 65535) + { + pix_val = 65535; + } + else if (pix_val < 0) + { + pix_val = 0; + } + } + rOutputStream << static_cast((pix_val) >> 8) << static_cast((pix_val) & 0xff); } } } diff --git a/nvJPEG2000/nvJPEG2000-Decoder/CMakeLists.txt b/nvJPEG2000/nvJPEG2000-Decoder/CMakeLists.txt index 0fd4d7ac..57569a99 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder/CMakeLists.txt +++ b/nvJPEG2000/nvJPEG2000-Decoder/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (c) 2020 - 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020 - 2023, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation @@ -8,14 +8,25 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -cmake_minimum_required(VERSION 3.13 FATAL_ERROR) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) project(nvjpeg2000_sample LANGUAGES CXX CUDA) -if(NOT NVJPEG2K_PATH) -message(FATAL_ERROR, "NVJPEG2K_PATH not set") +find_package(CUDAToolkit REQUIRED) + + +find_library(NVJPEG2K_LIB + NAMES nvjpeg2k + PATHS ${NVJPEG2K_PATH}/lib64/${CUDAToolkit_VERSION_MAJOR} + ${NVJPEG2K_PATH}/lib/${CUDAToolkit_VERSION_MAJOR}) + +if(NOT NVJPEG2K_LIB) + message(FATAL_ERROR, " nvJPEG2000 library not found. Make sure the library is installed. If using the tar.xz/zip packages set -DNVJPEG2K_PATH") +else() + message(INFO, " nvJPEG2000 library found here: ${NVJPEG2K_LIB}") endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -30,22 +41,14 @@ include_directories( ) -SET(EXAMPLES_DESCRIPTOR_SOURCES "nvjpeg2000DecodeSample.cpp") +set(EXAMPLES_DESCRIPTOR_SOURCES "nvjpeg2000DecodeSample.cpp") add_executable(nvjpeg2000_decode_sample ${EXAMPLES_DESCRIPTOR_SOURCES}) -find_library(NVJPEG2K_LIB - NAMES nvjpeg2k - PATHS ${NVJPEG2K_PATH}/lib64) - -find_library(CUDART_LIB - NAMES cudart - PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) - if(UNIX) set(FILESYS -lstdc++fs) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif() -target_link_libraries(nvjpeg2000_decode_sample PUBLIC ${NVJPEG2K_LIB} ${CUDART_LIB} ${FILESYS}) +target_link_libraries(nvjpeg2000_decode_sample PUBLIC ${NVJPEG2K_LIB} CUDA::cudart ${FILESYS}) diff --git a/nvJPEG2000/nvJPEG2000-Decoder/README.md b/nvJPEG2000/nvJPEG2000-Decoder/README.md index 3539953c..833d676c 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder/README.md +++ b/nvJPEG2000/nvJPEG2000-Decoder/README.md @@ -1,12 +1,12 @@ -# JPEG2000 Image decoding Example using nvJPEG 2000 Library +# JPEG2000 Decode example using nvJPEG2000 Library ## Description -This code demonstrates JPEG2000 Image decoding using nvJPEG2000 library. +This code demonstrates JPEG 2000 Image decoding using nvJPEG2000 library. ## Key Concepts -Image Decoding from NVJPEG2000 Library +Image Decoding from nvJPEG2000 Library ## Supported SM Architectures @@ -14,37 +14,35 @@ Image Decoding from NVJPEG2000 Library ## Supported OSes -Linux +Linux, Windows ## Supported CPU Architecture x86_64 -## CUDA APIs involved +## API Documentation -[NVJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) +[nvJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) -# Building (make) - # Prerequisites -- A Linux system with recent NVIDIA drivers. -- Install the [CUDA 11.0 toolkit](https://developer.nvidia.com/cuda-downloads). -- CMake (3.13 or later) -- nvjpeg2k package +- Recent NVIDIA drivers. +- [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). +- CMake (3.17 or later). +- Install [nvJPEG2000](https://developer.nvidia.com/nvjpeg2000/downloads). -## Build command on Linux +## Build Instructions on Linux ``` $ mkdir build $ $ cd build $ -$ export CUDACXX=nvcc +$ export CUDACXX=/usr/local/cuda/bin/nvcc $ -$ cmake .. -DNVJPEG2K_PATH= nvjpeg2k location -# -# example cmake .. -DNVJPEG2K_PATH=~/nvJPEG2kDecodeSample/libnvjpeg_2k +$ cmake .. -DCMAKE_BUILD_TYPE=Release +# Use -DNVJPEG2K_PATH, if nvJPEG2000 is not installed at the default location. +# example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=/opt/libnvjpeg_2k # $ make ``` @@ -95,4 +93,5 @@ Avg decoding time per image: 0.0498233 Avg images per sec: 20.0709 Avg decoding time per batch: 0.0498233 -``` \ No newline at end of file +``` + diff --git a/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.cpp b/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.cpp index abb169d7..c7e416d8 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.cpp +++ b/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.cpp @@ -29,7 +29,7 @@ #include "nvjpeg2000DecodeSample.h" int write_image(std::string output_path, std::string filename, const nvjpeg2kImage_t &imgdesc, int width, int height, - uint32_t num_components, uint8_t precision, bool verbose) + uint32_t num_components, uint8_t precision, uint8_t sgn, bool verbose) { // Get the file name, without extension. // This will be used to rename the output file. @@ -51,19 +51,22 @@ int write_image(std::string output_path, std::string filename, const nvjpeg2kIma if (imgdesc.pixel_type == NVJPEG2K_UINT8) { err = writePGM(fname.c_str(), (unsigned char *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } else if (imgdesc.pixel_type == NVJPEG2K_UINT16) { err = writePGM(fname.c_str(), (unsigned short *)imgdesc.pixel_data[0], - imgdesc.pitch_in_bytes[0], width, height, precision); + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); + } + else if(imgdesc.pixel_type == NVJPEG2K_INT16) + { + err = writePGM(fname.c_str(), (short *)imgdesc.pixel_data[0], + imgdesc.pitch_in_bytes[0], width, height, precision, sgn); } - if (err) { std::cout << "Cannot write output file: " << fname << std::endl; } - } else if (num_components == 3 || num_components == 4) { @@ -95,7 +98,7 @@ int write_image(std::string output_path, std::string filename, const nvjpeg2kIma } else { - std::cout << "only 1 and 3 channel outputs supported\n"; + std::cout << "num channels not supported"< decode_output_pitch; for( int i =0; i < params.batch_size; i++) { - double parse_time = Wtime(); + auto io_start = perfclock::now(); CHECK_NVJPEG2K(nvjpeg2kStreamParse(params.nvjpeg2k_handle, (unsigned char*)img_data[i].data(), img_len[i], 0, 0, params.jpeg2k_stream)); - parse_time = Wtime() - parse_time; + auto io_end = perfclock::now(); + double parse_time = std::chrono::duration_cast(io_end-io_start).count(); CHECK_NVJPEG2K(nvjpeg2kStreamGetImageInfo(params.jpeg2k_stream, &image_info)); @@ -181,7 +185,7 @@ int decode_images(FileNames ¤t_names, const FileData &img_data, const std: { decode_output_u16.resize(image_info.num_components); output_image.pixel_data = (void **)decode_output_u16.data(); - output_image.pixel_type = NVJPEG2K_UINT16; + output_image.pixel_type = image_comp_info[0].sgn ? NVJPEG2K_INT16 : NVJPEG2K_UINT16; bytes_per_element = 2; } else if (image_comp_info[0].precision == 8) @@ -203,13 +207,8 @@ int decode_images(FileNames ¤t_names, const FileData &img_data, const std: } CHECK_CUDA(cudaEventRecord(startEvent, params.stream)); -#if (NVJPEG2K_VER_MAJOR == 0 && NVJPEG2K_VER_MINOR >= 3) CHECK_NVJPEG2K(nvjpeg2kDecodeImage(params.nvjpeg2k_handle, params.nvjpeg2k_decode_state, params.jpeg2k_stream, decode_params, &output_image, params.stream)); -#else - CHECK_NVJPEG2K(nvjpeg2kDecode(params.nvjpeg2k_handle, params.nvjpeg2k_decode_state, - params.jpeg2k_stream, &output_image, params.stream)); -#endif CHECK_CUDA(cudaEventRecord(stopEvent, params.stream)); CHECK_CUDA(cudaEventSynchronize(stopEvent)); @@ -233,7 +232,7 @@ int decode_images(FileNames ¤t_names, const FileData &img_data, const std: } write_image(params.output_dir, current_names[i], output_image, image_info.image_width, image_info.image_height, image_info.num_components, image_comp_info[0].precision, - params.verbose); + image_comp_info[0].sgn, params.verbose); } if(free_output_buffers(output_image)) diff --git a/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.h b/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.h index 98316e48..cc4ea89b 100644 --- a/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.h +++ b/nvJPEG2000/nvJPEG2000-Decoder/nvjpeg2000DecodeSample.h @@ -32,6 +32,7 @@ #include #include #include +#include #include // strcmpi @@ -41,7 +42,6 @@ const std::string separator = "\\"; namespace fs = std::filesystem; #else -#include // timings #include const std::string separator = "/"; namespace fs = std::experimental::filesystem::v1; @@ -73,6 +73,8 @@ namespace fs = std::experimental::filesystem::v1; } \ } +typedef std::chrono::high_resolution_clock perfclock; + int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } int dev_free(void *p) { return (int)cudaFree(p); } @@ -154,7 +156,6 @@ int read_next_batch(FileNames &image_names, int batch_size, continue; } raw_len[counter] = file_size; - current_names[counter] = *cur_iter; counter++; @@ -163,40 +164,6 @@ int read_next_batch(FileNames &image_names, int batch_size, return EXIT_SUCCESS; } -double Wtime(void) -{ -#if defined(_WIN32) - LARGE_INTEGER t; - static double oofreq; - static int checkedForHighResTimer; - static BOOL hasHighResTimer; - - if (!checkedForHighResTimer) - { - hasHighResTimer = QueryPerformanceFrequency(&t); - oofreq = 1.0 / (double)t.QuadPart; - checkedForHighResTimer = 1; - } - if (hasHighResTimer) - { - QueryPerformanceCounter(&t); - return (double)t.QuadPart * oofreq; - } - else - { - return (double)GetTickCount() / 1000.0; - } -#else - struct timespec tp; - int rv = clock_gettime(CLOCK_MONOTONIC, &tp); - - if (rv) - return 0; - - return tp.tv_nsec / 1.0E+9 + (double)tp.tv_sec; - -#endif -} // ***************************************************************************** // reading input directory to file list // ----------------------------------------------------------------------------- @@ -305,13 +272,13 @@ int getInputDir(std::string &input_dir, const char *executable_path) // write PGM, input - single channel, device template -int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision) +int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, int nHeight, uint8_t precision, uint8_t sgn) { std::ofstream rOutputStream(filename, std::fstream::binary); if (!rOutputStream) { std::cerr << "Cannot open output file: " << filename << std::endl; - return 1; + return EXIT_FAILURE; } std::vector img(nHeight * (nSrcStep / sizeof(D))); D *hpSrc = img.data(); @@ -331,13 +298,26 @@ int writePGM(const char *filename, const D *pSrc, size_t nSrcStep, int nWidth, i const D *pEndColumn = pRow + nWidth; for (; pRow < pEndColumn; ++pRow) { - if (precision == 8) + if (precision <= 8) { rOutputStream << static_cast(*pRow); } - else + else if (precision <= 16) { - rOutputStream << static_cast((*pRow) >> 8) << static_cast((*pRow) & 0xff); + int pix_val = *pRow; + if(sgn) + { + pix_val += (1 << (precision - 1)); + if (pix_val > 65535) + { + pix_val = 65535; + } + else if (pix_val < 0) + { + pix_val = 0; + } + } + rOutputStream << static_cast((pix_val) >> 8) << static_cast((pix_val) & 0xff); } } } diff --git a/nvJPEG2000/nvJPEG2000-Encoder/CMakeLists.txt b/nvJPEG2000/nvJPEG2000-Encoder/CMakeLists.txt index 9d771abc..4dfb14d7 100644 --- a/nvJPEG2000/nvJPEG2000-Encoder/CMakeLists.txt +++ b/nvJPEG2000/nvJPEG2000-Encoder/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020 - 2023, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation @@ -8,14 +8,25 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -cmake_minimum_required(VERSION 3.13 FATAL_ERROR) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) project(nvjpeg2k_sample LANGUAGES CXX CUDA) -if(NOT NVJPEG2K_PATH) -message(FATAL_ERROR, "NVJPEG2K_PATH not set") +find_package(CUDAToolkit REQUIRED) + + +find_library(NVJPEG2K_LIB + NAMES nvjpeg2k + PATHS ${NVJPEG2K_PATH}/lib64/${CUDAToolkit_VERSION_MAJOR} + ${NVJPEG2K_PATH}/lib/${CUDAToolkit_VERSION_MAJOR}) + +if(NOT NVJPEG2K_LIB) + message(FATAL_ERROR, " nvJPEG2000 library not found. Make sure the library is installed. If using the tar.xz/zip packages set -DNVJPEG2K_PATH") +else() + message(INFO, " nvJPEG2000 library found here: ${NVJPEG2K_LIB}") endif() + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -34,17 +45,10 @@ set(EXAMPLES_DESCRIPTOR_SOURCES "nvjpeg2k_encode.cpp") add_executable(nvjpeg2k_encode ${EXAMPLES_DESCRIPTOR_SOURCES}) -find_library(NVJPEG2K_LIB - NAMES nvjpeg2k - PATHS ${NVJPEG2K_PATH}/lib64) - -find_library(CUDART_LIB - NAMES cudart - PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) - if(UNIX) set(FILESYS -lstdc++fs) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif() - -target_link_libraries(nvjpeg2k_encode PUBLIC ${NVJPEG2K_LIB} ${CUDART_LIB} ${FILESYS}) + +target_link_libraries(nvjpeg2k_encode PUBLIC ${NVJPEG2K_LIB} CUDA::cudart ${FILESYS}) + diff --git a/nvJPEG2000/nvJPEG2000-Encoder/README.md b/nvJPEG2000/nvJPEG2000-Encoder/README.md index ee273093..9343dfb1 100644 --- a/nvJPEG2000/nvJPEG2000-Encoder/README.md +++ b/nvJPEG2000/nvJPEG2000-Encoder/README.md @@ -1,4 +1,4 @@ -# JPEG2000 Encode example using nvJPEG2000 Library +# JPEG 2000 Encode example using nvJPEG2000 Library ## Description @@ -20,30 +20,28 @@ Linux, Windows x86_64 -## CUDA APIs involved +## API Documentation -[NVJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) +[nvJPEG2000](https://docs.nvidia.com/cuda/nvjpeg2000/index.html) -# Building (make) - # Prerequisites -- A Linux system with recent NVIDIA drivers. -- Install the [CUDA 11.0 toolkit](https://developer.nvidia.com/cuda-downloads). -- CMake (3.13 or later) -- nvjpeg2k package +- Recent NVIDIA drivers. +- [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). +- CMake (3.17 or later). +- Install [nvJPEG2000](https://developer.nvidia.com/nvjpeg2000/downloads). -## Build command on Linux +## Build Instructions on Linux ``` $ mkdir build $ $ cd build $ -$ export CUDACXX=nvcc +$ export CUDACXX=/usr/local/cuda/bin/nvcc $ -$ cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH= nvjpeg2k location -# +$ cmake .. -DCMAKE_BUILD_TYPE=Release +# Use -DNVJPEG2K_PATH, if nvJPEG2000 is not installed at the default location. # example cmake .. -DCMAKE_BUILD_TYPE=Release -DNVJPEG2K_PATH=/opt/libnvjpeg_2k # $ make @@ -72,7 +70,7 @@ Parameters: ``` -``` + Example: Sample example on GV100, Ubuntu 18.04, CUDA 11.2