-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathutils_api.cpp
117 lines (94 loc) · 3.96 KB
/
utils_api.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*******************************************************************************
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
******************************************************************************/
/*
* Intel® Query Processing Library (Intel® QPL)
* Utility API (public C API)
*/
#include <cstdint>
#include "qpl/qpl.h"
#include "job.hpp"
#include "own_defs.h"
#ifdef __cplusplus
extern "C" {
#endif
// Calculate maximum size needed for compression operations
QPL_FUN(uint32_t, qpl_get_safe_deflate_compression_buffer_size, (uint32_t source_size)) {
// Define constants
const uint8_t BLOCK_HEADER_SIZE = 5U;
const uint8_t FIRST_BLOCK_HEADER_SIZE = 1U;
const uint8_t OUTPUT_ACCUMULATOR_SIZE = 32U;
const uint8_t END_OF_BLOCK_SIZE = 2U;
// Efficient divide by 65535, works for all values of x except for 0xFFFFFFFF
auto div_65535 = [](uint64_t x) -> uint64_t {
const uint8_t BITS_PER_BYTE = 8U;
const uint8_t DIV_ROUND_UP_INCREMENT = 1U;
const uint64_t y = (x >> (BITS_PER_BYTE * 2)) + x + DIV_ROUND_UP_INCREMENT;
return (y >> (BITS_PER_BYTE * 2));
};
// Calculating stored block size, does not include first block header
#ifdef _WIN32
auto stored_block_size = [&div_65535, BLOCK_HEADER_SIZE, FIRST_BLOCK_HEADER_SIZE](uint64_t osize) -> uint64_t {
#else
auto stored_block_size = [&div_65535](uint64_t osize) -> uint64_t {
#endif
const uint32_t ROUND_UP_OFFSET = UINT16_MAX - 1U;
if (osize == 0) return BLOCK_HEADER_SIZE - FIRST_BLOCK_HEADER_SIZE;
const uint64_t num_blks = div_65535(osize + ROUND_UP_OFFSET); // round up
return osize + (num_blks * BLOCK_HEADER_SIZE) - FIRST_BLOCK_HEADER_SIZE;
};
// Estimate the maximum size needed for compression operations
// Add 1 byte for the first block header, 32 bytes for the output accumulator, and 2 bytes for the EOB
uint64_t num_bytes =
stored_block_size(source_size) + FIRST_BLOCK_HEADER_SIZE + OUTPUT_ACCUMULATOR_SIZE + END_OF_BLOCK_SIZE;
// Round up to even number of bytes for BE16 encoding
if (num_bytes % 2 != 0) { num_bytes++; }
// Check if the size is greater than UINT32_MAX since the function returns a uint32_t
if (num_bytes > UINT32_MAX) { return 0U; }
return static_cast<uint32_t>(num_bytes);
}
/**
* @enum qpl_execution_record_t
* @brief Enumeration for types of execution information.
*
* This enumeration defines the various types of execution information that can be queried
* from the Intel® Query Processing Library (Intel® QPL).
*/
typedef enum { // NOLINT(performance-enum-size)
QPL_EXECUTION_INFO_ELAPSED_TIME = 0,
QPL_EXECUTION_INFO_WQ_IDX,
QPL_EXECUTION_INFO_DEVICE_IDX
} qpl_execution_record_t;
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC visibility push(default)
#endif
/**
* @brief Get execution record information for the given job.
*/
QPL_FUN(qpl_status, qpl_get_execution_record,
(const qpl_job* const job_ptr, qpl_execution_record_t info_type, void* info_value)) {
if (job_ptr == nullptr || info_value == nullptr) { return QPL_STS_NULL_PTR_ERR; }
qpl_hw_state* state_ptr = reinterpret_cast<qpl_hw_state*>(qpl::job::get_state(job_ptr));
if (state_ptr == nullptr) { return QPL_STS_LIBRARY_INTERNAL_ERR; }
switch (info_type) {
case QPL_EXECUTION_INFO_ELAPSED_TIME:
*reinterpret_cast<double*>(info_value) = state_ptr->execution_record.elapsed_time_;
break;
case QPL_EXECUTION_INFO_WQ_IDX:
*reinterpret_cast<uint32_t*>(info_value) = state_ptr->execution_record.wq_idx_;
break;
case QPL_EXECUTION_INFO_DEVICE_IDX:
*reinterpret_cast<uint32_t*>(info_value) = state_ptr->execution_record.device_idx_;
break;
default: return QPL_STS_INVALID_PARAM_ERR;
}
return QPL_STS_OK;
}
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
}
#endif