forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ColumnVector.h
313 lines (239 loc) · 8.02 KB
/
ColumnVector.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
#pragma once
#include <cmath>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnVectorHelper.h>
#include <common/unaligned.h>
#include <Core/Field.h>
namespace DB
{
/** Stuff for comparing numbers.
* Integer values are compared as usual.
* Floating-point numbers are compared this way that NaNs always end up at the end
* (if you don't do this, the sort would not work at all).
*/
template <typename T>
struct CompareHelper
{
static bool less(T a, T b, int /*nan_direction_hint*/) { return a < b; }
static bool greater(T a, T b, int /*nan_direction_hint*/) { return a > b; }
/** Compares two numbers. Returns a number less than zero, equal to zero, or greater than zero if a < b, a == b, a > b, respectively.
* If one of the values is NaN, then
* - if nan_direction_hint == -1 - NaN are considered less than all numbers;
* - if nan_direction_hint == 1 - NaN are considered to be larger than all numbers;
* Essentially: nan_direction_hint == -1 says that the comparison is for sorting in descending order.
*/
static int compare(T a, T b, int /*nan_direction_hint*/)
{
return a > b ? 1 : (a < b ? -1 : 0);
}
};
template <typename T>
struct FloatCompareHelper
{
static bool less(T a, T b, int nan_direction_hint)
{
bool isnan_a = std::isnan(a);
bool isnan_b = std::isnan(b);
if (isnan_a && isnan_b)
return false;
if (isnan_a)
return nan_direction_hint < 0;
if (isnan_b)
return nan_direction_hint > 0;
return a < b;
}
static bool greater(T a, T b, int nan_direction_hint)
{
bool isnan_a = std::isnan(a);
bool isnan_b = std::isnan(b);
if (isnan_a && isnan_b)
return false;
if (isnan_a)
return nan_direction_hint > 0;
if (isnan_b)
return nan_direction_hint < 0;
return a > b;
}
static int compare(T a, T b, int nan_direction_hint)
{
bool isnan_a = std::isnan(a);
bool isnan_b = std::isnan(b);
if (unlikely(isnan_a || isnan_b))
{
if (isnan_a && isnan_b)
return 0;
return isnan_a
? nan_direction_hint
: -nan_direction_hint;
}
return (T(0) < (a - b)) - ((a - b) < T(0));
}
};
template <> struct CompareHelper<Float32> : public FloatCompareHelper<Float32> {};
template <> struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {};
/** A template for columns that use a simple array to store.
*/
template <typename T>
class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>
{
static_assert(!IsDecimalNumber<T>);
private:
using Self = ColumnVector;
friend class COWHelper<ColumnVectorHelper, Self>;
struct less;
struct greater;
public:
using value_type = T;
using Container = PaddedPODArray<value_type>;
private:
ColumnVector() {}
ColumnVector(const size_t n) : data(n) {}
ColumnVector(const size_t n, const value_type x) : data(n, x) {}
ColumnVector(const ColumnVector & src) : data(src.data.begin(), src.data.end()) {}
/// Sugar constructor.
ColumnVector(std::initializer_list<T> il) : data{il} {}
public:
bool isNumeric() const override { return IsNumber<T>; }
size_t size() const override
{
return data.size();
}
StringRef getDataAt(size_t n) const override
{
return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
}
void insertFrom(const IColumn & src, size_t n) override
{
data.push_back(static_cast<const Self &>(src).getData()[n]);
}
void insertData(const char * pos, size_t /*length*/) override
{
data.push_back(unalignedLoad<T>(pos));
}
void insertDefault() override
{
data.push_back(T());
}
virtual void insertManyDefaults(size_t length) override
{
data.resize_fill(data.size() + length, T());
}
void popBack(size_t n) override
{
data.resize_assume_reserved(data.size() - n);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
size_t byteSize() const override
{
return data.size() * sizeof(data[0]);
}
size_t allocatedBytes() const override
{
return data.allocated_bytes();
}
void protect() override
{
data.protect();
}
void insertValue(const T value)
{
data.push_back(value);
}
/// This method implemented in header because it could be possibly devirtualized.
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
}
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void reserve(size_t n) override
{
data.reserve(n);
}
const char * getFamilyName() const override;
MutableColumnPtr cloneResized(size_t size) const override;
Field operator[](size_t n) const override
{
return data[n];
}
void get(size_t n, Field & res) const override
{
res = (*this)[n];
}
UInt64 get64(size_t n) const override;
Float64 getFloat64(size_t n) const override;
UInt64 getUInt(size_t n) const override
{
return UInt64(data[n]);
}
bool getBool(size_t n) const override
{
return bool(data[n]);
}
Int64 getInt(size_t n) const override
{
return Int64(data[n]);
}
void insert(const Field & x) override
{
data.push_back(DB::get<NearestFieldType<T>>(x));
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override
{
return this->template scatterImpl<Self>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
StringRef getRawData() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); }
bool structureEquals(const IColumn & rhs) const override
{
return typeid(rhs) == typeid(ColumnVector<T>);
}
/** More efficient methods of manipulation - to manipulate with data directly. */
Container & getData()
{
return data;
}
const Container & getData() const
{
return data;
}
const T & getElement(size_t n) const
{
return data[n];
}
T & getElement(size_t n)
{
return data[n];
}
protected:
Container data;
};
template <typename T>
template <typename Type>
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
size_t size = indexes.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
auto res = this->create(limit);
typename Self::Container & res_data = res->getData();
for (size_t i = 0; i < limit; ++i)
res_data[i] = data[indexes[i]];
return res;
}
}