This repository was archived by the owner on Feb 21, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 232
/
Copy pathfilter.go
1424 lines (1309 loc) · 45.7 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2022 Molecula Corp. (DBA FeatureBase).
// SPDX-License-Identifier: Apache-2.0
package roaring
import (
"fmt"
"math"
"github.com/pkg/errors"
"github.com/featurebasedb/featurebase/v3/shardwidth"
)
// We want BitmapScanner to be accessible from both the pilosa package, and
// the rbf package. Pilosa imports rbf, so rbf can't import pilosa, but they
// both import roaring, and this package is closely tied to roaring structures
// like Containers and the key/container mapping, so it mostly makes sense for
// this to be here.
//
// Unfortunately, this really needs to be capable of being row-aware, which
// means it needs access to the shard width stuff, which roaring otherwise
// studiously avoids knowing about.
const (
rowExponent = (shardwidth.Exponent - 16) // for instance, 20-16 = 4
rowWidth = 1 << rowExponent // containers per row, for instance 1<<4 = 16
keyMask = (rowWidth - 1) // a mask for offset within the row
rowMask = ^FilterKey(keyMask) // a mask for the row bits, without converting them to a row ID
)
type FilterKey uint64
// FilterResult represents the results of a BitmapFilter considering a
// key, or data. The values are represented as exclusive upper bounds
// on a series of matches followed by a series of rejections. So for
// instance, if called on key 23, the result {YesKey: 23, NoKey: 24}
// indicates that key 23 is a "no" and 24 is unknown and will be the
// next to be Consider()ed. This may seem confusing but it makes the
// math a lot easier to write. It can also report an error, which
// indicates that the entire operation should be stopped with that
// error.
type FilterResult struct {
YesKey FilterKey // The lowest container key this filter is known NOT to match.
NoKey FilterKey // The highest container key after YesKey that this filter is known to not match.
Err error // An error which should terminate processing.
}
// Row() computes the row number of a key.
func (f FilterKey) Row() uint64 {
return uint64(f >> rowExponent)
}
// Add adds an offset to a key.
func (f FilterKey) Add(x uint64) FilterKey {
return f + FilterKey(x)
}
// Sub determines the distance from o to f.
func (f FilterKey) Sub(o FilterKey) uint64 {
return uint64(f - o)
}
// MatchReject just sets Yes and No appropriately.
func (f FilterKey) MatchReject(y, n FilterKey) FilterResult {
return FilterResult{YesKey: y, NoKey: n}
}
func (f FilterKey) MatchOne() FilterResult {
return FilterResult{YesKey: f + 1, NoKey: f + 1}
}
// NeedData() is only really meaningful for ConsiderKey, and indicates
// that a decision can't be made from the key alone.
func (f FilterKey) NeedData() FilterResult {
return FilterResult{}
}
// Fail() reports a fatal error that should terminate processing.
func (f FilterKey) Fail(err error) FilterResult {
return FilterResult{Err: err}
}
// Failf() is just like Errorf, etc
func (f FilterKey) Failf(msg string, args ...interface{}) FilterResult {
return FilterResult{Err: fmt.Errorf(msg, args...)}
}
// MatchRow indicates that the current row matches the filter.
func (f FilterKey) MatchRow() FilterResult {
return FilterResult{YesKey: (f & rowMask) + rowWidth}
}
// MatchOneRejectRow indicates that this item matched but no further
// items in this row can match.
func (f FilterKey) MatchOneRejectRow() FilterResult {
return FilterResult{YesKey: f + 1, NoKey: (f & rowMask) + rowWidth}
}
// Reject rejects this item only.
func (f FilterKey) RejectOne() FilterResult {
return FilterResult{NoKey: f + 1}
}
// Reject rejects N items.
func (f FilterKey) Reject(n uint64) FilterResult {
return FilterResult{NoKey: f.Add(n)}
}
// RejectRow indicates that this entire row is rejected.
func (f FilterKey) RejectRow() FilterResult {
return FilterResult{NoKey: (f & rowMask) + rowWidth}
}
// RejectUntil rejects everything up to the given key.
func (f FilterKey) RejectUntil(until FilterKey) FilterResult {
return FilterResult{NoKey: until}
}
// RejectUntilRow rejects everything until the given row ID.
func (f FilterKey) RejectUntilRow(rowID uint64) FilterResult {
return FilterResult{NoKey: FilterKey(rowID) << rowExponent}
}
// MatchRowUntilRow matches this row, then rejects everything else until
// the given row ID.
func (f FilterKey) MatchRowUntilRow(rowID uint64) FilterResult {
// if rows are 16 wide, "yes" will be 16 minus our current position
// within a row, and "no" will be the distance from the end of our
// current row to the start of rowID, which is also the distance from
// the beginning of our current row to the start of rowID-1.
return FilterResult{
YesKey: (f & rowMask) + rowWidth,
NoKey: FilterKey(rowID) << rowExponent,
}
}
// RejectUntilOffset rejects this container, and any others until the given
// in-row offset.
func (f FilterKey) RejectUntilOffset(offset uint64) FilterResult {
next := (f & rowMask).Add(offset)
if next <= f {
next += rowWidth
}
return FilterResult{NoKey: next}
}
// MatchUntilOffset matches the current container, then skips any other
// containers until the given offset.
func (f FilterKey) MatchOneUntilOffset(offset uint64) FilterResult {
r := f.RejectUntilOffset(offset)
r.YesKey = f + 1
return r
}
// Done indicates that nothing can ever match.
func (f FilterKey) Done() FilterResult {
return FilterResult{
NoKey: ^FilterKey(0),
}
}
// MatchRowAndDone matches this row and nothing after that.
func (f FilterKey) MatchRowAndDone() FilterResult {
return FilterResult{
YesKey: (f & rowMask) + rowWidth,
NoKey: ^FilterKey(0),
}
}
// Match the current container, then skip any others until the same offset
// is reached again.
func (f FilterKey) MatchOneUntilSameOffset() FilterResult {
return f.MatchOneUntilOffset(uint64(f) & keyMask)
}
// BitmapFilter is, given a series of key/data pairs, considered to "match"
// some of those containers. Matching may be dependent on key values and
// cardinalities alone, or on the contents of the container.
//
// The ConsiderData function must not retain the container, or the data
// from the container; if it needs access to that information later, it needs
// to make a copy.
//
// Many filters are, by virtue of how they operate, able to predict their
// results on future keys. To accommodate this, and allow operations to
// avoid processing keys they don't need to process, the result of a filter
// operation can indicate not just whether a given key matches, but whether
// some upcoming keys will, or won't, match. If ConsiderKey yields a non-zero
// number of matches or non-matches for a given key, ConsiderData will not be
// called for that key.
//
// If multiple filters are combined, they are only called if their input is
// needed to determine a value.
type BitmapFilter interface {
ConsiderKey(key FilterKey, n int32) FilterResult
ConsiderData(key FilterKey, data *Container) FilterResult
}
// ContainerWriteback is the type for functions which can feed updated
// containers back to things from filters.
type ContainerWriteback func(key FilterKey, data *Container) error
// A BitmapRewriter is like a bitmap filter, but can modify the bitmap
// it's being called on during the iteration.
//
// After the last container is returned, ConsiderData will be called with
// an unspecified key and a nil container pointer, so the rewriter can
// write any trailing containers it has. A nil container passed to writeback
// implies a delete operation on the container. Writeback should only be
// called with keys greater than any previously given container key, and
// less than or equal to the current key. So for instance, if
// ConsiderData is called with key 3, and then with key 5, the call with
// key 5 may call writeback with key 4, and then key 5, but may not call
// it with keys 3 or lower, or 6 or higher. When the container provided to
// the call is nil, any monotonically increasing keys greater than the
// previous key are allowed. (If there was no previous key, 0 and higher
// are allowed.)
type BitmapRewriter interface {
ConsiderKey(key FilterKey, n int32) FilterResult
RewriteData(key FilterKey, data *Container, writeback ContainerWriteback) FilterResult
}
// BitmapColumnFilter is a BitmapFilter which checks for containers matching
// a given column within a row; thus, only the one container per row which
// matches the column needs to be evaluated, and it's evaluated as matching
// if it contains the relevant bit.
type BitmapColumnFilter struct {
key, offset uint16
}
var _ BitmapFilter = &BitmapColumnFilter{}
func (f *BitmapColumnFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
if uint16(key&keyMask) != f.key {
return key.RejectUntilOffset(uint64(f.key))
}
return key.NeedData()
}
func (f *BitmapColumnFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
if data.Contains(f.offset) {
return key.MatchOneUntilSameOffset()
}
return key.RejectUntilOffset(uint64(f.key))
}
func NewBitmapColumnFilter(col uint64) BitmapFilter {
return &BitmapColumnFilter{key: uint16((col >> 16) & keyMask), offset: uint16(col & 0xFFFF)}
}
// BitmapRowsFilter is a BitmapFilter which checks for containers that are
// in any of a provided list of rows. The row list should be sorted.
type BitmapRowsFilter struct {
rows []uint64
i int
}
func (f *BitmapRowsFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
if f.i == -1 {
return key.Done()
}
if n == 0 {
return key.RejectOne()
}
row := uint64(key) >> rowExponent
for f.rows[f.i] < row {
f.i++
if f.i >= len(f.rows) {
f.i = -1
return key.Done()
}
}
if f.rows[f.i] > row {
return key.RejectUntilRow(f.rows[f.i])
}
// rows[f.i] must be equal, so we should match this row, until the
// next row, if there is a next row.
if f.i+1 < len(f.rows) {
return key.MatchRowUntilRow(f.rows[f.i+1])
}
return key.MatchRowAndDone()
}
func (f *BitmapRowsFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
return key.Fail(errors.New("bitmap rows filter should never consider data"))
}
func NewBitmapRowsFilter(rows []uint64) BitmapFilter {
if len(rows) == 0 {
return &BitmapRowsFilter{rows: rows, i: -1}
}
return &BitmapRowsFilter{rows: rows, i: 0}
}
// BitmapRowsUnion is a BitmapFilter which produces a union of all the
// rows listed in a []uint64.
type BitmapRowsUnion struct {
c []*Container
rows []uint64
i int
}
func (f *BitmapRowsUnion) ConsiderKey(key FilterKey, n int32) FilterResult {
if f.i == -1 {
return key.Done()
}
if n == 0 {
return key.RejectOne()
}
row := uint64(key) >> rowExponent
for f.rows[f.i] < row {
f.i++
if f.i >= len(f.rows) {
f.i = -1
return key.Done()
}
}
if f.rows[f.i] > row {
return key.RejectUntilRow(f.rows[f.i])
}
// If we ran out of rows, we said we were done. If we're
// waiting for a later row, we said to reject until then.
// Therefore, we're on the current row, and need the data because
// we're going to union it.
return key.NeedData()
}
func (f *BitmapRowsUnion) ConsiderData(key FilterKey, data *Container) FilterResult {
idx := key & keyMask
f.c[idx] = f.c[idx].UnionInPlace(data)
// UnionInPlace with nil will reuse the container. We don't want to reuse
// the container, because ApplyFilter will overwrite it.
if f.c[idx] == data {
f.c[idx] = data.Clone()
}
return key.MatchOne()
}
// Yield the bitmap containing our results, adjusted for a particular shard
// if necessary (because we expect the results to correspond to our shard
// ID).
func (f *BitmapRowsUnion) Results(shard uint64) *Bitmap {
b := NewSliceBitmap()
for i, c := range f.c {
// UnionInPlace might not have fixed count
c.Repair()
b.Containers.Put(uint64(i)+shard*rowWidth, c)
}
return b
}
// Reset the internal container buffer. You must use this before reusing a
// filter.
func (f *BitmapRowsUnion) Reset() {
for i := range f.c {
f.c[i] = nil
}
}
// NewBitmapRowsUnion yields a BitmapRowsUnion which can give you the union
// of all containers matching a given row.
func NewBitmapRowsUnion(rows []uint64) *BitmapRowsUnion {
if len(rows) == 0 {
return &BitmapRowsUnion{rows: rows, i: -1, c: make([]*Container, rowWidth)}
}
return &BitmapRowsUnion{rows: rows, i: 0, c: make([]*Container, rowWidth)}
}
// BitmapRowFilterBase is a generic form of a row-aware wrapper; it
// handles making decisions about keys once you tell it a yesKey and noKey
// that it should be using, and makes callbacks per row.
type BitmapRowFilterBase struct {
FilterResult
callback func(row uint64) error
lastRow uint64
}
var _ BitmapFilter = &BitmapRowFilterBase{}
// DetermineByKey decides whether it can produce a meaningful FilterResult
// for a given key. This encapsulates all the logic for row callbacks and
// figuring out when to wrap a row.
func (b *BitmapRowFilterBase) DetermineByKey(key FilterKey) (FilterResult, bool) {
if b.FilterResult.Err != nil {
return b.FilterResult, true
}
row := key.Row()
if b.YesKey <= key && b.NoKey > key {
return key.RejectUntil(b.NoKey), true
}
if b.lastRow == row {
return key.RejectRow(), true
}
// If we got here: Either b.noKey is less than key, or b.yesKey is
// greater than key. If yesKey is greater, we match this row, and
// possibly update to mark that we've said no through to the end
// of this row.
if b.YesKey > key {
b.lastRow = row
if b.callback != nil {
err := b.callback(row)
if err != nil {
return key.Fail(err), true
}
}
res := key.MatchOneRejectRow()
// This is probably unnecessary, but the idea is, since
// we've decided that we're rejecting everything up to the
// end of this row, we want to be sure that a later call
// doesn't produce a different answer.
if b.NoKey < res.NoKey {
b.NoKey = res.NoKey
}
// if our run of yes answers ends before the rejected row
// ends, and our run of no answers extends beyond this row,
// we can reject until then. note that we can't round that
// up to a full row; if our inner filter were a column
// filter, for instance, that only wanted to see the 7th
// key in each row, we would want to reject up to that 7th
// key, but then look at it.
if b.YesKey <= res.NoKey && b.NoKey > res.NoKey {
res.NoKey = b.NoKey
}
return res, true
}
// Both keys are <= key, err is nil, so this is basically a
// NeedData.
return b.FilterResult, false
}
// SetResult is a convenience function so that things embedding this
// can just call this instead of using a long series of dotted names.
// It returns the new result of DetermineByKey after this change.
func (b *BitmapRowFilterBase) SetResult(key FilterKey, result FilterResult) FilterResult {
b.FilterResult = result
result, _ = b.DetermineByKey(key)
return result
}
// Without a sub-filter, we always-succeed; if we get a key that isn't
// already answered by our YesKey/NoKey/lastRow, we will match this key,
// reject the rest of the row, and update our keys accordingly. We'll
// also hit the callback, and return an error from it if appropriate.
func (b *BitmapRowFilterBase) ConsiderKey(key FilterKey, n int32) FilterResult {
var done bool
b.FilterResult, done = b.DetermineByKey(key)
if done {
return b.FilterResult
}
if n == 0 {
return key.RejectOne()
}
b.FilterResult = key.MatchOneRejectRow()
row := key.Row()
b.lastRow = row
if b.callback != nil {
b.Err = b.callback(row)
}
return b.FilterResult
}
// This should probably never be reached?
func (b *BitmapRowFilterBase) ConsiderData(key FilterKey, data *Container) FilterResult {
b.Err = errors.New("base iterator should never consider data")
return b.FilterResult
}
func NewBitmapRowFilterBase(callback func(row uint64) error) *BitmapRowFilterBase {
return &BitmapRowFilterBase{lastRow: ^uint64(0), callback: callback}
}
type BitmapRowLimitFilter struct {
BitmapRowFilterBase
limit uint64
}
var _ BitmapFilter = &BitmapRowLimitFilter{}
// Without a sub-filter, we always-succeed; if we get a key that isn't
// already answered by our YesKey/NoKey/lastRow, we will match the whole
// row.
func (b *BitmapRowLimitFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
var done bool
b.FilterResult, done = b.DetermineByKey(key)
if done {
return b.FilterResult
}
if n == 0 {
return key.RejectOne()
}
if b.limit > 0 {
b.FilterResult = key.MatchRow()
b.limit--
} else {
b.FilterResult = key.Done()
}
return b.FilterResult
}
// This should probably never be reached?
func (b *BitmapRowLimitFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
b.Err = errors.New("limit iterator should never consider data")
return b.FilterResult
}
func NewBitmapRowLimitFilter(limit uint64) *BitmapRowLimitFilter {
return &BitmapRowLimitFilter{BitmapRowFilterBase: *NewBitmapRowFilterBase(nil), limit: limit}
}
// BitmapRowFilterSingleFilter is a row iterator with a single
// filter, which is simpler than one with multiple filters where
// it coincidentally turns out that N==1.
type BitmapRowFilterSingleFilter struct {
BitmapRowFilterBase
filter BitmapFilter
}
func (b *BitmapRowFilterSingleFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
res, done := b.DetermineByKey(key)
if done {
return res
}
return b.SetResult(key, b.filter.ConsiderKey(key, n))
}
func (b *BitmapRowFilterSingleFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
// We already handled any consideration of the key above, in principle.
b.FilterResult = b.filter.ConsiderData(key, data)
if b.FilterResult.Err != nil {
return b.FilterResult
}
res, done := b.DetermineByKey(key)
if done {
return res
}
// We could just return the res, which would say nothing, but I
// think it should be a visible error if that happens.
b.FilterResult.Err = errors.New("inner filter didn't make a decision")
return b.FilterResult
}
func NewBitmapRowFilterSingleFilter(callback func(row uint64) error, filter BitmapFilter) *BitmapRowFilterSingleFilter {
return &BitmapRowFilterSingleFilter{
BitmapRowFilterBase: BitmapRowFilterBase{lastRow: ^uint64(0), callback: callback},
filter: filter,
}
}
// BitmapRowFilterMultiFilter is a BitmapFilter which wraps other bitmap filters,
// calling a callback function once per row whenever it finds a container
// for which all the filters returned true.
type BitmapRowFilterMultiFilter struct {
BitmapRowFilterBase
filters []BitmapFilter
yesKeys, noKeys []FilterKey
toDo []int
}
func (b *BitmapRowFilterMultiFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
res, done := b.DetermineByKey(key)
if done {
return res
}
// highestNo: The highest No value that we have that isn't preceeded
// by a relevant Yes.
highestNo := key
lowestYes := ^FilterKey(0)
// The length of the "no" run after the lowest "yes"
lowestYesNo := FilterKey(0)
b.toDo = b.toDo[:0]
// We scan for any no values that don't have an earlier yes that's
// still greater than this key. If there are any, we can skip to
// the highest such value immediately. We also build a todo list
// of items for which we have neither a yes nor a no answer greater
// than this key.
for i, yk := range b.yesKeys {
if yk > key {
if yk < lowestYes {
lowestYes = yk
lowestYesNo = b.noKeys[i]
}
continue
}
nk := b.noKeys[i]
if nk > highestNo {
highestNo = nk
continue
}
b.toDo = append(b.toDo, i)
}
// We have an unambiguous no, so we can set our internal state to
// be aware that we have a No until then. We can unconditionally
// return the result; it can't be not-done, because we just set
// it to a known done state.
if highestNo > key {
return b.SetResult(key, key.RejectUntil(highestNo))
}
// Everything either has a yes value which is at least as high
// as lowestYes, or is in f.toDo now. Now we call ConsiderKey
// for everything in f.toDo, and accumulate a new list of the
// values still don't know, using the same backing store.
newToDo := b.toDo[:0]
for _, filter := range b.toDo {
result := b.filters[filter].ConsiderKey(key, n)
if result.Err != nil {
return key.Fail(result.Err)
}
yk, nk := result.YesKey, result.NoKey
b.yesKeys[filter], b.noKeys[filter] = yk, nk
if yk > key {
if lowestYes == 0 || yk < lowestYes {
lowestYes = yk
lowestYesNo = nk
}
continue
}
if nk > highestNo {
highestNo = nk
continue
}
newToDo = append(newToDo, filter)
}
// Same logic as before; if we have a highestNo, we don't need more
// information.
if highestNo > key {
return b.SetResult(key, key.RejectUntil(highestNo))
}
b.toDo = newToDo
if len(b.toDo) > 0 {
return key.NeedData()
}
// this shouldn't be possible
if lowestYes <= key {
return key.Failf("got lowest yes %d for key %d, this shouldn't happen", lowestYes, key)
}
// Flag that we have a definite Yes as far as the lowest yes, and a
// definite No after that to the corresponding No.
return b.SetResult(key, key.MatchReject(lowestYes, lowestYesNo))
}
// ConsiderData only gets called in cases where f.toDo had a list of filters
// for which we needed to get data to make a decision. That means that
// everything but the indexes in f.toDo must be a "yes" right now.
func (b *BitmapRowFilterMultiFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
res, done := b.DetermineByKey(key)
if done {
return res
}
highestNo := key
for _, filter := range b.toDo {
result := b.filters[filter].ConsiderData(key, data)
if result.Err != nil {
return key.Fail(result.Err)
}
yk, nk := result.YesKey, result.NoKey
b.yesKeys[filter], b.noKeys[filter] = yk, nk
if yk <= key && nk > highestNo {
highestNo = nk
}
}
if highestNo > key {
return b.SetResult(key, key.RejectUntil(highestNo))
}
// if we got here, either something was buggy, or everything has a yes
// > key.
lowestYes := ^FilterKey(0)
lowestYesNo := key
for i, yk := range b.yesKeys {
if yk < lowestYes {
lowestYes = yk
lowestYesNo = b.noKeys[i]
}
}
// this shouldn't be possible
if lowestYes <= key {
return key.Failf("got lowest yes %d on data for key %d, this shouldn't happen", lowestYes, key)
}
return b.SetResult(key, key.MatchReject(lowestYes, lowestYesNo))
}
// BitmapBitmapFilter builds a list of positions in the bitmap which
// match those in a provided bitmap. It is shard-agnostic; no matter what
// offsets the input bitmap's containers have, it matches them against
// corresponding keys.
type BitmapBitmapFilter struct {
containers []*Container
nextOffsets []uint64
callback func(uint64) error
}
func (b *BitmapBitmapFilter) SetCallback(cb func(uint64) error) {
b.callback = cb
}
func (b *BitmapBitmapFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
pos := key & keyMask
if b.containers[pos] == nil || n == 0 {
return key.RejectUntilOffset(b.nextOffsets[pos])
}
return key.NeedData()
}
func (b *BitmapBitmapFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
pos := key & keyMask
base := uint64(key << 16)
filter := b.containers[pos]
if filter == nil {
return key.RejectUntilOffset(b.nextOffsets[pos])
}
var lastErr error
matched := false
intersectionCallback(data, filter, func(v uint16) {
matched = true
err := b.callback(base + uint64(v))
if err != nil {
lastErr = err
}
})
if lastErr != nil {
return key.Fail(lastErr)
}
if !matched {
return key.RejectUntilOffset(b.nextOffsets[pos])
}
return key.MatchOneUntilOffset(b.nextOffsets[pos])
}
// NewBitmapBitmapFilter creates a filter which can report all the positions
// within a bitmap which are set, and which have positions corresponding to
// the specified columns. It calls the provided callback function on
// each value it finds, terminating early if that returns an error.
//
// The input filter is assumed to represent one "row" of a shard's data,
// which is to say, a range of up to rowWidth consecutive containers starting
// at some multiple of rowWidth. We coerce that to the 0..rowWidth range
// because offset-within-row is what we care about.
func NewBitmapBitmapFilter(filter *Bitmap, callback func(uint64) error) *BitmapBitmapFilter {
b := &BitmapBitmapFilter{
callback: callback,
containers: make([]*Container, rowWidth),
nextOffsets: make([]uint64, rowWidth),
}
iter, _ := filter.Containers.Iterator(0)
last := uint64(0)
count := 0
for iter.Next() {
k, v := iter.Value()
// Coerce container key into the 0-rowWidth range we'll be
// using to compare against containers within each row.
k = k & keyMask
b.containers[k] = v
last = k
count++
}
// if there's only one container, we need to populate everything with
// its position.
if count == 1 {
for i := range b.containers {
b.nextOffsets[i] = last
}
} else {
// Point each container at the offset of the next valid container.
// With sparse bitmaps this will potentially make skipping faster.
for i := range b.containers {
if b.containers[i] != nil {
for int(last) != i {
b.nextOffsets[last] = uint64(i)
last = (last + 1) % rowWidth
}
}
}
}
return b
}
// BitmapRowFilterMultiFilter will call a
func NewBitmapRowFilterMultiFilter(callback func(row uint64) error, filters ...BitmapFilter) BitmapFilter {
return &BitmapRowFilterMultiFilter{
filters: filters,
yesKeys: make([]FilterKey, len(filters)),
noKeys: make([]FilterKey, len(filters)),
BitmapRowFilterBase: BitmapRowFilterBase{
callback: callback,
lastRow: ^uint64(0),
},
}
}
// BitmapRowLister returns a pointer to a slice which it will populate when invoked
// as a bitmap filter.
func NewBitmapRowFilter(callback func(uint64) error, filters ...BitmapFilter) BitmapFilter {
if len(filters) == 0 {
return NewBitmapRowFilterBase(callback)
}
if len(filters) == 1 {
return NewBitmapRowFilterSingleFilter(callback, filters[0])
}
return NewBitmapRowFilterMultiFilter(callback, filters...)
}
// BitmapRangeFilter limits filter operations to a specified range, and
// performs key or data callbacks.
//
// On seeing a key in its range:
// If the key callback is present, and returns true, match the key.
// Otherwise, if a data callback is present, request the data, and in the
// data handler, call the data callback, then match the single key.
// If neither is present, match the entire range at once.
type BitmapRangeFilter struct {
min, max FilterKey
kcb func(FilterKey, int32) (bool, error)
dcb func(FilterKey, *Container) error
}
var _ BitmapFilter = &BitmapRangeFilter{}
func (b *BitmapRangeFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
if key >= b.max {
return key.Done()
}
if key >= b.min {
if b.kcb != nil {
match, err := b.kcb(key, n)
if err != nil {
return key.Fail(err)
}
if match {
return key.MatchOne()
}
}
if b.dcb != nil {
return key.NeedData()
}
return key.MatchReject(b.max, ^FilterKey(0))
}
return key.RejectUntil(b.min)
}
func (b *BitmapRangeFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
err := b.dcb(key, data)
if err != nil {
return key.Fail(err)
}
return key.MatchOne()
}
func NewBitmapRangeFilter(min, max FilterKey, keyCallback func(FilterKey, int32) (bool, error), dataCallback func(FilterKey, *Container) error) *BitmapRangeFilter {
return &BitmapRangeFilter{min: min, max: max, kcb: keyCallback, dcb: dataCallback}
}
// BitmapMutexDupFilter is a filter which identifies cases where the same
// position has a bit set in more than one row.
//
// We keep a slice of the first value seen for every row, with ^0 as the
// default; when that's already set, things get appended to the entries in
// the map. At the end, for each entry in the map, we also add its first
// value to it. Thus, the map holds all the entries, but we're only using
// the map in the (hopefully rarer) cases where there's duplicate values.
//
// The slice is local-coordinates (first column 0), but the map is global
// coordinates (first column is whatever base was).
type BitmapMutexDupFilter struct {
base uint64 // the offset of 0 for this, used to accommodate shard offsets
extra map[uint64][]uint64 // extra values observed
first []uint64 // first values observed
details bool
limit int
done bool // if we have a limit, and we've hit it...
highKey FilterKey // ... we can stop after this many containers.
}
var _ BitmapFilter = &BitmapMutexDupFilter{}
func NewBitmapMutexDupFilter(base uint64, details bool, limit int) *BitmapMutexDupFilter {
filter := &BitmapMutexDupFilter{
base: base,
extra: map[uint64][]uint64{},
first: make([]uint64, 1<<shardwidth.Exponent),
details: details,
limit: limit,
}
if filter.limit == 0 {
// A limit of 0 is not a limit; set limit higher than possible number of
// values we could have.
filter.limit = 2 << shardwidth.Exponent
}
for i := range filter.first {
filter.first[i] = ^uint64(0)
}
return filter
}
func (b *BitmapMutexDupFilter) ConsiderKey(key FilterKey, n int32) FilterResult {
if n > 0 {
return key.NeedData()
}
return key.RejectOne()
}
func (b *BitmapMutexDupFilter) ConsiderData(key FilterKey, data *Container) FilterResult {
value, basePos := uint64(key)>>rowExponent, uint64(key&keyMask)<<16
ContainerCallback(data, func(u uint16) {
pos := basePos + uint64(u)
if b.first[pos] != ^uint64(0) {
if b.details {
b.extra[pos+b.base] = append(b.extra[pos+b.base], value)
} else {
// no details, just annotate that it exists
b.extra[pos+b.base] = []uint64{}
}
} else {
b.first[pos] = value
}
})
if len(b.extra) >= b.limit {
if !b.done {
// we note which container we found the last value we needed in.
// We may still go over the limit, but we won't look at any *more*
// containers in this row.
//
// We can't just abort early because the records we already found
// could have more values.
b.done = true
b.highKey = key & keyMask
return key.RejectRow()
}
if (key & keyMask) >= b.highKey {
return key.RejectRow()
}
}
return key.MatchOne()
}
// Report returns the set of duplicate values identified.
func (b *BitmapMutexDupFilter) Report() map[uint64][]uint64 {
// copy values into extra, and remove them from first, so calling
// Report() again won't cause double-appends. We only have to do
// this if we've been asked for details; otherwise the list of
// known positions is sufficient.
if b.details {
for k, v := range b.extra {
kpos := k % (1 << shardwidth.Exponent)
if b.first[kpos] != ^uint64(0) {
v = append(v, 0)
// prepend so the lowest value goes at the beginning
copy(v[1:], v[:])
v[0] = b.first[kpos]
b.first[kpos] = ^uint64(0)
b.extra[k] = v
}
}
}
return b.extra
}
// BitmapBitmapTrimmer is like BitmapBitmapFilter, but instead of calling
// a callback per bit found in the intersection, it calls a callback with the
// original raw container and the corresponding filter container, and also
// provides the writeback func it got from the bitmap. So for instance, to
// implement a "subtract these bits" function, you would difference-in-place
// the raw container with the filter container, then pass that to the writeback
// function.
//
// It's called a Trimmer because it won't add containers; it won't *add*
// containers. It calls the callback function for every container, whether or
// not it matches the filter; this allows an intersect-like filter to work
// too.
//
// Note, however, that the caller's ContainerWriteback function *may* create
// containers, even though the Trimmer won't have called RewriteData with those
// keys.
type BitmapBitmapTrimmer struct {
containers []*Container
nextOffsets []uint64
callback func(key FilterKey, raw, filter *Container, writeback ContainerWriteback) error
}
var _ BitmapRewriter = &BitmapBitmapTrimmer{}
func (b *BitmapBitmapTrimmer) SetCallback(cb func(FilterKey, *Container, *Container, ContainerWriteback) error) {
b.callback = cb
}
func (b *BitmapBitmapTrimmer) ConsiderKey(key FilterKey, n int32) FilterResult {
pos := key & keyMask
// If a trimmer wants to do something for a key, it *must* have something in
// the filter slot for that key, otherwise we won't call it with the
// corresponding existing data. For the mutex case, this is covered -- every
// bit we have to write implies the corresponding filter bit being set.
//
// Note that, unlike BitmapBitmapFilter, we don't make assumptions about
// what you are *doing* with the filter.
if b.containers[pos] == nil || n == 0 {
return key.RejectUntilOffset(b.nextOffsets[pos])
}
return key.NeedData()
}
func (b *BitmapBitmapTrimmer) RewriteData(key FilterKey, data *Container, writeback ContainerWriteback) FilterResult {
pos := key & keyMask
filter := b.containers[pos]