Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

3 points on 2 segments Xor8Plus variant #11

Closed
wants to merge 3 commits into from
Closed
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

Prev

3 points on 2 segments Xor8Plus variant

It trades a bit of BitsPerValue (9.17 vs 9.13 for 3 segment) of a Xor8Plus
variant for collocating first two lookups in a single cache line.

# Conflicts:
#	xorfilter.go
  • Loading branch information
funny-falcon committed Dec 26, 2019
commit c7b541fe86297e75073e0d193173feb7a2dd5180
@@ -72,8 +72,8 @@ func (filter *Xor8) Contains(key uint64) bool {
r1 := uint32(rotl64(hash, 21))
r2 := uint32(rotl64(hash, 42))
h0 := reduce(r0, filter.BlockLength)
h1 := reduce(r1, filter.BlockLength) + filter.BlockLength
h2 := reduce(r2, filter.BlockLength) + 2*filter.BlockLength
h1 := h0 ^ (reduce(r1, 63) + 1)
h2 := reduce(r2, filter.BlockLength) + filter.BlockLength
return f == (filter.Fingerprints[h0] ^ filter.Fingerprints[h1] ^
filter.Fingerprints[h2])
}
@@ -87,7 +87,7 @@ func (filter *Xor8) geth0h1h2(k uint64) hashes {
r2 := uint32(rotl64(hash, 42))

answer.h0 = reduce(r0, filter.BlockLength)
answer.h1 = reduce(r1, filter.BlockLength)
answer.h1 = answer.h0 ^ (reduce(r1, 63) + 1)
answer.h2 = reduce(r2, filter.BlockLength)
return answer
}
@@ -97,9 +97,9 @@ func (filter *Xor8) geth0(hash uint64) uint32 {
return reduce(r0, filter.BlockLength)
}

func (filter *Xor8) geth1(hash uint64) uint32 {
func (filter *Xor8) geth1(h0 uint32, hash uint64) uint32 {
r1 := uint32(rotl64(hash, 21))
return reduce(r1, filter.BlockLength)
return h0 ^ (reduce(r1, 63) + 1)
}

func (filter *Xor8) geth2(hash uint64) uint32 {
@@ -111,37 +111,35 @@ func (filter *Xor8) geth2(hash uint64) uint32 {
// The caller is responsible to ensure that there are no duplicate keys.
func Populate(keys []uint64) *Xor8 {
size := len(keys)
capacity := 32 + uint32(math.Ceil(1.23*float64(size)))
capacity = capacity / 3 * 3 // round it down to a multiple of 3
capacity := 32 + uint32(math.Ceil(1.27*float64(size)))
filter := &Xor8{}
filter.Size = uint32(len(keys))
filter.BlockLength = capacity / 3
filter.BlockLength = capacity / 2
filter.BlockLength = (filter.BlockLength + 63) &^ 63 // round up to 64 bit blocks
capacity = filter.BlockLength * 2
filter.Fingerprints = make([]uint8, capacity, capacity)
var rngcounter uint64 = 1
filter.Seed = splitmix64(&rngcounter)

Q0 := make([]keyindex, filter.BlockLength, filter.BlockLength)
Q1 := make([]keyindex, filter.BlockLength, filter.BlockLength)
Q2 := make([]keyindex, filter.BlockLength, filter.BlockLength)
stack := make([]keyindex, size, size)
sets0 := make([]xorset, filter.BlockLength, filter.BlockLength)
sets1 := make([]xorset, filter.BlockLength, filter.BlockLength)
sets2 := make([]xorset, filter.BlockLength, filter.BlockLength)
for true {
for i := 0; i < size; i++ {
key := keys[i]
hs := filter.geth0h1h2(key)
sets0[hs.h0].xormask ^= hs.h
sets0[hs.h0].count++
sets1[hs.h1].xormask ^= hs.h
sets1[hs.h1].count++
sets2[hs.h2].xormask ^= hs.h
sets2[hs.h2].count++
sets0[hs.h1].xormask ^= hs.h
sets0[hs.h1].count++
sets1[hs.h2].xormask ^= hs.h
sets1[hs.h2].count++
}
// scan for values with a count of one
Q0size := 0
Q1size := 0
Q2size := 0
for i := uint32(0); i < filter.BlockLength; i++ {
if sets0[i].count == 1 {
Q0[Q0size].index = i
@@ -157,15 +155,8 @@ func Populate(keys []uint64) *Xor8 {
Q1size++
}
}
for i := uint32(0); i < filter.BlockLength; i++ {
if sets2[i].count == 1 {
Q2[Q2size].index = i
Q2[Q2size].hash = sets2[i].xormask
Q2size++
}
}
stacksize := 0
for Q0size+Q1size+Q2size > 0 {
for Q0size+Q1size > 0 {
for Q0size > 0 {
Q0size--
keyindexvar := Q0[Q0size]
@@ -174,37 +165,9 @@ func Populate(keys []uint64) *Xor8 {
continue // not actually possible after the initial scan.
}
hash := keyindexvar.hash
h1 := filter.geth1(hash)
h2 := filter.geth2(hash)
stack[stacksize] = keyindexvar
stacksize++
sets1[h1].xormask ^= hash

sets1[h1].count--
if sets1[h1].count == 1 {
Q1[Q1size].index = h1
Q1[Q1size].hash = sets1[h1].xormask
Q1size++
}
sets2[h2].xormask ^= hash
sets2[h2].count--
if sets2[h2].count == 1 {
Q2[Q2size].index = h2
Q2[Q2size].hash = sets2[h2].xormask
Q2size++
}
}
for Q1size > 0 {
Q1size--
keyindexvar := Q1[Q1size]
index := keyindexvar.index
if sets1[index].count == 0 {
continue
}
hash := keyindexvar.hash
h0 := filter.geth0(hash)
h1 := filter.geth1(h0, hash)
h2 := filter.geth2(hash)
keyindexvar.index += filter.BlockLength
stack[stacksize] = keyindexvar
stacksize++
sets0[h0].xormask ^= hash
@@ -214,25 +177,33 @@ func Populate(keys []uint64) *Xor8 {
Q0[Q0size].hash = sets0[h0].xormask
Q0size++
}
sets2[h2].xormask ^= hash
sets2[h2].count--
if sets2[h2].count == 1 {
Q2[Q2size].index = h2
Q2[Q2size].hash = sets2[h2].xormask
Q2size++
sets0[h1].xormask ^= hash
sets0[h1].count--
if sets0[h1].count == 1 {
Q0[Q0size].index = h1
Q0[Q0size].hash = sets0[h1].xormask
Q0size++
}
sets1[h2].xormask ^= hash
sets1[h2].count--
if sets1[h2].count == 1 {
Q1[Q1size].index = h2
Q1[Q1size].hash = sets1[h2].xormask
Q1size++
}
}
if Q0size == 0 && Q2size > 0 {
Q2size--
keyindexvar := Q2[Q2size]
if Q1size > 0 {
Q1size--
keyindexvar := Q1[Q1size]
index := keyindexvar.index
if sets2[index].count == 0 {
if sets1[index].count == 0 {
continue
}
sets1[index].count = 0
hash := keyindexvar.hash
h0 := filter.geth0(hash)
h1 := filter.geth1(hash)
keyindexvar.index += 2 * filter.BlockLength
h1 := filter.geth1(h0, hash)
keyindexvar.index += filter.BlockLength

stack[stacksize] = keyindexvar
stacksize++
@@ -243,12 +214,12 @@ func Populate(keys []uint64) *Xor8 {
Q0[Q0size].hash = sets0[h0].xormask
Q0size++
}
sets1[h1].xormask ^= hash
sets1[h1].count--
if sets1[h1].count == 1 {
Q1[Q1size].index = h1
Q1[Q1size].hash = sets1[h1].xormask
Q1size++
sets0[h1].xormask ^= hash
sets0[h1].count--
if sets0[h1].count == 1 {
Q0[Q0size].index = h1
Q0[Q0size].hash = sets0[h1].xormask
Q0size++
}

}
@@ -265,9 +236,6 @@ func Populate(keys []uint64) *Xor8 {
for i := range sets1 {
sets1[i] = xorset{0, 0}
}
for i := range sets2 {
sets2[i] = xorset{0, 0}
}
filter.Seed = splitmix64(&rngcounter)
}

@@ -277,11 +245,13 @@ func Populate(keys []uint64) *Xor8 {
ki := stack[stacksize]
val := uint8(fingerprint(ki.hash))
if ki.index < filter.BlockLength {
val ^= filter.Fingerprints[filter.geth1(ki.hash)+filter.BlockLength] ^ filter.Fingerprints[filter.geth2(ki.hash)+2*filter.BlockLength]
} else if ki.index < 2*filter.BlockLength {
val ^= filter.Fingerprints[filter.geth0(ki.hash)] ^ filter.Fingerprints[filter.geth2(ki.hash)+2*filter.BlockLength]
h0 := filter.geth0(ki.hash)
h1 := filter.geth1(h0, ki.hash)
h2 := filter.geth2(ki.hash)
val ^= filter.Fingerprints[h0] ^ filter.Fingerprints[h1] ^ filter.Fingerprints[h2+filter.BlockLength]
} else {
val ^= filter.Fingerprints[filter.geth0(ki.hash)] ^ filter.Fingerprints[filter.geth1(ki.hash)+filter.BlockLength]
h0 := filter.geth0(ki.hash)
val ^= filter.Fingerprints[h0] ^ filter.Fingerprints[filter.geth1(h0, ki.hash)]
}
filter.Fingerprints[ki.index] = val
}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.