-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathhyperminhash_demo.go
74 lines (57 loc) · 1.55 KB
/
hyperminhash_demo.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package main
import (
"fmt"
"strconv"
"time"
pcgr "github.com/dgryski/go-pcgr"
"github.com/axiomhq/hyperminhash"
)
func estimateError(got, exp uint64) float64 {
var delta uint64
if got > exp {
delta = got - exp
} else {
delta = exp - got
}
return float64(delta) / float64(exp)
}
func main() {
rnd := pcgr.New(time.Now().UnixNano(), 0)
for k := int64(1000); k < 1000000000; k *= 10 {
fmt.Println("\n### Max Cardinality", k)
fmt.Println("\n| Set1 | HLL1 | Set2 | HLL2 | S1 ∪ S2 | HLL1 ∪ HLL2 | S1 ∩ S2 | HLL1 ∩ HLL2 |")
fmt.Println("|---|---|---|---|---|---|---|---|")
for j := 1; j <= 5; j++ {
size1 := rnd.Int63() % k
size2 := rnd.Int63() % k
sk1 := hyperminhash.New()
sk2 := hyperminhash.New()
maxCol := size1
if maxCol > size2 {
maxCol = size2
}
cols := rnd.Int63() % maxCol
intersections := 0
set := make(map[int]uint8)
for i := 0; i < int(size1); i++ {
set[i]++
sk1.Add([]byte(strconv.Itoa(i)))
}
for i := int(size1 - cols); i < int(size1-cols+size2); i++ {
set[i]++
if set[i] > 1 {
intersections++
}
sk2.Add([]byte(strconv.Itoa(i)))
}
card1 := sk1.Cardinality()
card2 := sk2.Cardinality()
ints1 := sk1.Intersection(sk2)
m := sk1.Merge(sk2)
mcard := m.Cardinality()
row := fmt.Sprintf("| %d | %d | %d | %d | %d | %d | **%d** (%f%%) | **%d** (%f%%) |", size1, card1, size2, card2, len(set), mcard, cols, float64(float64(100*cols)/float64(len(set))), ints1, 100*float64(ints1)/float64(mcard))
fmt.Println(row)
}
fmt.Println("")
}
}