/
attack_hll.go
325 lines (259 loc) · 8.96 KB
/
attack_hll.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
package main
import (
"encoding/binary"
"fmt"
"hash"
"math"
"math/rand"
"time"
"./clarkduvall/hyperloglog"
"./spaolacci/murmur3"
)
var n = uint8(8)
var m = 1 << n
var maxValue = int(math.Pow(2, 20)) - 1
var iterations = 50
var defaultB = uint32(1)
var defaultT = uint8(1)
var empty = 0
func main() {
fmt.Printf("For %d iterations:\n", iterations)
//S1 scenario
fmt.Printf("S1 Scenario: (original, final, number of items inserted, number of resets)\n")
var originalEst, finalEst, nItermInserted, nResets, expected int
originalEst, finalEst, nItermInserted, nResets = runAttack("S1", empty, false, defaultB, defaultT)
fmt.Printf("Empty sketch, B = 1: %d, %d, %d, resets: %d\n", originalEst, finalEst, nItermInserted, nResets)
originalEst, finalEst, nItermInserted, nResets = runAttack("S1", empty, false, uint32(m/2), defaultT)
fmt.Printf("Empty sketch, B = %d: %d, %d, %d, resets: %d\n", uint32(m/2), originalEst, finalEst, nItermInserted, nResets)
//S2 scenario
fmt.Printf("S2 Scenario: (original, final, number of items inserted)\n")
originalEst, finalEst, nItermInserted, _ = runAttack("S2", empty, false, defaultB, defaultT)
fmt.Printf("Empty sketch, B = 1: %d, %d, %d\n", originalEst, finalEst, nItermInserted)
kmlogm := []int{}
for t := float64(1); t < 11; t++ {
kmlogm = append(kmlogm, int(math.Pow(2, t-1)*float64(m)*math.Log(float64(m))))
}
for i, val := range kmlogm {
originalEst, finalEst, nItermInserted, _ = runAttack("S2Preload", val, false, defaultB, uint8(i+1))
fmt.Printf("Non-empty sketch, t = %d: %d, %d, %d\n", i+1, originalEst, finalEst, nItermInserted)
}
// S4 scenario
fmt.Printf("S4 Scenario: (original, final, number of items inserted, expected fraction to insert)\n")
originalEst, finalEst, nItermInserted, _ = runAttack("S4", empty, false, defaultB, defaultT)
fmt.Printf("Empty sketch: %d, %d, %d\n", originalEst, finalEst, nItermInserted)
for i, val := range kmlogm {
originalEst, finalEst, nItermInserted, expected = runAttack("S4", val, false, defaultB, defaultT)
fmt.Printf("Non-empty sketch, t = %d: %d, %d, %d, %d\n", i+1, originalEst, finalEst, nItermInserted, expected)
}
//Setup of RT20 (S1 scenario)
fmt.Printf("In the setup of RT20: (original, final, number of items inserted, expected fraction to insert, number of resets)\n")
originalEst, finalEst, nItermInserted, nResets = runAttack("S1", empty, true, defaultB, defaultT)
fmt.Printf("Empty sketch, B = 1: %d, %d, %d, %d\n", originalEst, finalEst, nItermInserted, nResets)
originalEst, finalEst, nItermInserted, nResets = runAttack("S1", empty, true, uint32(m/2), defaultT)
fmt.Printf("Empty sketch, B = %d: %d, %d, %d, %d\n", uint32(m/2), originalEst, finalEst, nItermInserted, nResets)
}
//runAttack is sub-function to be called in main. Does all the setup and function calls.
func runAttack(scenario string, nInitialItems int, RT20 bool, B uint32, T uint8) (int, int, int, int) {
var avgOriginalEst int
var avgFinalEst int
var avgNItermInserted int
var avgNResets int
for i := 0; i < iterations; i++ {
var originalEst int
var finalEst int
var nItermInserted int
var nResets int
rand.Seed(time.Now().UnixNano())
hll, _ := hyperloglog.New(n)
hllHash := murmur3.New32
// Insert initial items
InsertInitialItems(hll, hllHash(), nInitialItems)
originalEst = int(hll.Count())
regCopy := make([]uint8, m)
copy(regCopy, hll.Reg)
switch scenario {
case "S1":
nItermInserted, nResets = AttackS1(hll, m, hllHash(), RT20, B)
case "S2":
nItermInserted = AttackS2(hll, m, hllHash(), RT20, B)
case "S2Preload":
nItermInserted = AttackS2Preload(hll, m, hllHash(), RT20, B, T)
case "S4":
nItermInserted, nResets = AttackS4(hll, m, hllHash(), RT20, B)
default:
println("Not implemented")
}
finalEst = int(hll.Count())
hll.Clear()
avgOriginalEst += originalEst
avgFinalEst += finalEst
avgNItermInserted += nItermInserted
avgNResets += nResets
}
return avgOriginalEst / iterations, avgFinalEst / iterations, avgNItermInserted / iterations, avgNResets / iterations
}
//Inserts nInitialItems random items into the HLL sketch
func InsertInitialItems(hll *hyperloglog.HyperLogLog, hllHash hash.Hash32, nItems int) {
//Shuffle all items and take first nItems
items := rand.Perm(maxValue)[:nItems]
//Insert them
for _, i := range items {
hllHash.Write(itob(i))
hll.Add(hllHash)
hllHash.Reset()
}
}
//Converts int to byte array to write into hash
func itob(i int) []byte {
b := make([]byte, binary.MaxVarintLen32)
binary.PutUvarint(b, uint64(i))
return b
}
func HarmonicMean(reg []uint8) float64 {
mean := float64(0)
for i := 0; i < len(reg); i++ {
mean += 1 / (math.Pow(2, float64(reg[i])))
}
return mean / float64(m)
}
//Generates an int of length 32-n with ci leading 1's and the rest 0's
func GenMask(ci uint8) uint32 {
mask := uint32(0)
for i := uint8(0); i < ci; i++ {
mask = mask << 1
mask += 1
}
for i := ci; i < 32-n; i++ {
mask = mask << 1
}
return mask
}
//Attack in the S1 scenario as described in the paper
//We note that the reset (hll.new), insert (hash.Write - hll.Add - hash.Reset)
//and count (hll.Count) lines are typically perfromed by the attacker via an API.
//We do not implement such API, hence why we are "giving" and using information
//such as the hash to the Attack function, altought, as described in our paper,
//it is not used by the attacker.
func AttackS1(hll *hyperloglog.HyperLogLog, m int, hllHash hash.Hash32, RT20 bool, B uint32) (int, int) {
nResets := 0
items := rand.Perm(maxValue)
if RT20 {
items = items[:250000]
}
switchPoint := uint64(float64(m) * 2.5)
targetEstimate := uint64(float64(m) * math.Log(float64(m)/float64(m-int(B))))
var card uint64
filteredItems := items
//Stop condition: the cardinality at the end of the iteration does not go above targetEstimate.
for card != targetEstimate {
//Reset HLL
hll.Clear()
nResets++
card = hll.Count()
l := len(filteredItems)
id := 0
for id < l {
//Insert item in HLL
hllHash.Write(itob(filteredItems[id]))
hll.Add(hllHash)
hllHash.Reset()
if (hll.Count() <= targetEstimate) || (card == hll.Count() && hll.Count() < switchPoint) {
id++
} else if card != hll.Count() && hll.Count() < switchPoint {
//Discard them
filteredItems = append(filteredItems[:id], filteredItems[id+1:]...)
l--
} else { //hll.Count > switchPoint -> we reset
card = hll.Count()
break
}
card = hll.Count()
}
}
return len(filteredItems), nResets - 1 //-1 to account for first reset of the loop
}
//Attack in the S2 scenario as described in the paper
func AttackS2(hll *hyperloglog.HyperLogLog, m int, hllHash hash.Hash32, RT20 bool, B uint32) int {
inserted := 0
items := rand.Perm(maxValue)
if RT20 {
items = items[:250000]
}
mask := GenMask(1)
emptyBool := hll.Count() == 0
for _, i := range items {
_, err := hllHash.Write(itob(i))
if err != nil {
fmt.Printf("Hash error: err %v\n", err)
continue
}
result := hllHash.Sum32()
bucket := (result & uint32(((1<<8)-1)<<24)) >> 24
if (emptyBool && bucket < B) || (!emptyBool && (result&mask != 0)) {
//There is a leading 1, or in case of empty sketch, to targeted bucket.
hll.Add(hllHash)
inserted++
}
hllHash.Reset()
}
return inserted
}
//Additional attack strategy in the S2 scenario when the adversary has information on the preload
func AttackS2Preload(hll *hyperloglog.HyperLogLog, m int, hllHash hash.Hash32, RT20 bool, B uint32, T uint8) int {
inserted := 0
items := rand.Perm(maxValue)
if RT20 {
items = items[:250000]
}
// 0 < T < 24, in "normal" attack, T == 1
mask := GenMask(T)
emptyBool := hll.Count() == 0
for _, i := range items {
_, err := hllHash.Write(itob(i))
if err != nil {
fmt.Printf("Hash error: err %v\n", err)
continue
}
result := hllHash.Sum32()
bucket := (result & uint32(((1<<8)-1)<<24)) >> 24
if (emptyBool && bucket < B) || (!emptyBool && (result&mask != 0)) {
//There is a leading 1, or in case of empty sketch, to targeted bucket.
hll.Add(hllHash)
inserted++
}
hllHash.Reset()
}
return inserted
}
//Attack in the S4 scenario as described in the paper
func AttackS4(hll *hyperloglog.HyperLogLog, m int, hllHash hash.Hash32, RT20 bool, B uint32) (int, int) {
inserted := 0
items := rand.Perm(maxValue)
if RT20 {
items = items[:250000]
}
expected := int(float64(maxValue) * (1 - HarmonicMean(hll.Reg)))
//First we check if we are attacking an empty sketch.
//If so, we will target only B buckets
emptyBool := hll.Count() == 0
for _, i := range items {
_, err := hllHash.Write(itob(i))
if err != nil {
fmt.Printf("Hash error: err %v\n", err)
continue
}
result := hllHash.Sum32()
bucket := (result & uint32(((1<<8)-1)<<24)) >> 24
ci := hll.Reg[bucket]
mask := GenMask(ci)
if (emptyBool && bucket < B) || (!emptyBool && (result&mask != 0)) {
//We insert in an empty bucket only if it is among the targeted
//in an empty sketch
//There is a 1 in the ci leading bits
hll.Add(hllHash)
inserted++
}
hllHash.Reset()
}
return inserted, expected
}