-
Notifications
You must be signed in to change notification settings - Fork 16
/
leb128.go
290 lines (275 loc) · 8.05 KB
/
leb128.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
// Copyright (c) 2017-2018 The qitmeer developers
package leb128
import (
"math/big"
)
// The code originally copied from https://github.com/filecoin-project/go-leb128
//
// LEB128 or Little Endian Base 128 is a form of variable-length code compression
// used to store an arbitrarily large integer in a small number of bytes.
// see :
// https://en.wikipedia.org/wiki/LEB128.
// see also
// https://en.wikipedia.org/wiki/Variable-length_quantity
//
// Unsigned LEB128
// unsigned number 624485
// MSB ----------------- LSB
// 10011000011101100101 In raw binary
// 010011000011101100101 Padded to a multiple of 7 bits
// 0100110 0001110 1100101 Split into 7-bit groups
// 00100110 10001110 11100101 Add high 1 bits on all but last (most significant) group to form bytes
// 0x26 0x8E 0xE5 In hexadecimal
//
// 0xE5 0x8E 0x26 Output stream (LSB to MSB)
//
// Signed LEB128
// signed number -624485 (0xFFF6789B)
// MSB ------------------ LSB
// 01100111100010011011 In raw two's complement binary
// 101100111100010011011 Sign extended to a multiple of 7 bits
// 1011001 1110001 0011011 Split into 7-bit groups
// 01011001 11110001 10011011 Add high 1 bits on all but last (most significant) group to form bytes
// 0x59 0xF1 0x9B In hexadecimal
//
// → 0x9B 0xF1 0x59 Output stream (LSB to MSB)
//
// TODO revisit & refactor the impl & refactor the current core serialization
//
// Reference:
//
// 1.) Varint define in Git (https://github.com/git/git/blob/master/varint.c)
//
// uintmax_t decode_varint(const unsigned char **bufp)
// {
// const unsigned char *buf = *bufp;
// unsigned char c = *buf++;
// uintmax_t val = c & 127;
// while (c & 128) {
// val += 1;
// if (!val || MSB(val, 7))
// return 0; /* overflow */
// c = *buf++;
// val = (val << 7) + (c & 127);
// }
// *bufp = buf;
// return val;
// }
//
// int encode_varint(uintmax_t value, unsigned char *buf)
// {
// unsigned char varint[16];
// unsigned pos = sizeof(varint) - 1;
// varint[pos] = value & 127;
// while (value >>= 7)
// varint[--pos] = 128 | (--value & 127);
// if (buf)
// memcpy(buf, varint + pos, sizeof(varint) - pos);
// return sizeof(varint) - pos;
// }
//
// 2.) VarInt Define in the Bitcoin Core (https://github.com/bitcoin/bitcoin/blob/master/src/serialize.h)
//
// Variable-length integers: bytes are a MSB base-128 encoding of the number.
// The high bit in each byte signifies whether another digit follows. To make
// sure the encoding is one-to-one, one is subtracted from all but the last digit.
// Thus, the byte sequence a[] with length len, where all but the last byte
// has bit 128 set, encodes the number:
//
// (a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1))
//
// Properties:
// * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes)
// * Every integer has exactly one encoding
// * Encoding does not depend on size of original integer type
// * No redundancy: every (infinite) byte sequence corresponds to a list
// of encoded integers.
//
// 0: [0x00] 256: [0x81 0x00]
// 1: [0x01] 16383: [0xFE 0x7F]
// 127: [0x7F] 16384: [0xFF 0x00]
// 128: [0x80 0x00] 16511: [0xFF 0x7F]
// 255: [0x80 0x7F] 65535: [0x82 0xFE 0x7F]
// 2^32: [0x8E 0xFE 0xFE 0xFF 0x00]
//
// Currently there is no support for signed encodings. The default mode will not
// compile with signed values, and the legacy "nonnegative signed" mode will
// accept signed values, but improperly encode and decode them if they are
// negative. In the future, the DEFAULT mode could be extended to support
// negative numbers in a backwards compatible way, and additional modes could be
// added to support different varint formats (e.g. zigzag encoding).
//
// template<VarIntMode Mode, typename I>
// inline unsigned int GetSizeOfVarInt(I n)
// {
// CheckVarIntMode<Mode, I>();
// int nRet = 0;
// while(true) {
// nRet++;
// if (n <= 0x7F)
// break;
// n = (n >> 7) - 1;
// }
// return nRet;
// }
//
// template<typename Stream, VarIntMode Mode, typename I>
// void WriteVarInt(Stream& os, I n)
// {
// CheckVarIntMode<Mode, I>();
// unsigned char tmp[(sizeof(n)*8+6)/7];
// int len=0;
// while(true) {
// tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
// if (n <= 0x7F)
// break;
// n = (n >> 7) - 1;
// len++;
// }
// do {
// ser_writedata8(os, tmp[len]);
// } while(len--);
// }
//
// template<typename Stream, VarIntMode Mode, typename I>
// I ReadVarInt(Stream& is)
// {
// CheckVarIntMode<Mode, I>();
// I n = 0;
// while(true) {
// unsigned char chData = ser_readdata8(is);
// if (n > (std::numeric_limits<I>::max() >> 7)) {
// throw std::ios_base::failure("ReadVarInt(): size too large");
// }
// n = (n << 7) | (chData & 0x7F);
// if (chData & 0x80) {
// if (n == std::numeric_limits<I>::max()) {
// throw std::ios_base::failure("ReadVarInt(): size too large");
// }
// n++;
// } else {
// return n;
// }
// }
// }
//
// FromUInt64 encodes n with LEB128 and returns the encoded bytes.
func FromUInt64(n uint64) (out []byte) {
more := true
for more {
b := byte(n & 0x7F)
n >>= 7
if n == 0 {
more = false
} else {
b = b | 0x80
}
out = append(out, b)
}
return
}
// TODO, not check the input overflow
// ToUInt64 decodes LEB128-encoded bytes into a uint64.
func ToUInt64(encoded []byte) uint64 {
var result uint64
var shift, i uint
for {
b := encoded[i]
result |= (uint64(0x7F & b)) << shift
if b&0x80 == 0 {
break
}
shift += 7
i++
}
return result
}
// FromBigInt encodes the signed big integer n in two's complement,
// LEB128-encodes it, and returns the encoded bytes.
func FromBigInt(n *big.Int) (out []byte) {
size := n.BitLen()
negative := n.Sign() < 0
if negative {
// big.Int stores integers using sign and magnitude. Returns a copy
// as the code below is destructive.
n = twosComplementBigInt(n)
} else {
// The code below is destructive so make a copy.
n = big.NewInt(0).Set(n)
}
more := true
for more {
bBigInt := big.NewInt(0)
n.DivMod(n, big.NewInt(128), bBigInt) // This does the mask and the shift.
b := uint8(bBigInt.Int64())
// We just logically right-shifted the bits of n so we need to sign extend
// if n is negative (this simulates an arithmetic shift).
if negative {
signExtend(n, size)
}
if (n.Sign() == 0 && b&0x40 == 0) ||
(negative && equalsNegativeOne(n, size) && b&0x40 > 0) {
more = false
} else {
b = b | 0x80
}
out = append(out, b)
}
return
}
// ToBigInt decodes the signed big integer found in the given bytes.
func ToBigInt(encoded []byte) *big.Int {
result := big.NewInt(0)
var shift, i int
var b byte
size := len(encoded) * 8
for {
b = encoded[i]
for bitPos := uint(0); bitPos < 7; bitPos++ {
result.SetBit(result, 7*i+int(bitPos), uint((b>>bitPos)&0x01))
}
shift += 7
if b&0x80 == 0 {
break
}
i++
}
if b&0x40 > 0 {
// Sign extend.
for ; shift < size; shift++ {
result.SetBit(result, shift, 1)
}
result = twosComplementBigInt(result)
result.Neg(result)
}
return result
}
func twosComplementBigInt(n *big.Int) *big.Int {
absValBytes := n.Bytes()
for i, b := range absValBytes {
absValBytes[i] = ^b
}
bitsFlipped := big.NewInt(0).SetBytes(absValBytes)
return bitsFlipped.Add(bitsFlipped, big.NewInt(1))
}
func signExtend(n *big.Int, size int) {
bitPos := size - 7
max := size
if bitPos < 0 {
bitPos = 0
max = 7
}
for ; bitPos < max; bitPos++ {
n.SetBit(n, bitPos, 1)
}
}
// equalsNegativeOne is a poor man's check that n, which
// is encoded in two's complement, is all 1's.
func equalsNegativeOne(n *big.Int, size int) bool {
for i := 0; i < size; i++ {
if !(n.Bit(i) == 1) {
return false
}
}
return true
}