forked from pbnjay/grate
/
cfb.go
363 lines (320 loc) · 11.8 KB
/
cfb.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
// Package cfb implements the Microsoft Compound File Binary File Format.
package cfb
// https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b
// Note for myself:
// Storage = Directory
// Stream = File
import (
"bytes"
"encoding/binary"
"errors"
"io"
"io/ioutil"
"log"
"unicode/utf16"
"github.com/ShiHuang-ESec/grate"
)
const fullAssertions = true
const (
secFree uint32 = 0xFFFFFFFF // FREESECT
secEndOfChain uint32 = 0xFFFFFFFE // ENDOFCHAIN
secFAT uint32 = 0xFFFFFFFD // FATSECT
secDIFAT uint32 = 0xFFFFFFFC // DIFSECT
secReserved uint32 = 0xFFFFFFFB
secMaxRegular uint32 = 0xFFFFFFFA // MAXREGSECT
)
// Header of the Compound File MUST be at the beginning of the file (offset 0).
type header struct {
Signature uint64 // Identification signature for the compound file structure, and MUST be set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
ClassID [2]uint64 // Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL).
MinorVersion uint16 // Version number for nonbreaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004.
MajorVersion uint16 // Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4).
ByteOrder uint16 // This field MUST be set to 0xFFFE. This field is a byte order mark for all integer fields, specifying little-endian byte order.
SectorShift uint16 // This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2.
MiniSectorShift uint16 // This field MUST be set to 0x0006. This field specifies the sector size of the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes.
Reserved1 [6]byte // This field MUST be set to all zeroes.
NumDirectorySectors int32 // This integer field contains the count of the number of directory sectors in the compound file.
NumFATSectors int32 // This integer field contains the count of the number of FAT sectors in the compound file.
FirstDirectorySectorLocation uint32 // This integer field contains the starting sector number for the directory stream.
TransactionSignature int32 // This integer field MAY contain a sequence number that is incremented every time the compound file is saved by an implementation that supports file transactions. This is the field that MUST be set to all zeroes if file transactions are not implemented.<1>
MiniStreamCutoffSize int32 // This integer field MUST be set to 0x00001000. This field specifies the maximum size of a user-defined data stream that is allocated from the mini FAT and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than or equal to this cutoff size must be allocated as normal sectors from the FAT.
FirstMiniFATSectorLocation uint32 // This integer field contains the starting sector number for the mini FAT.
NumMiniFATSectors int32 // This integer field contains the count of the number of mini FAT sectors in the compound file.
FirstDIFATSectorLocation uint32 // This integer field contains the starting sector number for the DIFAT.
NumDIFATSectors int32 // This integer field contains the count of the number of DIFAT sectors in the compound file.
DIFAT [109]uint32 // This array of 32-bit integer fields contains the first 109 FAT sector locations of the compound file.
}
type objectType byte
const (
typeUnknown objectType = 0x00
typeStorage objectType = 0x01
typeStream objectType = 0x02
typeRootStorage objectType = 0x05
)
type directory struct {
Name [32]uint16 // 32 utf16 characters
NameByteLen int16 // length of Name in bytes
ObjectType objectType
ColorFlag byte // 0=red, 1=black
LeftSiblingID uint32 // stream ids
RightSiblingID uint32
ChildID uint32
ClassID [2]uint64 // GUID
StateBits uint32
CreationTime int64
ModifiedTime int64
StartingSectorLocation int32
StreamSize uint64
}
func (d *directory) String() string {
if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 {
return "<invalid utf16 string>"
}
r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2])
// trim off null terminator
return string(r16[:len(r16)-1])
}
// Document represents a Compound File Binary Format document.
type Document struct {
// the entire file, loaded into memory
data []byte
// pre-parsed info
header *header
dir []*directory
// lookup tables for all the sectors
fat []uint32
minifat []uint32
ministreamstart uint32
ministreamsize uint32
}
func (d *Document) load(rx io.ReadSeeker) error {
var err error
d.data, err = ioutil.ReadAll(rx)
if err != nil {
return err
}
br := bytes.NewReader(d.data)
h := &header{}
err = binary.Read(br, binary.LittleEndian, h)
if h.Signature != 0xe11ab1a1e011cfd0 {
return grate.ErrNotInFormat // errors.New("ole2: invalid format")
}
if h.ByteOrder != 0xFFFE {
return grate.ErrNotInFormat //errors.New("ole2: invalid format")
}
if fullAssertions {
if h.ClassID[0] != 0 || h.ClassID[1] != 0 {
return grate.ErrNotInFormat //errors.New("ole2: invalid CLSID")
}
if h.MajorVersion != 3 && h.MajorVersion != 4 {
return errors.New("ole2: unknown major version")
}
if h.MinorVersion != 0x3B && h.MinorVersion != 0x3E {
log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion)
//return errors.New("ole2: unknown minor version")
}
for _, v := range h.Reserved1 {
if v != 0 {
return errors.New("ole2: reserved section is non-zero")
}
}
if h.MajorVersion == 3 {
if h.SectorShift != 9 {
return errors.New("ole2: invalid sector size")
}
if h.NumDirectorySectors != 0 {
return errors.New("ole2: version 3 does not support directory sectors")
}
}
if h.MajorVersion == 4 {
if h.SectorShift != 12 {
return errors.New("ole2: invalid sector size")
}
}
if h.MiniSectorShift != 6 {
return errors.New("ole2: invalid mini sector size")
}
if h.MiniStreamCutoffSize != 0x00001000 {
return errors.New("ole2: invalid mini sector cutoff")
}
}
d.header = h
numFATentries := (1 << (h.SectorShift - 2))
le := binary.LittleEndian
d.fat = make([]uint32, 0, numFATentries*int(1+d.header.NumFATSectors))
d.minifat = make([]uint32, 0, numFATentries*int(1+h.NumMiniFATSectors))
// step 1: read the DIFAT sector list
for i := 0; i < 109; i++ {
sid := h.DIFAT[i]
if sid == secFree {
break
}
offs := int64(1+sid) << int32(h.SectorShift)
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid2 := le.Uint32(sector)
d.fat = append(d.fat, sid2)
sector = sector[4:]
}
}
if h.NumDIFATSectors > 0 {
sid1 := h.FirstDIFATSectorLocation
for sid1 != secEndOfChain {
offs := int64(1+sid1) << int32(h.SectorShift)
difatSector := d.data[offs:]
for i := 0; i < numFATentries-1; i++ {
sid2 := le.Uint32(difatSector)
if sid2 == secFree || sid2 == secEndOfChain {
difatSector = difatSector[4:]
continue
}
offs := int64(1+sid2) << int32(h.SectorShift)
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid3 := le.Uint32(sector)
d.fat = append(d.fat, sid3)
sector = sector[4:]
}
difatSector = difatSector[4:]
}
// chain the next DIFAT sector
sid1 = le.Uint32(difatSector)
}
}
// step 2: read the mini FAT
sid := h.FirstMiniFATSectorLocation
for sid != secEndOfChain {
offs := int64(1+sid) << int32(h.SectorShift)
if offs >= int64(len(d.data)) {
return errors.New("xls/cfb: unable to load file")
}
sector := d.data[offs:]
for j := 0; j < numFATentries; j++ {
sid = le.Uint32(sector)
d.minifat = append(d.minifat, sid)
sector = sector[4:]
}
if len(d.minifat) >= int(h.NumMiniFATSectors) {
break
}
// chain the next mini FAT sector
sid = le.Uint32(sector)
}
// step 3: read the Directory Entries
err = d.buildDirs(br)
return err
}
func (d *Document) buildDirs(br *bytes.Reader) error {
h := d.header
le := binary.LittleEndian
// step 2: read the Directory
sid := h.FirstDirectorySectorLocation
offs := int64(1+sid) << int64(h.SectorShift)
br.Seek(offs, io.SeekStart)
for j := 0; j < 4; j++ {
dirent := &directory{}
binary.Read(br, le, dirent)
if d.header.MajorVersion == 3 {
// mask out upper 32bits
dirent.StreamSize = dirent.StreamSize & 0xFFFFFFFF
}
switch dirent.ObjectType {
case typeRootStorage:
d.ministreamstart = uint32(dirent.StartingSectorLocation)
d.ministreamsize = uint32(dirent.StreamSize)
case typeStorage:
//log.Println("got a storage? what to do now?")
case typeStream:
/*
var freader io.Reader
if dirent.StreamSize < uint64(d.header.MiniStreamCutoffSize) {
freader = d.getMiniStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize)
} else if dirent.StreamSize != 0 {
freader = d.getStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize)
}
*/
case typeUnknown:
return nil
}
d.dir = append(d.dir, dirent)
}
return nil
}
func (d *Document) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
// NB streamData is a slice of slices of the raw data, so this is the
// only allocation - for the (much smaller) list of sector slices
streamData := make([][]byte, 1+(size>>d.header.SectorShift))
x := 0
secSize := int64(1) << int32(d.header.SectorShift)
for sid != secEndOfChain && sid != secFree {
offs := int64(1+sid) << int64(d.header.SectorShift)
if offs > int64(len(d.data)) {
return nil, errors.New("ole2: corrupt data format")
}
slice := d.data[offs : offs+secSize]
if size < uint64(len(slice)) {
slice = slice[:size]
size = 0
} else {
size -= uint64(len(slice))
}
streamData[x] = slice
if size == 0 {
break
}
sid = d.fat[sid]
x++
}
if size != 0 {
return nil, errors.New("ole2: incomplete read")
}
return &SliceReader{Data: streamData}, nil
}
func (d *Document) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) {
// TODO: move into a separate cache so we don't recalculate it each time
fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift))
// NB streamData is a slice of slices of the raw data, so this is the
// only allocation - for the (much smaller) list of sector slices
streamData := make([][]byte, 1+(size>>d.header.MiniSectorShift))
x := 0
fsid := d.ministreamstart
fsize := uint64(d.ministreamsize)
secSize := int64(1) << int64(d.header.SectorShift)
for fsid != secEndOfChain && fsid != secFree {
offs := int64(1+fsid) << int64(d.header.SectorShift)
slice := d.data[offs : offs+secSize]
if fsize < uint64(len(slice)) {
slice = slice[:fsize]
fsize = 0
} else {
fsize -= uint64(len(slice))
}
fatStreamData[x] = slice
x++
fsid = d.fat[fsid]
}
x = 0
miniSecSize := int64(1) << int64(d.header.MiniSectorShift)
for sid != secEndOfChain && sid != secFree {
offs := int64(sid) << int64(d.header.MiniSectorShift)
so, si := offs/secSize, offs%secSize
data := fatStreamData[so]
slice := data[si : si+miniSecSize]
if size < uint64(len(slice)) {
slice = slice[:size]
size = 0
} else {
size -= uint64(len(slice))
}
streamData[x] = slice
x++
sid = d.minifat[sid]
}
return &SliceReader{Data: streamData}, nil
}