agl / otc

OpenType Condom

This URL has Read+Write access

Adam Langley (author)
Thu Apr 09 14:55:48 -0700 2009
otc / src / cmap.cc
100644 489 lines (413 sloc) 15.251 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
#include <vector>
 
#include "otc.h"
#include "cmap.h"
#include "maxp.h"
 
struct CMAPSubtableHeader {
  uint16_t platform;
  uint16_t encoding;
  uint32_t offset;
  uint16_t format;
  uint32_t length;
};
 
struct Subtable314Range {
  uint16_t start_range;
  uint16_t end_range;
  int16_t id_delta;
  uint16_t id_range_offset;
  uint32_t id_range_offset_offset;
};
 
// The maximum number of groups in format 12 or 13 subtables. Set so that we'll
// allocate, at most, 8MB of memory when parsing these.
static const unsigned kMaxCMAPGroups = 699050;
 
static bool
parse_314(OpenTypeFile *file, const uint8_t *data, size_t length, uint16_t num_glyphs) {
  Buffer subtable(data, length);
 
  // 3.1.4 subtables are complex and, rather than expanding the whole thing and
  // recompacting it, we valid it and include it verbatim in the ouput.
 
  subtable.Skip(4);
  uint16_t language;
  if (!subtable.ReadU16(&language))
    return failure();
  if (language)
    return failure();
 
  uint16_t segcountx2, search_range, entry_selector, range_shift;
  if (!subtable.ReadU16(&segcountx2) ||
      !subtable.ReadU16(&search_range) ||
      !subtable.ReadU16(&entry_selector) ||
      !subtable.ReadU16(&range_shift)) {
    return failure();
  }
 
  if (segcountx2 & 1 || search_range & 1)
    return failure();
  const uint16_t segcount = segcountx2 >> 1;
  // There must be at least one segment according the spec.
  if (segcount < 1)
    return failure();
 
  // log2segcount is the maximal x s.t. 2^x < segcount
  unsigned log2segcount = 0;
  while (1u << (log2segcount + 1) < segcount)
    log2segcount++;
 
  const uint16_t expected_search_range = 2 * 1 << log2segcount;
  if (expected_search_range != search_range)
    return failure();
 
  if (entry_selector != log2segcount)
    return failure();
 
  const uint16_t expected_range_shift = segcountx2 - search_range;
  if (range_shift != expected_range_shift)
    return failure();
 
  std::vector<Subtable314Range> ranges(segcount);
 
  for (unsigned i = 0; i < segcount; ++i) {
    if (!subtable.ReadU16(&ranges[i].end_range))
      return failure();
  }
 
  uint16_t padding;
  if (!subtable.ReadU16(&padding))
    return failure();
  if (padding)
    return failure();
 
  for (unsigned i = 0; i < segcount; ++i) {
    if (!subtable.ReadU16(&ranges[i].start_range))
      return failure();
  }
  for (unsigned i = 0; i < segcount; ++i) {
    if (!subtable.ReadS16(&ranges[i].id_delta))
      return failure();
  }
  for (unsigned i = 0; i < segcount; ++i) {
    ranges[i].id_range_offset_offset = subtable.offset();
    if (!subtable.ReadU16(&ranges[i].id_range_offset))
      return failure();
    if (ranges[i].id_range_offset & 1)
      return failure();
  }
 
  // ranges must be ascending order, based on the end_code. Ranges may not
  // overlap.
  for (unsigned i = 1; i < segcount; ++i) {
    if (ranges[i].end_range <= ranges[i - 1].end_range)
      return failure();
    if (ranges[i].start_range <= ranges[i - 1].end_range)
      return failure();
  }
 
  // The last range must end at 0xffff
  if (ranges[segcount - 1].end_range != 0xffff)
    return failure();
 
  // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
  // each code-point defined in the table and make sure that they are all valid
  // glyphs and that we don't access anything out-of-bounds.
  for (unsigned i = 1; i < segcount; ++i) {
    for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
      const uint16_t code_point = cp;
      if (ranges[i].id_range_offset == 0) {
        // this is explictly allowed to overflow in the spec
        const uint16_t glyph = code_point + ranges[i].id_delta;
        if (glyph >= num_glyphs)
          return failure();
      } else {
        const uint16_t range_delta = code_point - ranges[i].start_range;
        // this might seem odd, but it's true. The offset is relative to the
        // location of the offset value itself.
        const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
                                         ranges[i].id_range_offset +
                                         range_delta * 2;
        // We need to be able to access a 16-bit value from this offset
        if (glyph_id_offset + 1 >= length)
          return failure();
        uint16_t glyph;
        memcpy(&glyph, data + glyph_id_offset, 2);
        glyph = ntohs(glyph);
        if (glyph >= num_glyphs)
          return failure();
      }
    }
  }
 
  // We accept the table.
  file->cmap->subtable_314_data = data;
  file->cmap->subtable_314_length = length;
 
  return true;
}
 
static bool
parse_31012(OpenTypeFile *file, const uint8_t *data, size_t length, uint16_t num_glyphs) {
  Buffer subtable(data, length);
 
  // Format 12 tables are simple. We parse these and fully serialise them
  // later.
 
  subtable.Skip(8);
  uint32_t language;
  if (!subtable.ReadU32(&language))
    return failure();
  if (language)
    return failure();
 
  uint32_t num_groups;
  if (!subtable.ReadU32(&num_groups))
    return failure();
  // There are 12 bytes of data per group. In order to keep some sanity, we'll
  // only allow ourselves to allocate 8MB of memory here. That means that
  // we'll allow, at most, 64 * 1024 * 1024 / 12 groups. Note that this is
  // still far in excess of the number of Unicode code-points currently
  // allocated.
  if (num_groups == 0 || num_groups > kMaxCMAPGroups)
    return failure();
 
  std::vector<OpenTypeCMAPSubtableRange> &groups = file->cmap->subtable_31012;
  groups.resize(num_groups);
 
  for (unsigned i = 0; i < num_groups; ++i) {
    if (!subtable.ReadU32(&groups[i].start_range) ||
        !subtable.ReadU32(&groups[i].end_range) ||
        !subtable.ReadU32(&groups[i].start_glyph_id)) {
      return failure();
    }
 
    // We conservatively limit all of the values to 2^30 which is vastly larger
    // than the number of Unicode code-points defined and might protect some
    // parsers from overflows
    if (groups[i].start_range > 0x40000000 ||
        groups[i].end_range > 0x40000000 ||
        groups[i].start_glyph_id > 0x40000000) {
      return failure();
    }
 
    // Also we assert that the glyph value is within range. Because the range
    // limits, above, we don't need to worry about overflow.
    if (groups[i].end_range + groups[i].start_glyph_id > num_glyphs)
      return failure();
  }
 
  // the groups must be sorted by start code and may not overlap
  for (unsigned i = 1; i < num_groups; ++i) {
    if (groups[i].start_range <= groups[i - 1].start_range)
      return failure();
    if (groups[i].start_range <= groups[i - 1].end_range)
      return failure();
  }
 
  return true;
}
 
static bool
parse_31013(OpenTypeFile *file, const uint8_t *data, size_t length, uint16_t num_glyphs) {
  Buffer subtable(data, length);
 
  // Format 13 tables are simple. We parse these and fully serialise them
  // later.
 
  subtable.Skip(8);
  uint16_t language;
  if (!subtable.ReadU16(&language))
    return failure();
  if (language)
    return failure();
 
  uint32_t num_groups;
  if (!subtable.ReadU32(&num_groups))
    return failure();
 
  // We limit the number of groups in the same way as in 3.10.12 tables. See
  // the comment there in
  if (num_groups == 0 || num_groups > kMaxCMAPGroups)
    return failure();
 
  std::vector<OpenTypeCMAPSubtableRange> &groups = file->cmap->subtable_31013;
  groups.resize(num_groups);
 
  for (unsigned i = 0; i < num_groups; ++i) {
    if (!subtable.ReadU32(&groups[i].start_range) ||
        !subtable.ReadU32(&groups[i].end_range) ||
        !subtable.ReadU32(&groups[i].start_glyph_id)) {
      return failure();
    }
 
    // We conservatively limit all of the values to 2^30 which is vastly larger
    // than the number of Unicode code-points defined and might protect some
    // parsers from overflows
    if (groups[i].start_range > 0x40000000 ||
        groups[i].end_range > 0x40000000 ||
        groups[i].start_glyph_id > 0x40000000) {
      return failure();
    }
 
    if (groups[i].start_glyph_id >= num_glyphs)
      return failure();
  }
 
  // the groups must be sorted by start code and may not overlap
  for (unsigned i = 1; i < num_groups; ++i) {
    if (groups[i].start_range <= groups[i - 1].start_range)
      return failure();
    if (groups[i].start_range <= groups[i - 1].end_range)
      return failure();
  }
 
  return true;
}
 
bool
otc_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
  Buffer table(data, length);
  file->cmap = new OpenTypeCMAP;
 
  // http://www.microsoft.com/typography/otspec/cmap.htm
  // The charactor map table defines the mapping from code-point to glyph index
  // in the font. We only support Unicode (BMP and non-BMP) formats.
 
  uint16_t version, num_tables;
  if (!table.ReadU16(&version) ||
      !table.ReadU16(&num_tables))
    return failure();
 
  if (version != 0)
    return failure();
 
  std::vector<CMAPSubtableHeader> subtable_headers;
 
  // read the subtable headers
  for (unsigned i = 0; i < num_tables; ++i) {
    CMAPSubtableHeader subt;
 
    if (!table.ReadU16(&subt.platform) ||
        !table.ReadU16(&subt.encoding) ||
        !table.ReadU32(&subt.offset))
      return failure();
 
    subtable_headers.push_back(subt);
  }
 
  const size_t data_offset = table.offset();
 
  // make sure that all the offsets are valid.
  for (unsigned i = 0; i < num_tables; ++i) {
    if (subtable_headers[i].offset > 1024 * 1024 * 1024)
      return failure();
    if (subtable_headers[i].offset < data_offset ||
        subtable_headers[i].offset >= length)
      return failure();
  }
 
  // the format of the table is the first couple of bytes in the table. The
  // length of the table is in a format specific format afterwards.
  for (unsigned i = 0; i < num_tables; ++i) {
    table.set_offset(subtable_headers[i].offset);
    if (!table.ReadU16(&subtable_headers[i].format))
      return failure();
 
    if (subtable_headers[i].format == 4) {
      uint16_t length;
      if (!table.ReadU16(&length))
        return failure();
      subtable_headers[i].length = length;
    } else if (subtable_headers[i].format == 12 ||
               subtable_headers[i].format == 13) {
      table.Skip(2);
      if (!table.ReadU32(&subtable_headers[i].length))
        return failure();
    } else {
      subtable_headers[i].length = 0;
    }
  }
 
  // Now, verify that all the lengths are sane
  for (unsigned i = 0; i < num_tables; ++i) {
    if (!subtable_headers[i].length)
      continue;
    if (subtable_headers[i].length > 1024 * 1024 * 1024)
      return failure();
    // We know that both the offset and length are < 1GB, so the following
    // addition doesn't overflow
    const uint32_t end_byte = subtable_headers[i].offset + subtable_headers[i].length;
    if (end_byte > length)
      return failure();
  }
 
  // we grab the number of glyphs in the file from the maxp table to make sure
  // that the character map isn't referencing anything beyound this range.
  if (!file->maxp)
    return failure();
  const uint16_t num_glyphs = file->maxp->num_glyphs;
 
  // We only support a subset of the possible character map tables. Microsoft
  // 'strongly recommends' that everyone supports the Unicode BMP table with
  // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
  // Platform ID Encoding ID Format
  // 3 1 4 (Unicode BMP)
  // 3 10 12 (Unicode UCS-4)
  // 3 10 13 (UCS-4 Fallback mapping)
 
  for (unsigned i = 0; i < num_tables; ++i) {
    if (subtable_headers[i].platform != 3)
      continue;
    if (subtable_headers[i].encoding == 1) {
      if (subtable_headers[i].format == 4) {
        if (!parse_314(file, data + subtable_headers[i].offset, subtable_headers[i].length, num_glyphs))
          return failure();
      }
    } else if (subtable_headers[i].encoding == 10) {
      if (subtable_headers[i].format == 12) {
        if (!parse_31012(file, data + subtable_headers[i].offset, subtable_headers[i].length, num_glyphs))
          return failure();
      } else if (subtable_headers[i].format == 13) {
        if (!parse_31013(file, data + subtable_headers[i].offset, subtable_headers[i].length, num_glyphs))
          return failure();
      }
    }
  }
 
  return true;
}
 
bool
otc_cmap_should_serialise(OpenTypeFile *file) {
  return file->cmap;
}
 
bool
otc_cmap_serialise(OTCStream *out, OpenTypeFile *file) {
  const bool have_314 = file->cmap->subtable_314_data;
  const bool have_31012 = file->cmap->subtable_31012.size();
  const bool have_31013 = file->cmap->subtable_31013.size();
  const unsigned num_subtables = static_cast<unsigned>(have_314) +
                                 static_cast<unsigned>(have_31012) +
                                 static_cast<unsigned>(have_31013);
  const off_t table_start = out->Tell();
 
  if (!out->WriteU16(0) ||
      !out->WriteU16(num_subtables)) {
    return failure();
  }
 
  const off_t record_offset = out->Tell();
  out->Seek(record_offset + num_subtables * 8);
 
  const off_t offset_314 = out->Tell();
  if (have_314) {
    if (!out->Write(file->cmap->subtable_314_data, file->cmap->subtable_314_length))
      return failure();
  }
 
  const off_t offset_31012 = out->Tell();
  if (have_31012) {
    std::vector<OpenTypeCMAPSubtableRange> &groups = file->cmap->subtable_31012;
    const unsigned num_groups = groups.size();
    if (!out->WriteU16(12) ||
        !out->WriteU16(0) ||
        !out->WriteU32(num_groups * 12 + 14) ||
        !out->WriteU32(0) ||
        !out->WriteU32(num_groups)) {
      return failure();
    }
 
    for (unsigned i = 0; i < num_groups; ++i) {
      if (!out->WriteU32(groups[i].start_range) ||
          !out->WriteU32(groups[i].end_range) ||
          !out->WriteU32(groups[i].start_glyph_id)) {
        return failure();
      }
    }
  }
 
  const off_t offset_31013 = out->Tell();
  if (have_31013) {
    std::vector<OpenTypeCMAPSubtableRange> &groups = file->cmap->subtable_31013;
    const unsigned num_groups = groups.size();
    if (!out->WriteU16(13) ||
        !out->WriteU16(0) ||
        !out->WriteU32(num_groups * 12 + 14) ||
        !out->WriteU32(0) ||
        !out->WriteU32(num_groups)) {
      return failure();
    }
 
    for (unsigned i = 0; i < num_groups; ++i) {
      if (!out->WriteU32(groups[i].start_range) ||
          !out->WriteU32(groups[i].end_range) ||
          !out->WriteU32(groups[i].start_glyph_id)) {
        return failure();
      }
    }
  }
 
  const off_t table_end = out->Tell();
 
  // Now seek back and write the table of offsets
  out->Seek(record_offset);
  if (have_314) {
    if (!out->WriteU16(3) ||
        !out->WriteU16(1) ||
        !out->WriteU32(offset_314 - table_start)) {
      return failure();
    }
  }
 
  if (have_31012) {
    if (!out->WriteU16(3) ||
        !out->WriteU16(10) ||
        !out->WriteU32(offset_31012 - table_start)) {
      return failure();
    }
  }
 
  if (have_31013) {
    if (!out->WriteU16(3) ||
        !out->WriteU16(10) ||
        !out->WriteU32(offset_31013 - table_start)) {
      return failure();
    }
  }
 
  out->Seek(table_end);
  return true;
}
 
void
otc_cmap_free(OpenTypeFile *file) {
  delete file->cmap;
}