Skip to content

Commit

Permalink
Remove exclusions to unicode norm table
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenzV committed Jul 10, 2024
1 parent 7997bbe commit 3f602c6
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 83 deletions.
20 changes: 19 additions & 1 deletion scripts/gen-unicode-norm-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
URL = 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt'
FILE_NAME = 'UnicodeData.txt'

EXCLUSIONS_URL = 'https://www.unicode.org/Public/draft/UCD/ucd/CompositionExclusions.txt'
EXCLUSIONS_FILE_NAME = 'CompositionExclusions.txt'


def hex_to_char_rs(c):
return f"'\\u{{{c}}}'"
Expand All @@ -14,6 +17,20 @@ def hex_to_char_rs(c):
if not os.path.exists(FILE_NAME):
urllib.request.urlretrieve(URL, FILE_NAME)

if not os.path.exists(EXCLUSIONS_FILE_NAME):
urllib.request.urlretrieve(EXCLUSIONS_URL, EXCLUSIONS_FILE_NAME)

exclusions = set()

with open(EXCLUSIONS_FILE_NAME) as f:
import re
pattern = re.compile(r'^([0-9a-fA-F]+)\s+#\s+(.*)$')

for line in f:
match = pattern.match(line.strip())
if match:
exclusions.add(match.group(1))


print('// WARNING: this file was generated by ../scripts/gen-unicode-norm-table.py')
print()
Expand Down Expand Up @@ -76,6 +93,7 @@ def hex_to_char_rs(c):
assert len(pairs) == len(needles)

for pair in pairs:
print(f' ({pair[0]}, {hex_to_char_rs(pair[1])}),')
if pair[1] not in exclusions:
print(f' ({pair[0]}, {hex_to_char_rs(pair[1])}),')

print('];')
81 changes: 0 additions & 81 deletions src/hb/unicode_norm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2713,38 +2713,6 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(5330054415112, '\u{04DB}'),
(5394478924552, '\u{04EA}'),
(5398773891848, '\u{04EB}'),
(6390911337911, '\u{FB2E}'),
(6390911337912, '\u{FB2F}'),
(6390911337916, '\u{FB30}'),
(6395206305212, '\u{FB31}'),
(6395206305215, '\u{FB4C}'),
(6399501272508, '\u{FB32}'),
(6403796239804, '\u{FB33}'),
(6408091207100, '\u{FB34}'),
(6412386174393, '\u{FB4B}'),
(6412386174396, '\u{FB35}'),
(6416681141692, '\u{FB36}'),
(6425271076284, '\u{FB38}'),
(6429566043572, '\u{FB1D}'),
(6429566043580, '\u{FB39}'),
(6433861010876, '\u{FB3A}'),
(6438155978172, '\u{FB3B}'),
(6438155978175, '\u{FB4D}'),
(6442450945468, '\u{FB3C}'),
(6451040880060, '\u{FB3E}'),
(6459630814652, '\u{FB40}'),
(6463925781948, '\u{FB41}'),
(6472515716540, '\u{FB43}'),
(6476810683836, '\u{FB44}'),
(6476810683839, '\u{FB4E}'),
(6485400618428, '\u{FB46}'),
(6489695585724, '\u{FB47}'),
(6493990553020, '\u{FB48}'),
(6498285520316, '\u{FB49}'),
(6498285520321, '\u{FB2A}'),
(6498285520322, '\u{FB2B}'),
(6502580487612, '\u{FB4A}'),
(6536940225975, '\u{FB1F}'),
(6764573492819, '\u{0622}'),
(6764573492820, '\u{0623}'),
(6764573492821, '\u{0625}'),
Expand All @@ -2753,30 +2721,11 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(7425998456404, '\u{06C2}'),
(7499012900436, '\u{06D3}'),
(7511897802324, '\u{06C0}'),
(9985798965564, '\u{0958}'),
(9990093932860, '\u{0959}'),
(9994388900156, '\u{095A}'),
(10015863736636, '\u{095B}'),
(10037338573116, '\u{095C}'),
(10041633540412, '\u{095D}'),
(10067403344188, '\u{0929}'),
(10080288246076, '\u{095E}'),
(10097468115260, '\u{095F}'),
(10101763082556, '\u{0931}'),
(10114647984444, '\u{0934}'),
(10587094387132, '\u{09DC}'),
(10591389354428, '\u{09DD}'),
(10647223929276, '\u{09DF}'),
(10750303144382, '\u{09CB}'),
(10750303144407, '\u{09CC}'),
(11089605560892, '\u{0A59}'),
(11093900528188, '\u{0A5A}'),
(11115375364668, '\u{0A5B}'),
(11179799874108, '\u{0A5E}'),
(11209864645180, '\u{0A33}'),
(11235634448956, '\u{0A36}'),
(12236361829180, '\u{0B5C}'),
(12240656796476, '\u{0B5D}'),
(12399570586430, '\u{0B4B}'),
(12399570586454, '\u{0B48}'),
(12399570586455, '\u{0B4C}'),
Expand All @@ -2797,23 +2746,9 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(15225659067855, '\u{0DDC}'),
(15225659067871, '\u{0DDE}'),
(15238543969738, '\u{0DDD}'),
(16767552327605, '\u{0F69}'),
(16776142262199, '\u{0F43}'),
(16819091935159, '\u{0F4D}'),
(16840566771639, '\u{0F52}'),
(16862041608119, '\u{0F57}'),
(16883516444599, '\u{0F5C}'),
(16978005725042, '\u{0F73}'),
(16978005725044, '\u{0F75}'),
(16978005725056, '\u{0F81}'),
(17111149711285, '\u{0FB9}'),
(17119739645879, '\u{0F93}'),
(17162689318839, '\u{0F9D}'),
(17184164155319, '\u{0FA2}'),
(17205638991799, '\u{0FA7}'),
(17227113828279, '\u{0FAC}'),
(17257178599296, '\u{0F76}'),
(17261473566592, '\u{0F78}'),
(17751099838510, '\u{1026}'),
(29708288793397, '\u{1B06}'),
(29716878727989, '\u{1B08}'),
Expand Down Expand Up @@ -3014,7 +2949,6 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(38152194491192, '\u{22EB}'),
(38156489458488, '\u{22EC}'),
(38160784425784, '\u{22ED}'),
(47128676139832, '\u{2ADC}'),
(53077205856409, '\u{3094}'),
(53098680692889, '\u{304C}'),
(53107270627481, '\u{304E}'),
Expand Down Expand Up @@ -3073,8 +3007,6 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(53811645264025, '\u{30F9}'),
(53815940231321, '\u{30FA}'),
(53863184871577, '\u{30FE}'),
(276290951185857, '\u{FB2C}'),
(276290951185858, '\u{FB2D}'),
(299724292821178, '\u{1109A}'),
(299732882755770, '\u{1109C}'),
(299775832428730, '\u{110AB}'),
Expand All @@ -3088,17 +3020,4 @@ pub const COMPOSITION_TABLE: &[(u64, char)] = &[
(305354994947503, '\u{115BA}'),
(305359289914799, '\u{115BB}'),
(309190400743728, '\u{11938}'),
(511646569189733, '\u{1D15E}'),
(511650864157029, '\u{1D15F}'),
(511680928928110, '\u{1D160}'),
(511680928928111, '\u{1D161}'),
(511680928928112, '\u{1D162}'),
(511680928928113, '\u{1D163}'),
(511680928928114, '\u{1D164}'),
(512067475984741, '\u{1D1BB}'),
(512071770952037, '\u{1D1BC}'),
(512076065919342, '\u{1D1BD}'),
(512076065919343, '\u{1D1BF}'),
(512080360886638, '\u{1D1BE}'),
(512080360886639, '\u{1D1C0}'),
];
3 changes: 2 additions & 1 deletion tests/custom/fuzzer.tests
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
tests/fonts/rb_custom/PT_Sans-Caption-Web-Regular.ttf;;U+1EA4,U+006E;
tests/fonts/rb_custom/PT_Sans-Caption-Web-Regular.ttf;;U+1EA4,U+006E;
tests/fonts/rb_custom/AdobeBlank-Regular.ttf;--no-glyph-names;U+0F42,U+0FB7;
Binary file added tests/fonts/rb_custom/AdobeBlank-Regular.ttf
Binary file not shown.
13 changes: 13 additions & 0 deletions tests/shaping/custom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ fn fuzzer_001() {
);
}

#[test]
fn fuzzer_002() {
assert_eq!(
shape(
"tests/fonts/rb_custom/AdobeBlank-Regular.ttf",
"\u{0F42}\u{0FB7}",
"--no-glyph-names",
),
"1859=0+0|\
1976=0+0"
);
}

#[test]
fn glyph_flags_001() {
assert_eq!(
Expand Down

0 comments on commit 3f602c6

Please sign in to comment.