Skip to content

Commit

Permalink
std, core: Generate unicode.rs using unicode.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Florob authored and alexcrichton committed May 14, 2014
1 parent 21867fa commit 74ad023
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 61 deletions.
131 changes: 76 additions & 55 deletions src/etc/unicode.py
Expand Up @@ -169,7 +169,7 @@ def emit_bsearch_range_table(f):
else if hi < c { Less }
else { Greater }
}) != None
}\n\n
}\n
""");

def emit_property_module(f, mod, tbl):
Expand All @@ -193,11 +193,11 @@ def emit_property_module(f, mod, tbl):
f.write(" pub fn %s(c: char) -> bool {\n" % cat)
f.write(" super::bsearch_range_table(c, %s_table)\n" % cat)
f.write(" }\n\n")
f.write("}\n")
f.write("}\n\n")


def emit_conversions_module(f, lowerupper, upperlower):
f.write("pub mod conversions {\n")
f.write("pub mod conversions {")
f.write("""
use cmp::{Equal, Less, Greater};
use slice::ImmutableVector;
Expand Down Expand Up @@ -225,13 +225,14 @@ def emit_conversions_module(f, lowerupper, upperlower):
else { Greater }
})
}
""");
emit_caseconversion_table(f, "LuLl", upperlower)
emit_caseconversion_table(f, "LlLu", lowerupper)
f.write("}\n")

def emit_caseconversion_table(f, name, table):
f.write(" static %s_table : &'static [(char, char)] = &[\n" % name)
f.write(" static %s_table : &'static [(char, char)] = &[\n" % name)
sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0))
ix = 0
for key, value in sorted_table:
Expand All @@ -255,7 +256,7 @@ def format_table_content(f, content, indent):
line = " "*indent + chunk
f.write(line)

def emit_decomp_module(f, canon, compat, combine):
def emit_core_decomp_module(f, canon, compat):
canon_keys = canon.keys()
canon_keys.sort()

Expand All @@ -279,23 +280,6 @@ def emit_decomp_module(f, canon, compat, combine):
}
None => None
}
}\n
""")

f.write("""
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, result) = r[idx];
result
}
None => 0
}
}\n\n
""")

Expand Down Expand Up @@ -337,21 +321,10 @@ def emit_decomp_module(f, canon, compat, combine):
format_table_content(f, data, 8)
f.write("\n ];\n\n")

f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
ix = 0
for pair in combine:
f.write(ch_prefix(ix))
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
ix += 1
f.write("\n ];\n")

f.write(" pub fn canonical(c: char, i: |char|) "
+ "{ d(c, i, false); }\n\n")
f.write(" pub fn compatibility(c: char, i: |char|) "
+"{ d(c, i, true); }\n\n")
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n\n")
f.write(" fn d(c: char, i: |char|, k: bool) {\n")
f.write(" use iter::Iterator;\n");

Expand Down Expand Up @@ -389,17 +362,43 @@ def emit_decomp_module(f, canon, compat, combine):
f.write(" }\n")
f.write("}\n\n")

r = "unicode.rs"
for i in [r]:
if os.path.exists(i):
os.remove(i);
rf = open(r, "w")
def emit_std_decomp_module(f, combine):
f.write("pub mod decompose {\n");
f.write(" use option::{Some, None};\n");
f.write(" use slice::ImmutableVector;\n");

(canon_decomp, compat_decomp, gencats,
combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
f.write("""
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, result) = r[idx];
result
}
None => 0
}
}\n\n
""")

f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
ix = 0
for pair in combine:
f.write(ch_prefix(ix))
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
ix += 1
f.write("\n ];\n\n")

# Preamble
rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n")
f.write("}\n")


preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand All @@ -409,23 +408,45 @@ def emit_decomp_module(f, canon, compat, combine):
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
#![allow(missing_doc, non_uppercase_statics)]
'''

(canon_decomp, compat_decomp, gencats,
combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")

def gen_core_unicode():
r = "core_unicode.rs"
if os.path.exists(r):
os.remove(r);
with open(r, "w") as rf:
# Preamble
rf.write(preamble)

#![allow(missing_doc)]
#![allow(non_uppercase_statics)]
emit_bsearch_range_table(rf);
emit_property_module(rf, "general_category", gencats)

''')
emit_core_decomp_module(rf, canon_decomp, compat_decomp)

emit_bsearch_range_table(rf);
emit_property_module(rf, "general_category", gencats)
derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])

emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
emit_property_module(rf, "derived_property", derived)

derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)
emit_conversions_module(rf, lowerupper, upperlower)

emit_property_module(rf, "derived_property", derived)
def gen_std_unicode():
r = "std_unicode.rs"
if os.path.exists(r):
os.remove(r);
with open(r, "w") as rf:
# Preamble
rf.write(preamble)
emit_std_decomp_module(rf, combines)

props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)
emit_conversions_module(rf, lowerupper, upperlower)
gen_core_unicode()
gen_std_unicode()
11 changes: 7 additions & 4 deletions src/libcore/unicode.rs
@@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand All @@ -8,10 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly

#![allow(missing_doc, non_uppercase_statics)]


fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
use cmp::{Equal, Less, Greater};
use slice::ImmutableVector;
Expand Down Expand Up @@ -102,6 +103,7 @@ pub mod general_category {
}

}

pub mod decompose {
use option::Option;
use option::{Some, None};
Expand All @@ -123,7 +125,6 @@ pub mod decompose {
}



// Canonical decompositions
static canonical_table : &'static [(char, &'static [char])] = &[
('\xc0', &['\x41', '\u0300']), ('\xc1', &['\x41', '\u0301']), ('\xc2', &['\x41', '\u0302']),
Expand Down Expand Up @@ -3968,6 +3969,7 @@ pub mod derived_property {
pub fn XID_Start(c: char) -> bool {
super::bsearch_range_table(c, XID_Start_table)
}

}

pub mod property {
Expand All @@ -3983,6 +3985,7 @@ pub mod property {
pub fn White_Space(c: char) -> bool {
super::bsearch_range_table(c, White_Space_table)
}

}

pub mod conversions {
Expand Down Expand Up @@ -4501,7 +4504,7 @@ pub mod conversions {
('\U00010426', '\U0001044e'), ('\U00010427', '\U0001044f')
];

static LlLu_table : &'static [(char, char)] = &[
static LlLu_table : &'static [(char, char)] = &[
('\x61', '\x41'), ('\x62', '\x42'),
('\x63', '\x43'), ('\x64', '\x44'),
('\x65', '\x45'), ('\x66', '\x46'),
Expand Down
4 changes: 2 additions & 2 deletions src/libstd/unicode.rs
@@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand All @@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly

#![allow(missing_doc, non_uppercase_statics)]

Expand Down

0 comments on commit 74ad023

Please sign in to comment.