From 74ad0236747469f9646916d1916dee2598076161 Mon Sep 17 00:00:00 2001 From: Florian Zeitz Date: Mon, 12 May 2014 19:56:41 +0200 Subject: [PATCH] std, core: Generate unicode.rs using unicode.py --- src/etc/unicode.py | 131 ++++++++++++++++++++++++----------------- src/libcore/unicode.rs | 11 ++-- src/libstd/unicode.rs | 4 +- 3 files changed, 85 insertions(+), 61 deletions(-) diff --git a/src/etc/unicode.py b/src/etc/unicode.py index d5c74e367340e..e98c65ca50eee 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -169,7 +169,7 @@ def emit_bsearch_range_table(f): else if hi < c { Less } else { Greater } }) != None -}\n\n +}\n """); def emit_property_module(f, mod, tbl): @@ -193,11 +193,11 @@ def emit_property_module(f, mod, tbl): f.write(" pub fn %s(c: char) -> bool {\n" % cat) f.write(" super::bsearch_range_table(c, %s_table)\n" % cat) f.write(" }\n\n") - f.write("}\n") + f.write("}\n\n") def emit_conversions_module(f, lowerupper, upperlower): - f.write("pub mod conversions {\n") + f.write("pub mod conversions {") f.write(""" use cmp::{Equal, Less, Greater}; use slice::ImmutableVector; @@ -225,13 +225,14 @@ def emit_conversions_module(f, lowerupper, upperlower): else { Greater } }) } + """); emit_caseconversion_table(f, "LuLl", upperlower) emit_caseconversion_table(f, "LlLu", lowerupper) f.write("}\n") def emit_caseconversion_table(f, name, table): - f.write(" static %s_table : &'static [(char, char)] = &[\n" % name) + f.write(" static %s_table : &'static [(char, char)] = &[\n" % name) sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0)) ix = 0 for key, value in sorted_table: @@ -255,7 +256,7 @@ def format_table_content(f, content, indent): line = " "*indent + chunk f.write(line) -def emit_decomp_module(f, canon, compat, combine): +def emit_core_decomp_module(f, canon, compat): canon_keys = canon.keys() canon_keys.sort() @@ -279,23 +280,6 @@ def emit_decomp_module(f, canon, compat, combine): } None => None } - }\n -""") - - f.write(""" - fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 { - use cmp::{Equal, Less, Greater}; - match r.bsearch(|&(lo, hi, _)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) { - Some(idx) => { - let (_, _, result) = r[idx]; - result - } - None => 0 - } }\n\n """) @@ -337,21 +321,10 @@ def emit_decomp_module(f, canon, compat, combine): format_table_content(f, data, 8) f.write("\n ];\n\n") - f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n") - ix = 0 - for pair in combine: - f.write(ch_prefix(ix)) - f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2])) - ix += 1 - f.write("\n ];\n") - f.write(" pub fn canonical(c: char, i: |char|) " + "{ d(c, i, false); }\n\n") f.write(" pub fn compatibility(c: char, i: |char|) " +"{ d(c, i, true); }\n\n") - f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n" - + " bsearch_range_value_table(c, combining_class_table)\n" - + " }\n\n") f.write(" fn d(c: char, i: |char|, k: bool) {\n") f.write(" use iter::Iterator;\n"); @@ -389,17 +362,43 @@ def emit_decomp_module(f, canon, compat, combine): f.write(" }\n") f.write("}\n\n") -r = "unicode.rs" -for i in [r]: - if os.path.exists(i): - os.remove(i); -rf = open(r, "w") +def emit_std_decomp_module(f, combine): + f.write("pub mod decompose {\n"); + f.write(" use option::{Some, None};\n"); + f.write(" use slice::ImmutableVector;\n"); -(canon_decomp, compat_decomp, gencats, - combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt") + f.write(""" + fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 { + use cmp::{Equal, Less, Greater}; + match r.bsearch(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Some(idx) => { + let (_, _, result) = r[idx]; + result + } + None => 0 + } + }\n\n +""") + + f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n") + ix = 0 + for pair in combine: + f.write(ch_prefix(ix)) + f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2])) + ix += 1 + f.write("\n ];\n\n") -# Preamble -rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT + f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n" + + " bsearch_range_value_table(c, combining_class_table)\n" + + " }\n") + f.write("}\n") + + +preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -409,23 +408,45 @@ def emit_decomp_module(f, canon, compat, combine): // option. This file may not be copied, modified, or distributed // except according to those terms. -// The following code was generated by "src/etc/unicode.py" +// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly + +#![allow(missing_doc, non_uppercase_statics)] + +''' + +(canon_decomp, compat_decomp, gencats, + combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt") + +def gen_core_unicode(): + r = "core_unicode.rs" + if os.path.exists(r): + os.remove(r); + with open(r, "w") as rf: + # Preamble + rf.write(preamble) -#![allow(missing_doc)] -#![allow(non_uppercase_statics)] + emit_bsearch_range_table(rf); + emit_property_module(rf, "general_category", gencats) -''') + emit_core_decomp_module(rf, canon_decomp, compat_decomp) -emit_bsearch_range_table(rf); -emit_property_module(rf, "general_category", gencats) + derived = load_properties("DerivedCoreProperties.txt", + ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]) -emit_decomp_module(rf, canon_decomp, compat_decomp, combines) + emit_property_module(rf, "derived_property", derived) -derived = load_properties("DerivedCoreProperties.txt", - ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]) + props = load_properties("PropList.txt", ["White_Space"]) + emit_property_module(rf, "property", props) + emit_conversions_module(rf, lowerupper, upperlower) -emit_property_module(rf, "derived_property", derived) +def gen_std_unicode(): + r = "std_unicode.rs" + if os.path.exists(r): + os.remove(r); + with open(r, "w") as rf: + # Preamble + rf.write(preamble) + emit_std_decomp_module(rf, combines) -props = load_properties("PropList.txt", ["White_Space"]) -emit_property_module(rf, "property", props) -emit_conversions_module(rf, lowerupper, upperlower) +gen_core_unicode() +gen_std_unicode() diff --git a/src/libcore/unicode.rs b/src/libcore/unicode.rs index db016ad880741..b3298bde05547 100644 --- a/src/libcore/unicode.rs +++ b/src/libcore/unicode.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -8,10 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// The following code was generated by "src/etc/unicode.py" +// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly #![allow(missing_doc, non_uppercase_statics)] + fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { use cmp::{Equal, Less, Greater}; use slice::ImmutableVector; @@ -102,6 +103,7 @@ pub mod general_category { } } + pub mod decompose { use option::Option; use option::{Some, None}; @@ -123,7 +125,6 @@ pub mod decompose { } - // Canonical decompositions static canonical_table : &'static [(char, &'static [char])] = &[ ('\xc0', &['\x41', '\u0300']), ('\xc1', &['\x41', '\u0301']), ('\xc2', &['\x41', '\u0302']), @@ -3968,6 +3969,7 @@ pub mod derived_property { pub fn XID_Start(c: char) -> bool { super::bsearch_range_table(c, XID_Start_table) } + } pub mod property { @@ -3983,6 +3985,7 @@ pub mod property { pub fn White_Space(c: char) -> bool { super::bsearch_range_table(c, White_Space_table) } + } pub mod conversions { @@ -4501,7 +4504,7 @@ pub mod conversions { ('\U00010426', '\U0001044e'), ('\U00010427', '\U0001044f') ]; - static LlLu_table : &'static [(char, char)] = &[ + static LlLu_table : &'static [(char, char)] = &[ ('\x61', '\x41'), ('\x62', '\x42'), ('\x63', '\x43'), ('\x64', '\x44'), ('\x65', '\x45'), ('\x66', '\x46'), diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs index be6e5d040a7c9..d534b30221b4a 100644 --- a/src/libstd/unicode.rs +++ b/src/libstd/unicode.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// The following code was generated by "src/etc/unicode.py" +// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly #![allow(missing_doc, non_uppercase_statics)]