Skip to content

Commit

Permalink
Fix handling of upper/lowercase, and whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
Florob committed Nov 27, 2013
1 parent c234614 commit dfe38db
Show file tree
Hide file tree
Showing 5 changed files with 689 additions and 29 deletions.
22 changes: 12 additions & 10 deletions src/etc/unicode.py
Expand Up @@ -41,7 +41,7 @@ def load_unicode_data(f):
continue
[code, name, gencat, combine, bidi,
decomp, deci, digit, num, mirror,
old, iso, upcase, lowcsae, titlecase ] = fields
old, iso, upcase, lowcase, titlecase ] = fields

code = int(code, 16)

Expand Down Expand Up @@ -89,11 +89,9 @@ def load_unicode_data(f):

return (canon_decomp, compat_decomp, gencats, combines)


def load_derived_core_properties(f):
def load_properties(f, interestingprops):
fetch(f)
derivedprops = {}
interestingprops = ["XID_Start", "XID_Continue", "Alphabetic"]
props = {}
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")

Expand All @@ -118,10 +116,10 @@ def load_derived_core_properties(f):
continue
d_lo = int(d_lo, 16)
d_hi = int(d_hi, 16)
if prop not in derivedprops:
derivedprops[prop] = []
derivedprops[prop].append((d_lo, d_hi))
return derivedprops
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))
return props

def escape_char(c):
if c <= 0xff:
Expand Down Expand Up @@ -376,5 +374,9 @@ def emit_decomp_module(f, canon, compat, combine):

emit_decomp_module(rf, canon_decomp, compat_decomp, combines)

derived = load_derived_core_properties("DerivedCoreProperties.txt")
derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
emit_property_module(rf, "derived_property", derived)

props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)
18 changes: 8 additions & 10 deletions src/libstd/char.rs
Expand Up @@ -14,7 +14,7 @@ use cast::transmute;
use option::{None, Option, Some};
use iter::{Iterator, range_step};
use str::StrSlice;
use unicode::{derived_property, general_category, decompose};
use unicode::{derived_property, property, general_category, decompose};
use to_str::ToStr;
use str;

Expand Down Expand Up @@ -89,30 +89,28 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }

///
/// Indicates whether a character is in lower case, defined
/// in terms of the Unicode General Category 'Ll'
/// in terms of the Unicode Derived Core Property 'Lowercase'.
///
#[inline]
pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) }
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }

///
/// Indicates whether a character is in upper case, defined
/// in terms of the Unicode General Category 'Lu'.
/// in terms of the Unicode Derived Core Property 'Uppercase'.
///
#[inline]
pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) }
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }

///
/// Indicates whether a character is whitespace. Whitespace is defined in
/// terms of the Unicode General Categories 'Zs', 'Zl', 'Zp'
/// additional 'Cc'-category control codes in the range [0x09, 0x0d]
/// terms of the Unicode Property 'White_Space'.
///
#[inline]
pub fn is_whitespace(c: char) -> bool {
// As an optimization ASCII whitespace characters are checked separately
c == ' '
|| ('\x09' <= c && c <= '\x0d')
|| general_category::Zs(c)
|| general_category::Zl(c)
|| general_category::Zp(c)
|| property::White_Space(c)
}

///
Expand Down

5 comments on commit dfe38db

@bors
Copy link
Contributor

@bors bors commented on dfe38db Nov 28, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saw approval from cmr
at Florob@dfe38db

@bors
Copy link
Contributor

@bors bors commented on dfe38db Nov 28, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging Florob/rust/unicode63 = dfe38db into auto

@bors
Copy link
Contributor

@bors bors commented on dfe38db Nov 28, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Florob/rust/unicode63 = dfe38db merged ok, testing candidate = 503e5df

@bors
Copy link
Contributor

@bors bors commented on dfe38db Nov 28, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bors
Copy link
Contributor

@bors bors commented on dfe38db Nov 28, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fast-forwarding master to auto = 503e5df

Please sign in to comment.