Add support code for new unicode_data module

rust-lang · Jan 15, 2020 · 40ad877 · 40ad877
1 parent 064f888
commit 40ad877
Showing 1 changed file with 49 additions and 5 deletions.
diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs
@@ -1,15 +1,59 @@
 #![unstable(feature = "unicode_internals", issue = "none")]
 #![allow(missing_docs)]
 
-mod bool_trie;
 pub(crate) mod printable;
-pub(crate) mod tables;
+mod unicode_data;
 pub(crate) mod version;
 
+use version::UnicodeVersion;
+
+/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
+/// `char` and `str` methods are based on.
+#[unstable(feature = "unicode_version", issue = "49726")]
+pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
+    major: unicode_data::UNICODE_VERSION.0,
+    minor: unicode_data::UNICODE_VERSION.1,
+    micro: unicode_data::UNICODE_VERSION.2,
+    _priv: (),
+};
+
 // For use in liballoc, not re-exported in libstd.
 pub mod derived_property {
-    pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased};
+    pub use super::{Case_Ignorable, Cased};
 }
-pub mod conversions {
-    pub use crate::unicode::tables::conversions::{to_lower, to_upper};
+
+pub use unicode_data::alphabetic::lookup as Alphabetic;
+pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
+pub use unicode_data::cased::lookup as Cased;
+pub use unicode_data::cc::lookup as Cc;
+pub use unicode_data::conversions;
+pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
+pub use unicode_data::lowercase::lookup as Lowercase;
+pub use unicode_data::n::lookup as N;
+pub use unicode_data::uppercase::lookup as Uppercase;
+pub use unicode_data::white_space::lookup as White_Space;
+
+#[inline(always)]
+fn range_search<const N: usize, const N1: usize, const N2: usize>(
+    needle: u32,
+    chunk_idx_map: &[u8; N],
+    (last_chunk_idx, last_chunk_mapping): (u16, u8),
+    bitset_chunk_idx: &[[u8; 16]; N1],
+    bitset: &[u64; N2],
+) -> bool {
+    let bucket_idx = (needle / 64) as usize;
+    let chunk_map_idx = bucket_idx / 16;
+    let chunk_piece = bucket_idx % 16;
+    let chunk_idx = if chunk_map_idx >= N {
+        if chunk_map_idx == last_chunk_idx as usize {
+            last_chunk_mapping
+        } else {
+            return false;
+        }
+    } else {
+        chunk_idx_map[chunk_map_idx]
+    };
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
+    let word = bitset[(idx as usize)];
+    (word & (1 << (needle % 64) as u64)) != 0
 }