Skip to content

Commit

Permalink
Implement _str.len() to return the number of bytes, rename it to byte…
Browse files Browse the repository at this point in the history
…_len(),

and add a test.
  • Loading branch information
jyasskin authored and graydon committed Aug 7, 2010
1 parent 581a95a commit 3f6e8ff
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/Makefile
Expand Up @@ -506,6 +506,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \
str-append.rs \
str-concat.rs \
str-idx.rs \
str-lib.rs \
tag.rs \
tail-cps.rs \
tail-direct.rs \
Expand Down
15 changes: 10 additions & 5 deletions src/lib/_str.rs
Expand Up @@ -3,7 +3,7 @@ import rustrt.sbuf;
native "rust" mod rustrt {
type sbuf;
fn str_buf(str s) -> sbuf;
fn str_len(str s) -> uint;
fn str_byte_len(str s) -> uint;
fn str_alloc(uint n_bytes) -> str;
fn refcount[T](str s) -> uint;
}
Expand All @@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool {
}

fn is_ascii(str s) -> bool {
let uint i = len(s);
let uint i = byte_len(s);
while (i > 0u) {
i -= 1u;
if ((s.(i) & 0x80u8) != 0u8) {
Expand All @@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str {
ret rustrt.str_alloc(n_bytes);
}

fn len(str s) -> uint {
ret rustrt.str_len(s);
// Returns the number of bytes (a.k.a. UTF-8 code units) in s.
// Contrast with a function that would return the number of code
// points (char's), combining character sequences, words, etc. See
// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
// way to implement those.
fn byte_len(str s) -> uint {
ret rustrt.str_byte_len(s);
}

fn buf(str s) -> sbuf {
Expand All @@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] {
fn ith(str s, uint i) -> u8 {
ret s.(i);
}
ret _vec.init_fn[u8](bind ith(s, _), _str.len(s));
ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s));
}
6 changes: 6 additions & 0 deletions src/rt/rust_builtin.cpp
Expand Up @@ -115,6 +115,12 @@ str_buf(rust_task *task, rust_str *s)
return (char const *)&s->data[0];
}

extern "C" CDECL size_t
str_byte_len(rust_task *task, rust_str *s)
{
return s->fill - 1; // -1 for the '\0' terminator.
}

extern "C" CDECL void *
vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset)
{
Expand Down
16 changes: 16 additions & 0 deletions src/test/run-pass/str-lib.rs
@@ -0,0 +1,16 @@
use std;
import std._str;

fn test_bytes_len() {
check (_str.byte_len("") == 0u);
check (_str.byte_len("hello world") == 11u);
check (_str.byte_len("\x63") == 1u);
check (_str.byte_len("\xa2") == 2u);
check (_str.byte_len("\u03c0") == 2u);
check (_str.byte_len("\u2620") == 3u);
check (_str.byte_len("\U0001d11e") == 4u);
}

fn main() {
test_bytes_len();
}

0 comments on commit 3f6e8ff

Please sign in to comment.