Permalink
Browse files

introduce a better unichar API, which should be as fast as before the…

… recent string datastore change, also fixing the rubygems crasher and also various memory leaks

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/trunk@5114 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent 0b4fbcc commit d1673a26e4cc9dcca4ec0bd4c968cdd26a5fd636 @lrz lrz committed Jan 5, 2011
Showing with 152 additions and 257 deletions.
  1. +39 −2 encoding.h
  2. +5 −28 parse.y
  3. +14 −55 re.c
  4. +20 −3 re.h
  5. +3 −16 sprintf.c
  6. +69 −138 string.c
  7. +2 −15 symbol.c
View
@@ -295,9 +295,46 @@ VALUE rstr_concat(VALUE self, SEL sel, VALUE other);
// The following functions should always been prefered over anything else,
// especially if this "else" is RSTRING_PTR and RSTRING_LEN.
// They also work on CFStrings.
+
VALUE rb_unicode_str_new(const UniChar *ptr, const size_t len);
-void rb_str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
- bool *need_free_p);
+
+#define STR_UCHARS_STATIC_BUFSIZE 35
+
+typedef struct {
+ UChar static_buf[STR_UCHARS_STATIC_BUFSIZE];
+ UChar *chars;
+ long len;
+} rb_str_uchars_buf_t;
+
+void rb_str_get_uchars_always(VALUE str, rb_str_uchars_buf_t *buf);
+
+static inline void
+rb_str_get_uchars(VALUE str, rb_str_uchars_buf_t *buf)
+{
+ if (IS_RSTR(str)) {
+ rb_str_t *rstr = RSTR(str);
+ if (rstr->encoding->ascii_compatible && str_is_ascii_only(rstr)
+ && rstr->length_in_bytes < STR_UCHARS_STATIC_BUFSIZE) {
+ // Fast path.
+ for (long i = 0; i < rstr->length_in_bytes; i++) {
+ buf->static_buf[i] = rstr->bytes[i];
+ }
+ buf->chars = buf->static_buf;
+ buf->len = rstr->length_in_bytes;
+ return;
+ }
+ }
+ rb_str_get_uchars_always(str, buf);
+}
+
+UChar *rb_str_xcopy_uchars(VALUE str, long *len_p);
+
+#define RB_STR_GET_UCHARS(str, _chars, _len) \
+ rb_str_uchars_buf_t __buf; \
+ rb_str_get_uchars(str, &__buf); \
+ UChar *_chars = __buf.chars; \
+ long _len = __buf.len
+
long rb_str_chars_len(VALUE str);
UChar rb_str_get_uchar(VALUE str, long pos);
void rb_str_append_uchar(VALUE str, UChar c);
View
33 parse.y
@@ -5094,15 +5094,13 @@ rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
struct parser_params *parser;
Data_Get_Struct(vparser, struct parser_params, parser);
- UChar *chars = NULL;
long chars_len = 0;
- bool need_free = false;
- rb_str_get_uchars(s, &chars, &chars_len, &need_free);
+ UChar *chars = rb_str_xcopy_uchars(s, &chars_len);
struct lex_get_str_context *ctx = (struct lex_get_str_context *)
xmalloc(sizeof(struct lex_get_str_context));
GC_WB(&ctx->str, s);
- ctx->chars = chars;
+ GC_WB(&ctx->chars, chars);
ctx->chars_len = chars_len;
lex_gets = lex_get_str;
@@ -5111,14 +5109,7 @@ rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
lex_pbeg = lex_p = lex_pend = 0;
compile_for_eval = rb_parse_in_eval();
- NODE *node = yycompile(parser, f, line);
-
- if (need_free && chars != NULL) {
- orig_free(chars);
- chars = NULL;
- }
-
- return node;
+ return yycompile(parser, f, line);
}
NODE*
@@ -9901,27 +9892,13 @@ ripper_initialize(VALUE self, SEL sel, int argc, VALUE *argv)
Data_Get_Struct(self, struct parser_params, parser);
rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
- UChar *chars = NULL;
long chars_len = 0;
- bool need_free = false;
- rb_str_get_uchars(src, &chars, &chars_len, &need_free);
-
- if (need_free) {
- UChar *tmp = (UChar *)xmalloc(sizeof(UChar) * chars_len);
- memcpy(tmp, chars, sizeof(UChar) * chars_len);
- orig_free(chars);
- chars = tmp;
- }
+ UChar *chars = rb_str_xcopy_uchars(src, &chars_len);
struct lex_get_str_context *ctx = (struct lex_get_str_context *)
xmalloc(sizeof(struct lex_get_str_context));
GC_WB(&ctx->str, src);
- if (need_free) {
- GC_WB(&ctx->chars, chars);
- }
- else {
- ctx->chars = chars;
- }
+ GC_WB(&ctx->chars, chars);
ctx->chars_len = chars_len;
parser->parser_lex_gets = lex_get_str;
View
69 re.c
@@ -91,20 +91,16 @@ regexp_finalize_imp(void *rcv, SEL sel)
// Work around ICU limitations.
static void
-sanitize_regexp_string(UChar **chars_p, long *chars_len_p, bool *need_free_p)
+sanitize_regexp_string(UChar **chars_p, long *chars_len_p)
{
UChar *chars = *chars_p;
long chars_len = *chars_len_p;
- bool need_free = *need_free_p;
#define copy_if_needed() \
do { \
- if (!need_free) { \
- UChar *tmp = (UChar *)malloc(sizeof(UChar) * chars_len); \
- memcpy(tmp, chars, sizeof(UChar) * chars_len); \
- chars = tmp; \
- need_free = true; \
- } \
+ UChar *tmp = (UChar *)xmalloc(sizeof(UChar) * chars_len); \
+ memcpy(tmp, chars, sizeof(UChar) * chars_len); \
+ chars = tmp; \
} \
while (0)
@@ -183,18 +179,14 @@ printf("\n");
*chars_p = chars;
*chars_len_p = chars_len;
- *need_free_p = need_free;
}
static bool
init_from_string(rb_regexp_t *regexp, VALUE str, int option, VALUE *excp)
{
option |= REGEXP_OPT_DEFAULT;
- UChar *chars = NULL;
- long chars_len = 0;
- bool need_free = false;
- rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+ RB_STR_GET_UCHARS(str, chars, chars_len);
UChar null_char = '\0';
if (chars_len == 0) {
@@ -203,21 +195,16 @@ init_from_string(rb_regexp_t *regexp, VALUE str, int option, VALUE *excp)
// of -1 which indicates it's terminated by \0.
chars = &null_char;
chars_len = -1;
- need_free = false;
}
else {
- sanitize_regexp_string(&chars, &chars_len, &need_free);
+ sanitize_regexp_string(&chars, &chars_len);
}
UParseError pe;
UErrorCode status = U_ZERO_ERROR;
URegularExpression *pattern = uregex_open(chars, chars_len, option,
&pe, &status);
- if (need_free) {
- free(chars);
- }
-
if (pattern == NULL) {
if (excp != NULL) {
char error[1024];
@@ -669,7 +656,7 @@ regexp_equal(VALUE rcv, SEL sel, VALUE other)
typedef struct rb_regexp_matcher {
struct RBasic basic;
URegularExpression *pattern;
- UChar *text_to_free;
+ UChar *text_chars;
rb_encoding_t *encoding;
VALUE frozen_str;
} rb_regexp_matcher_t;
@@ -681,10 +668,6 @@ reg_matcher_cleanup(rb_regexp_matcher_t *m)
uregex_close(m->pattern);
m->pattern = NULL;
}
- if (m->text_to_free != NULL) {
- free(m->text_to_free);
- m->text_to_free = NULL;
- }
}
static IMP regexp_matcher_finalize_imp_super = NULL;
@@ -697,6 +680,7 @@ regexp_matcher_finalize_imp(void *rcv, SEL sel)
((void(*)(void *, SEL))regexp_matcher_finalize_imp_super)(rcv, sel);
}
}
+
VALUE
rb_reg_matcher_new(VALUE re, VALUE str)
{
@@ -712,28 +696,22 @@ rb_reg_matcher_new(VALUE re, VALUE str)
u_errorName(status));
}
- UChar *chars = NULL;
long chars_len = 0;
- bool need_free = false;
- rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+ UChar *chars = rb_str_xcopy_uchars(str, &chars_len);
- UChar null_char = '\0';
if (chars_len == 0) {
// uregex_setText() will complain if we pass a NULL pattern or a
// pattern length of 0, so we do pass an empty pattern with a length
// of -1 which indicates it's terminated by \0.
- chars = &null_char;
+ chars = (UChar *)xmalloc(sizeof(UChar));
+ *chars = '\0';
chars_len = -1;
- need_free = false;
}
uregex_setText(match_pattern, chars, chars_len, &status);
if (status != U_ZERO_ERROR) {
uregex_close(match_pattern);
- if (need_free) {
- free(chars);
- }
rb_raise(rb_eRegexpError, "can't set pattern text: %s",
u_errorName(status));
}
@@ -744,7 +722,7 @@ rb_reg_matcher_new(VALUE re, VALUE str)
// Apparently uregex_setText doesn't copy the given string, so we need
// to keep it around until we finally destroy the matcher object.
- matcher->text_to_free = need_free ? chars : NULL;
+ GC_WB(&matcher->text_chars, chars);
return (VALUE)matcher;
}
@@ -756,7 +734,7 @@ rb_reg_matcher_destroy(VALUE matcher)
xfree((void *)matcher);
}
-static int
+int
rb_reg_matcher_search_find(VALUE re, VALUE matcher, int pos, bool reverse,
bool findFirst)
{
@@ -857,18 +835,6 @@ rb_reg_matcher_search_find(VALUE re, VALUE matcher, int pos, bool reverse,
return res[0].beg;
}
-int
-rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool reverse)
-{
- return rb_reg_matcher_search_find(re, matcher, pos, reverse, true);
-}
-
-int
-rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool reverse)
-{
- return rb_reg_matcher_search_find(re, matcher, pos, reverse, false);
-}
-
static long
reg_match_pos(VALUE re, VALUE *strp, long pos)
{
@@ -975,7 +941,6 @@ regexp_match3(VALUE rcv, SEL sel)
rb_backref_set(Qnil);
return Qnil;
}
-
const long start = rb_reg_search(rcv, line, 0, 0);
if (start < 0) {
return Qnil;
@@ -2124,12 +2089,9 @@ rb_reg_new(const char *cstr, long len, int options)
VALUE
rb_reg_quote(VALUE pat)
{
- UChar *chars = NULL;
- long chars_len = 0;
- bool need_free = false;
VALUE result;
- rb_str_get_uchars(pat, &chars, &chars_len, &need_free);
+ RB_STR_GET_UCHARS(pat, chars, chars_len);
long pos = 0;
for (; pos < chars_len; pos++) {
@@ -2197,9 +2159,6 @@ rb_reg_quote(VALUE pat)
}
bail:
- if (need_free) {
- free(chars);
- }
return result;
}
View
23 re.h
@@ -24,10 +24,27 @@ VALUE rb_reg_regcomp(VALUE str);
VALUE rb_regexp_source(VALUE re);
VALUE rb_reg_matcher_new(VALUE re, VALUE str);
+int rb_reg_matcher_search_find(VALUE re, VALUE matcher, int pos, bool reverse,
+ bool findFirst);
void rb_reg_matcher_destroy(VALUE matcher);
-int rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool reverse);
-int rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool reverse);
-#define rb_reg_matcher_search rb_reg_matcher_search_next
+
+static inline int
+rb_reg_matcher_search_first(VALUE re, VALUE matcher, int pos, bool reverse)
+{
+ return rb_reg_matcher_search_find(re, matcher, pos, reverse, true);
+}
+
+static inline int
+rb_reg_matcher_search_next(VALUE re, VALUE matcher, int pos, bool reverse)
+{
+ return rb_reg_matcher_search_find(re, matcher, pos, reverse, false);
+}
+
+static inline int
+rb_reg_matcher_search(VALUE re, VALUE matcher, int pos, bool reverse)
+{
+ return rb_reg_matcher_search_next(re, matcher, pos, reverse);
+}
static inline int
rb_reg_search(VALUE re, VALUE str, int pos, bool reverse)
View
@@ -372,14 +372,9 @@ cstr_update(UChar **str, long *str_len, long start, long num, VALUE replace)
sizeof(UChar) * (len - start - num));
}
if (replace_len > 0) {
- UChar *replace_chars = NULL;
- bool need_free = false;
- rb_str_get_uchars(replace, &replace_chars, &replace_len, &need_free);
- assert(replace_len > 0);
+ RB_STR_GET_UCHARS(replace, replace_chars, replace_len2);
+ assert(replace_len2 == replace_len);
bcopy(replace_chars, *str + start, sizeof(UChar) * replace_len);
- if (need_free) {
- free(replace_chars);
- }
}
return replace_len - num;
}
@@ -413,19 +408,11 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt)
{
bool tainted = OBJ_TAINTED(fmt);
- UChar *format_str = NULL;
long format_len = 0;
- bool need_free = false;
- rb_str_get_uchars(fmt, &format_str, &format_len, &need_free);
+ UChar *format_str = rb_str_xcopy_uchars(fmt, &format_len);
if (format_len == 0) {
goto bail;
}
- UChar *tmp = (UChar *)xmalloc(format_len * sizeof(UChar));
- memcpy(tmp, format_str, format_len * sizeof(UChar));
- if (need_free) {
- free(format_str);
- }
- format_str = tmp;
long num, pos;
int j = 0;
Oops, something went wrong.

0 comments on commit d1673a2

Please sign in to comment.