Merge pull request #12762 from ScottPJones/spj/remstring

Remove various string functions from flisp and support functions from utf8.c
JuliaLang · Aug 23, 2015 · 31dda80 · 31dda80
2 parents 56360a7 + 308cc33
commit 31dda80
Show file tree

Hide file tree

Showing 3 changed files with 0 additions and 198 deletions.
diff --git a/src/flisp/string.c b/src/flisp/string.c
@@ -57,66 +57,6 @@ value_t fl_string_count(value_t *args, u_int32_t nargs)
     return size_wrap(u8_charnum(str+start, stop-start));
 }
 
-value_t fl_string_width(value_t *args, u_int32_t nargs)
-{
-    argcount("string.width", nargs, 1);
-    if (iscprim(args[0])) {
-        cprim_t *cp = (cprim_t*)ptr(args[0]);
-        if (cp_class(cp) == wchartype) {
-            int w = utf8proc_charwidth(*(uint32_t*)cp_data(cp));
-            if (w < 0)
-                return FL_F;
-            return fixnum(w);
-        }
-    }
-    char *s = tostring(args[0], "string.width");
-    return size_wrap(u8_strwidth(s));
-}
-
-value_t fl_string_encode(value_t *args, u_int32_t nargs)
-{
-    argcount("string.encode", nargs, 1);
-    if (iscvalue(args[0])) {
-        cvalue_t *cv = (cvalue_t*)ptr(args[0]);
-        fltype_t *t = cv_class(cv);
-        if (t->eltype == wchartype) {
-            size_t nc = cv_len(cv) / sizeof(uint32_t);
-            uint32_t *ptr = (uint32_t*)cv_data(cv);
-            size_t nbytes = u8_codingsize(ptr, nc);
-            value_t str = cvalue_string(nbytes);
-            ptr = (uint32_t*)cv_data((cvalue_t*)ptr(args[0]));  // relocatable pointer
-            u8_toutf8((char*)cvalue_data(str), nbytes, ptr, nc);
-            return str;
-        }
-    }
-    type_error("string.encode", "wchar array", args[0]);
-}
-
-value_t fl_string_decode(value_t *args, u_int32_t nargs)
-{
-    int term=0;
-    if (nargs == 2) {
-        term = (args[1] != FL_F);
-    }
-    else {
-        argcount("string.decode", nargs, 1);
-    }
-    if (!fl_isstring(args[0]))
-        type_error("string.decode", "string", args[0]);
-    cvalue_t *cv = (cvalue_t*)ptr(args[0]);
-    char *ptr = (char*)cv_data(cv);
-    size_t nb = cv_len(cv);
-    size_t nc = u8_charnum(ptr, nb);
-    size_t newsz = nc*sizeof(uint32_t);
-    if (term) newsz += sizeof(uint32_t);
-    value_t wcstr = cvalue(wcstringtype, newsz);
-    ptr = (char*)cv_data((cvalue_t*)ptr(args[0]));  // relocatable pointer
-    uint32_t *pwc = (uint32_t*)cvalue_data(wcstr);
-    u8_toucs(pwc, nc, ptr, nb);
-    if (term) pwc[nc] = 0;
-    return wcstr;
-}
-
 extern value_t fl_buffer(value_t *args, u_int32_t nargs);
 extern value_t stream_to_string(value_t *ps);
 
@@ -142,49 +82,6 @@ value_t fl_string(value_t *args, u_int32_t nargs)
     return outp;
 }
 
-value_t fl_string_split(value_t *args, u_int32_t nargs)
-{
-    argcount("string.split", nargs, 2);
-    char *s = tostring(args[0], "string.split");
-    char *delim = tostring(args[1], "string.split");
-    size_t len = cv_len((cvalue_t*)ptr(args[0]));
-    size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
-    size_t ssz, tokend=0, tokstart=0, i=0;
-    value_t first=FL_NIL, c=FL_NIL, last;
-    size_t junk;
-    fl_gc_handle(&first);
-    fl_gc_handle(&last);
-
-    do {
-        // find and allocate next token
-        tokstart = tokend = i;
-        while (i < len &&
-               !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
-            tokend = i;
-        ssz = tokend - tokstart;
-        last = c;  // save previous cons cell
-        c = fl_cons(cvalue_string(ssz), FL_NIL);
-
-        // we've done allocation; reload movable pointers
-        s = (char*)cv_data((cvalue_t*)ptr(args[0]));
-        delim = (char*)cv_data((cvalue_t*)ptr(args[1]));
-
-        if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);
-
-        // link new cell
-        if (last == FL_NIL)
-            first = c;   // first time, save first cons
-        else
-            ((cons_t*)ptr(last))->cdr = c;
-
-        // note this tricky condition: if the string ends with a
-        // delimiter, we need to go around one more time to add an
-        // empty string. this happens when (i==len && tokend<i)
-    } while (i < len || (i==len && (tokend!=i)));
-    fl_free_gc_handles(2);
-    return first;
-}
-
 value_t fl_string_sub(value_t *args, u_int32_t nargs)
 {
     if (nargs != 2)
@@ -225,23 +122,6 @@ value_t fl_string_char(value_t *args, u_int32_t nargs)
     return mk_wchar(u8_nextchar(s, &i));
 }
 
-value_t fl_char_upcase(value_t *args, u_int32_t nargs)
-{
-    argcount("char.upcase", nargs, 1);
-    cprim_t *cp = (cprim_t*)ptr(args[0]);
-    if (!iscprim(args[0]) || cp_class(cp) != wchartype)
-      type_error("char.upcase", "wchar", args[0]);
-    return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
-}
-value_t fl_char_downcase(value_t *args, u_int32_t nargs)
-{
-    argcount("char.downcase", nargs, 1);
-    cprim_t *cp = (cprim_t*)ptr(args[0]);
-    if (!iscprim(args[0]) || cp_class(cp) != wchartype)
-      type_error("char.downcase", "wchar", args[0]);
-    return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
-}
-
 static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
 {
     char *p = (char*)memchr(s+start, c, len-start);
@@ -398,20 +278,13 @@ static builtinspec_t stringfunc_info[] = {
     { "string", fl_string },
     { "string?", fl_stringp },
     { "string.count", fl_string_count },
-    { "string.width", fl_string_width },
-    { "string.split", fl_string_split },
     { "string.sub", fl_string_sub },
     { "string.find", fl_string_find },
     { "string.char", fl_string_char },
     { "string.inc", fl_string_inc },
     { "string.dec", fl_string_dec },
-    { "string.encode", fl_string_encode },
-    { "string.decode", fl_string_decode },
     { "string.isutf8", fl_string_isutf8 },
 
-    { "char.upcase", fl_char_upcase },
-    { "char.downcase", fl_char_downcase },
-
     { "number->string", fl_numbertostring },
     { "string->number", fl_stringtonumber },
 

diff --git a/src/support/utf8.c b/src/support/utf8.c
@@ -78,18 +78,6 @@ size_t u8_charlen(uint32_t ch)
     return 0;
 }
 
-size_t u8_codingsize(uint32_t *wcstr, size_t n)
-{
-    size_t i, c=0;
-
-    for(i=0; i < n; i++) {
-        size_t cl = u8_charlen(wcstr[i]);
-        if (cl == 0) cl = 3;  // invalid: encoded as replacement char
-        c += cl;
-    }
-    return c;
-}
-
 /* conversions without error checking
    only works for valid UTF-8, i.e. no 5- or 6-byte sequences
    srcsz = source size in bytes
@@ -380,36 +368,6 @@ size_t u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
     return i;
 }
 
-/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
-   example: u8_unescape(mybuf, 256, "hello\\u220e")
-   note the double backslash is needed if called on a C string literal */
-size_t u8_unescape(char *buf, size_t sz, const char *src)
-{
-    size_t c=0, amt;
-    uint32_t ch = 0;
-    char temp[4];
-
-    while (*src && c < sz) {
-        if (*src == '\\') {
-            src++;
-            amt = u8_read_escape_sequence(src, 1000, &ch);
-        }
-        else {
-            ch = (uint32_t)*src;
-            amt = 1;
-        }
-        src += amt;
-        amt = u8_wc_toutf8(temp, ch);
-        if (amt > sz-c)
-            break;
-        memcpy(&buf[c], temp, amt);
-        c += amt;
-    }
-    if (c < sz)
-        buf[c] = '\0';
-    return c;
-}
-
 static inline int buf_put2c(char *buf, const char *src)
 {
     buf[0] = src[0];
@@ -539,27 +497,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz)
     return NULL;
 }
 
-int u8_is_locale_utf8(const char *locale)
-{
-    if (locale == NULL) return 0;
-
-    /* this code based on libutf8 */
-    const char *cp = locale;
-
-    for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++) {
-        if (*cp == '.') {
-            const char *encoding = ++cp;
-            for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++)
-                ;
-            if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
-                || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
-                return 1; /* it's UTF-8 */
-            break;
-        }
-    }
-    return 0;
-}
-
 size_t u8_vprintf(const char *fmt, va_list ap)
 {
     size_t cnt, sz=0, nc, needfree=0;

diff --git a/src/support/utf8.h b/src/support/utf8.h
@@ -45,9 +45,6 @@ size_t u8_seqlen(const char *s);
 /* returns the # of bytes needed to encode a certain character */
 size_t u8_charlen(uint32_t ch);
 
-/* computes the # of bytes needed to encode a WC string as UTF-8 */
-size_t u8_codingsize(uint32_t *wcstr, size_t n);
-
 char read_escape_control_char(char c);
 
 /* assuming src points to the character after a backslash, read an
@@ -60,9 +57,6 @@ size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
    sz must be at least 3. */
 int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
 
-/* convert a string "src" containing escape sequences to UTF-8 */
-size_t u8_unescape(char *buf, size_t sz, const char *src);
-
 /* convert UTF-8 "src" to escape sequences.
 
    sz is buf size in bytes. must be at least 12.
@@ -94,8 +88,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
 /* number of columns occupied by a string */
 DLLEXPORT size_t u8_strwidth(const char *s);
 
-int u8_is_locale_utf8(const char *locale);
-
 /* printf where the format string and arguments may be in UTF-8.
    you can avoid this function and just use ordinary printf() if the current
    locale is UTF-8. */