Skip to content

Commit

Permalink
Merge pull request #12762 from ScottPJones/spj/remstring
Browse files Browse the repository at this point in the history
Remove various string functions from flisp and support functions from utf8.c
  • Loading branch information
JeffBezanson committed Aug 23, 2015
2 parents 56360a7 + 308cc33 commit 31dda80
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 198 deletions.
127 changes: 0 additions & 127 deletions src/flisp/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,66 +57,6 @@ value_t fl_string_count(value_t *args, u_int32_t nargs)
return size_wrap(u8_charnum(str+start, stop-start));
}

value_t fl_string_width(value_t *args, u_int32_t nargs)
{
argcount("string.width", nargs, 1);
if (iscprim(args[0])) {
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (cp_class(cp) == wchartype) {
int w = utf8proc_charwidth(*(uint32_t*)cp_data(cp));
if (w < 0)
return FL_F;
return fixnum(w);
}
}
char *s = tostring(args[0], "string.width");
return size_wrap(u8_strwidth(s));
}

value_t fl_string_encode(value_t *args, u_int32_t nargs)
{
argcount("string.encode", nargs, 1);
if (iscvalue(args[0])) {
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
fltype_t *t = cv_class(cv);
if (t->eltype == wchartype) {
size_t nc = cv_len(cv) / sizeof(uint32_t);
uint32_t *ptr = (uint32_t*)cv_data(cv);
size_t nbytes = u8_codingsize(ptr, nc);
value_t str = cvalue_string(nbytes);
ptr = (uint32_t*)cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
u8_toutf8((char*)cvalue_data(str), nbytes, ptr, nc);
return str;
}
}
type_error("string.encode", "wchar array", args[0]);
}

value_t fl_string_decode(value_t *args, u_int32_t nargs)
{
int term=0;
if (nargs == 2) {
term = (args[1] != FL_F);
}
else {
argcount("string.decode", nargs, 1);
}
if (!fl_isstring(args[0]))
type_error("string.decode", "string", args[0]);
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
char *ptr = (char*)cv_data(cv);
size_t nb = cv_len(cv);
size_t nc = u8_charnum(ptr, nb);
size_t newsz = nc*sizeof(uint32_t);
if (term) newsz += sizeof(uint32_t);
value_t wcstr = cvalue(wcstringtype, newsz);
ptr = (char*)cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
uint32_t *pwc = (uint32_t*)cvalue_data(wcstr);
u8_toucs(pwc, nc, ptr, nb);
if (term) pwc[nc] = 0;
return wcstr;
}

extern value_t fl_buffer(value_t *args, u_int32_t nargs);
extern value_t stream_to_string(value_t *ps);

Expand All @@ -142,49 +82,6 @@ value_t fl_string(value_t *args, u_int32_t nargs)
return outp;
}

value_t fl_string_split(value_t *args, u_int32_t nargs)
{
argcount("string.split", nargs, 2);
char *s = tostring(args[0], "string.split");
char *delim = tostring(args[1], "string.split");
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
size_t ssz, tokend=0, tokstart=0, i=0;
value_t first=FL_NIL, c=FL_NIL, last;
size_t junk;
fl_gc_handle(&first);
fl_gc_handle(&last);

do {
// find and allocate next token
tokstart = tokend = i;
while (i < len &&
!u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
tokend = i;
ssz = tokend - tokstart;
last = c; // save previous cons cell
c = fl_cons(cvalue_string(ssz), FL_NIL);

// we've done allocation; reload movable pointers
s = (char*)cv_data((cvalue_t*)ptr(args[0]));
delim = (char*)cv_data((cvalue_t*)ptr(args[1]));

if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);

// link new cell
if (last == FL_NIL)
first = c; // first time, save first cons
else
((cons_t*)ptr(last))->cdr = c;

// note this tricky condition: if the string ends with a
// delimiter, we need to go around one more time to add an
// empty string. this happens when (i==len && tokend<i)
} while (i < len || (i==len && (tokend!=i)));
fl_free_gc_handles(2);
return first;
}

value_t fl_string_sub(value_t *args, u_int32_t nargs)
{
if (nargs != 2)
Expand Down Expand Up @@ -225,23 +122,6 @@ value_t fl_string_char(value_t *args, u_int32_t nargs)
return mk_wchar(u8_nextchar(s, &i));
}

value_t fl_char_upcase(value_t *args, u_int32_t nargs)
{
argcount("char.upcase", nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("char.upcase", "wchar", args[0]);
return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
}
value_t fl_char_downcase(value_t *args, u_int32_t nargs)
{
argcount("char.downcase", nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("char.downcase", "wchar", args[0]);
return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
}

static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
{
char *p = (char*)memchr(s+start, c, len-start);
Expand Down Expand Up @@ -398,20 +278,13 @@ static builtinspec_t stringfunc_info[] = {
{ "string", fl_string },
{ "string?", fl_stringp },
{ "string.count", fl_string_count },
{ "string.width", fl_string_width },
{ "string.split", fl_string_split },
{ "string.sub", fl_string_sub },
{ "string.find", fl_string_find },
{ "string.char", fl_string_char },
{ "string.inc", fl_string_inc },
{ "string.dec", fl_string_dec },
{ "string.encode", fl_string_encode },
{ "string.decode", fl_string_decode },
{ "string.isutf8", fl_string_isutf8 },

{ "char.upcase", fl_char_upcase },
{ "char.downcase", fl_char_downcase },

{ "number->string", fl_numbertostring },
{ "string->number", fl_stringtonumber },

Expand Down
63 changes: 0 additions & 63 deletions src/support/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,6 @@ size_t u8_charlen(uint32_t ch)
return 0;
}

size_t u8_codingsize(uint32_t *wcstr, size_t n)
{
size_t i, c=0;

for(i=0; i < n; i++) {
size_t cl = u8_charlen(wcstr[i]);
if (cl == 0) cl = 3; // invalid: encoded as replacement char
c += cl;
}
return c;
}

/* conversions without error checking
only works for valid UTF-8, i.e. no 5- or 6-byte sequences
srcsz = source size in bytes
Expand Down Expand Up @@ -380,36 +368,6 @@ size_t u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
return i;
}

/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
example: u8_unescape(mybuf, 256, "hello\\u220e")
note the double backslash is needed if called on a C string literal */
size_t u8_unescape(char *buf, size_t sz, const char *src)
{
size_t c=0, amt;
uint32_t ch = 0;
char temp[4];

while (*src && c < sz) {
if (*src == '\\') {
src++;
amt = u8_read_escape_sequence(src, 1000, &ch);
}
else {
ch = (uint32_t)*src;
amt = 1;
}
src += amt;
amt = u8_wc_toutf8(temp, ch);
if (amt > sz-c)
break;
memcpy(&buf[c], temp, amt);
c += amt;
}
if (c < sz)
buf[c] = '\0';
return c;
}

static inline int buf_put2c(char *buf, const char *src)
{
buf[0] = src[0];
Expand Down Expand Up @@ -539,27 +497,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz)
return NULL;
}

int u8_is_locale_utf8(const char *locale)
{
if (locale == NULL) return 0;

/* this code based on libutf8 */
const char *cp = locale;

for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++) {
if (*cp == '.') {
const char *encoding = ++cp;
for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++)
;
if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
|| (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
return 1; /* it's UTF-8 */
break;
}
}
return 0;
}

size_t u8_vprintf(const char *fmt, va_list ap)
{
size_t cnt, sz=0, nc, needfree=0;
Expand Down
8 changes: 0 additions & 8 deletions src/support/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ size_t u8_seqlen(const char *s);
/* returns the # of bytes needed to encode a certain character */
size_t u8_charlen(uint32_t ch);

/* computes the # of bytes needed to encode a WC string as UTF-8 */
size_t u8_codingsize(uint32_t *wcstr, size_t n);

char read_escape_control_char(char c);

/* assuming src points to the character after a backslash, read an
Expand All @@ -60,9 +57,6 @@ size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
sz must be at least 3. */
int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);

/* convert a string "src" containing escape sequences to UTF-8 */
size_t u8_unescape(char *buf, size_t sz, const char *src);

/* convert UTF-8 "src" to escape sequences.
sz is buf size in bytes. must be at least 12.
Expand Down Expand Up @@ -94,8 +88,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
/* number of columns occupied by a string */
DLLEXPORT size_t u8_strwidth(const char *s);

int u8_is_locale_utf8(const char *locale);

/* printf where the format string and arguments may be in UTF-8.
you can avoid this function and just use ordinary printf() if the current
locale is UTF-8. */
Expand Down

0 comments on commit 31dda80

Please sign in to comment.