Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove various string functions from flisp and support functions from utf8.c #12762

Merged
merged 1 commit into from Aug 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
127 changes: 0 additions & 127 deletions src/flisp/string.c
Expand Up @@ -57,66 +57,6 @@ value_t fl_string_count(value_t *args, u_int32_t nargs)
return size_wrap(u8_charnum(str+start, stop-start));
}

value_t fl_string_width(value_t *args, u_int32_t nargs)
{
argcount("string.width", nargs, 1);
if (iscprim(args[0])) {
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (cp_class(cp) == wchartype) {
int w = utf8proc_charwidth(*(uint32_t*)cp_data(cp));
if (w < 0)
return FL_F;
return fixnum(w);
}
}
char *s = tostring(args[0], "string.width");
return size_wrap(u8_strwidth(s));
}

value_t fl_string_encode(value_t *args, u_int32_t nargs)
{
argcount("string.encode", nargs, 1);
if (iscvalue(args[0])) {
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
fltype_t *t = cv_class(cv);
if (t->eltype == wchartype) {
size_t nc = cv_len(cv) / sizeof(uint32_t);
uint32_t *ptr = (uint32_t*)cv_data(cv);
size_t nbytes = u8_codingsize(ptr, nc);
value_t str = cvalue_string(nbytes);
ptr = (uint32_t*)cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
u8_toutf8((char*)cvalue_data(str), nbytes, ptr, nc);
return str;
}
}
type_error("string.encode", "wchar array", args[0]);
}

value_t fl_string_decode(value_t *args, u_int32_t nargs)
{
int term=0;
if (nargs == 2) {
term = (args[1] != FL_F);
}
else {
argcount("string.decode", nargs, 1);
}
if (!fl_isstring(args[0]))
type_error("string.decode", "string", args[0]);
cvalue_t *cv = (cvalue_t*)ptr(args[0]);
char *ptr = (char*)cv_data(cv);
size_t nb = cv_len(cv);
size_t nc = u8_charnum(ptr, nb);
size_t newsz = nc*sizeof(uint32_t);
if (term) newsz += sizeof(uint32_t);
value_t wcstr = cvalue(wcstringtype, newsz);
ptr = (char*)cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer
uint32_t *pwc = (uint32_t*)cvalue_data(wcstr);
u8_toucs(pwc, nc, ptr, nb);
if (term) pwc[nc] = 0;
return wcstr;
}

extern value_t fl_buffer(value_t *args, u_int32_t nargs);
extern value_t stream_to_string(value_t *ps);

Expand All @@ -142,49 +82,6 @@ value_t fl_string(value_t *args, u_int32_t nargs)
return outp;
}

value_t fl_string_split(value_t *args, u_int32_t nargs)
{
argcount("string.split", nargs, 2);
char *s = tostring(args[0], "string.split");
char *delim = tostring(args[1], "string.split");
size_t len = cv_len((cvalue_t*)ptr(args[0]));
size_t dlen = cv_len((cvalue_t*)ptr(args[1]));
size_t ssz, tokend=0, tokstart=0, i=0;
value_t first=FL_NIL, c=FL_NIL, last;
size_t junk;
fl_gc_handle(&first);
fl_gc_handle(&last);

do {
// find and allocate next token
tokstart = tokend = i;
while (i < len &&
!u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk))
tokend = i;
ssz = tokend - tokstart;
last = c; // save previous cons cell
c = fl_cons(cvalue_string(ssz), FL_NIL);

// we've done allocation; reload movable pointers
s = (char*)cv_data((cvalue_t*)ptr(args[0]));
delim = (char*)cv_data((cvalue_t*)ptr(args[1]));

if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz);

// link new cell
if (last == FL_NIL)
first = c; // first time, save first cons
else
((cons_t*)ptr(last))->cdr = c;

// note this tricky condition: if the string ends with a
// delimiter, we need to go around one more time to add an
// empty string. this happens when (i==len && tokend<i)
} while (i < len || (i==len && (tokend!=i)));
fl_free_gc_handles(2);
return first;
}

value_t fl_string_sub(value_t *args, u_int32_t nargs)
{
if (nargs != 2)
Expand Down Expand Up @@ -225,23 +122,6 @@ value_t fl_string_char(value_t *args, u_int32_t nargs)
return mk_wchar(u8_nextchar(s, &i));
}

value_t fl_char_upcase(value_t *args, u_int32_t nargs)
{
argcount("char.upcase", nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("char.upcase", "wchar", args[0]);
return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
}
value_t fl_char_downcase(value_t *args, u_int32_t nargs)
{
argcount("char.downcase", nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
if (!iscprim(args[0]) || cp_class(cp) != wchartype)
type_error("char.downcase", "wchar", args[0]);
return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
}

static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
{
char *p = (char*)memchr(s+start, c, len-start);
Expand Down Expand Up @@ -398,20 +278,13 @@ static builtinspec_t stringfunc_info[] = {
{ "string", fl_string },
{ "string?", fl_stringp },
{ "string.count", fl_string_count },
{ "string.width", fl_string_width },
{ "string.split", fl_string_split },
{ "string.sub", fl_string_sub },
{ "string.find", fl_string_find },
{ "string.char", fl_string_char },
{ "string.inc", fl_string_inc },
{ "string.dec", fl_string_dec },
{ "string.encode", fl_string_encode },
{ "string.decode", fl_string_decode },
{ "string.isutf8", fl_string_isutf8 },

{ "char.upcase", fl_char_upcase },
{ "char.downcase", fl_char_downcase },

{ "number->string", fl_numbertostring },
{ "string->number", fl_stringtonumber },

Expand Down
63 changes: 0 additions & 63 deletions src/support/utf8.c
Expand Up @@ -78,18 +78,6 @@ size_t u8_charlen(uint32_t ch)
return 0;
}

size_t u8_codingsize(uint32_t *wcstr, size_t n)
{
size_t i, c=0;

for(i=0; i < n; i++) {
size_t cl = u8_charlen(wcstr[i]);
if (cl == 0) cl = 3; // invalid: encoded as replacement char
c += cl;
}
return c;
}

/* conversions without error checking
only works for valid UTF-8, i.e. no 5- or 6-byte sequences
srcsz = source size in bytes
Expand Down Expand Up @@ -380,36 +368,6 @@ size_t u8_read_escape_sequence(const char *str, size_t ssz, uint32_t *dest)
return i;
}

/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
example: u8_unescape(mybuf, 256, "hello\\u220e")
note the double backslash is needed if called on a C string literal */
size_t u8_unescape(char *buf, size_t sz, const char *src)
{
size_t c=0, amt;
uint32_t ch = 0;
char temp[4];

while (*src && c < sz) {
if (*src == '\\') {
src++;
amt = u8_read_escape_sequence(src, 1000, &ch);
}
else {
ch = (uint32_t)*src;
amt = 1;
}
src += amt;
amt = u8_wc_toutf8(temp, ch);
if (amt > sz-c)
break;
memcpy(&buf[c], temp, amt);
c += amt;
}
if (c < sz)
buf[c] = '\0';
return c;
}

static inline int buf_put2c(char *buf, const char *src)
{
buf[0] = src[0];
Expand Down Expand Up @@ -539,27 +497,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz)
return NULL;
}

int u8_is_locale_utf8(const char *locale)
{
if (locale == NULL) return 0;

/* this code based on libutf8 */
const char *cp = locale;

for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++) {
if (*cp == '.') {
const char *encoding = ++cp;
for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ',' && *cp != ';'; cp++)
;
if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
|| (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
return 1; /* it's UTF-8 */
break;
}
}
return 0;
}

size_t u8_vprintf(const char *fmt, va_list ap)
{
size_t cnt, sz=0, nc, needfree=0;
Expand Down
8 changes: 0 additions & 8 deletions src/support/utf8.h
Expand Up @@ -45,9 +45,6 @@ size_t u8_seqlen(const char *s);
/* returns the # of bytes needed to encode a certain character */
size_t u8_charlen(uint32_t ch);

/* computes the # of bytes needed to encode a WC string as UTF-8 */
size_t u8_codingsize(uint32_t *wcstr, size_t n);

char read_escape_control_char(char c);

/* assuming src points to the character after a backslash, read an
Expand All @@ -60,9 +57,6 @@ size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
sz must be at least 3. */
int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);

/* convert a string "src" containing escape sequences to UTF-8 */
size_t u8_unescape(char *buf, size_t sz, const char *src);

/* convert UTF-8 "src" to escape sequences.

sz is buf size in bytes. must be at least 12.
Expand Down Expand Up @@ -94,8 +88,6 @@ char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
/* number of columns occupied by a string */
DLLEXPORT size_t u8_strwidth(const char *s);

int u8_is_locale_utf8(const char *locale);

/* printf where the format string and arguments may be in UTF-8.
you can avoid this function and just use ordinary printf() if the current
locale is UTF-8. */
Expand Down