diff --git a/devel/200_27.md b/devel/200_27.md new file mode 100644 index 00000000..9122a219 --- /dev/null +++ b/devel/200_27.md @@ -0,0 +1,28 @@ +# [200_27] 从 s7.c 拆分出 s7_liii_string.c 和 s7_liii_string.h + +## 任务相关的代码文件 +- src/s7.c +- src/s7_liii_string.c(新增) +- src/s7_liii_string.h(新增) +- xmake.lua +- devel/200_27.md + +## 如何测试 +``` +xmake b goldfish +bin/goldfish tests/goldfish/liii/string-test.scm +``` + +### 验证结果 +所有迁移函数均通过验证: +- ✅ 编译成功:`xmake b goldfish` +- ✅ 测试通过:`bin/goldfish tests/goldfish/liii/string-test.scm`(1560 个测试全部通过) + +## 2026/02/25 将字符串函数从 s7.c 迁移到 s7_liii_string.c + +### What +将以下 4 个字符串函数的实现从 `s7.c` 迁移到 `s7_liii_string.c`: +1. `string-upcase` (`g_string_upcase`) +2. `string-downcase` (`g_string_downcase`) +3. `string-ref` (`g_string_ref`, `string_ref_1`) +4. `string-set!` (`g_string_set`) diff --git a/src/s7.c b/src/s7.c index 7c422731..3c8ad3c9 100644 --- a/src/s7.c +++ b/src/s7.c @@ -412,6 +412,7 @@ #include "s7_scheme_inexact.h" #include "s7_scheme_complex.h" #include "s7_liii_bitwise.h" +#include "s7_liii_string.h" /* there is also apparently __STDC_NO_COMPLEX__ */ #if WITH_CLANG_PP @@ -15736,7 +15737,7 @@ static void resize_strbuf(s7_scheme *sc, s7_int needed_size) for (s7_int i = old_size; i < sc->strbuf_size; i++) sc->strbuf[i] = '\0'; } -static s7_pointer *chars; +s7_pointer *chars; static s7_pointer unknown_sharp_constant(s7_scheme *sc, const char *name, s7_pointer port) { @@ -24717,7 +24718,7 @@ static s7_pointer integer_to_char_p_i(s7_scheme *sc, s7_int ind) } -static uint8_t uppers[256], lowers[256]; +uint8_t uppers[256], lowers[256]; static void init_uppers(void) { for (int32_t i = 0; i < 256; i++) @@ -25669,92 +25670,7 @@ static s7_int string_length_i_7p(s7_scheme *sc, s7_pointer str) #endif -/* -------------------------------- string-up|downcase -------------------------------- */ -static s7_pointer g_string_downcase(s7_scheme *sc, s7_pointer args) -{ - #define H_string_downcase "(string-downcase str) returns the lower case version of str." - #define Q_string_downcase sc->pcl_s - - const s7_pointer str = car(args); - if (!is_string(str)) - return(method_or_bust_p(sc, str, sc->string_downcase_symbol, sc->type_names[T_STRING])); - { - const s7_int len = string_length(str); - const s7_pointer newstr = make_empty_string(sc, len, '\0'); - const uint8_t *ostr = (const uint8_t *)string_value(str); - uint8_t *nstr = (uint8_t *)string_value(newstr); - - if (len >= 128) - { - s7_int i = len - 1; - while (i >= 8) - LOOP_8(nstr[i] = lowers[(uint8_t)ostr[i]]; i--); - while (i >= 0) {nstr[i] = lowers[(uint8_t)ostr[i]]; i--;} - } - else - for (s7_int i = 0; i < len; i++) nstr[i] = lowers[(uint8_t)ostr[i]]; - return(newstr); - } -} - -static s7_pointer g_string_upcase(s7_scheme *sc, s7_pointer args) -{ - #define H_string_upcase "(string-upcase str) returns the upper case version of str." - #define Q_string_upcase sc->pcl_s - - const s7_pointer str = car(args); - if (!is_string(str)) - return(method_or_bust_p(sc, str, sc->string_upcase_symbol, sc->type_names[T_STRING])); - - { - const s7_int len = string_length(str); - const s7_pointer newstr = make_empty_string(sc, len, '\0'); - const uint8_t *ostr = (const uint8_t *)string_value(str); - uint8_t *nstr = (uint8_t *)string_value(newstr); - - if (len >= 128) - { - s7_int i = len - 1; - while (i >= 8) - LOOP_8(nstr[i] = uppers[(uint8_t)ostr[i]]; i--); - while (i >= 0) {nstr[i] = uppers[(uint8_t)ostr[i]]; i--;} - } - else - for (s7_int i = 0; i < len; i++) nstr[i] = uppers[(uint8_t)ostr[i]]; - return(newstr); - } -} - - /* -------------------------------- string-ref -------------------------------- */ -static s7_pointer string_ref_1(s7_scheme *sc, s7_pointer strng, s7_pointer index) -{ - char *str; - s7_int ind; - - if (!s7_is_integer(index)) - return(method_or_bust_pp(sc, index, sc->string_ref_symbol, strng, index, sc->type_names[T_INTEGER], 2)); - ind = s7_integer_clamped_if_gmp(sc, index); - if (ind < 0) - out_of_range_error_nr(sc, sc->string_ref_symbol, int_two, index, it_is_negative_string); - if (ind >= string_length(strng)) - out_of_range_error_nr(sc, sc->string_ref_symbol, int_two, index, it_is_too_large_string); - - str = string_value(strng); - return(chars[((uint8_t *)str)[ind]]); -} - -static s7_pointer g_string_ref(s7_scheme *sc, s7_pointer args) -{ - #define H_string_ref "(string-ref str index) returns the character at the index-th element of the string str" - #define Q_string_ref s7_make_signature(sc, 3, sc->is_char_symbol, sc->is_string_symbol, sc->is_integer_symbol) - - s7_pointer str = car(args); - if (!is_string(str)) - return(method_or_bust(sc, str, sc->string_ref_symbol, args, sc->type_names[T_STRING], 1)); - return(string_ref_1(sc, str, cadr(args))); -} - static s7_pointer string_ref_p_pi(s7_scheme *sc, s7_pointer str, s7_int index) { if (!is_string(str)) @@ -25766,9 +25682,17 @@ static s7_pointer string_ref_p_pi(s7_scheme *sc, s7_pointer str, s7_int index) static s7_pointer string_ref_p_pp(s7_scheme *sc, s7_pointer str, s7_pointer index) { + s7_int ind; if (!is_string(str)) return(method_or_bust_pp(sc, str, sc->string_ref_symbol, str, index, sc->type_names[T_STRING], 1)); - return(string_ref_1(sc, str, index)); + if (!s7_is_integer(index)) + return(method_or_bust_pp(sc, index, sc->string_ref_symbol, str, index, sc->type_names[T_INTEGER], 2)); + ind = s7_integer_clamped_if_gmp(sc, index); + if (ind < 0) + out_of_range_error_nr(sc, sc->string_ref_symbol, int_two, index, it_is_negative_string); + if (ind >= string_length(str)) + out_of_range_error_nr(sc, sc->string_ref_symbol, int_two, index, it_is_too_large_string); + return(chars[((uint8_t *)string_value(str))[ind]]); } static s7_pointer string_ref_p_p0(s7_scheme *sc, s7_pointer str, s7_pointer unused_index) @@ -25806,33 +25730,6 @@ static s7_pointer string_ref_p_pi_direct(s7_scheme *unused_sc, s7_pointer str, s /* -------------------------------- string-set! -------------------------------- */ -static s7_pointer g_string_set(s7_scheme *sc, s7_pointer args) -{ - #define H_string_set "(string-set! str index chr) sets the index-th element of the string str to the character chr" - #define Q_string_set s7_make_signature(sc, 4, sc->is_char_symbol, sc->is_string_symbol, sc->is_integer_symbol, sc->is_char_symbol) - - const s7_pointer strng = car(args), index = cadr(args); - s7_int ind; - - if (!is_mutable_string(strng)) - return(mutable_method_or_bust(sc, strng, sc->string_set_symbol, args, sc->type_names[T_STRING], 1)); - if (!s7_is_integer(index)) - return(method_or_bust(sc, index, sc->string_set_symbol, args, sc->type_names[T_INTEGER], 2)); - ind = s7_integer_clamped_if_gmp(sc, index); - if (ind < 0) - out_of_range_error_nr(sc, sc->string_set_symbol, int_two, index, a_non_negative_integer_string); - if (ind >= string_length(strng)) - out_of_range_error_nr(sc, sc->string_set_symbol, int_two, index, it_is_too_large_string); - { - char *str = string_value(strng); - s7_pointer c = caddr(args); - if (!is_character(c)) - return(method_or_bust(sc, c, sc->string_set_symbol, args, sc->type_names[T_CHARACTER], 3)); - str[ind] = (char)s7_character(c); - return(c); - } -} - static s7_pointer string_set_p_pip(s7_scheme *sc, s7_pointer str, s7_int index, s7_pointer chr) { if (!is_string(str)) @@ -98299,8 +98196,12 @@ static void init_rootlet(s7_scheme *sc) sc->string_position_symbol = defun("string-position", string_position, 2, 1, false); sc->make_string_symbol = defun("make-string", make_string, 1, 1, false); - sc->string_ref_symbol = defun("string-ref", string_ref, 2, 0, false); - sc->string_set_symbol = defun("string-set!", string_set, 3, 0, false); + sc->string_ref_symbol = s7_define_typed_function(sc, "string-ref", g_string_ref, 2, 0, false, + "(string-ref str index) returns the character at the index-th element of the string str", + s7_make_signature(sc, 3, sc->is_char_symbol, sc->is_string_symbol, sc->is_integer_symbol)); + sc->string_set_symbol = s7_define_typed_function(sc, "string-set!", g_string_set, 3, 0, false, + "(string-set! str index chr) sets the index-th element of the string str to the character chr", + s7_make_signature(sc, 4, sc->is_char_symbol, sc->is_string_symbol, sc->is_integer_symbol, sc->is_char_symbol)); sc->string_eq_symbol = defun("string=?", strings_are_equal, 2, 0, true); sc->string_lt_symbol = defun("stringstring_copy_symbol = defun("string-copy", string_copy, 1, 3, false); - sc->string_downcase_symbol = defun("string-downcase", string_downcase, 1, 0, false); - sc->string_upcase_symbol = defun("string-upcase", string_upcase, 1, 0, false); + sc->string_downcase_symbol = s7_define_typed_function(sc, "string-downcase", g_string_downcase, 1, 0, false, + "(string-downcase str) returns the lower case version of str.", + s7_make_signature(sc, 2, sc->is_string_symbol, sc->is_string_symbol)); + sc->string_upcase_symbol = s7_define_typed_function(sc, "string-upcase", g_string_upcase, 1, 0, false, + "(string-upcase str) returns the upper case version of str.", + s7_make_signature(sc, 2, sc->is_string_symbol, sc->is_string_symbol)); sc->string_append_symbol = defun("string-append", string_append, 0, 0, true); sc->substring_symbol = defun("substring", substring, 1, 2, false); sc->substring_uncopied_symbol = defun("substring-uncopied",substring_uncopied, 1, 2, false); diff --git a/src/s7_liii_string.c b/src/s7_liii_string.c new file mode 100644 index 00000000..f83b5b48 --- /dev/null +++ b/src/s7_liii_string.c @@ -0,0 +1,154 @@ +/* s7_liii_string.c - string utility implementations for s7 Scheme interpreter + * + * derived from s7, a Scheme interpreter + * SPDX-License-Identifier: 0BSD + * + * Bill Schottstaedt, bil@ccrma.stanford.edu + */ + +#include "s7_liii_string.h" +#include +#include +#include + +/* Externally defined in s7.c - upper/lower case conversion tables */ +extern uint8_t uppers[256]; +extern uint8_t lowers[256]; + +/* Externally defined in s7.c - character cache */ +extern s7_pointer *chars; + +#define LOOP_8(Code) do {Code; Code; Code; Code; Code; Code; Code; Code;} while (0) + +/* Helper function for out-of-range errors */ +static s7_pointer string_ref_out_of_range(s7_scheme *sc, s7_int index, bool is_negative) +{ + return s7_out_of_range_error(sc, "string-ref", 2, s7_make_integer(sc, index), + is_negative ? "it is negative" : "it is too large"); +} + +static s7_pointer method_or_bust(s7_scheme *sc, s7_pointer obj, const char *name, const char *type_name) +{ + s7_pointer sym = s7_make_symbol(sc, name); + s7_pointer func = s7_method(sc, obj, sym); + if (func != s7_undefined(sc)) + return(s7_apply_function(sc, func, s7_cons(sc, obj, s7_nil(sc)))); + return(s7_wrong_type_arg_error(sc, name, 1, obj, type_name)); +} + +s7_pointer g_string_downcase(s7_scheme *sc, s7_pointer args) +{ + #define H_string_downcase "(string-downcase str) returns the lower case version of str." + #define Q_string_downcase s7_make_signature(sc, 2, sc->is_string_symbol, sc->is_string_symbol) + + s7_pointer str = s7_car(args); + if (!s7_is_string(str)) + return(method_or_bust(sc, str, "string-downcase", "a string")); + { + s7_int len = s7_string_length(str); + const char *ostr = s7_string(str); + char *nstr = (char *)malloc(len); + if (!nstr) + return(s7_out_of_range_error(sc, "string-downcase", 0, str, "memory allocation failed")); + + if (len >= 128) + { + s7_int i = len - 1; + while (i >= 8) + LOOP_8(nstr[i] = lowers[(uint8_t)ostr[i]]; i--); + while (i >= 0) {nstr[i] = lowers[(uint8_t)ostr[i]]; i--;} + } + else + for (s7_int i = 0; i < len; i++) nstr[i] = lowers[(uint8_t)ostr[i]]; + + s7_pointer result = s7_make_string_with_length(sc, nstr, len); + free(nstr); + return(result); + } +} + +s7_pointer g_string_upcase(s7_scheme *sc, s7_pointer args) +{ + #define H_string_upcase "(string-upcase str) returns the upper case version of str." + #define Q_string_upcase s7_make_signature(sc, 2, sc->is_string_symbol, sc->is_string_symbol) + + s7_pointer str = s7_car(args); + if (!s7_is_string(str)) + return(method_or_bust(sc, str, "string-upcase", "a string")); + + { + s7_int len = s7_string_length(str); + const char *ostr = s7_string(str); + char *nstr = (char *)malloc(len); + if (!nstr) + return(s7_out_of_range_error(sc, "string-upcase", 0, str, "memory allocation failed")); + + if (len >= 128) + { + s7_int i = len - 1; + while (i >= 8) + LOOP_8(nstr[i] = uppers[(uint8_t)ostr[i]]; i--); + while (i >= 0) {nstr[i] = uppers[(uint8_t)ostr[i]]; i--;} + } + else + for (s7_int i = 0; i < len; i++) nstr[i] = uppers[(uint8_t)ostr[i]]; + + s7_pointer result = s7_make_string_with_length(sc, nstr, len); + free(nstr); + return(result); + } +} + +/* -------------------------------- string-ref -------------------------------- */ + +s7_pointer string_ref_1(s7_scheme *sc, s7_pointer strng, s7_pointer index) +{ + if (!s7_is_integer(index)) + return s7_wrong_type_arg_error(sc, "string-ref", 2, index, "an integer"); + + s7_int ind = s7_integer(index); + if (ind < 0) + return string_ref_out_of_range(sc, ind, true); + if (ind >= s7_string_length(strng)) + return string_ref_out_of_range(sc, ind, false); + + const char *str = s7_string(strng); + return chars[((uint8_t *)str)[ind]]; +} + +s7_pointer g_string_ref(s7_scheme *sc, s7_pointer args) +{ + s7_pointer str = s7_car(args); + if (!s7_is_string(str)) + return method_or_bust(sc, str, "string-ref", "a string"); + return string_ref_1(sc, str, s7_cadr(args)); +} + +/* -------------------------------- string-set! -------------------------------- */ + +s7_pointer g_string_set(s7_scheme *sc, s7_pointer args) +{ + s7_pointer strng = s7_car(args); + s7_pointer index = s7_cadr(args); + + if (!s7_is_string(strng)) + return method_or_bust(sc, strng, "string-set!", "a string"); + if (s7_is_immutable(strng)) + return s7_wrong_type_arg_error(sc, "string-set!", 1, strng, "a mutable string"); + if (!s7_is_integer(index)) + return s7_wrong_type_arg_error(sc, "string-set!", 2, index, "an integer"); + + s7_int ind = s7_integer(index); + if (ind < 0) + return s7_out_of_range_error(sc, "string-set!", 2, index, "it is negative"); + if (ind >= s7_string_length(strng)) + return s7_out_of_range_error(sc, "string-set!", 2, index, "it is too large"); + + s7_pointer c = s7_caddr(args); + if (!s7_is_character(c)) + return s7_wrong_type_arg_error(sc, "string-set!", 3, c, "a character"); + + char *str = (char *)s7_string(strng); + str[ind] = (char)s7_character(c); + return c; +} diff --git a/src/s7_liii_string.h b/src/s7_liii_string.h new file mode 100644 index 00000000..8f609524 --- /dev/null +++ b/src/s7_liii_string.h @@ -0,0 +1,28 @@ +/* s7_liii_string.h - string utility declarations for s7 Scheme interpreter + * + * derived from s7, a Scheme interpreter + * SPDX-License-Identifier: 0BSD + * + * Bill Schottstaedt, bil@ccrma.stanford.edu + */ + +#ifndef S7_LIII_STRING_H +#define S7_LIII_STRING_H + +#include "s7.h" + +#ifdef __cplusplus +extern "C" { +#endif + +s7_pointer g_string_upcase(s7_scheme *sc, s7_pointer args); +s7_pointer g_string_downcase(s7_scheme *sc, s7_pointer args); +s7_pointer g_string_ref(s7_scheme *sc, s7_pointer args); +s7_pointer string_ref_1(s7_scheme *sc, s7_pointer strng, s7_pointer index); +s7_pointer g_string_set(s7_scheme *sc, s7_pointer args); + +#ifdef __cplusplus +} +#endif + +#endif /* S7_LIII_STRING_H */ diff --git a/xmake.lua b/xmake.lua index 6b40f66b..71112ff7 100644 --- a/xmake.lua +++ b/xmake.lua @@ -112,6 +112,7 @@ target ("goldfish") do add_files ("src/s7.c", {languages = "c11"}) add_files ("src/s7_scheme_complex.c", {languages = "c11"}) add_files ("src/s7_liii_bitwise.c", {languages = "c11"}) + add_files ("src/s7_liii_string.c", {languages = "c11"}) add_files ("src/s7_scheme_inexact.c", {languages = "c11"}) add_files ("src/s7_scheme_base.c", {languages = "c11"}) add_packages("tbox")