From df22afa7f3486db207972cd4147cf6c8c6e12777 Mon Sep 17 00:00:00 2001 From: zerico <71151164+ZERICO2005@users.noreply.github.com> Date: Sat, 4 Oct 2025 14:34:18 -0600 Subject: [PATCH 1/3] added PRI24 and SCN24 to --- src/libc/include/inttypes.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/libc/include/inttypes.h b/src/libc/include/inttypes.h index d762282e7..eb91e7179 100644 --- a/src/libc/include/inttypes.h +++ b/src/libc/include/inttypes.h @@ -42,6 +42,15 @@ #define PRIxLEAST16 __UINT_LEAST16_FMTx__ #define PRIXLEAST16 __UINT_LEAST16_FMTX__ +#ifdef __INT24_TYPE__ +#define PRId24 __INT24_FMTd__ +#define PRIi24 __INT24_FMTi__ +#define PRIo24 __UINT24_FMTo__ +#define PRIu24 __UINT24_FMTu__ +#define PRIx24 __UINT24_FMTx__ +#define PRIX24 __UINT24_FMTX__ +#endif /*__INT24_TYPE_*/ + #define PRId32 __INT32_FMTd__ #define PRIi32 __INT32_FMTi__ #define PRIo32 __UINT32_FMTo__ @@ -126,6 +135,14 @@ #define SCNuLEAST16 __UINT_LEAST16_FMTu__ #define SCNxLEAST16 __UINT_LEAST16_FMTx__ +#ifdef __INT24_TYPE__ +#define SCNd24 __INT24_FMTd__ +#define SCNi24 __INT24_FMTi__ +#define SCNo24 __UINT24_FMTo__ +#define SCNu24 __UINT24_FMTu__ +#define SCNx24 __UINT24_FMTx__ +#endif /*__INT24_TYPE_*/ + #define SCNd32 __INT32_FMTd__ #define SCNi32 __INT32_FMTi__ #define SCNo32 __UINT32_FMTo__ From 4685c870361df92d1561a6668423f2d8826f7259 Mon Sep 17 00:00:00 2001 From: zerico <71151164+ZERICO2005@users.noreply.github.com> Date: Sat, 4 Oct 2025 17:53:06 -0600 Subject: [PATCH 2/3] added (v)sscanf --- src/libc/include/stdio.h | 10 +- src/libc/sscanf.c | 522 ++++++++++++++++++++++++++++ test/standalone/scanf/autotest.json | 40 +++ test/standalone/scanf/makefile | 19 + test/standalone/scanf/src/main.c | 241 +++++++++++++ 5 files changed, 831 insertions(+), 1 deletion(-) create mode 100644 src/libc/sscanf.c create mode 100644 test/standalone/scanf/autotest.json create mode 100644 test/standalone/scanf/makefile create mode 100644 test/standalone/scanf/src/main.c diff --git a/src/libc/include/stdio.h b/src/libc/include/stdio.h index 36e74ae7d..63e0ffd4b 100644 --- a/src/libc/include/stdio.h +++ b/src/libc/include/stdio.h @@ -113,6 +113,12 @@ int asprintf(char **__restrict p_buffer, const char *__restrict format, ...) int vasprintf(char **__restrict p_buffer, const char *__restrict format, va_list va) __attribute__((format(__printf__, 2, 0))) __attribute__((nonnull(1))); +int sscanf(const char *__restrict buffer, const char *__restrict format, ...) + __attribute__((format(__scanf__, 2, 3))); + +int vsscanf(const char *__restrict buffer, const char *__restrict format, va_list va) + __attribute__((format(__scanf__, 2, 0))); + void perror(const char *str); __END_DECLS @@ -121,7 +127,7 @@ __END_DECLS namespace std { using ::size_t; using ::FILE; - + using ::fopen; using ::fclose; using ::fflush; @@ -151,6 +157,8 @@ namespace std { using ::vfprintf; using ::asprintf; using ::vasprintf; + using ::sscanf; + using ::vsscanf; using ::perror; } /* namespace std */ #endif /* __cplusplus */ diff --git a/src/libc/sscanf.c b/src/libc/sscanf.c new file mode 100644 index 000000000..0432e4ce9 --- /dev/null +++ b/src/libc/sscanf.c @@ -0,0 +1,522 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NO_FLOAT 0 +#define USE_STRTOD 1 +#define USE_STRTOF 2 +#define USE_STRTOLD 3 + +/*============================================================================*/ +/* Config */ +/*============================================================================*/ + +/** + * define to 0 or 1. Enables support for `[` set matching (such as "%3[^]^123abc]"). + */ +#define ENABLE_SET_MATCHING 1 + +/** + * Maximum characters (including null terminator) to scan for set matching. + * Also determines the highest maximum field width that can be processed. + * Recommended values: + * - 128 or 256 if you are not targetting embedded platforms. + * otherwise, for embedded targets: + * - 16 allows for digits and a few other symbols to be matched. + * - 72 allows for lowercase, uppercase, digits, and a few other symbols to be matched. + */ +#define SCAN_LIMIT 40 + +/** + * Select the float conversion routine used: + * NO_FLOAT : disables float conversion formats. + * USE_STRTOD : `strtod` (default) + * USE_STRTOF : `strtof` (speed) + * USE_STRTOLD : `strtold` (precision) + */ +#define STRING_TO_FLOAT USE_STRTOD + +/** + * define to 0 or 1. Adds support for C23 `%b` format specifiers. + */ +#define ENABLE_BINARY_CONVERSION_FORMAT 1 + +/** + * define to `restrict`, `__restrict`, `__restrict__`, or leave blank. + */ +#define NANO_SCANF_restrict __restrict + +/*============================================================================*/ +/* Validate macros */ +/*============================================================================*/ + +/* minimum size to read "-32768" */ +#if SCAN_LIMIT < 7 +# error "SCAN_LIMIT is too small" +#endif + +#ifndef STRING_TO_FLOAT +# error "STRING_TO_FLOAT must be defined to a value" +#endif + +#if STRING_TO_FLOAT == USE_STRTOD +# define STRING_TO_FLOAT_TYPE double +# define STRING_TO_FLOAT_FUNC strtod +#elif STRING_TO_FLOAT == USE_STRTOF +# define STRING_TO_FLOAT_TYPE float +# define STRING_TO_FLOAT_FUNC strtof +#elif STRING_TO_FLOAT == USE_STRTOLD +# define STRING_TO_FLOAT_TYPE long double +# define STRING_TO_FLOAT_FUNC strtold +#elif STRING_TO_FLOAT != NO_FLOAT +# error "invalid STRING_TO_FLOAT value" +#endif + +#ifndef ENABLE_SET_MATCHING +# error "ENABLE_SET_MATCHING must be defined to 0 or 1" +#endif + +#ifndef ENABLE_BINARY_CONVERSION_FORMAT +# error "ENABLE_BINARY_CONVERSION_FORMAT must be defined to 0 or 1" +#endif + +#if UINTMAX_MAX < ULLONG_MAX +#error "UINTMAX_MAX needs to be greater than or equal ULLONG_MAX" +#endif + +/*============================================================================*/ +/* Code */ +/*============================================================================*/ + +#define TEST_LENGTH_MODIFIER() do { \ + if (ptr_size == 0) { \ + /* invalid length modifier */ \ + return assignment_count; \ + } \ +} while (0) + +#define CONSUME_WHITESPACE() while (isspace(*buf)) { buf++; } + +#define RETURN_IF_NULL(ptr) if ((ptr) == NULL) { return assignment_count; } + +static intmax_t limit_strtoimax( + char const * NANO_SCANF_restrict str, + char * * NANO_SCANF_restrict endptr, + int base, + size_t max_len, + char * NANO_SCANF_restrict scan_buf +) { + if (max_len == 0) { + return strtoimax(str, (char**)endptr, base); + } + size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + strncpy(scan_buf, str, copy_size); + /* null terminate */ + scan_buf[copy_size - 1] = '\0'; + char* scan_endptr; + intmax_t value = strtoimax(scan_buf, &scan_endptr, base); + *endptr = (char*)(str + (scan_endptr - scan_buf)); + return value; +} + +static uintmax_t limit_strtoumax( + char const * NANO_SCANF_restrict str, + char * * NANO_SCANF_restrict endptr, + int base, + size_t max_len, + char * NANO_SCANF_restrict scan_buf +) { + if (max_len == 0) { + return strtoumax(str, (char**)endptr, base); + } + size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + strncpy(scan_buf, str, copy_size); + /* null terminate */ + scan_buf[copy_size - 1] = '\0'; + char* scan_endptr; + uintmax_t value = strtoumax(scan_buf, &scan_endptr, base); + *endptr = (char*)(str + (scan_endptr - scan_buf)); + return value; +} + +#if STRING_TO_FLOAT +static STRING_TO_FLOAT_TYPE limit_strtofloat( + char const * NANO_SCANF_restrict str, + char * * NANO_SCANF_restrict endptr, + size_t max_len, + char * NANO_SCANF_restrict scan_buf +) { + if (max_len == 0) { + return STRING_TO_FLOAT_FUNC(str, (char**)endptr); + } + size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + strncpy(scan_buf, str, copy_size); + /* null terminate */ + scan_buf[copy_size - 1] = '\0'; + char* scan_endptr; + STRING_TO_FLOAT_TYPE value = STRING_TO_FLOAT_FUNC(scan_buf, &scan_endptr); + *endptr = (char*)(str + (scan_endptr - scan_buf)); + return value; +} +#endif /* STRING_TO_FLOAT */ + +/** + * @author zerico2005 (Originally based off of https://github.com/tusharjois/bscanf) + * @note All non-suppressed character sequence types must have a maximum field width: + * - Valid : "%*3c %*8s %*12[^abc]" + * - Valid : "%3c %8s %12[^abc]" + * - Valid : "%*c %*s %*[^abc]" + * - Invalid : "%c %s %[^abc]" + * @note ranges such as "%5[0-9]" or "%5[^a-z]" are not supported + * @note Assumes little endian + * @note `wchar_t` is not supported + */ +int vsscanf( + char const * const NANO_SCANF_restrict Buffer, + char const * const NANO_SCANF_restrict Format, + va_list args +) { + char scan_buf[SCAN_LIMIT]; + int assignment_count = 0; + char const * NANO_SCANF_restrict buf = Buffer; + char const * NANO_SCANF_restrict fmt = Format; + if (buf == NULL || fmt == NULL) { + return EOF; + } + while (*fmt != '\0') { + if (isspace(*fmt)) { + CONSUME_WHITESPACE(); + fmt++; + continue; + } + if (*fmt != '%') { + if (*fmt != *buf) { + /* end of format */ + return assignment_count; + } + fmt++; + buf++; + continue; + } + /* conversion specifier */ + fmt++; + bool is_suppressed = false; + bool is_double_or_wide_char = false; + #if STRING_TO_FLOAT + bool is_long_double = false; + #endif /* STRING_TO_FLOAT */ + size_t max_width = 0; + size_t ptr_size = sizeof(int); + if (*fmt == '*') { + is_suppressed = true; + fmt++; + } + /* test for digits */ + if (isdigit(*fmt)) { + char *endptr; + /** + * @remarks Either strtoumax or strtoul can be used here. + * strtoul might be faster, however it also means that we link + * another routine increasing size. So strtoumax is used instead so + * use can reduce the amount of routines we need to link to. + */ + max_width = (size_t)strtoumax(fmt, &endptr, 10); + if (fmt == endptr || max_width == 0) { + /* failed */ + return assignment_count; + } + fmt = endptr; + } + /* test for length modifiers */ + switch (*fmt) { + case 'h': + { + ptr_size = sizeof(short); + fmt++; + if (*fmt == 'h') { + ptr_size = sizeof(char); + fmt++; + } + } break; + case 'z': + { + ptr_size = sizeof(size_t); + fmt++; + } break; + case 'l': + { + is_double_or_wide_char = true; + ptr_size = sizeof(long); + fmt++; + if (*fmt == 'l') { + ptr_size = sizeof(long long); + fmt++; + } + } break; + case 't': + { + ptr_size = sizeof(ptrdiff_t); + fmt++; + } break; + #if STRING_TO_FLOAT + case 'L': + { + is_long_double = true; + ptr_size = 0; + fmt++; + } break; + #endif /* STRING_TO_FLOAT */ + case 'j': + { + ptr_size = sizeof(intmax_t); + fmt++; + } break; + default: break; + } + /** + * @remarks All conversion specifiers other than 'n' 'c' '[' consume + * and discard all leading whitespace characters. + */ + if (*fmt != 'n' && *fmt != 'c' && *fmt != '[') { + CONSUME_WHITESPACE(); + } + /* test for format type */ + switch (*fmt) { + case '%': + /* handle "%%" */ { + if (*fmt != *buf) { + return assignment_count; + } + buf++; + fmt++; + continue; + } + case 'n': + /* number of characters read so far */ { + if (is_suppressed) { + /* "%*n" is undefined behaviour */ + return assignment_count; + } + TEST_LENGTH_MODIFIER(); + void* ptr = va_arg(args, void*); + RETURN_IF_NULL(ptr); + unsigned long long diff = buf - Buffer; + memcpy(ptr, &diff, ptr_size); + fmt++; + /* assignment_count is not incremented for %n */ + continue; + } break; + case 'c': + case 's': + /* string */ { + const bool string_format = (*fmt == 's'); + if (!is_suppressed && max_width == 0) { + /* enforce bounds checking */ + return assignment_count; + } + if (is_double_or_wide_char) { + /* unimplemented */ + return assignment_count; + } + char const * NANO_SCANF_restrict const begin = buf; + for (; max_width --> 0;) { + if (*buf == '\0') { + break; + } + if (string_format && isspace(*buf)) { + break; + } + buf++; + } + size_t copy_size = buf - begin; + if (!is_suppressed) { + char* ptr = va_arg(args, char*); + RETURN_IF_NULL(ptr); + memcpy(ptr, begin, copy_size); + if (string_format) { + /* null terminate */ + *(ptr + copy_size) = '\0'; + } + assignment_count++; + } + fmt++; + continue; + } break; + #if ENABLE_SET_MATCHING + case '[': + /* match range */ { + fmt++; + if (!is_suppressed && max_width == 0) { + /* enforce bounds checking */ + return assignment_count; + } + if (is_double_or_wide_char) { + /* unimplemented */ + return assignment_count; + } + bool invert_match = false; + bool starts_with_bracket = false; + if (*fmt == '^') { + invert_match = true; + fmt++; + } + if (*fmt == ']') { + starts_with_bracket = true; + fmt++; + } + char const * NANO_SCANF_restrict last_bracket = strchr(fmt, ']'); + if (last_bracket == NULL) { + /* "%[^]" is still considered to be an empty sequence */ + return assignment_count; + } + if (starts_with_bracket) { + fmt--; + } + size_t scan_length = (last_bracket - fmt); + + if (scan_length >= SCAN_LIMIT) { + /* too many characters */ + return assignment_count; + } + memcpy(scan_buf, fmt, scan_length); + /* null terminate */ + *(scan_buf + scan_length) = '\0'; + /* move format to the character after the ending ']' */ + fmt = last_bracket + 1; + + size_t match_length; + if (invert_match) { + match_length = strcspn(buf, scan_buf); + } else { + match_length = strspn(buf, scan_buf); + } + if (max_width != 0 && match_length > max_width) { + match_length = max_width; + } + + if (!is_suppressed) { + char* ptr = va_arg(args, char*); + RETURN_IF_NULL(ptr); + memcpy(ptr, buf, match_length); + /* null terminate */ + *(ptr + match_length) = '\0'; + assignment_count++; + } + /* move buf to the character after the last matched character */ + buf += match_length; + continue; + } break; + #endif /* ENABLE_SET_MATCHING */ + case 'i': + case 'd': + /* signed integer */ { + TEST_LENGTH_MODIFIER(); + char *endptr; + int base = ((*fmt == 'd') ? 10 : 0); + intmax_t value = limit_strtoimax(buf, &endptr, base, max_width, scan_buf); + if (buf == endptr) { + /* failed */ + return assignment_count; + } + if (!is_suppressed) { + void* ptr = va_arg(args, void*); + RETURN_IF_NULL(ptr); + memcpy(ptr, &value, ptr_size); + assignment_count++; + } + buf = endptr; + fmt++; + } break; + #if ENABLE_BINARY_CONVERSION_FORMAT + case 'b': + #endif /* ENABLE_BINARY_CONVERSION_FORMAT */ + case 'u': + case 'o': + case 'x': + case 'X': + case 'p': + /* unsigned integer or pointer */ { + TEST_LENGTH_MODIFIER(); + char *endptr; + int base = 10; + if (*fmt == 'X' || *fmt == 'x' || *fmt == 'p') { + base = 16; + if (*fmt == 'p') { + ptr_size = sizeof(void*); + } + #if ENABLE_BINARY_CONVERSION_FORMAT + } else if (*fmt == 'b') { + base = 2; + #endif /* ENABLE_BINARY_CONVERSION_FORMAT*/ + } else if (*fmt == 'o') { + base = 8; + } + uintmax_t value = limit_strtoumax(buf, &endptr, base, max_width, scan_buf); + if (buf == endptr) { + /* failed */ + return assignment_count; + } + if (!is_suppressed) { + void* ptr = va_arg(args, void*); + RETURN_IF_NULL(ptr); + memcpy(ptr, &value, ptr_size); + assignment_count++; + } + buf = endptr; + fmt++; + } break; + #if STRING_TO_FLOAT + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + /* float */ { + char *endptr; + STRING_TO_FLOAT_TYPE value = limit_strtofloat(buf, &endptr, max_width, scan_buf); + if (buf == endptr) { + /* failed */ + return assignment_count; + } + if (!is_suppressed) { + void* ptr = va_arg(args, void*); + RETURN_IF_NULL(ptr); + if (is_long_double) { + *(long double*)ptr = (long double)value; + } else if (is_double_or_wide_char) { + *(double*)ptr = (double)value; + } else { + *(float*)ptr = (float)value; + } + assignment_count++; + } + buf = endptr; + fmt++; + } break; + #endif /* STRING_TO_FLOAT */ + default: + /* unknown format */ { + return assignment_count; + } break; + } + } + return assignment_count; +} + +int sscanf(const char * NANO_SCANF_restrict buffer, const char * NANO_SCANF_restrict format, ...) +{ + va_list vlist; + va_start(vlist, format); + int ret = vsscanf(buffer, format, vlist); + va_end(vlist); + return ret; +} diff --git a/test/standalone/scanf/autotest.json b/test/standalone/scanf/autotest.json new file mode 100644 index 000000000..be5eeed3c --- /dev/null +++ b/test/standalone/scanf/autotest.json @@ -0,0 +1,40 @@ +{ + "transfer_files": [ + "bin/DEMO.8xp" + ], + "target": { + "name": "DEMO", + "isASM": true + }, + "sequence": [ + "action|launch", + "delay|1000", + "hashWait|1", + "key|enter", + "delay|300", + "hashWait|2" + ], + "hashes": { + "1": { + "description": "All tests passed", + "timeout": 5000, + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "38E2AD5A" + ] + }, + "2": { + "description": "Exit", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "FFAF89BA", + "101734A5", + "9DA19F44", + "A32840C8", + "349F4775" + ] + } + } +} diff --git a/test/standalone/scanf/makefile b/test/standalone/scanf/makefile new file mode 100644 index 000000000..e99218420 --- /dev/null +++ b/test/standalone/scanf/makefile @@ -0,0 +1,19 @@ +# ---------------------------- +# Makefile Options +# ---------------------------- + +NAME = DEMO +ICON = icon.png +DESCRIPTION = "CE C Toolchain Demo" +COMPRESSED = NO +ARCHIVED = NO + +CFLAGS = -ffreestanding -Wall -Wextra -Wshadow -Wconversion -Wformat=2 -Wno-sign-conversion -Oz +CXXFLAGS = -ffreestanding -Wall -Wextra -Wshadow -Wconversion -Wformat=2 -Wno-sign-conversion -Oz + +PREFER_OS_LIBC = NO +PREFER_OS_CRT = NO + +# ---------------------------- + +include $(shell cedev-config --makefile) diff --git a/test/standalone/scanf/src/main.c b/test/standalone/scanf/src/main.c new file mode 100644 index 000000000..86a3e4fee --- /dev/null +++ b/test/standalone/scanf/src/main.c @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//------------------------------------------------------------------------------ +// Config +//------------------------------------------------------------------------------ + +// define to 0 or 1 +#define DEBUG_DIAGNOSTICS 1 + +//------------------------------------------------------------------------------ +// Tests +//------------------------------------------------------------------------------ + +#define C(expr) if (!(expr)) { return __LINE__; } + +#define TEST(test) { ret = test; if (ret != 0) { return ret; }} + +#define ARRAY_LENGTH(x) (sizeof(x) / sizeof((x)[0])) + +#ifndef DEBUG_DIAGNOSTICS +#error "DEBUG_DIAGNOSTICS needs to be defined to 0 or 1" +#endif + +#if DEBUG_DIAGNOSTICS +#define test_printf printf +#else +#define test_printf(...) +#endif + +bool double_equals(double x, double y, double absolute_difference) { + return (fabs(x - y) <= absolute_difference); +} + +bool strcmp_exact(const char *x, const char *y) { + if (strlen(x) != strlen(y)) { + return false; + } + if (strcmp(x, y) != 0) { + return false; + } + return true; +} + +int stdc_test(void) { + const char* input_1 = ( + "2 quarts of oil\n"\ + "-12.8degrees Celsius\n"\ + ); + const char* input_2 = ( + "lots of luck\n"\ + "10.0LBS of\n"\ + "dirt\n"\ + /* "100ergs of energy\n"\ */ /* strtod doesn't handle this error currently */ + "f100ergs of energy\n" + ); + + struct { + float quant; + char unit[21]; + char item[21]; + } data[2]; + memset(data, '\0', sizeof(data)); + int count; + + count = sscanf(input_1, + "%f%20s of %20s%*[^\n]"\ + "%f%20s of %20s%*[^\n]", + &data[0].quant, data[0].unit, data[0].item, + &data[1].quant, data[1].unit, data[1].item + ); + C(count == 5); + C(double_equals(data[0].quant, 2.0, 1.0e-5)); + C(strcmp_exact(data[0].unit, "quarts")); + C(strcmp_exact(data[0].item, "oil")); + C(double_equals(data[1].quant, -12.8, 1.0e-5)); + C(strcmp_exact(data[1].unit, "degrees")); + C(*data[0].item != 'C'); /* test that "Celius" was not written at all */ + + count = sscanf(input_2, + "%f%20s of %20s%*[^\n]"\ + "%f%20s of %20s%*[^\n]", + &data[0].quant, data[0].unit, data[0].item, + &data[1].quant, data[1].unit, data[1].item + ); + C(count == 0); + + count = sscanf(strchr(input_2, '\n'), + "%*[^!@#\n$%d]"\ + "%f%20s of %20s%*[^\n]"\ + "%f%20s of %20s%*[^\n]", + &data[0].quant, data[0].unit, data[0].item, + &data[1].quant, data[1].unit, data[1].item + ); + C(count == 3); + C(double_equals(data[0].quant, 10.0, 1.0e-5)); + C(strcmp_exact(data[0].unit, "LBS")); + C(strcmp_exact(data[0].item, "dirt")); + + int d1 = 0, d2 = 0xC0FFEE, n1 = 0, n2 = 0xA5A500; + int c = sscanf("123", "%d%n%hhn%d", &d1, &n1, (char*)((void*)&n2), &d2); + C(c == 1); + C(d1 == 123); + C(n1 == 3); + C(n2 == (int)(0xA5A500 | 3)); + C(d2 == (int)0xC0FFEE); + + long long i3 = 0xFEEDFACECAFEBEEF; + int n3 = sscanf("foo %bar -42", "foo%%bar%lld", &i3); + C(i3 == -42); + C(n3 == 1); + + return 0; +} + +int basic_test(void) { + int i, j; + float x, y; + char str1[10] = "ZZZZZZZZZ", str2[4] = "FFF"; + + char input[] = "25 54.32E-1 Thompson 56789 0123 40"; + /** + * https://en.cppreference.com/w/c/io/fscanf + * parse as follows: + * %d: an integer + * %f: a floating-point value + * %9s: a string of at most 9 non-whitespace characters + * %2d: two-digit integer (digits 5 and 6) + * %f: a floating-point value (digits 7, 8, 9) + * %*d: an integer which isn't stored anywhere + * ' ': all consecutive whitespace + * %3[0123456789]: a string of at most 3 decimal digits (digits 5 and 6) + */ + int ret = sscanf( + input, "%d%f%9s%2d%f%*d %3[0123456789]", + &i, &x, str1, &j, &y, str2 + ); + + C(i == 25); + C(double_equals(x, 5.432000, 1.0e-5)); + C(strcmp_exact(str1, "Thompson")); + C(j == 56); + C(y == 789.000000); + C(strcmp_exact(str2, "40")); + C(ret == 6); + + return 0; +} + +/** + * @brief https://stackoverflow.com/questions/9537072/properly-using-sscanf + */ +int stackoverflow_test(void) { + const char *data[] = + { + "text1 char2 , word3", + "text1 char2 ,word3", + "text1 char2, word3", + "text1 char2,word3", + "text1 char2 , word3", + }; + char text1[20]; text1[0] = '\0'; + char char2[32]; char2[0] = '\0'; + char word3[48]; word3[0] = '\0'; + for (size_t i = 0; i < ARRAY_LENGTH(data); i++) { + int n = sscanf(data[i], "%19s %31[^, ] %*[,]%47s", text1, char2, word3); + C(n == 3); + C(strcmp_exact("text1", text1)); + C(strcmp_exact("char2", char2)); + C(strcmp_exact("word3", word3)); + } + return 0; +} + +int other_test(void) { + const char* text = "abc]]]def]ghi^^]]^^^jklm^^"; + char buf_1[10] = {'\0'}; + char buf_2[10] = {'\0'}; + char buf_3[10] = {'\0','\0','\0', ';', '\0'}; + char buf_4[10] = {'\0'}; + char buf_5[10] = {'\0'}; + char buf_6[10] = {'\0'}; + char buf_7[10] = {'\0'}; + + ptrdiff_t end_1 = 0xE0E0E0; + intmax_t end_2 = UINTMAX_C(0xD0D0D0D0D0D0D0D0); + int count = sscanf(text, + "%9[^]]%1[]]%*[]]%3c%9[^^]%9[^]^]%3[]^]%*4[]^]%tn%4[^]^]%jn", + buf_1, buf_2, buf_3, buf_4, buf_5, buf_6, &end_1, buf_7, &end_2 + ); + C(count == 7); + C(end_1 == 20); + C(end_2 == 24); + C(strcmp_exact(buf_1, "abc")); + C(strcmp_exact(buf_2, "]")); + C(memcmp(buf_3, "def;", 4) == 0); + C(strcmp_exact(buf_4, "]ghi")); + C(*buf_5 == '\0'); + C(strcmp_exact(buf_6, "^^]")); + C(strcmp_exact(buf_7, "jklm")); + return 0; +} + +int run_tests(void) { + int ret = 0; + + TEST(basic_test()); + TEST(stdc_test()); + TEST(stackoverflow_test()); + TEST(other_test()); + + return ret; +} + +int main(void) { + os_ClrHome(); + int failed_test = run_tests(); + if (failed_test != 0) { + char buf[sizeof("Failed test L-8388608\n")]; + boot_sprintf(buf, "Failed test L%d\n", failed_test); + fputs(buf, stdout); + } else { + fputs("All tests passed", stdout); + } + + while (!os_GetCSC()); + + return 0; +} From e32f31d5445dc67016ce3be0fe7a4c908902497f Mon Sep 17 00:00:00 2001 From: unknown <71151164+ZERICO2005@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:13:56 -0600 Subject: [PATCH 3/3] updated float handling in sscanf and fixed endptr for strto(f/d/ld) --- src/libc/sscanf.c | 242 ++++++++++++++---------- src/libc/strtof.c | 39 ++-- src/libc/strtold.c | 42 ++-- test/standalone/scanf/src/main.c | 7 + test/standalone/scanf/src/strtod_test.c | 185 ++++++++++++++++++ 5 files changed, 395 insertions(+), 120 deletions(-) create mode 100644 test/standalone/scanf/src/strtod_test.c diff --git a/src/libc/sscanf.c b/src/libc/sscanf.c index 0432e4ce9..a2b3ba082 100644 --- a/src/libc/sscanf.c +++ b/src/libc/sscanf.c @@ -8,82 +8,118 @@ #include #include -#define NO_FLOAT 0 -#define USE_STRTOD 1 -#define USE_STRTOF 2 -#define USE_STRTOLD 3 +#define NS_DISABLE_FLOAT 1 +#define NS_USE_STRTOD 2 +#define NS_USE_STRTOF 3 +#define NS_USE_STRTOLD 4 /*============================================================================*/ /* Config */ /*============================================================================*/ /** - * define to 0 or 1. Enables support for `[` set matching (such as "%3[^]^123abc]"). + * define to `restrict`, `__restrict`, `__restrict__`, or leave blank. + */ + #define NS_restrict __restrict + +/** + * define to 0 or 1 (default = 1) + * Enables support for `[` set matching (such as "%3[^]^123abc]"). */ -#define ENABLE_SET_MATCHING 1 + #define NS_ENABLE_SET_MATCHING 1 + +/** + * define to 0 or 1 (default = 0) + * Adds support for C23 `%b` format specifiers. + */ + #define NS_ENABLE_BINARY_CONVERSION_FORMAT 1 /** * Maximum characters (including null terminator) to scan for set matching. * Also determines the highest maximum field width that can be processed. - * Recommended values: + * Recommended values (default = 128): * - 128 or 256 if you are not targetting embedded platforms. * otherwise, for embedded targets: * - 16 allows for digits and a few other symbols to be matched. * - 72 allows for lowercase, uppercase, digits, and a few other symbols to be matched. */ -#define SCAN_LIMIT 40 +#define NS_SCAN_LIMIT 40 /** * Select the float conversion routine used: - * NO_FLOAT : disables float conversion formats. - * USE_STRTOD : `strtod` (default) - * USE_STRTOF : `strtof` (speed) - * USE_STRTOLD : `strtold` (precision) + * NS_DISABLE_FLOAT: disables float conversion formats. + * NS_USE_STRTOD : `strtod` (default) + * NS_USE_STRTOF : `strtof` (speed) + * NS_USE_STRTOLD : `strtold` (precision) */ -#define STRING_TO_FLOAT USE_STRTOD +#define NS_STRING_TO_FLOAT NS_USE_STRTOD /** - * define to 0 or 1. Adds support for C23 `%b` format specifiers. + * define to 0 or 1 (default = 1). + * Enables/disables the `%L` conversion format and etc. */ -#define ENABLE_BINARY_CONVERSION_FORMAT 1 +#define NS_ENABLE_LONG_DOUBLE 1 /** - * define to `restrict`, `__restrict`, `__restrict__`, or leave blank. + * define to 0 or 1 (default = 1). + * You can disbale this if strto(f/d/ld) doesn't support hexadecimal floats. */ -#define NANO_SCANF_restrict __restrict +#define NS_ENABLE_HEXADECIMAL_FLOAT 0 /*============================================================================*/ /* Validate macros */ /*============================================================================*/ +#ifndef NS_restrict +# error "NS_restrict needs to be defined" +#endif + /* minimum size to read "-32768" */ -#if SCAN_LIMIT < 7 -# error "SCAN_LIMIT is too small" +#if NS_SCAN_LIMIT < 7 +# error "NS_SCAN_LIMIT is too small" +#endif + +#ifndef NS_ENABLE_SET_MATCHING +# error "NS_ENABLE_SET_MATCHING must be defined to 0 or 1" #endif -#ifndef STRING_TO_FLOAT -# error "STRING_TO_FLOAT must be defined to a value" +#ifndef NS_ENABLE_BINARY_CONVERSION_FORMAT +# error "NS_ENABLE_BINARY_CONVERSION_FORMAT must be defined to 0 or 1" #endif -#if STRING_TO_FLOAT == USE_STRTOD -# define STRING_TO_FLOAT_TYPE double -# define STRING_TO_FLOAT_FUNC strtod -#elif STRING_TO_FLOAT == USE_STRTOF -# define STRING_TO_FLOAT_TYPE float -# define STRING_TO_FLOAT_FUNC strtof -#elif STRING_TO_FLOAT == USE_STRTOLD -# define STRING_TO_FLOAT_TYPE long double -# define STRING_TO_FLOAT_FUNC strtold -#elif STRING_TO_FLOAT != NO_FLOAT -# error "invalid STRING_TO_FLOAT value" +#ifndef NS_STRING_TO_FLOAT +# error "NS_STRING_TO_FLOAT must be defined to a value" #endif -#ifndef ENABLE_SET_MATCHING -# error "ENABLE_SET_MATCHING must be defined to 0 or 1" +#ifdef NS_ENABLE_FLOAT +# error "NS_ENABLE_FLOAT should not be defined here" #endif -#ifndef ENABLE_BINARY_CONVERSION_FORMAT -# error "ENABLE_BINARY_CONVERSION_FORMAT must be defined to 0 or 1" +#if NS_STRING_TO_FLOAT == NS_USE_STRTOD +# define NS_STRING_TO_FLOAT_TYPE double +# define NS_STRING_TO_FLOAT_FUNC strtod +# define NS_ENABLE_FLOAT 1 +#elif NS_STRING_TO_FLOAT == NS_USE_STRTOF +# define NS_STRING_TO_FLOAT_TYPE float +# define NS_STRING_TO_FLOAT_FUNC strtof +# define NS_ENABLE_FLOAT 1 +#elif NS_STRING_TO_FLOAT == NS_USE_STRTOLD +# define NS_STRING_TO_FLOAT_TYPE long double +# define NS_STRING_TO_FLOAT_FUNC strtold +# define NS_ENABLE_FLOAT 1 +#elif NS_STRING_TO_FLOAT == NS_DISABLE_FLOAT +# define NS_ENABLE_FLOAT 0 +#else +# error "invalid NS_STRING_TO_FLOAT value" +#endif + +#if NS_ENABLE_FLOAT +# ifndef NS_ENABLE_LONG_DOUBLE +# error "NS_ENABLE_LONG_DOUBLE must be defined to 0 or 1" +# endif +# ifndef NS_ENABLE_HEXADECIMAL_FLOAT +# error "NS_ENABLE_HEXADECIMAL_FLOAT must be defined to 0 or 1" +# endif #endif #if UINTMAX_MAX < ULLONG_MAX @@ -105,66 +141,71 @@ #define RETURN_IF_NULL(ptr) if ((ptr) == NULL) { return assignment_count; } +#if !NS_ENABLE_FLOAT +# undef NS_ENABLE_LONG_DOUBLE +# define NS_ENABLE_LONG_DOUBLE 0 +#endif /* NS_ENABLE_FLOAT */ + static intmax_t limit_strtoimax( - char const * NANO_SCANF_restrict str, - char * * NANO_SCANF_restrict endptr, + char const * NS_restrict str, + char const * * NS_restrict endptr, int base, size_t max_len, - char * NANO_SCANF_restrict scan_buf + char * NS_restrict scan_buf ) { if (max_len == 0) { return strtoimax(str, (char**)endptr, base); } - size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + size_t copy_size = (max_len > (NS_SCAN_LIMIT - 1)) ? (NS_SCAN_LIMIT - 1) : max_len; strncpy(scan_buf, str, copy_size); /* null terminate */ - scan_buf[copy_size - 1] = '\0'; - char* scan_endptr; - intmax_t value = strtoimax(scan_buf, &scan_endptr, base); - *endptr = (char*)(str + (scan_endptr - scan_buf)); + scan_buf[copy_size] = '\0'; + char const * scan_endptr; + intmax_t value = strtoimax(scan_buf, (char**)&scan_endptr, base); + *endptr = str + (scan_endptr - scan_buf); return value; } static uintmax_t limit_strtoumax( - char const * NANO_SCANF_restrict str, - char * * NANO_SCANF_restrict endptr, + char const * NS_restrict str, + char const * * NS_restrict endptr, int base, size_t max_len, - char * NANO_SCANF_restrict scan_buf + char * NS_restrict scan_buf ) { if (max_len == 0) { return strtoumax(str, (char**)endptr, base); } - size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + size_t copy_size = (max_len > (NS_SCAN_LIMIT - 1)) ? (NS_SCAN_LIMIT - 1) : max_len; strncpy(scan_buf, str, copy_size); /* null terminate */ - scan_buf[copy_size - 1] = '\0'; - char* scan_endptr; - uintmax_t value = strtoumax(scan_buf, &scan_endptr, base); - *endptr = (char*)(str + (scan_endptr - scan_buf)); + scan_buf[copy_size] = '\0'; + char const * scan_endptr; + uintmax_t value = strtoumax(scan_buf, (char**)&scan_endptr, base); + *endptr = str + (scan_endptr - scan_buf); return value; } -#if STRING_TO_FLOAT -static STRING_TO_FLOAT_TYPE limit_strtofloat( - char const * NANO_SCANF_restrict str, - char * * NANO_SCANF_restrict endptr, +#if NS_ENABLE_FLOAT +static NS_STRING_TO_FLOAT_TYPE limit_strtofloat( + char const * NS_restrict str, + char const * * NS_restrict endptr, size_t max_len, - char * NANO_SCANF_restrict scan_buf + char * NS_restrict scan_buf ) { if (max_len == 0) { - return STRING_TO_FLOAT_FUNC(str, (char**)endptr); + return NS_STRING_TO_FLOAT_FUNC(str, (char**)endptr); } - size_t copy_size = ((max_len + 1) > SCAN_LIMIT) ? SCAN_LIMIT : (max_len + 1); + size_t copy_size = (max_len > (NS_SCAN_LIMIT - 1)) ? (NS_SCAN_LIMIT - 1) : max_len; strncpy(scan_buf, str, copy_size); /* null terminate */ - scan_buf[copy_size - 1] = '\0'; - char* scan_endptr; - STRING_TO_FLOAT_TYPE value = STRING_TO_FLOAT_FUNC(scan_buf, &scan_endptr); - *endptr = (char*)(str + (scan_endptr - scan_buf)); + scan_buf[copy_size] = '\0'; + char const * scan_endptr; + NS_STRING_TO_FLOAT_TYPE value = NS_STRING_TO_FLOAT_FUNC(scan_buf, (char**)&scan_endptr); + *endptr = str + (scan_endptr - scan_buf); return value; } -#endif /* STRING_TO_FLOAT */ +#endif /* NS_ENABLE_FLOAT */ /** * @author zerico2005 (Originally based off of https://github.com/tusharjois/bscanf) @@ -176,16 +217,21 @@ static STRING_TO_FLOAT_TYPE limit_strtofloat( * @note ranges such as "%5[0-9]" or "%5[^a-z]" are not supported * @note Assumes little endian * @note `wchar_t` is not supported + * @warning invalid floating point strings are not handled correctly. + * Under the C standard, "100e" should fail to match to "%f" (since the + * exponent field may not be empty), and no value should be assigned. However, + * nano_scanf incorrectly matches "100e" to "%f", assigning a value of 100 + * instead of exiting. */ int vsscanf( - char const * const NANO_SCANF_restrict Buffer, - char const * const NANO_SCANF_restrict Format, + char const * const NS_restrict Buffer, + char const * const NS_restrict Format, va_list args ) { - char scan_buf[SCAN_LIMIT]; + char scan_buf[NS_SCAN_LIMIT]; int assignment_count = 0; - char const * NANO_SCANF_restrict buf = Buffer; - char const * NANO_SCANF_restrict fmt = Format; + char const * NS_restrict buf = Buffer; + char const * NS_restrict fmt = Format; if (buf == NULL || fmt == NULL) { return EOF; } @@ -208,9 +254,9 @@ int vsscanf( fmt++; bool is_suppressed = false; bool is_double_or_wide_char = false; - #if STRING_TO_FLOAT + #if NS_ENABLE_FLOAT bool is_long_double = false; - #endif /* STRING_TO_FLOAT */ + #endif /* NS_ENABLE_FLOAT */ size_t max_width = 0; size_t ptr_size = sizeof(int); if (*fmt == '*') { @@ -219,14 +265,14 @@ int vsscanf( } /* test for digits */ if (isdigit(*fmt)) { - char *endptr; + char const * endptr; /** * @remarks Either strtoumax or strtoul can be used here. * strtoul might be faster, however it also means that we link * another routine increasing size. So strtoumax is used instead so * use can reduce the amount of routines we need to link to. */ - max_width = (size_t)strtoumax(fmt, &endptr, 10); + max_width = (size_t)strtoumax(fmt, (char**)&endptr, 10); if (fmt == endptr || max_width == 0) { /* failed */ return assignment_count; @@ -264,14 +310,14 @@ int vsscanf( ptr_size = sizeof(ptrdiff_t); fmt++; } break; - #if STRING_TO_FLOAT + #if NS_ENABLE_LONG_DOUBLE case 'L': { is_long_double = true; ptr_size = 0; fmt++; } break; - #endif /* STRING_TO_FLOAT */ + #endif /* NS_ENABLE_LONG_DOUBLE */ case 'j': { ptr_size = sizeof(intmax_t); @@ -304,7 +350,7 @@ int vsscanf( return assignment_count; } TEST_LENGTH_MODIFIER(); - void* ptr = va_arg(args, void*); + void * ptr = va_arg(args, void*); RETURN_IF_NULL(ptr); unsigned long long diff = buf - Buffer; memcpy(ptr, &diff, ptr_size); @@ -324,7 +370,7 @@ int vsscanf( /* unimplemented */ return assignment_count; } - char const * NANO_SCANF_restrict const begin = buf; + char const * NS_restrict const begin = buf; for (; max_width --> 0;) { if (*buf == '\0') { break; @@ -336,7 +382,7 @@ int vsscanf( } size_t copy_size = buf - begin; if (!is_suppressed) { - char* ptr = va_arg(args, char*); + char * ptr = va_arg(args, char*); RETURN_IF_NULL(ptr); memcpy(ptr, begin, copy_size); if (string_format) { @@ -348,7 +394,7 @@ int vsscanf( fmt++; continue; } break; - #if ENABLE_SET_MATCHING + #if NS_ENABLE_SET_MATCHING case '[': /* match range */ { fmt++; @@ -370,7 +416,7 @@ int vsscanf( starts_with_bracket = true; fmt++; } - char const * NANO_SCANF_restrict last_bracket = strchr(fmt, ']'); + char const * NS_restrict last_bracket = strchr(fmt, ']'); if (last_bracket == NULL) { /* "%[^]" is still considered to be an empty sequence */ return assignment_count; @@ -380,7 +426,7 @@ int vsscanf( } size_t scan_length = (last_bracket - fmt); - if (scan_length >= SCAN_LIMIT) { + if (scan_length >= NS_SCAN_LIMIT) { /* too many characters */ return assignment_count; } @@ -401,7 +447,7 @@ int vsscanf( } if (!is_suppressed) { - char* ptr = va_arg(args, char*); + char * ptr = va_arg(args, char*); RETURN_IF_NULL(ptr); memcpy(ptr, buf, match_length); /* null terminate */ @@ -412,12 +458,12 @@ int vsscanf( buf += match_length; continue; } break; - #endif /* ENABLE_SET_MATCHING */ + #endif /* NS_ENABLE_SET_MATCHING */ case 'i': case 'd': /* signed integer */ { TEST_LENGTH_MODIFIER(); - char *endptr; + char const * endptr; int base = ((*fmt == 'd') ? 10 : 0); intmax_t value = limit_strtoimax(buf, &endptr, base, max_width, scan_buf); if (buf == endptr) { @@ -425,7 +471,7 @@ int vsscanf( return assignment_count; } if (!is_suppressed) { - void* ptr = va_arg(args, void*); + void * ptr = va_arg(args, void*); RETURN_IF_NULL(ptr); memcpy(ptr, &value, ptr_size); assignment_count++; @@ -433,9 +479,9 @@ int vsscanf( buf = endptr; fmt++; } break; - #if ENABLE_BINARY_CONVERSION_FORMAT + #if NS_ENABLE_BINARY_CONVERSION_FORMAT case 'b': - #endif /* ENABLE_BINARY_CONVERSION_FORMAT */ + #endif /* NS_ENABLE_BINARY_CONVERSION_FORMAT */ case 'u': case 'o': case 'x': @@ -443,17 +489,17 @@ int vsscanf( case 'p': /* unsigned integer or pointer */ { TEST_LENGTH_MODIFIER(); - char *endptr; + char const * endptr; int base = 10; if (*fmt == 'X' || *fmt == 'x' || *fmt == 'p') { base = 16; if (*fmt == 'p') { ptr_size = sizeof(void*); } - #if ENABLE_BINARY_CONVERSION_FORMAT + #if NS_ENABLE_BINARY_CONVERSION_FORMAT } else if (*fmt == 'b') { base = 2; - #endif /* ENABLE_BINARY_CONVERSION_FORMAT*/ + #endif /* NS_ENABLE_BINARY_CONVERSION_FORMAT*/ } else if (*fmt == 'o') { base = 8; } @@ -463,7 +509,7 @@ int vsscanf( return assignment_count; } if (!is_suppressed) { - void* ptr = va_arg(args, void*); + void * ptr = va_arg(args, void*); RETURN_IF_NULL(ptr); memcpy(ptr, &value, ptr_size); assignment_count++; @@ -471,9 +517,11 @@ int vsscanf( buf = endptr; fmt++; } break; - #if STRING_TO_FLOAT + #if NS_ENABLE_FLOAT + #if NS_ENABLE_HEXADECIMAL_FLOAT case 'a': case 'A': + #endif /* NS_ENABLE_HEXADECIMAL_FLOAT */ case 'e': case 'E': case 'f': @@ -481,14 +529,14 @@ int vsscanf( case 'g': case 'G': /* float */ { - char *endptr; - STRING_TO_FLOAT_TYPE value = limit_strtofloat(buf, &endptr, max_width, scan_buf); + char const * endptr; + NS_STRING_TO_FLOAT_TYPE value = limit_strtofloat(buf, &endptr, max_width, scan_buf); if (buf == endptr) { /* failed */ return assignment_count; } if (!is_suppressed) { - void* ptr = va_arg(args, void*); + void * ptr = va_arg(args, void*); RETURN_IF_NULL(ptr); if (is_long_double) { *(long double*)ptr = (long double)value; @@ -502,7 +550,7 @@ int vsscanf( buf = endptr; fmt++; } break; - #endif /* STRING_TO_FLOAT */ + #endif /* NS_ENABLE_FLOAT */ default: /* unknown format */ { return assignment_count; @@ -512,7 +560,7 @@ int vsscanf( return assignment_count; } -int sscanf(const char * NANO_SCANF_restrict buffer, const char * NANO_SCANF_restrict format, ...) +int sscanf(char const * NS_restrict buffer, char const * NS_restrict format, ...) { va_list vlist; va_start(vlist, format); diff --git a/src/libc/strtof.c b/src/libc/strtof.c index 912cfdd24..893f13107 100644 --- a/src/libc/strtof.c +++ b/src/libc/strtof.c @@ -22,6 +22,8 @@ typedef union F32_pun { uint32_t bin; } F32_pun; +#define c_isdigit(c) ((c) >= '0' && (c) <= '9') + /************************************************* * * strtof - string to float conversion @@ -39,14 +41,14 @@ typedef union F32_pun { * @remarks `*str >= '0' && *str <= '9'` is smaller than calls to `isdigit(*str)` * @todo Add support for INF INFINITY NAN NAN(...) */ -float _strtof_c(const char *__restrict nptr, char **__restrict endptr) +float _strtof_c(char const * const __restrict nptr, char **__restrict endptr) { F32_pun val; int frac = 0; int exp = 0; bool sign = false; bool exp_sign = false; - char *str = (char*)nptr; + char const *__restrict str = nptr; while (isspace(*str)) { ++str; @@ -59,41 +61,50 @@ float _strtof_c(const char *__restrict nptr, char **__restrict endptr) ++str; } - val.flt = 0.0f; + bool has_digits = false; - while (*str >= '0' && *str <= '9') { + val.flt = 0.0f; + while (c_isdigit(*str)) { + has_digits = true; val.flt = val.flt * 10.0f + (float)(*str - '0'); ++str; } if (*str == '.') { ++str; - while (*str >= '0' && *str <= '9') { + while (c_isdigit(*str)) { + has_digits = true; val.flt = val.flt * 10.0f + (float)(*str - '0'); ++frac; ++str; } } + if (!has_digits) { + str = nptr; + goto finish; + } + if (*str == 'e' || *str == 'E') { + char const * const end_of_digits = str; ++str; if (*str == '-') { exp_sign = true; ++str; } else if (*str == '+') { - exp_sign = false; ++str; } - while (*str >= '0' && *str <= '9') { + if (!c_isdigit(*str)) { + str = end_of_digits; + val.flt = 0.0f; + goto finish; + } + while (c_isdigit(*str)) { exp = exp * 10 + (*str - '0'); ++str; } } - if (endptr) { - *endptr = (char*)str; - } - if (exp_sign) { exp = -exp; } @@ -125,6 +136,12 @@ float _strtof_c(const char *__restrict nptr, char **__restrict endptr) if (sign) { val.flt = -val.flt; } + +finish: + if (endptr) { + *endptr = (char*)str; + } + return val.flt; } diff --git a/src/libc/strtold.c b/src/libc/strtold.c index ac4e86b7b..14b16b090 100644 --- a/src/libc/strtold.c +++ b/src/libc/strtold.c @@ -24,6 +24,8 @@ typedef union F64_pun { uint64_t bin; } F64_pun; +#define c_isdigit(c) ((c) >= '0' && (c) <= '9') + /************************************************* * * strtold - string to long double conversion @@ -42,14 +44,14 @@ typedef union F64_pun { * @remarks `*str >= '0' && *str <= '9'` is smaller than calls to `isdigit(*str)` * @todo Add support for INF INFINITY NAN NAN(...) */ -long double strtold(const char *__restrict nptr, char **__restrict endptr) +long double strtold(char const * const __restrict nptr, char **__restrict endptr) { F64_pun val; int frac = 0; int exp = 0; bool sign = false; bool exp_sign = false; - const char *str = (const char*)nptr; + char const *__restrict str = nptr; while (isspace(*str)) { ++str; @@ -62,40 +64,50 @@ long double strtold(const char *__restrict nptr, char **__restrict endptr) ++str; } + bool has_digits = false; + val.flt = 0.0L; - while (*str >= '0' && *str <= '9') { + while (c_isdigit(*str)) { + has_digits = true; val.flt = val.flt * 10.0L + (long double)(*str - '0'); ++str; } if (*str == '.') { ++str; - while (*str >= '0' && *str <= '9') { + while (c_isdigit(*str)) { + has_digits = true; val.flt = val.flt * 10.0L + (long double)(*str - '0'); ++frac; ++str; } } + if (!has_digits) { + str = nptr; + goto finish; + } + if (*str == 'e' || *str == 'E') { + char const * const end_of_digits = str; ++str; if (*str == '-') { exp_sign = true; ++str; } else if (*str == '+') { - exp_sign = false; ++str; } - while (*str >= '0' && *str <= '9') { + if (!c_isdigit(*str)) { + str = end_of_digits; + val.flt = 0.0L; + goto finish; + } + while (c_isdigit(*str)) { exp = exp * 10 + (*str - '0'); ++str; } } - if (endptr) { - *endptr = (char*)str; - } - if (exp_sign) { exp = -exp; } @@ -118,8 +130,8 @@ long double strtold(const char *__restrict nptr, char **__restrict endptr) val.flt /= 10.0L; if (val.bin == 0) { - errno = ERANGE; - break; + errno = ERANGE; + break; } ++exp; } @@ -127,6 +139,12 @@ long double strtold(const char *__restrict nptr, char **__restrict endptr) if (sign) { val.flt = -val.flt; } + +finish: + if (endptr) { + *endptr = (char*)str; + } + return val.flt; } diff --git a/test/standalone/scanf/src/main.c b/test/standalone/scanf/src/main.c index 86a3e4fee..733c0204d 100644 --- a/test/standalone/scanf/src/main.c +++ b/test/standalone/scanf/src/main.c @@ -25,6 +25,7 @@ //------------------------------------------------------------------------------ #define C(expr) if (!(expr)) { return __LINE__; } +#define B(expr) if (!(expr)) { return false; } #define TEST(test) { ret = test; if (ret != 0) { return ret; }} @@ -184,6 +185,11 @@ int stackoverflow_test(void) { return 0; } +/** + * @brief tests if strto(f/d/ld) sets endptr correctly + */ +bool strtod_test(void); + int other_test(void) { const char* text = "abc]]]def]ghi^^]]^^^jklm^^"; char buf_1[10] = {'\0'}; @@ -216,6 +222,7 @@ int other_test(void) { int run_tests(void) { int ret = 0; + C(strtod_test()); TEST(basic_test()); TEST(stdc_test()); TEST(stackoverflow_test()); diff --git a/test/standalone/scanf/src/strtod_test.c b/test/standalone/scanf/src/strtod_test.c new file mode 100644 index 000000000..019e8bc37 --- /dev/null +++ b/test/standalone/scanf/src/strtod_test.c @@ -0,0 +1,185 @@ +#include +#include +#include + +#define B(expr) if (!(expr)) { return false; } + +/** + * @brief tests if strto(f/d/ld) sets endptr correctly + */ +static bool strtod_verify(const char* str, ptrdiff_t offset) { + char* endptr; + (void)strtod(str, &endptr); + ptrdiff_t diff = (ptrdiff_t)(endptr - str); + if (diff != offset) { + return false; + } + return true; +} + +bool strtod_test(void) { + + B(strtod_verify("" , 0)); + B(strtod_verify("." , 0)); + B(strtod_verify("0.", 2)); + B(strtod_verify(".0", 2)); + B(strtod_verify(" " "" , 0)); + B(strtod_verify(" " "." , 0)); + B(strtod_verify(" " "0.", 3)); + B(strtod_verify(" " ".0", 3)); + B(strtod_verify("" "e", 0)); + B(strtod_verify("." "e", 0)); + B(strtod_verify("0." "e", 2)); + B(strtod_verify(".0" "e", 2)); + B(strtod_verify("" "e+", 0)); + B(strtod_verify("." "e+", 0)); + B(strtod_verify("0." "e+", 2)); + B(strtod_verify(".0" "e+", 2)); + B(strtod_verify("" "e1", 0)); + B(strtod_verify("." "e1", 0)); + B(strtod_verify("0." "e1", 4)); + B(strtod_verify(".0" "e1", 4)); + B(strtod_verify("" "e+1", 0)); + B(strtod_verify("." "e+1", 0)); + B(strtod_verify("0." "e+1", 5)); + B(strtod_verify(".0" "e+1", 5)); + B(strtod_verify(" " "" "e", 0)); + B(strtod_verify(" " "." "e", 0)); + B(strtod_verify(" " "0." "e", 3)); + B(strtod_verify(" " ".0" "e", 3)); + B(strtod_verify(" " "" "e+", 0)); + B(strtod_verify(" " "." "e+", 0)); + B(strtod_verify(" " "0." "e+", 3)); + B(strtod_verify(" " ".0" "e+", 3)); + B(strtod_verify(" " "" "e1", 0)); + B(strtod_verify(" " "." "e1", 0)); + B(strtod_verify(" " "0." "e1", 5)); + B(strtod_verify(" " ".0" "e1", 5)); + B(strtod_verify(" " "" "e+1", 0)); + B(strtod_verify(" " "." "e+1", 0)); + B(strtod_verify(" " "0." "e+1", 6)); + B(strtod_verify(" " ".0" "e+1", 6)); + B(strtod_verify("+" , 0)); + B(strtod_verify("+." , 0)); + B(strtod_verify("+0.", 3)); + B(strtod_verify("+.0", 3)); + B(strtod_verify(" " "+" , 0)); + B(strtod_verify(" " "+." , 0)); + B(strtod_verify(" " "+0.", 4)); + B(strtod_verify(" " "+.0", 4)); + B(strtod_verify("+" "e", 0)); + B(strtod_verify("+." "e", 0)); + B(strtod_verify("+0." "e", 3)); + B(strtod_verify("+.0" "e", 3)); + B(strtod_verify("+" "e+", 0)); + B(strtod_verify("+." "e+", 0)); + B(strtod_verify("+0." "e+", 3)); + B(strtod_verify("+.0" "e+", 3)); + B(strtod_verify("+" "e1", 0)); + B(strtod_verify("+." "e1", 0)); + B(strtod_verify("+0." "e1", 5)); + B(strtod_verify("+.0" "e1", 5)); + B(strtod_verify("+" "e+1", 0)); + B(strtod_verify("+." "e+1", 0)); + B(strtod_verify("+0." "e+1", 6)); + B(strtod_verify("+.0" "e+1", 6)); + B(strtod_verify(" " "+" "e", 0)); + B(strtod_verify(" " "+." "e", 0)); + B(strtod_verify(" " "+0." "e", 4)); + B(strtod_verify(" " "+.0" "e", 4)); + B(strtod_verify(" " "+" "e+", 0)); + B(strtod_verify(" " "+." "e+", 0)); + B(strtod_verify(" " "+0." "e+", 4)); + B(strtod_verify(" " "+.0" "e+", 4)); + B(strtod_verify(" " "+" "e1", 0)); + B(strtod_verify(" " "+." "e1", 0)); + B(strtod_verify(" " "+0." "e1", 6)); + B(strtod_verify(" " "+.0" "e1", 6)); + B(strtod_verify(" " "+" "e+1", 0)); + B(strtod_verify(" " "+." "e+1", 0)); + B(strtod_verify(" " "+0." "e+1", 7)); + B(strtod_verify(" " "+.0" "e+1", 7)); + + B(strtod_verify("" "r", 0)); + B(strtod_verify("." "r", 0)); + B(strtod_verify("0." "r", 2)); + B(strtod_verify(".0" "r", 2)); + B(strtod_verify(" " "" "r", 0)); + B(strtod_verify(" " "." "r", 0)); + B(strtod_verify(" " "0." "r", 3)); + B(strtod_verify(" " ".0" "r", 3)); + B(strtod_verify("" "e" "r", 0)); + B(strtod_verify("." "e" "r", 0)); + B(strtod_verify("0." "e" "r", 2)); + B(strtod_verify(".0" "e" "r", 2)); + B(strtod_verify("" "e+" "r", 0)); + B(strtod_verify("." "e+" "r", 0)); + B(strtod_verify("0." "e+" "r", 2)); + B(strtod_verify(".0" "e+" "r", 2)); + B(strtod_verify("" "e1" "r", 0)); + B(strtod_verify("." "e1" "r", 0)); + B(strtod_verify("0." "e1" "r", 4)); + B(strtod_verify(".0" "e1" "r", 4)); + B(strtod_verify("" "e+1" "r", 0)); + B(strtod_verify("." "e+1" "r", 0)); + B(strtod_verify("0." "e+1" "r", 5)); + B(strtod_verify(".0" "e+1" "r", 5)); + B(strtod_verify(" " "" "e" "r", 0)); + B(strtod_verify(" " "." "e" "r", 0)); + B(strtod_verify(" " "0." "e" "r", 3)); + B(strtod_verify(" " ".0" "e" "r", 3)); + B(strtod_verify(" " "" "e+" "r", 0)); + B(strtod_verify(" " "." "e+" "r", 0)); + B(strtod_verify(" " "0." "e+" "r", 3)); + B(strtod_verify(" " ".0" "e+" "r", 3)); + B(strtod_verify(" " "" "e1" "r", 0)); + B(strtod_verify(" " "." "e1" "r", 0)); + B(strtod_verify(" " "0." "e1" "r", 5)); + B(strtod_verify(" " ".0" "e1" "r", 5)); + B(strtod_verify(" " "" "e+1" "r", 0)); + B(strtod_verify(" " "." "e+1" "r", 0)); + B(strtod_verify(" " "0." "e+1" "r", 6)); + B(strtod_verify(" " ".0" "e+1" "r", 6)); + B(strtod_verify("+" "r", 0)); + B(strtod_verify("+." "r", 0)); + B(strtod_verify("+0." "r", 3)); + B(strtod_verify("+.0" "r", 3)); + B(strtod_verify(" " "+" "r", 0)); + B(strtod_verify(" " "+." "r", 0)); + B(strtod_verify(" " "+0." "r", 4)); + B(strtod_verify(" " "+.0" "r", 4)); + B(strtod_verify("+" "e" "r", 0)); + B(strtod_verify("+." "e" "r", 0)); + B(strtod_verify("+0." "e" "r", 3)); + B(strtod_verify("+.0" "e" "r", 3)); + B(strtod_verify("+" "e+" "r", 0)); + B(strtod_verify("+." "e+" "r", 0)); + B(strtod_verify("+0." "e+" "r", 3)); + B(strtod_verify("+.0" "e+" "r", 3)); + B(strtod_verify("+" "e1" "r", 0)); + B(strtod_verify("+." "e1" "r", 0)); + B(strtod_verify("+0." "e1" "r", 5)); + B(strtod_verify("+.0" "e1" "r", 5)); + B(strtod_verify("+" "e+1" "r", 0)); + B(strtod_verify("+." "e+1" "r", 0)); + B(strtod_verify("+0." "e+1" "r", 6)); + B(strtod_verify("+.0" "e+1" "r", 6)); + B(strtod_verify(" " "+" "e" "r", 0)); + B(strtod_verify(" " "+." "e" "r", 0)); + B(strtod_verify(" " "+0." "e" "r", 4)); + B(strtod_verify(" " "+.0" "e" "r", 4)); + B(strtod_verify(" " "+" "e+" "r", 0)); + B(strtod_verify(" " "+." "e+" "r", 0)); + B(strtod_verify(" " "+0." "e+" "r", 4)); + B(strtod_verify(" " "+.0" "e+" "r", 4)); + B(strtod_verify(" " "+" "e1" "r", 0)); + B(strtod_verify(" " "+." "e1" "r", 0)); + B(strtod_verify(" " "+0." "e1" "r", 6)); + B(strtod_verify(" " "+.0" "e1" "r", 6)); + B(strtod_verify(" " "+" "e+1" "r", 0)); + B(strtod_verify(" " "+." "e+1" "r", 0)); + B(strtod_verify(" " "+0." "e+1" "r", 7)); + B(strtod_verify(" " "+.0" "e+1" "r", 7)); + + return true; +}