Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support UTF-8 string datatype encoding #87

Merged
merged 1 commit into from Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/rest_vol.h
Expand Up @@ -496,7 +496,7 @@ typedef struct {
} upload_info;

/* Structure that keeps track of semantic version. */
typedef struct server_api_version {
typedef struct {
size_t major;
size_t minor;
size_t patch;
Expand Down Expand Up @@ -753,6 +753,10 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
size_t num_elem, hbool_t clear_tconv_buf, hbool_t dst_file, void **tconv_buf,
void **bkg_buf, RV_tconv_reuse_t *reuse, hbool_t *fill_bkg);

/* REST VOL Datatype helper */
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand All @@ -763,6 +767,9 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
#define SERVER_VERSION_SUPPORTS_GET_STORAGE_SIZE(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#define SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_attr.c
Expand Up @@ -211,7 +211,8 @@ RV_attr_create(void *obj, const H5VL_loc_params_t *loc_params, const char *attr_
/* Form the request body to give the new Attribute its properties */

/* Form the Datatype portion of the Attribute create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL,
"can't convert attribute's datatype to JSON representation");

Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_dataset.c
Expand Up @@ -3586,7 +3586,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dataset create request output buffer was NULL");

/* Form the Datatype portion of the Dataset create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
pobj->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL,
"can't convert dataset's datatype to JSON representation");

Expand Down
51 changes: 38 additions & 13 deletions src/rest_vol_datatype.c
Expand Up @@ -153,7 +153,8 @@ RV_datatype_commit(void *obj, const H5VL_loc_params_t *loc_params, const char *n
new_datatype->u.datatype.tcpl_id = H5P_DATATYPE_CREATE_DEFAULT;

/* Convert the datatype into JSON to be used in the request body */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL, "can't convert datatype to JSON representation");

/* If this is not a H5Tcommit_anon call, create a link for the Datatype
Expand Down Expand Up @@ -711,7 +712,8 @@ RV_parse_datatype(char *type, hbool_t need_truncate)
* July, 2017
*/
herr_t
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested)
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version)
{
H5T_class_t type_class;
const char *leading_string = "\"type\": "; /* Leading string for all datatypes */
Expand Down Expand Up @@ -857,12 +859,36 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l

case H5T_STRING: {
const char *const cset_ascii_string = "H5T_CSET_ASCII";
htri_t is_vlen;
const char *const cset_utf8_string = "H5T_CSET_UTF8";
const char *cset = NULL;
H5T_cset_t char_set = H5T_CSET_ERROR;

char_set = H5Tget_cset(type_id);

htri_t is_vlen;

if ((is_vlen = H5Tis_variable_str(type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"can't determine if datatype is variable-length string");

switch (char_set) {
case (H5T_CSET_ASCII):
cset = cset_ascii_string;
break;
case (H5T_CSET_UTF8):
if (!is_vlen && !(SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(server_version)))
FUNC_GOTO_ERROR(
H5E_DATATYPE, H5E_UNSUPPORTED, FAIL,
"fixed-length UTF8 strings not supported until server version 0.8.5+");

cset = cset_utf8_string;
break;
case (H5T_CSET_ERROR):
default:
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "invalid character set for string");
break;
}

/* Build the Datatype body by appending the character set for the string type,
* any type of string padding, and the length of the string */
/* Note: currently only H5T_CSET_ASCII is supported for the character set and
Expand All @@ -879,8 +905,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": \"H5T_VARIABLE\""
"}";

bytes_to_print =
(strlen(fmt_string) - 4) + strlen(cset_ascii_string) + strlen(nullterm_string) + 1;
bytes_to_print = (strlen(fmt_string) - 4) + strlen(cset) + strlen(nullterm_string) + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -892,7 +917,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullterm_string)) < 0)
fmt_string, cset, nullterm_string)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand All @@ -910,8 +935,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": %zu"
"}";

bytes_to_print = (strlen(fmt_string) - 7) + strlen(cset_ascii_string) +
strlen(nullpad_string) + MAX_NUM_LENGTH + 1;
bytes_to_print =
(strlen(fmt_string) - 7) + strlen(cset) + strlen(nullpad_string) + MAX_NUM_LENGTH + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -923,7 +948,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullpad_string, type_size)) < 0)
fmt_string, cset, nullpad_string, type_size)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand Down Expand Up @@ -981,8 +1006,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting compound datatype member %zu to JSON\n\n", i);
#endif

if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE) <
0)
if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert compound datatype member to JSON representation");

Expand Down Expand Up @@ -1243,8 +1268,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting array datatype's base datatype to JSON\n\n");
#endif

if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE) <
0)
if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert datatype to JSON representation");

Expand Down
3 changes: 1 addition & 2 deletions src/rest_vol_datatype.h
Expand Up @@ -26,8 +26,7 @@ herr_t RV_datatype_get(void *obj, H5VL_datatype_get_args_t *args, hid_t dxpl_id,
herr_t RV_datatype_close(void *dt, hid_t dxpl_id, void **req);

/* REST VOL Datatype helper functions */
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested);
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);

/* Determine whether datatype conversion is necessary between 'same' datatypes */
static htri_t RV_detect_vl_vlstr_ref(hid_t type_id);
Expand Down