Skip to content

Commit

Permalink
Support UTF8 string datatype encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
mattjala committed Nov 16, 2023
1 parent c0bd119 commit ade4340
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 18 deletions.
9 changes: 8 additions & 1 deletion src/rest_vol.h
Expand Up @@ -496,7 +496,7 @@ typedef struct {
} upload_info;

/* Structure that keeps track of semantic version. */
typedef struct server_api_version {
typedef struct {
size_t major;
size_t minor;
size_t patch;
Expand Down Expand Up @@ -753,6 +753,10 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
size_t num_elem, hbool_t clear_tconv_buf, hbool_t dst_file, void **tconv_buf,
void **bkg_buf, RV_tconv_reuse_t *reuse, hbool_t *fill_bkg);

/* REST VOL Datatype helper */
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand All @@ -763,6 +767,9 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
#define SERVER_VERSION_SUPPORTS_GET_STORAGE_SIZE(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#define SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_attr.c
Expand Up @@ -211,7 +211,8 @@ RV_attr_create(void *obj, const H5VL_loc_params_t *loc_params, const char *attr_
/* Form the request body to give the new Attribute its properties */

/* Form the Datatype portion of the Attribute create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL,
"can't convert attribute's datatype to JSON representation");

Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_dataset.c
Expand Up @@ -3586,7 +3586,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dataset create request output buffer was NULL");

/* Form the Datatype portion of the Dataset create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
pobj->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL,
"can't convert dataset's datatype to JSON representation");

Expand Down
51 changes: 38 additions & 13 deletions src/rest_vol_datatype.c
Expand Up @@ -153,7 +153,8 @@ RV_datatype_commit(void *obj, const H5VL_loc_params_t *loc_params, const char *n
new_datatype->u.datatype.tcpl_id = H5P_DATATYPE_CREATE_DEFAULT;

/* Convert the datatype into JSON to be used in the request body */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL, "can't convert datatype to JSON representation");

/* If this is not a H5Tcommit_anon call, create a link for the Datatype
Expand Down Expand Up @@ -711,7 +712,8 @@ RV_parse_datatype(char *type, hbool_t need_truncate)
* July, 2017
*/
herr_t
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested)
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version)
{
H5T_class_t type_class;
const char *leading_string = "\"type\": "; /* Leading string for all datatypes */
Expand Down Expand Up @@ -857,12 +859,36 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l

case H5T_STRING: {
const char *const cset_ascii_string = "H5T_CSET_ASCII";
htri_t is_vlen;
const char *const cset_utf8_string = "H5T_CSET_UTF8";
const char *cset = NULL;
H5T_cset_t char_set = H5T_CSET_ERROR;

char_set = H5Tget_cset(type_id);

htri_t is_vlen;

if ((is_vlen = H5Tis_variable_str(type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"can't determine if datatype is variable-length string");

switch (char_set) {
case (H5T_CSET_ASCII):
cset = cset_ascii_string;
break;
case (H5T_CSET_UTF8):
if (!is_vlen && !(SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(server_version)))
FUNC_GOTO_ERROR(
H5E_DATATYPE, H5E_UNSUPPORTED, FAIL,
"fixed-length UTF8 strings not supported until server version 0.8.5+");

cset = cset_utf8_string;
break;
case (H5T_CSET_ERROR):
default:
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "invalid character set for string");
break;
}

/* Build the Datatype body by appending the character set for the string type,
* any type of string padding, and the length of the string */
/* Note: currently only H5T_CSET_ASCII is supported for the character set and
Expand All @@ -879,8 +905,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": \"H5T_VARIABLE\""
"}";

bytes_to_print =
(strlen(fmt_string) - 4) + strlen(cset_ascii_string) + strlen(nullterm_string) + 1;
bytes_to_print = (strlen(fmt_string) - 4) + strlen(cset) + strlen(nullterm_string) + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -892,7 +917,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullterm_string)) < 0)
fmt_string, cset, nullterm_string)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand All @@ -910,8 +935,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": %zu"
"}";

bytes_to_print = (strlen(fmt_string) - 7) + strlen(cset_ascii_string) +
strlen(nullpad_string) + MAX_NUM_LENGTH + 1;
bytes_to_print =
(strlen(fmt_string) - 7) + strlen(cset) + strlen(nullpad_string) + MAX_NUM_LENGTH + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -923,7 +948,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullpad_string, type_size)) < 0)
fmt_string, cset, nullpad_string, type_size)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand Down Expand Up @@ -981,8 +1006,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting compound datatype member %zu to JSON\n\n", i);
#endif

if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE) <
0)
if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert compound datatype member to JSON representation");

Expand Down Expand Up @@ -1243,8 +1268,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting array datatype's base datatype to JSON\n\n");
#endif

if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE) <
0)
if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert datatype to JSON representation");

Expand Down
3 changes: 1 addition & 2 deletions src/rest_vol_datatype.h
Expand Up @@ -26,8 +26,7 @@ herr_t RV_datatype_get(void *obj, H5VL_datatype_get_args_t *args, hid_t dxpl_id,
herr_t RV_datatype_close(void *dt, hid_t dxpl_id, void **req);

/* REST VOL Datatype helper functions */
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested);
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);

/* Determine whether datatype conversion is necessary between 'same' datatypes */
static htri_t RV_detect_vl_vlstr_ref(hid_t type_id);
Expand Down

0 comments on commit ade4340

Please sign in to comment.