Skip to content

Commit

Permalink
Support UTF8 string datatype encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
mattjala committed Nov 3, 2023
1 parent 59b455c commit 88d83ed
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 18 deletions.
9 changes: 8 additions & 1 deletion src/rest_vol.h
Expand Up @@ -484,7 +484,7 @@ typedef struct {
} upload_info;

/* Structure that keeps track of semantic version. */
typedef struct server_api_version {
typedef struct {
size_t major;
size_t minor;
size_t patch;
Expand Down Expand Up @@ -721,6 +721,10 @@ herr_t RV_curl_multi_perform(CURL *curl_multi_ptr, dataset_transfer_info *transf
hid_t file_space_id, void *buf,
struct response_buffer resp_buffer));

/* REST VOL Datatype helper */
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand All @@ -731,6 +735,9 @@ herr_t RV_curl_multi_perform(CURL *curl_multi_ptr, dataset_transfer_info *transf
#define SERVER_VERSION_SUPPORTS_GET_STORAGE_SIZE(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#define SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(version) \
(SERVER_VERSION_MATCHES_OR_EXCEEDS(version, 0, 8, 5))

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_attr.c
Expand Up @@ -211,7 +211,8 @@ RV_attr_create(void *obj, const H5VL_loc_params_t *loc_params, const char *attr_
/* Form the request body to give the new Attribute its properties */

/* Form the Datatype portion of the Attribute create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL,
"can't convert attribute's datatype to JSON representation");

Expand Down
3 changes: 2 additions & 1 deletion src/rest_vol_dataset.c
Expand Up @@ -3331,7 +3331,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dataset create request output buffer was NULL");

/* Form the Datatype portion of the Dataset create request */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
pobj->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL,
"can't convert dataset's datatype to JSON representation");

Expand Down
51 changes: 38 additions & 13 deletions src/rest_vol_datatype.c
Expand Up @@ -153,7 +153,8 @@ RV_datatype_commit(void *obj, const H5VL_loc_params_t *loc_params, const char *n
new_datatype->u.datatype.tcpl_id = H5P_DATATYPE_CREATE_DEFAULT;

/* Convert the datatype into JSON to be used in the request body */
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE) < 0)
if (RV_convert_datatype_to_JSON(type_id, &datatype_body, &datatype_body_len, FALSE,
parent->domain->u.file.server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, NULL, "can't convert datatype to JSON representation");

/* If this is not a H5Tcommit_anon call, create a link for the Datatype
Expand Down Expand Up @@ -700,7 +701,8 @@ RV_parse_datatype(char *type, hbool_t need_truncate)
* July, 2017
*/
herr_t
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested)
RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version)
{
H5T_class_t type_class;
const char *leading_string = "\"type\": "; /* Leading string for all datatypes */
Expand Down Expand Up @@ -846,12 +848,36 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l

case H5T_STRING: {
const char *const cset_ascii_string = "H5T_CSET_ASCII";
htri_t is_vlen;
const char *const cset_utf8_string = "H5T_CSET_UTF8";
const char *cset = NULL;
H5T_cset_t char_set = H5T_CSET_ERROR;

char_set = H5Tget_cset(type_id);

htri_t is_vlen;

if ((is_vlen = H5Tis_variable_str(type_id)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL,
"can't determine if datatype is variable-length string");

switch (char_set) {
case (H5T_CSET_ASCII):
cset = cset_ascii_string;
break;
case (H5T_CSET_UTF8):
if (!is_vlen && !(SERVER_VERSION_SUPPORTS_FIXED_LENGTH_UTF8(server_version)))
FUNC_GOTO_ERROR(
H5E_DATATYPE, H5E_UNSUPPORTED, FAIL,
"fixed-length UTF8 strings not supported until server version 0.8.5+");

cset = cset_utf8_string;
break;
case (H5T_CSET_ERROR):
default:
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_BADVALUE, FAIL, "invalid character set for string");
break;
}

/* Build the Datatype body by appending the character set for the string type,
* any type of string padding, and the length of the string */
/* Note: currently only H5T_CSET_ASCII is supported for the character set and
Expand All @@ -868,8 +894,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": \"H5T_VARIABLE\""
"}";

bytes_to_print =
(strlen(fmt_string) - 4) + strlen(cset_ascii_string) + strlen(nullterm_string) + 1;
bytes_to_print = (strlen(fmt_string) - 4) + strlen(cset) + strlen(nullterm_string) + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -881,7 +906,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullterm_string)) < 0)
fmt_string, cset, nullterm_string)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand All @@ -899,8 +924,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
"\"length\": %zu"
"}";

bytes_to_print = (strlen(fmt_string) - 7) + strlen(cset_ascii_string) +
strlen(nullpad_string) + MAX_NUM_LENGTH + 1;
bytes_to_print =
(strlen(fmt_string) - 7) + strlen(cset) + strlen(nullpad_string) + MAX_NUM_LENGTH + 1;

buf_ptrdiff = out_string_curr_pos - out_string;
if (buf_ptrdiff < 0)
Expand All @@ -912,7 +937,7 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
out_string_curr_pos, H5E_DATATYPE, FAIL);

if ((bytes_printed = snprintf(out_string_curr_pos, out_string_len - leading_string_len,
fmt_string, cset_ascii_string, nullpad_string, type_size)) < 0)
fmt_string, cset, nullpad_string, type_size)) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)bytes_printed >= out_string_len - leading_string_len)
Expand Down Expand Up @@ -970,8 +995,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting compound datatype member %zu to JSON\n\n", i);
#endif

if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE) <
0)
if (RV_convert_datatype_to_JSON(compound_member, &compound_member_strings[i], NULL, FALSE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert compound datatype member to JSON representation");

Expand Down Expand Up @@ -1232,8 +1257,8 @@ RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_l
printf("-> Converting array datatype's base datatype to JSON\n\n");
#endif

if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE) <
0)
if (RV_convert_datatype_to_JSON(type_base_class, &array_base_type, &array_base_type_len, TRUE,
server_version) < 0)
FUNC_GOTO_ERROR(H5E_DATATYPE, H5E_CANTCONVERT, FAIL,
"can't convert datatype to JSON representation");

Expand Down
3 changes: 1 addition & 2 deletions src/rest_vol_datatype.h
Expand Up @@ -26,8 +26,7 @@ herr_t RV_datatype_get(void *obj, H5VL_datatype_get_args_t *args, hid_t dxpl_id,
herr_t RV_datatype_close(void *dt, hid_t dxpl_id, void **req);

/* REST VOL Datatype helper functions */
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested);
hid_t RV_parse_datatype(char *type, hbool_t need_truncate);

#ifdef __cplusplus
}
Expand Down

0 comments on commit 88d83ed

Please sign in to comment.