diff --git a/examples/mtx2bsp.c b/examples/mtx2bsp.c index d72f69d..fe5dcaa 100644 --- a/examples/mtx2bsp.c +++ b/examples/mtx2bsp.c @@ -41,6 +41,8 @@ int main(int argc, char** argv) { bsp_matrix_t matrix = bsp_mmread(input_fname); printf(" === Done reading. ===\n"); + matrix = bsp_matrix_minimize_values(matrix); + bsp_print_matrix_info(matrix); printf(" === Writing to %s... ===\n", output_fname); diff --git a/include/binsparse/binsparse.h b/include/binsparse/binsparse.h index 0579d73..c63e151 100644 --- a/include/binsparse/binsparse.h +++ b/include/binsparse/binsparse.h @@ -9,5 +9,6 @@ #include #include #include +#include #include #include diff --git a/include/binsparse/hdf5_wrapper.h b/include/binsparse/hdf5_wrapper.h index dbd2eef..7a69b11 100644 --- a/include/binsparse/hdf5_wrapper.h +++ b/include/binsparse/hdf5_wrapper.h @@ -16,8 +16,22 @@ int bsp_write_array(hid_t f, char* label, bsp_array_t array) { hid_t fspace = H5Screate_simple(1, (hsize_t[]){array.size}, NULL); hid_t lcpl = H5Pcreate(H5P_LINK_CREATE); - hid_t dset = H5Dcreate2(f, label, hdf5_standard_type, fspace, lcpl, - H5P_DEFAULT, H5P_DEFAULT); + hid_t dcpl = H5Pcreate(H5P_DATASET_CREATE); + + // Choose 1 MiB, the default chunk cache size, as our chunk size. + size_t chunk_size = 1024 * 1024 / bsp_type_size(array.type); + + // If the dataset is smaller than the chunk size, cap the chunk size. + if (array.size < chunk_size) { + chunk_size = array.size; + } + + H5Pset_chunk(dcpl, 1, (hsize_t[]){chunk_size}); + + H5Pset_deflate(dcpl, 9); + + hid_t dset = + H5Dcreate2(f, label, hdf5_standard_type, fspace, lcpl, dcpl, H5P_DEFAULT); if (dset == H5I_INVALID_HID) { return -1; @@ -34,6 +48,7 @@ int bsp_write_array(hid_t f, char* label, bsp_array_t array) { H5Sclose(fspace); H5Pclose(lcpl); + H5Pclose(dcpl); return 0; } diff --git a/include/binsparse/matrix_market/matrix_market_write.h b/include/binsparse/matrix_market/matrix_market_write.h index 09a9722..fcb0aef 100644 --- a/include/binsparse/matrix_market/matrix_market_write.h +++ b/include/binsparse/matrix_market/matrix_market_write.h @@ -64,11 +64,12 @@ void bsp_mmwrite(char* file_path, bsp_matrix_t matrix) { bsp_array_read(matrix.indices_1, count, j); fprintf(f, "%zu %zu\n", i + 1, j + 1); } else if (mm_type == BSP_MM_INTEGER) { - size_t i, j, value; + size_t i, j; + int64_t value; bsp_array_read(matrix.indices_0, count, i); bsp_array_read(matrix.indices_1, count, j); bsp_array_read(matrix.values, count, value); - fprintf(f, "%zu %zu %zu\n", i + 1, j + 1, value); + fprintf(f, "%zu %zu %lld\n", i + 1, j + 1, (long long)value); } else if (mm_type == BSP_MM_REAL) { size_t i, j; double value; diff --git a/include/binsparse/minimize_values.h b/include/binsparse/minimize_values.h new file mode 100644 index 0000000..bd64585 --- /dev/null +++ b/include/binsparse/minimize_values.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include + +bsp_matrix_t bsp_matrix_minimize_values(bsp_matrix_t matrix) { + if (matrix.values.type == BSP_FLOAT64) { + bool float32_representable = true; + + double* values = (double*)matrix.values.data; + + for (size_t i = 0; i < matrix.values.size; i++) { + if (((float)values[i]) != values[i]) { + float32_representable = false; + } + } + + if (float32_representable) { + bsp_array_t new_values = + bsp_construct_array_t(matrix.values.size, BSP_FLOAT32); + + float* n_values = (float*)new_values.data; + + for (size_t i = 0; i < matrix.values.size; i++) { + n_values[i] = values[i]; + } + + bsp_destroy_array_t(matrix.values); + matrix.values = new_values; + } + } else if (matrix.values.type == BSP_INT64) { + int64_t* values = (int64_t*)matrix.values.data; + + int64_t min_value = values[0]; + int64_t max_value = values[0]; + + for (size_t i = 1; i < matrix.values.size; i++) { + if (values[i] > max_value) { + max_value = values[i]; + } + + if (values[i] < min_value) { + min_value = values[i]; + } + } + + bsp_type_t value_type; + if (min_value >= 0) { + // No negative values => unsigned integers + if (max_value <= (int64_t)UINT8_MAX) { + value_type = BSP_UINT8; + } else if (max_value <= (int64_t)UINT16_MAX) { + value_type = BSP_UINT16; + } else if (max_value <= (int64_t)UINT32_MAX) { + value_type = BSP_UINT32; + } else { + value_type = BSP_UINT64; + } + } else { + // Negative values => signed integers + if (max_value <= (int64_t)INT8_MAX && min_value >= (int64_t)INT8_MIN) { + value_type = BSP_INT8; + } else if (max_value <= (int64_t)INT16_MAX && + min_value >= (int64_t)INT16_MIN) { + value_type = BSP_INT16; + } else if (max_value <= (int64_t)INT32_MAX && + min_value >= (int64_t)INT32_MIN) { + value_type = BSP_INT32; + } else { + value_type = BSP_INT64; + } + } + bsp_array_t new_values = + bsp_construct_array_t(matrix.values.size, value_type); + + for (size_t i = 0; i < matrix.values.size; i++) { + int64_t value; + bsp_array_read(matrix.values, i, value); + bsp_array_write(new_values, i, value); + } + + bsp_destroy_array_t(matrix.values); + matrix.values = new_values; + } else if (matrix.values.type == BSP_COMPLEX_FLOAT64) { + bool float32_representable = true; + + double _Complex* values = (double _Complex*)matrix.values.data; + + for (size_t i = 0; i < matrix.values.size; i++) { + if (((float _Complex)values[i]) != values[i]) { + float32_representable = false; + } + } + + if (float32_representable) { + bsp_array_t new_values = + bsp_construct_array_t(matrix.values.size, BSP_COMPLEX_FLOAT32); + + float _Complex* n_values = (float _Complex*)new_values.data; + + for (size_t i = 0; i < matrix.values.size; i++) { + n_values[i] = values[i]; + } + + bsp_destroy_array_t(matrix.values); + matrix.values = new_values; + } + } + + return matrix; +}