Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion examples/bsp-ls.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ void print_group_info(hid_t g, const char* name) {
assert(format_ != NULL);
char* format_string = cJSON_GetStringValue(format_);

cJSON* nnz_ = cJSON_GetObjectItemCaseSensitive(binsparse, "nnz");
cJSON* nnz_ =
cJSON_GetObjectItemCaseSensitive(binsparse, "number_of_stored_values");
assert(nnz_ != NULL);
size_t nnz = cJSON_GetNumberValue(nnz_);

Expand All @@ -59,6 +60,15 @@ void print_group_info(hid_t g, const char* name) {

printf("Group \"%s\": Version %s Binsparse matrix. Format %s, %zu x %zu.\n",
full_group_path, version_string, format_string, nrows, ncols);

cJSON* data_types =
cJSON_GetObjectItemCaseSensitive(binsparse, "data_types");
assert(data_types != NULL);

cJSON* item;
cJSON_ArrayForEach(item, data_types) {
printf(" %s: %s\n", item->string, cJSON_Print(item));
}
}

H5Literate(g, H5_INDEX_NAME, H5_ITER_INC, NULL, visit_group, NULL);
Expand Down
71 changes: 35 additions & 36 deletions examples/check_equivalence.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
bsp_array_read(array2, i, value2);

if (value1 != value2) {
fprintf(stderr, "Array values are not equal.\n");
fprintf(stderr, "Array values are not equal. (%zu != %zu)\n", value1,
value2);
return 4;
}
} else if (mm_type1 == BSP_MM_REAL) {
Expand All @@ -62,7 +63,8 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
bsp_array_read(array2, i, value2);

if (value1 != value2) {
fprintf(stderr, "Array values are not equal.\n");
fprintf(stderr, "Array values are not equal. (%.17lg != %.17lg)\n",
value1, value2);
return 4;
}
} else if (mm_type1 == BSP_MM_COMPLEX) {
Expand All @@ -71,7 +73,11 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
bsp_array_read(array2, i, value2);

if (value1 != value2) {
fprintf(stderr, "Array values are not equal.\n");
fprintf(stderr,
"Array values are not equal. (%.17lg + i%.17lg != %.17lg + "
"i%.17lg)\n",
__real__ value1, __imag__ value1, __real__ value2,
__imag__ value2);
return 4;
}
}
Expand All @@ -80,39 +86,6 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
return 0;
}

typedef struct {
char* fname;
char* dataset;
} bsp_fdataset_info_t;

bsp_fdataset_info_t bsp_parse_fdataset_string(char* str) {
size_t len = strlen(str);

int split = -1;
for (int i = len - 1; i >= 0; i--) {
if (str[i] == ':') {
split = i;
break;
}
}

if (split == -1) {
bsp_fdataset_info_t info;
info.fname = (char*) malloc(sizeof(char) * (len + 1));
strcpy(info.fname, str);
info.dataset = NULL;
return info;
} else {
bsp_fdataset_info_t info;
info.fname = (char*) malloc(sizeof(char) * (split + 1));
strncpy(info.fname, str, split);
info.fname[split] = '\0';
info.dataset = (char*) malloc(sizeof(char) * (len - split));
strcpy(info.dataset, &str[split + 1]);
return info;
}
}

int main(int argc, char** argv) {
if (argc < 3) {
printf(
Expand All @@ -134,6 +107,32 @@ int main(int argc, char** argv) {
bsp_matrix_t matrix1 = bsp_read_matrix(info1.fname, info1.dataset);
bsp_matrix_t matrix2 = bsp_read_matrix(info2.fname, info2.dataset);

bool perform_suitesparse_declamping = true;
if (perform_suitesparse_declamping &&
strcmp(bsp_get_file_extension(file1), ".mtx") == 0) {
bsp_matrix_declamp_values(matrix1);
}

if (perform_suitesparse_declamping &&
strcmp(bsp_get_file_extension(file2), ".mtx") == 0) {
bsp_matrix_declamp_values(matrix2);
}

// If matrices are not the same format, try to convert.
if (matrix1.format != matrix2.format) {
if (matrix1.format != BSP_COOR) {
bsp_matrix_t intermediate = bsp_convert_matrix(matrix1, BSP_COOR);
bsp_destroy_matrix_t(matrix1);
matrix1 = intermediate;
}

if (matrix2.format != BSP_COOR) {
bsp_matrix_t intermediate = bsp_convert_matrix(matrix2, BSP_COOR);
bsp_destroy_matrix_t(matrix2);
matrix2 = intermediate;
}
}

if (matrix1.format != matrix2.format) {
fprintf(stderr, "Formats do not match. (%s != %s)\n",
bsp_get_matrix_format_string(matrix1.format),
Expand Down
70 changes: 65 additions & 5 deletions examples/mtx2bsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,31 @@
int main(int argc, char** argv) {

if (argc < 3) {
printf("usage: ./mtx2bsp [inputfile_name.mtx] [outputfile_name.bsp.hdf5] "
"[optional: dataset]\n");
printf("usage: ./mtx2bsp [input.mtx] [output.bsp.h5]:[optional: group] "
"[optional: format]\n");
printf("\n");
printf("Description: Convert a Matrix Market file to a Binsparse HDF5 "
"file.\n");
printf(" Users can optionally provide an HDF5 group to store "
"the\n");
printf(" file in as well as a specific format. The default "
"format\n");
printf(" is row-sorted COO (COOR).\n");
printf("\n");
printf("example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5\n");
printf(" - Convert Matrix Market file `chesapeake.mtx` to Binsparse "
"HDF5 file `chesapeake.bsp.h5`.\n");
printf(" - Matrix will be stored in root group.\n");
printf(" - Matrix will be stored in COOR format.\n");
printf("\n");
printf("example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5:chesapeake\n");
printf(" - Same as previous example, but matrix will be stored in "
"HDF5 group `chesapeake`.\n");
printf("\n");
printf(
"example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5:chesapeake CSR\n");
printf(" - Same as previous example, but matrix will use CSR "
"format.\n");
return 1;
}

Expand All @@ -15,16 +38,47 @@ int main(int argc, char** argv) {
bool perform_suitesparse_declamping = true;

char* input_fname = argv[1];
char* output_fname = argv[2];

char* group_name = NULL;
bsp_fdataset_info_t info2 = bsp_parse_fdataset_string(argv[2]);
char* output_fname = info2.fname;
char* group_name = info2.dataset;

char* format_name = NULL;

if (argc >= 4) {
group_name = argv[3];
format_name = argv[3];
}

char* input_file_extension = bsp_get_file_extension(input_fname);
char* output_file_extension = bsp_get_file_extension(output_fname);

if (input_file_extension == NULL ||
strcmp(input_file_extension, ".mtx") != 0) {
fprintf(stderr,
"error: input file \"%s\" is not a Matrix Market file. "
"(Its extension is not '.mtx'.)\n",
input_fname);
return 1;
}

if (output_file_extension == NULL ||
(strcmp(output_file_extension, ".h5") != 0 &&
strcmp(output_file_extension, ".hdf5") != 0)) {
fprintf(stderr,
"error: output file \"%s\" is not an HDF5 file. "
"(Its extension is not '.h5' or '.hdf5'.)\n",
output_fname);
return 1;
}

bsp_mm_metadata m = bsp_mmread_metadata(input_fname);

bsp_matrix_format_t format = BSP_COOR;
if (format_name != NULL) {
format = bsp_get_matrix_format(format_name);
assert(format != 0);
}

printf("%lu x %lu matrix with %lu nonzeros.\n", m.nrows, m.ncols, m.nnz);
printf(
"Matrix Market format is \"%s\" with type \"%s\" and structure \"%s\"\n",
Expand Down Expand Up @@ -52,6 +106,12 @@ int main(int argc, char** argv) {

matrix = bsp_matrix_minimize_values(matrix);

if (format != BSP_COOR) {
bsp_matrix_t converted_matrix = bsp_convert_matrix(matrix, format);
bsp_destroy_matrix_t(matrix);
matrix = converted_matrix;
}

bsp_print_matrix_info(matrix);

printf(" === Writing to %s... ===\n", output_fname);
Expand Down
1 change: 1 addition & 0 deletions include/binsparse/binsparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define BINSPARSE_VERSION "0.1"

#include <binsparse/array.h>
#include <binsparse/convert_matrix.h>
#include <binsparse/detail/detail.h>
#include <binsparse/generate.h>
#include <binsparse/hdf5_wrapper.h>
Expand Down
146 changes: 146 additions & 0 deletions include/binsparse/convert_matrix.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#pragma once

#include <assert.h>
#include <binsparse/matrix.h>

bsp_matrix_t bsp_convert_matrix(bsp_matrix_t matrix,
bsp_matrix_format_t format) {
// Throw an error if matrix already in desired format.
if (matrix.format == format) {
assert(false);
}

if (format == BSP_COOR) {
// *Convert to COO* from another format.
if (matrix.format == BSP_CSR) {
// Convert CSR -> COOR
bsp_matrix_t result = bsp_construct_default_matrix_t();

result.format = BSP_COOR;

// Inherit NNZ, nrows, ncols, ISO-ness, and structure directly from
// original matrix.
result.nnz = matrix.nnz;
result.nrows = matrix.nrows;
result.ncols = matrix.ncols;
result.is_iso = matrix.is_iso;
result.structure = matrix.structure;

size_t max_dim =
(matrix.nrows > matrix.ncols) ? matrix.nrows : matrix.ncols;

bsp_type_t index_type = bsp_pick_integer_type(max_dim);

result.values = bsp_copy_construct_array_t(matrix.values);

// There is a corner case with tall and skinny matrices where we need a
// higher width for rowind. In order to keep rowind/colind the same type,
// we might upcast.

if (index_type == matrix.indices_0.type) {
result.indices_1 = bsp_copy_construct_array_t(matrix.indices_0);
} else {
result.indices_1 = bsp_construct_array_t(matrix.nnz, index_type);
for (size_t i = 0; i < matrix.nnz; i++) {
bsp_array_awrite(result.indices_1, i, matrix.indices_0, i);
}
}

result.indices_0 = bsp_construct_array_t(matrix.nnz, index_type);

for (size_t i = 0; i < matrix.nrows; i++) {
size_t row_begin, row_end;
bsp_array_read(matrix.pointers_to_1, i, row_begin);
bsp_array_read(matrix.pointers_to_1, i + 1, row_end);
for (size_t j_ptr = row_begin; j_ptr < row_end; j_ptr++) {
bsp_array_write(result.indices_0, j_ptr, i);
}
}
return result;
} else {
assert(false);
}
} else {
// Convert to any another format.

// Currently only support COOR -> X.
// If matrix is not COOR, convert to COOR.
if (matrix.format != BSP_COOR) {
bsp_matrix_t intermediate = bsp_convert_matrix(matrix, BSP_COOR);
bsp_matrix_t result = bsp_convert_matrix(intermediate, format);
bsp_destroy_matrix_t(intermediate);
return result;
} else {
if (format == BSP_CSR) {
// Convert COOR -> CSR

bsp_matrix_t result = bsp_construct_default_matrix_t();

result.format = BSP_CSR;

result.nrows = matrix.nrows;
result.ncols = matrix.ncols;
result.nnz = matrix.nnz;
result.is_iso = matrix.is_iso;
result.structure = matrix.structure;

// TODO: consider whether to produce files with varying integer types
// for row indices, column indices, and offsets.

size_t max_dim =
(matrix.nrows > matrix.ncols) ? matrix.nrows : matrix.ncols;

size_t max_value =
(max_dim > matrix.values.size) ? max_dim : matrix.values.size;

bsp_type_t value_type = matrix.values.type;
bsp_type_t index_type = bsp_pick_integer_type(max_value);

// Since COOR is sorted by rows and then by columns, values and column
// indices can be copied exactly. Values' type will not change, but
// column indices might, thus the extra branch.

result.values = bsp_copy_construct_array_t(matrix.values);

if (index_type == matrix.indices_1.type) {
result.indices_0 = bsp_copy_construct_array_t(matrix.indices_1);
} else {
result.indices_0 = bsp_construct_array_t(matrix.nnz, index_type);

for (size_t i = 0; i < matrix.nnz; i++) {
bsp_array_awrite(result.indices_0, i, matrix.indices_1, i);
}
}

result.pointers_to_1 =
bsp_construct_array_t(matrix.nrows + 1, index_type);

bsp_array_t rowptr = result.pointers_to_1;

bsp_array_write(rowptr, 0, 0);

size_t r = 0;
size_t c = 0;
for (size_t c = 0; c < matrix.nnz; c++) {
size_t j;
bsp_array_read(matrix.indices_0, c, j);

while (r < j) {
assert(r + 1 <= matrix.nrows);

bsp_array_write(rowptr, r + 1, c);
r++;
}
}

for (; r < matrix.nrows; r++) {
bsp_array_write(rowptr, r + 1, matrix.nnz);
}

return result;
} else {
assert(false);
}
}
}
}
Loading