Skip to content

Commit

Permalink
Adding SparseCSFIndex::Make.
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Feb 5, 2020
1 parent 7d17995 commit f44d92c
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 46 deletions.
63 changes: 49 additions & 14 deletions cpp/src/arrow/sparse_tensor.cc
Expand Up @@ -438,6 +438,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
template <typename IndexValueType>
Status Convert() {
using c_index_value_type = typename IndexValueType::c_type;
RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
const int64_t indices_elsize = sizeof(c_index_value_type);

std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
Expand All @@ -463,8 +464,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
AllocateBuffer(pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
RETURN_NOT_OK(AllocateBuffer(pool_, indices_elsize * (ndim - 1) * (nonzero_count + 1),
&indptr_buffer));
int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
auto* indices = reinterpret_cast<c_index_value_type*>(indices_buffer->mutable_data());
auto* indptr = reinterpret_cast<c_index_value_type*>(indptr_buffer->mutable_data());

for (int64_t row = 0; row < nonzero_count; ++row) {
bool tree_split = false;
Expand All @@ -477,16 +478,19 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>

indices[column * nonzero_count + counts[column]] =
coords->Value<IndexValueType>({row, column});
indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
indptr[column * (nonzero_count + 1) + counts[column]] =
static_cast<c_index_value_type>(counts[column + 1]);
++counts[column];
}
}
}

for (int64_t column = 0; column < ndim; ++column) {
indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
indptr[column * (nonzero_count + 1) + counts[column]] =
static_cast<c_index_value_type>(counts[column + 1]);
}

// Remove gaps from buffers
int64_t total_size = counts[0];
for (int64_t column = 1; column < ndim; ++column) {
for (int64_t i = 0; i < counts[column] + 1; ++i) {
Expand All @@ -506,8 +510,10 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
RETURN_NOT_OK(AllocateBuffer(pool_,
indices_elsize * total_size - nonzero_count + ndim - 1,
&out_indptr_buffer));
int64_t* out_indices = reinterpret_cast<int64_t*>(out_indices_buffer->mutable_data());
int64_t* out_indptr = reinterpret_cast<int64_t*>(out_indptr_buffer->mutable_data());
auto* out_indices =
reinterpret_cast<c_index_value_type*>(out_indices_buffer->mutable_data());
auto* out_indptr =
reinterpret_cast<c_index_value_type*>(out_indptr_buffer->mutable_data());

for (int64_t i = 0; i < total_size; ++i) out_indices[i] = indices[i];

Expand All @@ -516,12 +522,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>

// Construct SparseCSFTensor
std::vector<int64_t> out_indptr_shape({total_size - nonzero_count + ndim - 1});
std::shared_ptr<Tensor> out_indptr_tensor =
std::make_shared<Tensor>(int64(), out_indptr_buffer, out_indptr_shape);

std::vector<int64_t> out_indices_shape({total_size});
std::shared_ptr<Tensor> out_indices_tensor =
std::make_shared<Tensor>(int64(), out_indices_buffer, out_indices_shape);

std::vector<int64_t> indptr_offsets(ndim - 1);
std::vector<int64_t> indices_offsets(ndim);
Expand All @@ -534,9 +535,11 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
for (int64_t i = 0; i < ndim; ++i)
indices_offsets[i + 1] = indices_offsets[i] + counts[i];

sparse_index =
std::make_shared<SparseCSFIndex>(out_indptr_tensor, out_indices_tensor,
indptr_offsets, indices_offsets, axis_order);
sparse_index = std::make_shared<SparseCSFIndex>(
std::make_shared<Tensor>(index_value_type_, out_indptr_buffer, out_indptr_shape),
std::make_shared<Tensor>(index_value_type_, out_indices_buffer,
out_indices_shape),
indptr_offsets, indices_offsets, axis_order);
data = sparse_coo_tensor->data();

return Status::OK();
Expand Down Expand Up @@ -565,6 +568,22 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
using BaseClass::index_value_type_;
using BaseClass::pool_;
using BaseClass::tensor_;

template <typename c_value_type>
inline Status CheckMaximumValue(const c_value_type type_max) const {
auto max_dimension =
*std::max_element(tensor_.shape().begin(), tensor_.shape().end());
if (static_cast<int64_t>(type_max) < max_dimension) {
// LCOV_EXCL_START: The following invalid causes program failure.
return Status::Invalid("The bit width of the index value type is too small");
// LCOV_EXCL_STOP
}
return Status::OK();
}

inline Status CheckMaximumValue(const int64_t) const { return Status::OK(); }

inline Status CheckMaximumValue(const uint64_t) const { return Status::OK(); }
};

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -955,6 +974,22 @@ void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
// ----------------------------------------------------------------------
// SparseCSFIndex

Status SparseCSFIndex::Make(const std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& indptr_shape,
const std::vector<int64_t>& indices_shape,
const std::vector<int64_t>& indptr_offsets,
const std::vector<int64_t>& indices_offsets,
const std::vector<int64_t>& axis_order,
std::shared_ptr<Buffer> indptr_data,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCSFIndex>* out) {
*out = std::make_shared<SparseCSFIndex>(
std::make_shared<Tensor>(indices_type, indptr_data, indptr_shape),
std::make_shared<Tensor>(indices_type, indices_data, indices_shape), indptr_offsets,
indices_offsets, axis_order);
return Status::OK();
}

// Constructor with two index vectors
SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
const std::shared_ptr<Tensor>& indices,
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/sparse_tensor.h
Expand Up @@ -349,6 +349,17 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
public:
static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;

/// \brief Make SparseCSFIndex from raw properties
static Status Make(const std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& indptr_shape,
const std::vector<int64_t>& indices_shape,
const std::vector<int64_t>& indptr_offsets,
const std::vector<int64_t>& indices_offsets,
const std::vector<int64_t>& axis_order,
std::shared_ptr<Buffer> indptr_data,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCSFIndex>* out);

/// \brief Construct SparseCSFIndex from two index vectors
explicit SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
const std::shared_ptr<Tensor>& indices,
Expand Down
68 changes: 36 additions & 32 deletions cpp/src/arrow/sparse_tensor_test.cc
Expand Up @@ -912,30 +912,37 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) {

template <typename IndexValueType>
class TestSparseCSFTensorBase : public ::testing::Test {
public:
void SetUp() {
shape_ = {6, 4};
dim_names_ = {"foo", "bar"};

// Dense representation:
// [
// 1 0 2 0
// 0 3 0 4
// 5 0 6 0
// 0 11 0 12
// 13 0 14 0
// 0 15 0 16
// ]
std::vector<int64_t> dense_values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
auto dense_data = Buffer::Wrap(dense_values);
NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
}

protected:
std::vector<int64_t> shape_;
std::vector<std::string> dim_names_;
std::shared_ptr<SparseCSFTensor> sparse_tensor_from_dense_;
public:
void SetUp() {
shape_ = {3, 3, 3, 4};
dim_names_ = {"a", "b", "c", "d"};

// COO representation:
// X[1, 1, 1, 2] := 1
// X[1, 1, 1, 3] := 2
// X[1, 2, 1, 1] := 3
// X[1, 2, 1, 3] := 4
// X[1, 2, 2, 1] := 5
// X[2, 2, 2, 1] := 6
// X[2, 2, 2, 2] := 7
// X[2, 2, 2, 3] := 8

std::vector<int64_t> dense_values = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
auto dense_data = Buffer::Wrap(dense_values);
NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
ASSERT_OK(SparseCSFTensor::Make(dense_tensor,
TypeTraits<IndexValueType>::type_singleton(),
&sparse_tensor_from_dense_));
}

protected:
std::vector<int64_t> shape_;
std::vector<std::string> dim_names_;
std::shared_ptr<SparseCSCMatrix> sparse_tensor_from_dense_;
};

class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
Expand All @@ -957,15 +964,12 @@ TEST_F(TestSparseCSFTensor, TestToTensor) {
std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);

std::shared_ptr<Tensor> indptr =
std::make_shared<Tensor>(int64(), indptr_buffer, indptr_shape);
std::shared_ptr<Tensor> indices =
std::make_shared<Tensor>(int64(), indices_buffer, indices_shape);

std::shared_ptr<SparseCSFIndex> sparse_index = std::make_shared<SparseCSFIndex>(
indptr, indices, indptr_offsets, indices_offsets, axis_order);
std::shared_ptr<SparseCSFIndex> si;
ASSERT_OK(SparseCSFIndex::Make(int64(), indptr_shape, indices_shape, indptr_offsets,
indices_offsets, axis_order, indptr_buffer,
indices_buffer, &si));
std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
si, int64(), data_buffer, sparse_tensor_shape, dim_names);

ASSERT_EQ(8, sparse_tensor->non_zero_length());

Expand Down

0 comments on commit f44d92c

Please sign in to comment.