From cc4977c62a8e53e6f672dcf0826fbad4204d3b31 Mon Sep 17 00:00:00 2001 From: bekadavis9 <31743465+bekadavis9@users.noreply.github.com> Date: Sun, 27 Feb 2022 21:05:16 -0500 Subject: [PATCH] Store shared_ptr for Attribute in ArraySchema (#2887) * Convert ArraySchema's Atrribute ptr to smart ptr * Change declarations of attributes_ and attribute_map_ * Address comments * Remove copies, address comments * Address comments Co-authored-by: Seth Shelnutt --- test/src/unit-QueryCondition.cc | 36 ++++++++++++------- test/src/unit-filter-pipeline.cc | 4 +-- test/src/unit-tile-metadata-generator.cc | 8 ++--- tiledb/sm/array_schema/array_schema.cc | 28 +++++++-------- tiledb/sm/array_schema/array_schema.h | 16 +++++---- .../sm/array_schema/array_schema_evolution.cc | 8 +---- .../sm/array_schema/array_schema_evolution.h | 4 +-- tiledb/sm/array_schema/attribute.cc | 2 +- tiledb/sm/array_schema/attribute.h | 2 +- tiledb/sm/c_api/tiledb.cc | 8 ++++- tiledb/sm/fragment/fragment_metadata.cc | 3 +- tiledb/sm/query/query_condition.cc | 3 +- tiledb/sm/query/result_tile.cc | 2 +- tiledb/sm/serialization/array_schema.cc | 4 +-- tools/src/commands/info_command.cc | 2 +- 15 files changed, 71 insertions(+), 59 deletions(-) diff --git a/test/src/unit-QueryCondition.cc b/test/src/unit-QueryCondition.cc index 9281fa80386..7a1479becee 100644 --- a/test/src/unit-QueryCondition.cc +++ b/test/src/unit-QueryCondition.cc @@ -711,7 +711,8 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -743,7 +744,8 @@ void test_apply(const Datatype type, bool var_size, bool nullable) { Attribute attr(field_name, type); REQUIRE(attr.set_cell_val_num(1).ok()); REQUIRE(attr.set_fill_value(&fill_value, sizeof(T)).ok()); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -798,7 +800,8 @@ TEST_CASE( // Initialize the array schema. ArraySchema array_schema; Attribute attr(field_name, type); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -897,7 +900,8 @@ TEST_CASE( REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -1461,7 +1465,8 @@ void test_apply_dense( REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -1494,7 +1499,8 @@ void test_apply_dense(const Datatype type, bool var_size, bool nullable) { Attribute attr(field_name, type); REQUIRE(attr.set_cell_val_num(1).ok()); REQUIRE(attr.set_fill_value(&fill_value, sizeof(T)).ok()); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -1552,7 +1558,8 @@ TEST_CASE( // Initialize the array schema. ArraySchema array_schema; Attribute attr(field_name, type); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -1654,7 +1661,8 @@ TEST_CASE( REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -2221,7 +2229,8 @@ void test_apply_sparse( REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -2254,7 +2263,8 @@ void test_apply_sparse(const Datatype type, bool var_size, bool nullable) { Attribute attr(field_name, type); REQUIRE(attr.set_cell_val_num(1).ok()); REQUIRE(attr.set_fill_value(&fill_value, sizeof(T)).ok()); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -2312,7 +2322,8 @@ TEST_CASE( // Initialize the array schema. ArraySchema array_schema; Attribute attr(field_name, type); - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; @@ -2415,7 +2426,8 @@ TEST_CASE( REQUIRE(attr.set_fill_value(fill_value, 2 * sizeof(char)).ok()); } - REQUIRE(array_schema.add_attribute(&attr).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); Domain domain; Dimension dim("dim1", Datatype::UINT32); uint32_t bounds[2] = {1, cells}; diff --git a/test/src/unit-filter-pipeline.cc b/test/src/unit-filter-pipeline.cc index 01db1d745a7..8a4cd9ffcc3 100644 --- a/test/src/unit-filter-pipeline.cc +++ b/test/src/unit-filter-pipeline.cc @@ -1550,7 +1550,7 @@ TEST_CASE("Filter: Test compression", "[filter][compression]") { domain.add_dimension(&dim); tiledb::sm::ArraySchema schema; tiledb::sm::Attribute attr("attr", Datatype::UINT64); - schema.add_attribute(&attr); + schema.add_attribute(tdb::make_shared(HERE(), &attr)); schema.set_domain(&domain); schema.init(); @@ -1710,7 +1710,7 @@ TEST_CASE("Filter: Test compression var", "[filter][compression][var]") { domain.add_dimension(&dim); tiledb::sm::ArraySchema schema; tiledb::sm::Attribute attr("attr", Datatype::UINT64); - schema.add_attribute(&attr); + schema.add_attribute(tdb::make_shared(HERE(), &attr)); schema.set_domain(&domain); schema.init(); diff --git a/test/src/unit-tile-metadata-generator.cc b/test/src/unit-tile-metadata-generator.cc index d9c53f69af4..3ab2c8b64d9 100644 --- a/test/src/unit-tile-metadata-generator.cc +++ b/test/src/unit-tile-metadata-generator.cc @@ -72,7 +72,7 @@ TEMPLATE_LIST_TEST_CASE( ArraySchema schema; Attribute a("a", (Datatype)type.tiledb_type); a.set_cell_val_num(cell_val_num); - schema.add_attribute(&a); + schema.add_attribute(tdb::make_shared(HERE(), &a)); // Generate random, sorted strings for the string ascii type. std::vector string_ascii; @@ -238,7 +238,7 @@ TEMPLATE_LIST_TEST_CASE( // Generate the array schema. ArraySchema schema; Attribute a("a", (Datatype)type.tiledb_type); - schema.add_attribute(&a); + schema.add_attribute(tdb::make_shared(HERE(), &a)); // Initialize a new tile. uint64_t num_cells = 4; @@ -320,7 +320,7 @@ TEST_CASE( ArraySchema schema; Attribute a("a", Datatype::STRING_ASCII); a.set_cell_val_num(constants::var_num); - schema.add_attribute(&a); + schema.add_attribute(tdb::make_shared(HERE(), &a)); // Generate random, sorted strings for the string ascii type. std::vector strings; @@ -433,7 +433,7 @@ TEST_CASE( ArraySchema schema; Attribute a("a", Datatype::CHAR); a.set_cell_val_num(constants::var_num); - schema.add_attribute(&a); + schema.add_attribute(tdb::make_shared(HERE(), &a)); // Store '123' and '12' // Initialize offsets tile. diff --git a/tiledb/sm/array_schema/array_schema.cc b/tiledb/sm/array_schema/array_schema.cc index 27d76968044..c5f5ef7017f 100644 --- a/tiledb/sm/array_schema/array_schema.cc +++ b/tiledb/sm/array_schema/array_schema.cc @@ -140,7 +140,7 @@ const URI& ArraySchema::array_uri() const { const Attribute* ArraySchema::attribute(unsigned int id) const { if (id < attributes_.size()) - return attributes_[id]; + return attributes_[id].get(); return nullptr; } @@ -153,7 +153,8 @@ unsigned int ArraySchema::attribute_num() const { return (unsigned)attributes_.size(); } -const std::vector& ArraySchema::attributes() const { +const std::vector>& ArraySchema::attributes() + const { return attributes_; } @@ -388,7 +389,7 @@ bool ArraySchema::is_field(const std::string& name) const { } bool ArraySchema::is_nullable(const std::string& name) const { - const Attribute* const attr = this->attribute(name); + auto attr = this->attribute(name); if (attr == nullptr) return false; return attr->nullable(); @@ -493,7 +494,8 @@ bool ArraySchema::var_size(const std::string& name) const { return false; } -Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) { +Status ArraySchema::add_attribute( + shared_ptr attr, bool check_special) { // Sanity check if (attr == nullptr) return LOG_STATUS(Status_ArraySchemaError( @@ -507,9 +509,8 @@ Status ArraySchema::add_attribute(const Attribute* attr, bool check_special) { } // Create new attribute and potentially set a default name - auto new_attr = tdb_new(Attribute, attr); - attributes_.emplace_back(new_attr); - attribute_map_[new_attr->name()] = new_attr; + attributes_.emplace_back(attr); + attribute_map_[attr->name()] = attr.get(); RETURN_NOT_OK(generate_uri()); return Status::Ok(); @@ -531,11 +532,10 @@ Status ArraySchema::drop_attribute(const std::string& attr_name) { // Iterate backwards and remove the attribute pointer, it should be slightly // faster than iterating forward. - std::vector::iterator it; + decltype(attributes_)::iterator it; for (it = attributes_.end(); it != attributes_.begin();) { --it; if ((*it)->name() == attr_name) { - tdb_delete(*it); it = attributes_.erase(it); } } @@ -592,9 +592,10 @@ Status ArraySchema::deserialize(ConstBuffer* buff) { return st_attr; } - Attribute* attr_ptr = tdb_new(Attribute, std::move(attr.value())); - attributes_.emplace_back(attr_ptr); - attribute_map_[attr_ptr->name()] = attr_ptr; + attributes_.emplace_back( + tdb::make_shared(HERE(), move(attr.value()))); + auto a = attributes_.back().get(); + attribute_map_[a->name()] = a; } // Create dimension map @@ -852,9 +853,6 @@ void ArraySchema::clear() { capacity_ = constants::capacity; cell_order_ = Layout::ROW_MAJOR; tile_order_ = Layout::ROW_MAJOR; - - for (auto& attr : attributes_) - tdb_delete(attr); attributes_.clear(); tdb_delete(domain_); diff --git a/tiledb/sm/array_schema/array_schema.h b/tiledb/sm/array_schema/array_schema.h index 50029b1791e..b19b9f199eb 100644 --- a/tiledb/sm/array_schema/array_schema.h +++ b/tiledb/sm/array_schema/array_schema.h @@ -36,6 +36,7 @@ #include +#include "tiledb/common/common.h" #include "tiledb/common/status.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/filter/filter_pipeline.h" @@ -114,7 +115,7 @@ class ArraySchema { unsigned int attribute_num() const; /** Returns the attributes. */ - const std::vector& attributes() const; + const std::vector>& attributes() const; /** Returns the capacity. */ uint64_t capacity() const; @@ -239,7 +240,8 @@ class ArraySchema { * they are doing in this case). * @return Status */ - Status add_attribute(const Attribute* attr, bool check_special = true); + Status add_attribute( + shared_ptr attr, bool check_special = true); /** * Drops an attribute. @@ -361,11 +363,13 @@ class ArraySchema { /** The array type. */ ArrayType array_type_; - /** It maps each attribute name to the corresponding attribute object. */ - std::unordered_map attribute_map_; + /** It maps each attribute name to the corresponding attribute object. + * Lifespan is maintained by the shared_ptr in attributes_. */ + std::unordered_map attribute_map_; - /** The array attributes. */ - std::vector attributes_; + /** The array attributes. + * Maintains lifespan for elements in both attributes_ and attribute_map_. */ + std::vector> attributes_; /** * The tile capacity for the case of sparse fragments. */ diff --git a/tiledb/sm/array_schema/array_schema_evolution.cc b/tiledb/sm/array_schema/array_schema_evolution.cc index a13109f412a..77ca9069367 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.cc +++ b/tiledb/sm/array_schema/array_schema_evolution.cc @@ -84,7 +84,7 @@ Status ArraySchemaEvolution::evolve_schema( // Add attributes. for (auto& attr : attributes_to_add_map_) { - RETURN_NOT_OK(schema->add_attribute(attr.second.get(), false)); + RETURN_NOT_OK(schema->add_attribute(attr.second, false)); } // Drop attributes. @@ -156,7 +156,6 @@ Status ArraySchemaEvolution::drop_attribute(const std::string& attribute_name) { if (attributes_to_add_map_.find(attribute_name) != attributes_to_add_map_.end()) { // Reset the pointer and erase it - attributes_to_add_map_[attribute_name].reset(nullptr); attributes_to_add_map_.erase(attribute_name); } return Status::Ok(); @@ -189,13 +188,8 @@ std::pair ArraySchemaEvolution::timestamp_range() const { /* ****************************** */ void ArraySchemaEvolution::clear() { - for (auto& attr : attributes_to_add_map_) { - attr.second.reset(nullptr); - } attributes_to_add_map_.clear(); - attributes_to_drop_.clear(); - timestamp_range_ = {0, 0}; } diff --git a/tiledb/sm/array_schema/array_schema_evolution.h b/tiledb/sm/array_schema/array_schema_evolution.h index a84c12c51d0..7dc31b40a61 100644 --- a/tiledb/sm/array_schema/array_schema_evolution.h +++ b/tiledb/sm/array_schema/array_schema_evolution.h @@ -37,6 +37,7 @@ #include #include +#include "tiledb/common/common.h" #include "tiledb/common/status.h" #include "tiledb/sm/filesystem/uri.h" #include "tiledb/sm/filter/filter_pipeline.h" @@ -123,8 +124,7 @@ class ArraySchemaEvolution { /** The array attributes to be added. */ /** It maps each attribute name to the corresponding attribute object. */ - std::unordered_map> - attributes_to_add_map_; + std::unordered_map> attributes_to_add_map_; /** The names of array attributes to be dropped. */ std::unordered_set attributes_to_drop_; diff --git a/tiledb/sm/array_schema/attribute.cc b/tiledb/sm/array_schema/attribute.cc index f997772ab3a..f78d66b1e1d 100644 --- a/tiledb/sm/array_schema/attribute.cc +++ b/tiledb/sm/array_schema/attribute.cc @@ -214,7 +214,7 @@ const std::string& Attribute::name() const { // fill_value (uint8_t[]) // nullable (bool) // fill_value_validity (uint8_t) -Status Attribute::serialize(Buffer* buff, const uint32_t version) { +const Status Attribute::serialize(Buffer* buff, const uint32_t version) const { // Write attribute name auto attribute_name_size = (uint32_t)name_.size(); RETURN_NOT_OK(buff->write(&attribute_name_size, sizeof(uint32_t))); diff --git a/tiledb/sm/array_schema/attribute.h b/tiledb/sm/array_schema/attribute.h index 4553e30ff33..f46a3566ab7 100644 --- a/tiledb/sm/array_schema/attribute.h +++ b/tiledb/sm/array_schema/attribute.h @@ -152,7 +152,7 @@ class Attribute { * @param version The format spec version. * @return Status */ - Status serialize(Buffer* buff, uint32_t version); + const Status serialize(Buffer* buff, uint32_t version) const; /** * Sets the attribute number of values per cell. Note that if the attribute diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 1980c8dddf8..71aeaf06a0f 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -2267,8 +2267,14 @@ int32_t tiledb_array_schema_add_attribute( sanity_check(ctx, array_schema) == TILEDB_ERR || sanity_check(ctx, attr) == TILEDB_ERR) return TILEDB_ERR; + /** Note: The call to make_shared creates a copy of the attribute and + * the user-visible handle to the attr no longer refers to the same object + * that's in the array_schema. + **/ if (SAVE_ERROR_CATCH( - ctx, array_schema->array_schema_->add_attribute(attr->attr_))) + ctx, + array_schema->array_schema_->add_attribute( + tdb::make_shared(HERE(), attr->attr_)))) return TILEDB_ERR; return TILEDB_OK; } diff --git a/tiledb/sm/fragment/fragment_metadata.cc b/tiledb/sm/fragment/fragment_metadata.cc index f04ea820595..0a08e940337 100644 --- a/tiledb/sm/fragment/fragment_metadata.cc +++ b/tiledb/sm/fragment/fragment_metadata.cc @@ -985,8 +985,7 @@ tuple> FragmentMetadata::encode_name( const unsigned idx = iter->second; - const std::vector attributes = - array_schema_->attributes(); + auto attributes = array_schema_->attributes(); for (unsigned i = 0; i < attributes.size(); ++i) { const std::string attr_name = attributes[i]->name(); if (attr_name == name) { diff --git a/tiledb/sm/query/query_condition.cc b/tiledb/sm/query/query_condition.cc index a07c44aa16c..1d9e7e47047 100644 --- a/tiledb/sm/query/query_condition.cc +++ b/tiledb/sm/query/query_condition.cc @@ -594,8 +594,7 @@ QueryCondition::apply_clause( const ArraySchema* const array_schema, const uint64_t stride, const std::vector& result_cell_slabs) const { - const Attribute* const attribute = - array_schema->attribute(clause.field_name_); + auto attribute = array_schema->attribute(clause.field_name_); if (!attribute) { return { Status_QueryConditionError("Unknown attribute " + clause.field_name_), diff --git a/tiledb/sm/query/result_tile.cc b/tiledb/sm/query/result_tile.cc index 6c987ddfae0..d9ab473dde9 100644 --- a/tiledb/sm/query/result_tile.cc +++ b/tiledb/sm/query/result_tile.cc @@ -59,7 +59,7 @@ ResultTile::ResultTile( coord_tiles_.resize(domain_->dim_num()); attr_tiles_.resize(array_schema->attribute_num()); for (uint64_t i = 0; i < array_schema->attribute_num(); i++) { - const Attribute* attribute = array_schema->attribute(i); + auto attribute = array_schema->attribute(i); attr_tiles_[i] = std::make_pair(attribute->name(), nullopt); } set_compute_results_func(); diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 3cafbeed965..fa5079b3f9a 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -560,7 +560,7 @@ Status array_schema_from_capnp( for (auto attr_reader : attributes_reader) { tdb_unique_ptr attribute; RETURN_NOT_OK(attribute_from_capnp(attr_reader, &attribute)); - RETURN_NOT_OK((*array_schema)->add_attribute(attribute.get(), false)); + RETURN_NOT_OK((*array_schema)->add_attribute(std::move(attribute), false)); } // Set the range if we have two values @@ -1113,7 +1113,7 @@ Status max_buffer_sizes_serialize( const auto& attrs = schema->attributes(); std::set attr_names; attr_names.insert(constants::coords); - for (const auto* a : attrs) + for (const auto& a : attrs) attr_names.insert(a->name()); // Get max buffer size for each attribute from the given Array instance diff --git a/tools/src/commands/info_command.cc b/tools/src/commands/info_command.cc index ad076ba28e9..a8e412fe9ea 100644 --- a/tools/src/commands/info_command.cc +++ b/tools/src/commands/info_command.cc @@ -184,7 +184,7 @@ void InfoCommand::print_tile_sizes() const { process_attr(constants::coords, false); // Dump info about the rest of the attributes - for (const auto* attr : attributes) + for (const auto& attr : attributes) process_attr(attr->name(), attr->var_size()); std::cout << "Sum of attribute persisted size: " << total_persisted_size