Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate delete_array and delete_fragments from StorageManager to Array. #4880

Merged
merged 3 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
100 changes: 84 additions & 16 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,23 +485,48 @@ Status Array::close() {
return Status::Ok();
}

void Array::delete_array(const URI& uri) {
// Check that data deletion is allowed
ensure_array_is_valid_for_delete(uri);

// Delete array data
if (remote_) {
auto rest_client = resources_.rest_client();
if (rest_client == nullptr) {
throw ArrayException("[delete_array] Remote array with no REST client.");
void Array::delete_fragments(
ContextResources& resources,
const URI& uri,
uint64_t timestamp_start,
uint64_t timestamp_end,
std::optional<ArrayDirectory> array_dir) {
// Get the fragment URIs to be deleted
if (array_dir == std::nullopt) {
array_dir = ArrayDirectory(resources, uri, timestamp_start, timestamp_end);
}
auto filtered_fragment_uris = array_dir->filtered_fragment_uris(true);
const auto& fragment_uris = filtered_fragment_uris.fragment_uris();

// Retrieve commit uris to delete and ignore
std::vector<URI> commit_uris_to_delete;
std::vector<URI> commit_uris_to_ignore;
for (auto& fragment : fragment_uris) {
auto commit_uri = array_dir->get_commit_uri(fragment.uri_);
commit_uris_to_delete.emplace_back(commit_uri);
if (array_dir->consolidated_commit_uris_set().count(commit_uri.c_str()) !=
0) {
commit_uris_to_ignore.emplace_back(commit_uri);
}
rest_client->delete_array_from_rest(uri);
} else {
storage_manager_->delete_array(uri.c_str());
}

// Close the array
throw_if_not_ok(this->close());
// Write ignore file
if (commit_uris_to_ignore.size() != 0) {
array_dir->write_commit_ignore_file(commit_uris_to_ignore);
}

// Delete fragments and commits
auto vfs = &(resources.vfs());
throw_if_not_ok(parallel_for(
&resources.compute_tp(), 0, fragment_uris.size(), [&](size_t i) {
RETURN_NOT_OK(vfs->remove_dir(fragment_uris[i].uri_));
bool is_file = false;
RETURN_NOT_OK(vfs->is_file(commit_uris_to_delete[i], &is_file));
if (is_file) {
RETURN_NOT_OK(vfs->remove_file(commit_uris_to_delete[i]));
}
return Status::Ok();
}));
}

void Array::delete_fragments(
Expand All @@ -519,11 +544,54 @@ void Array::delete_fragments(
rest_client->post_delete_fragments_to_rest(
uri, this, timestamp_start, timestamp_end);
} else {
storage_manager_->delete_fragments(
uri.c_str(), timestamp_start, timestamp_end);
Array::delete_fragments(resources_, uri, timestamp_start, timestamp_end);
}
}

void Array::delete_array(ContextResources& resources, const URI& uri) {
auto& vfs = resources.vfs();
auto array_dir =
ArrayDirectory(resources, uri, 0, std::numeric_limits<uint64_t>::max());

// Delete fragments and commits
Array::delete_fragments(
resources, uri, 0, std::numeric_limits<uint64_t>::max(), array_dir);

// Delete array metadata, fragment metadata and array schema files
// Note: metadata files may not be present, try to delete anyway
vfs.remove_files(&resources.compute_tp(), array_dir.array_meta_uris());
vfs.remove_files(&resources.compute_tp(), array_dir.fragment_meta_uris());
vfs.remove_files(&resources.compute_tp(), array_dir.array_schema_uris());

// Delete all tiledb child directories
// Note: using vfs.ls() here could delete user data
std::vector<URI> dirs;
auto parent_dir = array_dir.uri().c_str();
for (auto array_dir_name : constants::array_dir_names) {
dirs.emplace_back(URI(parent_dir + array_dir_name));
}
vfs.remove_dirs(&resources.compute_tp(), dirs);
}

void Array::delete_array(const URI& uri) {
// Check that data deletion is allowed
ensure_array_is_valid_for_delete(uri);

// Delete array data
if (uri.is_tiledb()) {
auto rest_client = resources_.rest_client();
if (rest_client == nullptr) {
throw ArrayException("[delete_array] Remote array with no REST client.");
}
rest_client->delete_array_from_rest(uri);
} else {
Array::delete_array(resources_, uri);
}

// Close the array
throw_if_not_ok(this->close());
}

void Array::delete_fragments_list(const std::vector<URI>& fragment_uris) {
// Check that data deletion is allowed
ensure_array_is_valid_for_delete(array_uri_);
Expand Down
62 changes: 57 additions & 5 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,26 +420,78 @@ class Array {
Status close();

/**
* Deletes the Array data with given URI.
* Performs deletion of local fragments with the given parent URI,
* between the provided timestamps.
*
* @param uri The uri of the Array whose data is to be deleted.
* @param resources The context resources.
* @param uri The uri of the Array whose fragments are to be deleted.
* @param timestamp_start The start timestamp at which to delete fragments.
* @param timestamp_end The end timestamp at which to delete fragments.
* @param array_dir An optional ArrayDirectory from which to delete fragments.
*
* @pre The Array must be open for exclusive writes
* @section Maturity Notes
* This is legacy code, ported from StorageManager during its removal process.
* Its existence supports the non-static `delete_fragments` API below,
* performing the actual deletion of fragments. This function is slated for
* removal and should be directly integrated into the function below.
*/
void delete_array(const URI& uri);
static void delete_fragments(
ContextResources& resources,
const URI& uri,
uint64_t timestamp_start,
uint64_t timstamp_end,
std::optional<ArrayDirectory> array_dir = std::nullopt);

/**
* Deletes the fragments from the Array with given URI.
* Handles local and remote deletion of fragments between the provided
* timestamps from an open array with the given URI.
*
* @param uri The uri of the Array whose fragments are to be deleted.
* @param timestamp_start The start timestamp at which to delete fragments.
* @param timestamp_end The end timestamp at which to delete fragments.
*
* @pre The Array must be open for exclusive writes
*
* @section Maturity Notes
* This API is an interim version of its final product, awaiting rewrite.
* As is, it handles the incoming URI and invokes the remote or local function
* call accordingly. The local, static function above is legacy code which
* exists only to support this function. A rewrite should integrate the two
* and remove the need for any static APIs.
*/
void delete_fragments(
const URI& uri, uint64_t timestamp_start, uint64_t timstamp_end);

/**
* Performs deletion of the local array with the given parent URI.
*
* @param resources The context resources.
* @param uri The uri of the Array whose data is to be deleted.
*
* @section Maturity Notes
* This is legacy code, ported from StorageManager during its removal process.
* Its existence supports the non-static `delete_array` API below,
* performing the actual deletion of array data. This function is slated for
* removal and should be directly integrated into the function below.
*/
static void delete_array(ContextResources& resources, const URI& uri);
bekadavis9 marked this conversation as resolved.
Show resolved Hide resolved

/**
* Handles local and remote deletion of an open array with the given URI.
*
* @param uri The uri of the Array whose data is to be deleted.
*
* @pre The Array must be open for exclusive writes
*
* @section Maturity Notes
* This API is an interim version of its final product, awaiting rewrite.
* As is, it handles the incoming URI and invokes the remote or local function
* call accordingly. The local, static function above is legacy code which
* exists only to support this function. A rewrite should integrate the two
* and remove the need for any static APIs.
*/
void delete_array(const URI& uri);

/**
* Deletes the fragments with the given URIs from the Array with given URI.
*
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/group/group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "tiledb/common/logger.h"
#include "tiledb/common/memory_tracker.h"
#include "tiledb/common/stdx_string.h"
#include "tiledb/sm/array/array.h"
#include "tiledb/sm/enums/datatype.h"
#include "tiledb/sm/enums/encryption_type.h"
#include "tiledb/sm/enums/query_type.h"
Expand Down Expand Up @@ -324,7 +325,7 @@ void Group::delete_group(const URI& uri, bool recursive) {
}

if (member->type() == ObjectType::ARRAY) {
storage_manager_->delete_array(member_uri.to_string().c_str());
Array::delete_array(resources_, member_uri);
} else if (member->type() == ObjectType::GROUP) {
Group group_rec(resources_, member_uri, storage_manager_);
throw_if_not_ok(group_rec.open(QueryType::MODIFY_EXCLUSIVE));
Expand Down
70 changes: 0 additions & 70 deletions tiledb/sm/storage_manager/storage_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,76 +151,6 @@ Status StorageManagerCanonical::group_close_for_writes(Group* group) {
return Status::Ok();
}

void StorageManagerCanonical::delete_array(const char* array_name) {
if (array_name == nullptr) {
throw std::invalid_argument("[delete_array] Array name cannot be null");
}

// Delete fragments and commits
delete_fragments(array_name, 0, std::numeric_limits<uint64_t>::max());

auto array_dir = ArrayDirectory(
resources(), URI(array_name), 0, std::numeric_limits<uint64_t>::max());

// Delete array metadata, fragment metadata and array schema files
// Note: metadata files may not be present, try to delete anyway
vfs()->remove_files(compute_tp(), array_dir.array_meta_uris());
vfs()->remove_files(compute_tp(), array_dir.fragment_meta_uris());
vfs()->remove_files(compute_tp(), array_dir.array_schema_uris());

// Delete all tiledb child directories
// Note: using vfs()->ls() here could delete user data
std::vector<URI> dirs;
auto parent_dir = array_dir.uri().c_str();
for (auto array_dir_name : constants::array_dir_names) {
dirs.emplace_back(URI(parent_dir + array_dir_name));
}
vfs()->remove_dirs(compute_tp(), dirs);
}

void StorageManagerCanonical::delete_fragments(
const char* array_name, uint64_t timestamp_start, uint64_t timestamp_end) {
if (array_name == nullptr) {
throw std::invalid_argument("[delete_fragments] Array name cannot be null");
}

auto array_dir = ArrayDirectory(
resources(), URI(array_name), timestamp_start, timestamp_end);

// Get the fragment URIs to be deleted
auto filtered_fragment_uris = array_dir.filtered_fragment_uris(true);
const auto& fragment_uris = filtered_fragment_uris.fragment_uris();

// Retrieve commit uris to delete and ignore
std::vector<URI> commit_uris_to_delete;
std::vector<URI> commit_uris_to_ignore;
for (auto& fragment : fragment_uris) {
auto commit_uri = array_dir.get_commit_uri(fragment.uri_);
commit_uris_to_delete.emplace_back(commit_uri);
if (array_dir.consolidated_commit_uris_set().count(commit_uri.c_str()) !=
0) {
commit_uris_to_ignore.emplace_back(commit_uri);
}
}

// Write ignore file
if (commit_uris_to_ignore.size() != 0) {
array_dir.write_commit_ignore_file(commit_uris_to_ignore);
}

// Delete fragments and commits
throw_if_not_ok(
parallel_for(compute_tp(), 0, fragment_uris.size(), [&](size_t i) {
RETURN_NOT_OK(vfs()->remove_dir(fragment_uris[i].uri_));
bool is_file = false;
RETURN_NOT_OK(vfs()->is_file(commit_uris_to_delete[i], &is_file));
if (is_file) {
RETURN_NOT_OK(vfs()->remove_file(commit_uris_to_delete[i]));
}
return Status::Ok();
}));
}

Status StorageManagerCanonical::array_create(
const URI& array_uri,
const shared_ptr<ArraySchema>& array_schema,
Expand Down
17 changes: 0 additions & 17 deletions tiledb/sm/storage_manager/storage_manager_canonical.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,23 +171,6 @@ class StorageManagerCanonical {
tdb_shared_ptr<GroupDetails> group,
const EncryptionKey& encryption_key);

/**
* Cleans up the array data.
*
* @param array_name The name of the array whose data is to be deleted.
*/
void delete_array(const char* array_name);

/**
* Cleans up the array fragments.
*
* @param array_name The name of the array whose fragments are to be deleted.
* @param timestamp_start The start timestamp at which to delete.
* @param timestamp_end The end timestamp at which to delete.
*/
void delete_fragments(
const char* array_name, uint64_t timestamp_start, uint64_t timestamp_end);

/**
* Creates a TileDB array storing its schema.
*
Expand Down