Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Acquire rowsets at querying #13830

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions be/src/exec/pipeline/scan/olap_scan_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "exec/vectorized/olap_scan_node.h"
#include "exprs/vectorized/runtime_filter_bank.h"
#include "storage/tablet.h"

namespace starrocks::pipeline {

Expand Down Expand Up @@ -38,6 +39,31 @@ Status OlapScanContext::prepare(RuntimeState* state) {

void OlapScanContext::close(RuntimeState* state) {
_chunk_buffer.close();
for (const auto& rowsets_per_tablet : _tablet_rowsets) {
Rowset::release_readers(rowsets_per_tablet);
}
}

Status OlapScanContext::capture_tablet_rowsets(const std::vector<TInternalScanRange*>& olap_scan_ranges) {
_tablet_rowsets.resize(olap_scan_ranges.size());
_tablets.resize(olap_scan_ranges.size());
for (int i = 0; i < olap_scan_ranges.size(); ++i) {
auto* scan_range = olap_scan_ranges[i];

int64_t version = strtoul(scan_range->version.c_str(), nullptr, 10);
ASSIGN_OR_RETURN(TabletSharedPtr tablet, vectorized::OlapScanNode::get_tablet(scan_range));

// Capture row sets of this version tablet.
{
std::shared_lock l(tablet->get_header_lock());
RETURN_IF_ERROR(tablet->capture_consistent_rowsets(Version(0, version), &_tablet_rowsets[i]));
Rowset::acquire_readers(_tablet_rowsets[i]);
}

_tablets[i] = std::move(tablet);
}

return Status::OK();
}

Status OlapScanContext::parse_conjuncts(RuntimeState* state, const std::vector<ExprContext*>& runtime_in_filters,
Expand Down
15 changes: 15 additions & 0 deletions be/src/exec/pipeline/scan/olap_scan_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
namespace starrocks {

class ScanNode;
class Tablet;
using TabletSharedPtr = std::shared_ptr<Tablet>;
class Rowset;
using RowsetSharedPtr = std::shared_ptr<Rowset>;

namespace vectorized {
class RuntimeFilterProbeCollector;
Expand Down Expand Up @@ -56,6 +60,10 @@ class OlapScanContext final : public ContextWithDependency {
bool has_active_input() const;
BalancedChunkBuffer& get_shared_buffer();

Status capture_tablet_rowsets(const std::vector<TInternalScanRange*>& olap_scan_ranges);
const std::vector<TabletSharedPtr>& tablets() const { return _tablets; }
const std::vector<std::vector<RowsetSharedPtr>>& tablet_rowsets() const { return _tablet_rowsets; };

private:
vectorized::OlapScanNode* _scan_node;

Expand All @@ -77,6 +85,13 @@ class OlapScanContext final : public ContextWithDependency {
bool _shared_scan; // Enable shared_scan

std::atomic<bool> _is_prepare_finished{false};

// The row sets of tablets will become stale and be deleted, if compaction occurs
// and these row sets aren't referenced, which will typically happen when the tablets
// of the left table are compacted at building the right hash table. Therefore, reference
// the row sets into _tablet_rowsets in the preparation phase to avoid the row sets being deleted.
std::vector<TabletSharedPtr> _tablets;
std::vector<std::vector<RowsetSharedPtr>> _tablet_rowsets;
};

// OlapScanContextFactory creates different contexts for each scan operator, if _shared_scan is false.
Expand Down
32 changes: 6 additions & 26 deletions be/src/exec/pipeline/scan/olap_scan_prepare_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ OlapScanPrepareOperator::~OlapScanPrepareOperator() {
Status OlapScanPrepareOperator::prepare(RuntimeState* state) {
RETURN_IF_ERROR(SourceOperator::prepare(state));

RETURN_IF_ERROR(_capture_tablet_rowsets());
return _ctx->prepare(state);
RETURN_IF_ERROR(_ctx->prepare(state));
RETURN_IF_ERROR(_ctx->capture_tablet_rowsets(_morsel_queue->olap_scan_ranges()));

return Status::OK();
}

void OlapScanPrepareOperator::close(RuntimeState* state) {
Expand All @@ -46,8 +48,8 @@ StatusOr<vectorized::ChunkPtr> OlapScanPrepareOperator::pull_chunk(RuntimeState*
Status status = _ctx->parse_conjuncts(state, runtime_in_filters(), runtime_bloom_filters());

_morsel_queue->set_key_ranges(_ctx->key_ranges());
_morsel_queue->set_tablets(_tablets);
_morsel_queue->set_tablet_rowsets(_tablet_rowsets);
_morsel_queue->set_tablets(_ctx->tablets());
_morsel_queue->set_tablet_rowsets(_ctx->tablet_rowsets());

_ctx->set_prepare_finished();
if (!status.ok()) {
Expand All @@ -58,28 +60,6 @@ StatusOr<vectorized::ChunkPtr> OlapScanPrepareOperator::pull_chunk(RuntimeState*
return nullptr;
}

Status OlapScanPrepareOperator::_capture_tablet_rowsets() {
auto olap_scan_ranges = _morsel_queue->olap_scan_ranges();
_tablet_rowsets.resize(olap_scan_ranges.size());
_tablets.resize(olap_scan_ranges.size());
for (int i = 0; i < olap_scan_ranges.size(); ++i) {
auto* scan_range = olap_scan_ranges[i];

int64_t version = strtoul(scan_range->version.c_str(), nullptr, 10);
ASSIGN_OR_RETURN(TabletSharedPtr tablet, vectorized::OlapScanNode::get_tablet(scan_range));

// Capture row sets of this version tablet.
{
std::shared_lock l(tablet->get_header_lock());
RETURN_IF_ERROR(tablet->capture_consistent_rowsets(Version(0, version), &_tablet_rowsets[i]));
}

_tablets[i] = std::move(tablet);
}

return Status::OK();
}

/// OlapScanPrepareOperatorFactory
OlapScanPrepareOperatorFactory::OlapScanPrepareOperatorFactory(int32_t id, int32_t plan_node_id,
vectorized::OlapScanNode* const scan_node,
Expand Down
10 changes: 0 additions & 10 deletions be/src/exec/pipeline/scan/olap_scan_prepare_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,6 @@ class OlapScanPrepareOperator final : public SourceOperator {
StatusOr<vectorized::ChunkPtr> pull_chunk(RuntimeState* state) override;

private:
Status _capture_tablet_rowsets();

private:
// The row sets of tablets will become stale and be deleted, if compaction occurs
// and these row sets aren't referenced, which will typically happen when the tablets
// of the left table are compacted at building the right hash table. Therefore, reference
// the row sets into _tablet_rowsets in the preparation phase to avoid the row sets being deleted.
std::vector<TabletSharedPtr> _tablets;
std::vector<std::vector<RowsetSharedPtr>> _tablet_rowsets;

OlapScanContextPtr _ctx;
};

Expand Down
5 changes: 5 additions & 0 deletions be/src/exec/vectorized/olap_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ Status OlapScanNode::close(RuntimeState* state) {
release_large_columns<BinaryColumn>(runtime_state()->chunk_size() * 512);
}

for (const auto& rowsets_per_tablet : _tablet_rowsets) {
Rowset::release_readers(rowsets_per_tablet);
}

return ScanNode::close(state);
}

Expand Down Expand Up @@ -680,6 +684,7 @@ Status OlapScanNode::_capture_tablet_rowsets() {
{
std::shared_lock l(tablet->get_header_lock());
RETURN_IF_ERROR(tablet->capture_consistent_rowsets(Version(0, version), &_tablet_rowsets[i]));
Rowset::acquire_readers(_tablet_rowsets[i]);
}
}

Expand Down