Skip to content

Commit 192b0b1

Browse files
committed
bulk loading interface
1 parent 9789c7f commit 192b0b1

File tree

10 files changed

+554
-4
lines changed

10 files changed

+554
-4
lines changed

fdbclient/NativeAPI.actor.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,11 +1603,23 @@ DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<IClusterConnection
16031603

16041604
if (apiVersion.version() >= 740) {
16051605
registerSpecialKeysImpl(
1606-
SpecialKeySpace::MODULE::METRICS,
1606+
SpecialKeySpace::MODULE::BULKLOADING,
16071607
SpecialKeySpace::IMPLTYPE::READONLY,
1608-
std::make_unique<FaultToleranceMetricsImpl>(
1609-
singleKeyRange("fault_tolerance_metrics_json"_sr)
1610-
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::METRICS).begin)));
1608+
std::make_unique<BulkLoadStatusImpl>(
1609+
KeyRangeRef("status/"_sr, "status0"_sr)
1610+
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin)));
1611+
registerSpecialKeysImpl(
1612+
SpecialKeySpace::MODULE::BULKLOADING,
1613+
SpecialKeySpace::IMPLTYPE::READWRITE,
1614+
std::make_unique<BulkLoadTaskImpl>(
1615+
KeyRangeRef("task/"_sr, "task0"_sr)
1616+
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin)));
1617+
registerSpecialKeysImpl(
1618+
SpecialKeySpace::MODULE::BULKLOADING,
1619+
SpecialKeySpace::IMPLTYPE::READWRITE,
1620+
std::make_unique<BulkLoadCancelImpl>(
1621+
KeyRangeRef("cancel/"_sr, "cancel0"_sr)
1622+
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin)));
16111623
}
16121624

16131625
if (apiVersion.version() >= 700) {

fdbclient/SpecialKeySpace.actor.cpp

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,16 @@
2929
#include <unordered_set>
3030

3131
#include "fdbclient/ActorLineageProfiler.h"
32+
#include "fdbclient/AuditUtils.actor.h"
33+
#include "fdbclient/BulkLoading.h"
3234
#include "fdbclient/ClusterConnectionMemoryRecord.h"
3335
#include "fdbclient/FDBOptions.g.h"
36+
#include "fdbclient/KeyRangeMap.h"
3437
#include "fdbclient/Knobs.h"
3538
#include "fdbclient/ProcessInterface.h"
3639
#include "fdbclient/GlobalConfig.actor.h"
3740
#include "fdbclient/SpecialKeySpace.actor.h"
41+
#include "fdbclient/SystemData.h"
3842
#include "flow/Arena.h"
3943
#include "flow/UnitTest.h"
4044
#include "fdbclient/ManagementAPI.actor.h"
@@ -74,6 +78,7 @@ std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToB
7478
{ SpecialKeySpace::MODULE::ACTOR_PROFILER_CONF,
7579
KeyRangeRef("\xff\xff/actor_profiler_conf/"_sr, "\xff\xff/actor_profiler_conf0"_sr) },
7680
{ SpecialKeySpace::MODULE::CLUSTERID, singleKeyRange("\xff\xff/cluster_id"_sr) },
81+
{ SpecialKeySpace::MODULE::BULKLOADING, KeyRangeRef("\xff\xff/bulk_loading/"_sr, "\xff\xff/bulk_loading0"_sr) },
7782
};
7883

7984
std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandToRange = {
@@ -2717,6 +2722,256 @@ Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransac
27172722
return msg;
27182723
}
27192724

2725+
bool existingBulkLoadUpdate(ReadYourWritesTransaction* ryw, KeyRange range) {
2726+
KeyRange rangeToCheck =
2727+
range.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin);
2728+
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(rangeToCheck);
2729+
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
2730+
if (iter->value().first && iter->value().second.present()) {
2731+
return true;
2732+
}
2733+
}
2734+
return false;
2735+
}
2736+
2737+
BulkLoadStatusImpl::BulkLoadStatusImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
2738+
2739+
ACTOR static Future<RangeResult> BulkLoadStatusGetRangeActor(ReadYourWritesTransaction* ryw,
2740+
KeyRef prefix,
2741+
KeyRangeRef kr) {
2742+
state Key taskPrefixKey =
2743+
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin.withSuffix("status/"_sr);
2744+
state KeyRange range = kr.removePrefix(taskPrefixKey);
2745+
if (!normalKeys.contains(range)) {
2746+
TraceEvent(SevWarn, "BulkLoadCheckTaskStatusError")
2747+
.detail("Reason", "Input range to check is out of normal range")
2748+
.detail("InputRange", range);
2749+
throw bulkload_check_status_input_error();
2750+
}
2751+
// Check if there are existing updates in the current transaction
2752+
if (existingBulkLoadUpdate(ryw, Standalone(KeyRangeRef("task/"_sr, "task0"_sr))) ||
2753+
existingBulkLoadUpdate(ryw, Standalone(KeyRangeRef("cancel/"_sr, "cancel0"_sr)))) {
2754+
TraceEvent(SevWarn, "BulkLoadCheckTaskStatusError")
2755+
.detail("Reason", "Exist bulk loading update in the same transaction")
2756+
.detail("InputRange", range);
2757+
throw bulkload_check_status_input_error();
2758+
}
2759+
RangeResult result = wait(krmGetRanges(&(ryw->getTransaction()), bulkLoadPrefix, range));
2760+
RangeResult res;
2761+
res.more = result.more;
2762+
res.readThrough = result.readThrough;
2763+
res.readToBegin = result.readToBegin;
2764+
res.readThroughEnd = result.readThroughEnd;
2765+
for (int i = 0; i < result.size(); ++i) {
2766+
Key keyToCopy = result[i].key.withPrefix(taskPrefixKey);
2767+
res.push_back_deep(res.arena(), KeyValueRef(keyToCopy, result[i].value));
2768+
}
2769+
return rywGetRange(ryw, kr, res);
2770+
}
2771+
2772+
Future<RangeResult> BulkLoadStatusImpl::getRange(ReadYourWritesTransaction* ryw,
2773+
KeyRangeRef kr,
2774+
GetRangeLimits limitsHint) const {
2775+
return BulkLoadStatusGetRangeActor(ryw, getKeyRange().begin, kr);
2776+
}
2777+
2778+
BulkLoadTaskImpl::BulkLoadTaskImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2779+
2780+
Future<RangeResult> BulkLoadTaskImpl::getRange(ReadYourWritesTransaction* ryw,
2781+
KeyRangeRef kr,
2782+
GetRangeLimits limitsHint) const {
2783+
throw not_implemented();
2784+
}
2785+
2786+
void BulkLoadTaskImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
2787+
throw not_implemented();
2788+
}
2789+
2790+
void BulkLoadTaskImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
2791+
throw not_implemented();
2792+
}
2793+
2794+
void BulkLoadTaskImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
2795+
Key taskPrefixKey =
2796+
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin.withSuffix("task/"_sr);
2797+
BulkLoadState bulkLoadState = decodeBulkLoadState(value);
2798+
if (key != taskPrefixKey) {
2799+
TraceEvent(SevWarn, "BulkLoadSetTaskError")
2800+
.detail("Reason", "Input key error")
2801+
.detail("CorrectKey", taskPrefixKey)
2802+
.detail("InputKey", key)
2803+
.detail("InputState", bulkLoadState.toString());
2804+
throw bulkload_add_task_input_error();
2805+
}
2806+
ASSERT(bulkLoadState.isValid());
2807+
KeyRangeRef bulkLoadRange = bulkLoadState.range;
2808+
ASSERT(!bulkLoadRange.empty());
2809+
if (bulkLoadRange.begin >= normalKeys.end || bulkLoadRange.end >= normalKeys.end) {
2810+
TraceEvent(SevWarn, "BulkLoadSetTaskError")
2811+
.detail("Reason", "Input range is out of normal key space")
2812+
.detail("InputState", bulkLoadState.toString());
2813+
throw bulkload_add_task_input_error();
2814+
}
2815+
auto ranges = ryw->getSpecialKeySpaceWriteMap().intersectingRanges(bulkLoadRange.withPrefix(taskPrefixKey));
2816+
for (auto range : ranges) {
2817+
if (!range.value().first || !range.value().second.present()) {
2818+
continue;
2819+
}
2820+
BulkLoadState oldBulkLoadState = decodeBulkLoadState(range.value().second.get());
2821+
ASSERT(oldBulkLoadState.isValid());
2822+
TraceEvent(SevWarnAlways, "BulkLoadSetTaskError")
2823+
.detail("Reason", "Input task is trying to overwrite the existing enforced task")
2824+
.detail("InputState", bulkLoadState.toString())
2825+
.detail("ExistState", oldBulkLoadState.toString());
2826+
throw bulkload_add_task_input_error();
2827+
}
2828+
ryw->getSpecialKeySpaceWriteMap().insert(bulkLoadRange.withPrefix(taskPrefixKey),
2829+
std::make_pair(true, bulkLoadStateValue(bulkLoadState)));
2830+
}
2831+
2832+
ACTOR static Future<Optional<std::string>> BulkLoadingTaskCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
2833+
// Check if there are existing bulk loading cancellation in the current transaction
2834+
if (existingBulkLoadUpdate(ryw, Standalone(KeyRangeRef("cancel/"_sr, "cancel0"_sr)))) {
2835+
TraceEvent(SevWarn, "BulkLoadTaskCommitError")
2836+
.detail("Reason", "Exist bulk loading cancel in the same transaction");
2837+
throw bulkload_check_status_input_error();
2838+
}
2839+
2840+
state KeyRange taskRange =
2841+
Standalone(KeyRangeRef("task/"_sr, "task0"_sr))
2842+
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin);
2843+
2844+
// Validate current transaction bulk loading tasks
2845+
state std::vector<BulkLoadState> updateTasks;
2846+
state std::vector<KeyRange> updateRanges;
2847+
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(taskRange);
2848+
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
2849+
if (!iter->value().first || !iter->value().second.present()) {
2850+
continue;
2851+
}
2852+
BulkLoadState bulkLoadTask = decodeBulkLoadState(iter->value().second.get());
2853+
ASSERT(iter->range() == bulkLoadTask.range.withPrefix(taskRange.begin));
2854+
for (const auto& updateTask : updateTasks) {
2855+
if (updateTask.filePaths == bulkLoadTask.filePaths) {
2856+
TraceEvent(SevWarnAlways, "BulkLoadTaskCommitError")
2857+
.detail("Reason", "Different ranges are mapped to the same file path set");
2858+
throw bulkload_add_task_input_error();
2859+
}
2860+
}
2861+
updateTasks.push_back(bulkLoadTask);
2862+
updateRanges.push_back(bulkLoadTask.range);
2863+
}
2864+
updateRanges = coalesceRangeList(updateRanges);
2865+
2866+
// Conflict check between local change and global
2867+
state int i = 0;
2868+
state Key readBeginKey;
2869+
state Key readEndKey;
2870+
for (; i < updateRanges.size(); i++) {
2871+
readBeginKey = updateRanges[i].begin;
2872+
readEndKey = updateRanges[i].end;
2873+
while (readBeginKey < readEndKey) {
2874+
KeyRange rangeToRead = Standalone(KeyRangeRef(readBeginKey, readEndKey));
2875+
RangeResult result =
2876+
wait(krmGetRanges(&(ryw->getTransaction()), bulkLoadPrefix, rangeToRead.withPrefix(bulkLoadPrefix)));
2877+
for (int j = 0; j < result.size() - 1; j++) {
2878+
if (!result[j].value.empty()) {
2879+
KeyRange existRange = Standalone(KeyRangeRef(result[i].key, result[i + 1].key));
2880+
BulkLoadState existBulkLoadTask = decodeBulkLoadState(result[i].value);
2881+
ASSERT(existBulkLoadTask.isValid());
2882+
ASSERT(existBulkLoadTask.range ==
2883+
existRange.removePrefix(bulkLoadPrefix)); // check existing ones, unsafe, may remove
2884+
TraceEvent(SevWarnAlways, "BulkLoadTaskCommitError")
2885+
.detail("Reason", "New range conflicts to existing ones");
2886+
throw bulkload_add_task_input_error();
2887+
}
2888+
}
2889+
readBeginKey = result.back().key;
2890+
}
2891+
}
2892+
2893+
// Update to global
2894+
i = 0;
2895+
for (; i < updateTasks.size(); i++) {
2896+
wait(krmSetRange(
2897+
&(ryw->getTransaction()), bulkLoadPrefix, updateTasks[i].range, bulkLoadStateValue(updateTasks[i])));
2898+
TraceEvent("BulkLoadCommitEach").detail("Task", updateTasks[i].toString()).detail("KR", kr.toString());
2899+
}
2900+
return Optional<std::string>();
2901+
}
2902+
2903+
Future<Optional<std::string>> BulkLoadTaskImpl::commit(ReadYourWritesTransaction* ryw) {
2904+
return BulkLoadingTaskCommitActor(ryw, getKeyRange());
2905+
}
2906+
2907+
BulkLoadCancelImpl::BulkLoadCancelImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2908+
2909+
Future<RangeResult> BulkLoadCancelImpl::getRange(ReadYourWritesTransaction* ryw,
2910+
KeyRangeRef kr,
2911+
GetRangeLimits limitsHint) const {
2912+
throw not_implemented();
2913+
}
2914+
2915+
void BulkLoadCancelImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
2916+
Key cancelPrefixKey =
2917+
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin.withSuffix("cancel/"_sr);
2918+
KeyRange rangeToCancel = range.removePrefix(cancelPrefixKey);
2919+
if (!normalKeys.contains(rangeToCancel)) {
2920+
TraceEvent(SevWarn, "BulkLoadCancelTaskError")
2921+
.detail("Reason", "Input range to check is out of normal range")
2922+
.detail("InputRange", rangeToCancel);
2923+
throw bulkload_cancel_task_input_error();
2924+
}
2925+
ryw->getSpecialKeySpaceWriteMap().insert(rangeToCancel.withPrefix(cancelPrefixKey),
2926+
std::make_pair(true, Optional<Value>()));
2927+
}
2928+
2929+
void BulkLoadCancelImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
2930+
throw not_implemented();
2931+
}
2932+
2933+
void BulkLoadCancelImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
2934+
throw not_implemented();
2935+
}
2936+
2937+
ACTOR static Future<Optional<std::string>> BulkLoadingCancelCommitActor(ReadYourWritesTransaction* ryw,
2938+
KeyRangeRef kr) {
2939+
// Check if there are existing bulk loading new task in the current transaction
2940+
if (existingBulkLoadUpdate(ryw, Standalone(KeyRangeRef("task/"_sr, "task0"_sr)))) {
2941+
TraceEvent(SevWarn, "BulkLoadTaskCommitError")
2942+
.detail("Reason", "Exist bulk loading cancel in the same transaction");
2943+
throw bulkload_check_status_input_error();
2944+
}
2945+
2946+
state KeyRange cancelRange =
2947+
Standalone(KeyRangeRef("cancel/"_sr, "cancel0"_sr))
2948+
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::BULKLOADING).begin);
2949+
2950+
state std::vector<KeyRange> cancelRanges;
2951+
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(cancelRange);
2952+
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
2953+
if (!iter->value().first) {
2954+
continue;
2955+
}
2956+
ASSERT(!iter->value().second.present());
2957+
cancelRanges.push_back(iter->range().removePrefix(cancelRange.begin));
2958+
}
2959+
cancelRanges = coalesceRangeList(cancelRanges);
2960+
2961+
// Update to global
2962+
state int i = 0;
2963+
for (; i < cancelRanges.size(); i++) {
2964+
wait(krmSetRange(&(ryw->getTransaction()), bulkLoadPrefix, cancelRanges[i], Value()));
2965+
TraceEvent("BulkLoadCancelCommitEach").detail("Range", cancelRanges[i].toString()).detail("KR", kr.toString());
2966+
}
2967+
2968+
return Optional<std::string>();
2969+
}
2970+
2971+
Future<Optional<std::string>> BulkLoadCancelImpl::commit(ReadYourWritesTransaction* ryw) {
2972+
return BulkLoadingCancelCommitActor(ryw, getKeyRange());
2973+
}
2974+
27202975
// Clears the special management api keys excludeLocality and failedLocality.
27212976
void includeLocalities(ReadYourWritesTransaction* ryw) {
27222977
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);

fdbclient/SystemData.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,6 +1215,21 @@ const KeyRef moveKeysLockWriteKey = "\xff/moveKeysLock/Write"_sr;
12151215
const KeyRef dataDistributionModeKey = "\xff/dataDistributionMode"_sr;
12161216
const UID dataDistributionModeLock = UID(6345, 3425);
12171217

1218+
// Bulk loading keys
1219+
const KeyRef bulkLoadTriggerKey = "\xff/bulkLoadTrigger"_sr;
1220+
const KeyRef bulkLoadPrefix = "\xff/bulkLoad/"_sr;
1221+
1222+
const Value bulkLoadStateValue(const BulkLoadState& bulkLoadState) {
1223+
return ObjectWriter::toValue(bulkLoadState, IncludeVersion());
1224+
}
1225+
1226+
BulkLoadState decodeBulkLoadState(const ValueRef& value) {
1227+
BulkLoadState bulkLoadState;
1228+
ObjectReader reader(value.begin(), IncludeVersion());
1229+
reader.deserialize(bulkLoadState);
1230+
return bulkLoadState;
1231+
}
1232+
12181233
// Keys to view and control tag throttling
12191234
const KeyRangeRef tagThrottleKeys = KeyRangeRef("\xff\x02/throttledTags/tag/"_sr, "\xff\x02/throttledTags/tag0"_sr);
12201235
const KeyRef tagThrottleKeysPrefix = tagThrottleKeys.begin;

0 commit comments

Comments
 (0)