Skip to content

Commit

Permalink
avoid trace SevError for TesterRecruitmentTimeout unless it keeps fai…
Browse files Browse the repository at this point in the history
…lure for over 1 day
  • Loading branch information
kakaiu committed Apr 29, 2024
1 parent bb8ba15 commit d8c8d6f
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions fdbserver/tester.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,7 @@ ACTOR Future<std::vector<TesterInterface>> getTesters(Reference<AsyncVar<Optiona
}
when(wait(cc->onChange())) {}
when(wait(testerTimeout)) {
TraceEvent(SevError, "TesterRecruitmentTimeout").log();
TraceEvent(SevWarn, "TesterRecruitmentTimeout").log();
throw timed_out();
}
}
Expand Down Expand Up @@ -1792,6 +1792,7 @@ ACTOR Future<Void> runConsistencyCheckerUrgentCore(Reference<AsyncVar<Optional<C
state std::vector<TesterInterface> ts; // used to store testers interface
state std::vector<KeyRange> rangesToCheck; // get from globalProgressMap
state std::vector<KeyRange> shardsToCheck; // get from keyServer metadata
state Optional<double> whenFailedToGetTesterStart;

// Initialize globalProgressMap
Optional<std::vector<KeyRange>> rangesToCheck_ = loadRangesToCheckFromKnob();
Expand Down Expand Up @@ -1838,7 +1839,19 @@ ACTOR Future<Void> runConsistencyCheckerUrgentCore(Reference<AsyncVar<Optional<C
// Step 2: Get testers
ts.clear();
if (!testers.present()) { // In real clusters
wait(store(ts, getTesters(cc, minTestersExpected)));
try {
wait(store(ts, getTesters(cc, minTestersExpected)));
whenFailedToGetTesterStart.reset();
} catch (Error& e) {
if (e.code() == error_code_timed_out) {
if (!whenFailedToGetTesterStart.present()) {
whenFailedToGetTesterStart = now();
} else if (now() - whenFailedToGetTesterStart.get() > 3600 * 24) { // 1 day
TraceEvent(SevError, "TesterRecruitmentTimeout").log();
}
}
throw e;
}
if (g_network->isSimulated() && deterministicRandom()->random01() < 0.05) {
throw operation_failed(); // Introduce random failure
}
Expand Down

0 comments on commit d8c8d6f

Please sign in to comment.