Skip to content

Commit d8c8d6f

Browse files
committed
avoid trace SevError for TesterRecruitmentTimeout unless it keeps failure for over 1 day
1 parent bb8ba15 commit d8c8d6f

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

fdbserver/tester.actor.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,7 +1594,7 @@ ACTOR Future<std::vector<TesterInterface>> getTesters(Reference<AsyncVar<Optiona
15941594
}
15951595
when(wait(cc->onChange())) {}
15961596
when(wait(testerTimeout)) {
1597-
TraceEvent(SevError, "TesterRecruitmentTimeout").log();
1597+
TraceEvent(SevWarn, "TesterRecruitmentTimeout").log();
15981598
throw timed_out();
15991599
}
16001600
}
@@ -1792,6 +1792,7 @@ ACTOR Future<Void> runConsistencyCheckerUrgentCore(Reference<AsyncVar<Optional<C
17921792
state std::vector<TesterInterface> ts; // used to store testers interface
17931793
state std::vector<KeyRange> rangesToCheck; // get from globalProgressMap
17941794
state std::vector<KeyRange> shardsToCheck; // get from keyServer metadata
1795+
state Optional<double> whenFailedToGetTesterStart;
17951796

17961797
// Initialize globalProgressMap
17971798
Optional<std::vector<KeyRange>> rangesToCheck_ = loadRangesToCheckFromKnob();
@@ -1838,7 +1839,19 @@ ACTOR Future<Void> runConsistencyCheckerUrgentCore(Reference<AsyncVar<Optional<C
18381839
// Step 2: Get testers
18391840
ts.clear();
18401841
if (!testers.present()) { // In real clusters
1841-
wait(store(ts, getTesters(cc, minTestersExpected)));
1842+
try {
1843+
wait(store(ts, getTesters(cc, minTestersExpected)));
1844+
whenFailedToGetTesterStart.reset();
1845+
} catch (Error& e) {
1846+
if (e.code() == error_code_timed_out) {
1847+
if (!whenFailedToGetTesterStart.present()) {
1848+
whenFailedToGetTesterStart = now();
1849+
} else if (now() - whenFailedToGetTesterStart.get() > 3600 * 24) { // 1 day
1850+
TraceEvent(SevError, "TesterRecruitmentTimeout").log();
1851+
}
1852+
}
1853+
throw e;
1854+
}
18421855
if (g_network->isSimulated() && deterministicRandom()->random01() < 0.05) {
18431856
throw operation_failed(); // Introduce random failure
18441857
}

0 commit comments

Comments
 (0)