Skip to content

Commit

Permalink
Allow determinism checking for entire jobsets
Browse files Browse the repository at this point in the history
Setting

  xxx-jobset-repeats = patchelf:master:2

will cause Hydra to perform every build step in the specified jobset 2
additional times (i.e. 3 times in total). Non-determinism is not fatal
unless the derivation has the attribute "isDeterministic = true"; we
just note the lack of determinism in the Hydra database. This will
allow us to get stats about the (lack of) reproducibility of all of
Nixpkgs.
  • Loading branch information
edolstra committed Dec 7, 2016
1 parent 8bb36e7 commit f608166
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 36 deletions.
13 changes: 9 additions & 4 deletions src/hydra-queue-runner/build-remote.cc
Expand Up @@ -116,7 +116,7 @@ static void copyClosureTo(ref<Store> destStore,

void State::buildRemote(ref<Store> destStore,
Machine::ptr machine, Step::ptr step,
unsigned int maxSilentTime, unsigned int buildTimeout,
unsigned int maxSilentTime, unsigned int buildTimeout, unsigned int repeats,
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep)
{
assert(BuildResult::TimedOut == 8);
Expand Down Expand Up @@ -263,9 +263,10 @@ void State::buildRemote(ref<Store> destStore,
to << maxSilentTime << buildTimeout;
if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
to << 64 * 1024 * 1024; // == maxLogSize
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3)
// FIXME: make the number of repeats configurable.
to << (step->isDeterministic ? 1 : 0);
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
to << repeats // == build-repeat
<< step->isDeterministic; // == enforce-determinism
}
to.flush();

result.startTime = time(0);
Expand Down Expand Up @@ -295,6 +296,10 @@ void State::buildRemote(ref<Store> destStore,
result.stepStatus = bsSuccess;
} else {
result.errorMsg = readString(from);
if (GET_PROTOCOL_MINOR(remoteVersion) >= 3) {
result.timesBuilt = readInt(from);
result.isNonDeterministic = readInt(from);
}
switch ((BuildResult::Status) res) {
case BuildResult::Built:
result.stepStatus = bsSuccess;
Expand Down
21 changes: 12 additions & 9 deletions src/hydra-queue-runner/builder.cc
Expand Up @@ -86,6 +86,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
BuildID buildId;
Path buildDrvPath;
unsigned int maxSilentTime, buildTimeout;
unsigned int repeats = step->isDeterministic ? 1 : 0;

{
std::set<Build::ptr> dependents;
Expand Down Expand Up @@ -113,6 +114,11 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
build = build2;
enqueueNotificationItem({NotificationItem::Type::BuildStarted, build->id});
}
{
auto i = jobsetRepeats.find(std::make_pair(build2->projectName, build2->jobsetName));
if (i != jobsetRepeats.end())
repeats = std::max(repeats, i->second);
}
}
if (!build) build = *dependents.begin();

Expand All @@ -121,8 +127,8 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
maxSilentTime = build->maxSilentTime;
buildTimeout = build->buildTimeout;

printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by build %3% and %4% others)")
% step->drvPath % machine->sshName % buildId % (dependents.size() - 1));
printInfo("performing step ‘%s’ %d times on ‘%s’ (needed by build %d and %d others)",
step->drvPath, repeats + 1, machine->sshName, buildId, (dependents.size() - 1));
}

bool quit = buildId == buildOne && step->drvPath == buildDrvPath;
Expand Down Expand Up @@ -162,7 +168,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
/* Do the build. */
try {
/* FIXME: referring builds may have conflicting timeouts. */
buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, result, activeStep);
buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, repeats, result, activeStep);
} catch (NoTokens & e) {
result.stepStatus = bsNarSizeLimitExceeded;
} catch (Error & e) {
Expand Down Expand Up @@ -224,8 +230,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,
auto mc = startDbUpdate();
{
pqxx::work txn(*conn);
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead, buildId,
stepNr, machine->sshName, result.stepStatus, result.errorMsg);
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
txn.commit();
}
stepFinished = true;
Expand Down Expand Up @@ -279,8 +284,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,

pqxx::work txn(*conn);

finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
buildId, stepNr, machine->sshName, bsSuccess);
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);

for (auto & b : direct) {
printMsg(lvlInfo, format("marking build %1% as succeeded") % b->id);
Expand Down Expand Up @@ -386,8 +390,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,

if (result.stepStatus != bsCachedFailure && !stepFinished) {
assert(stepNr);
finishBuildStep(txn, result.startTime, result.stopTime, result.overhead,
buildId, stepNr, machine->sshName, result.stepStatus, result.errorMsg);
finishBuildStep(txn, result, buildId, stepNr, machine->sshName);
}

/* Mark all builds that depend on this derivation as failed. */
Expand Down
30 changes: 19 additions & 11 deletions src/hydra-queue-runner/hydra-queue-runner.cc
Expand Up @@ -264,20 +264,21 @@ unsigned int State::createBuildStep(pqxx::work & txn, time_t startTime, BuildID
}


void State::finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime, unsigned int overhead,
BuildID buildId, unsigned int stepNr, const std::string & machine, BuildStatus status,
const std::string & errorMsg, BuildID propagatedFrom)
void State::finishBuildStep(pqxx::work & txn, const RemoteResult & result,
BuildID buildId, unsigned int stepNr, const std::string & machine)
{
assert(startTime);
assert(stopTime);
assert(result.startTime);
assert(result.stopTime);
txn.parameterized
("update BuildSteps set busy = 0, status = $1, propagatedFrom = $4, errorMsg = $5, startTime = $6, stopTime = $7, machine = $8, overhead = $9 where build = $2 and stepnr = $3")
((int) status)(buildId)(stepNr)
(propagatedFrom, propagatedFrom != 0)
(errorMsg, errorMsg != "")
(startTime)(stopTime)
("update BuildSteps set busy = 0, status = $1, errorMsg = $4, startTime = $5, stopTime = $6, machine = $7, overhead = $8, timesBuilt = $9, isNonDeterministic = $10 where build = $2 and stepnr = $3")
((int) result.stepStatus)(buildId)(stepNr)
(result.errorMsg, result.errorMsg != "")
(result.startTime)(result.stopTime)
(machine, machine != "")
(overhead, overhead != 0).exec();
(result.overhead, result.overhead != 0)
(result.timesBuilt, result.timesBuilt > 0)
(result.isNonDeterministic, result.timesBuilt > 1)
.exec();
}


Expand Down Expand Up @@ -809,6 +810,13 @@ void State::run(BuildID buildOne)

useSubstitutes = isTrue(hydraConfig["use-substitutes"]);

// FIXME: hacky mechanism for configuring determinism checks.
for (auto & s : tokenizeString<Strings>(hydraConfig["xxx-jobset-repeats"])) {
auto s2 = tokenizeString<std::vector<std::string>>(s, ":");
if (s2.size() != 3) throw Error("bad value in xxx-jobset-repeats");
jobsetRepeats.emplace(std::make_pair(s2[0], s2[1]), std::stoi(s2[2]));
}

{
auto conn(dbPool.get());
clearBusy(*conn, 0);
Expand Down
14 changes: 10 additions & 4 deletions src/hydra-queue-runner/state.hh
Expand Up @@ -48,6 +48,9 @@ struct RemoteResult
bool canCache = false; // for bsFailed
std::string errorMsg; // for bsAborted

unsigned int timesBuilt = 0;
bool isNonDeterministic = false;

time_t startTime = 0, stopTime = 0;
unsigned int overhead = 0;
nix::Path logFile;
Expand Down Expand Up @@ -414,6 +417,10 @@ private:
from showing up as busy until the queue runner is restarted. */
nix::Sync<std::set<std::pair<BuildID, int>>> orphanedSteps;

/* How often the build steps of a jobset should be repeated in
order to detect non-determinism. */
std::map<std::pair<std::string, std::string>, unsigned int> jobsetRepeats;

public:
State();

Expand All @@ -437,10 +444,8 @@ private:
const std::string & machine, BuildStatus status, const std::string & errorMsg = "",
BuildID propagatedFrom = 0);

void finishBuildStep(pqxx::work & txn, time_t startTime, time_t stopTime,
unsigned int overhead, BuildID buildId, unsigned int stepNr,
const std::string & machine, BuildStatus status, const std::string & errorMsg = "",
BuildID propagatedFrom = 0);
void finishBuildStep(pqxx::work & txn, const RemoteResult & result, BuildID buildId, unsigned int stepNr,
const std::string & machine);

int createSubstitutionStep(pqxx::work & txn, time_t startTime, time_t stopTime,
Build::ptr build, const nix::Path & drvPath, const std::string & outputName, const nix::Path & storePath);
Expand Down Expand Up @@ -492,6 +497,7 @@ private:
void buildRemote(nix::ref<nix::Store> destStore,
Machine::ptr machine, Step::ptr step,
unsigned int maxSilentTime, unsigned int buildTimeout,
unsigned int repeats,
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep);

void markSucceededBuild(pqxx::work & txn, Build::ptr build,
Expand Down
18 changes: 16 additions & 2 deletions src/lib/Hydra/Schema/BuildSteps.pm
Expand Up @@ -103,6 +103,16 @@ __PACKAGE__->table("BuildSteps");
data_type: 'integer'
is_nullable: 1
=head2 timesbuilt
data_type: 'integer'
is_nullable: 1
=head2 isnondeterministic
data_type: 'boolean'
is_nullable: 1
=cut

__PACKAGE__->add_columns(
Expand Down Expand Up @@ -132,6 +142,10 @@ __PACKAGE__->add_columns(
{ data_type => "integer", is_foreign_key => 1, is_nullable => 1 },
"overhead",
{ data_type => "integer", is_nullable => 1 },
"timesbuilt",
{ data_type => "integer", is_nullable => 1 },
"isnondeterministic",
{ data_type => "boolean", is_nullable => 1 },
);

=head1 PRIMARY KEY
Expand Down Expand Up @@ -201,8 +215,8 @@ __PACKAGE__->belongs_to(
);


# Created by DBIx::Class::Schema::Loader v0.07043 @ 2016-02-16 18:04:52
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:TRALbEoaF/OIOyERYCyxkw
# Created by DBIx::Class::Schema::Loader v0.07045 @ 2016-12-07 13:48:19
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:3FYkqSUfgWmiqZzmX8J4TA

my %hint = (
columns => [
Expand Down
19 changes: 13 additions & 6 deletions src/root/build.tt
Expand Up @@ -50,13 +50,20 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
END %]
</td>
<td>[% IF step.busy == 1 || ((step.machine || step.starttime) && (step.status == 0 || step.status == 1 || step.status == 3 || step.status == 4 || step.status == 7)); INCLUDE renderMachineName machine=step.machine; ELSE; "<em>n/a</em>"; END %]</td>
<td>
<td class="step-status">
[% IF step.busy == 1 %]
<strong>Building</strong>
[% ELSIF step.status == 0 %]
Succeeded
[% IF step.isnondeterministic %]
<span class="warn">Succeeded with non-determistic result</span>
[% ELSE %]
Succeeded
[% END %]
[% IF step.timesbuilt > 1 %]
([% step.timesbuilt %] times)
[% END %]
[% ELSIF step.status == 3 %]
<span class="error"><strong>Aborted</strong>[% IF step.errormsg %]: [% HTML.escape(step.errormsg); END %]</span>
<span class="error">Aborted</span>[% IF step.errormsg %]: <em>[% HTML.escape(step.errormsg) %]</em>[% END %]
[% ELSIF step.status == 4 %]
<span class="error">Cancelled</span>
[% ELSIF step.status == 7 %]
Expand All @@ -70,9 +77,9 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
[% ELSIF step.status == 11 %]
<span class="error">Output limit exceeded</span>
[% ELSIF step.status == 12 %]
<span class="error">Non-deterministic build</span>
<span class="error">Non-determinism detected</span> [% IF step.timesbuilt %] after [% step.timesbuilt %] times[% END %]
[% ELSIF step.errormsg %]
<span class="error">Failed: [% HTML.escape(step.errormsg) %]</span>
<span class="error">Failed</span>: <em>[% HTML.escape(step.errormsg) %]</em>
[% ELSE %]
<span class="error">Failed</span>
[% END %]
Expand Down Expand Up @@ -137,7 +144,7 @@ FOR step IN steps; IF step.busy; busy = 1; END; END;
<table>
<tr>
<td>
[% INCLUDE renderBuildStatusIcon size=128, build=build %]
[% INCLUDE renderBuildStatusIcon size=128 build=build %]
</td>
<td>
<table class="info-table">
Expand Down
10 changes: 10 additions & 0 deletions src/root/static/css/hydra.css
Expand Up @@ -132,3 +132,13 @@ div.flot-tooltip {
opacity: 0.80;
z-index: 100;
}

td.step-status span.error {
color: red;
font-weight: bold;
}

td.step-status span.warn {
color: #aaaa00;
font-weight: bold;
}
6 changes: 6 additions & 0 deletions src/sql/hydra.sql
Expand Up @@ -289,6 +289,12 @@ create table BuildSteps (
-- Time in milliseconds spend copying stuff from/to build machines.
overhead integer,

-- How many times this build step was done (for checking determinism).
timesBuilt integer,

-- Whether this build step produced different results when repeated.
isNonDeterministic boolean,

primary key (build, stepnr),
foreign key (build) references Builds(id) on delete cascade,
foreign key (propagatedFrom) references Builds(id) on delete cascade
Expand Down
3 changes: 3 additions & 0 deletions src/sql/upgrade-52.sql
@@ -0,0 +1,3 @@
alter table BuildSteps
add column timesBuilt integer,
add column isNonDeterministic boolean;

0 comments on commit f608166

Please sign in to comment.