Skip to content

Commit

Permalink
reject the *new* slave when a dupe slave connects
Browse files Browse the repository at this point in the history
This, coupled with a ping of the old slave, will keep double slaves from
killing one another

Fixes #962
  • Loading branch information
Dustin J. Mitchell committed Aug 26, 2010
1 parent 5a12f7d commit 9b81701
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 21 deletions.
28 changes: 10 additions & 18 deletions master/buildbot/buildslave.py
Expand Up @@ -163,6 +163,13 @@ def startMissingTimer(self):
self.missing_timer = reactor.callLater(self.missing_timeout,
self._missing_timer_fired)

def recordConnectTime(self):
if self.slave_status:
self.slave_status.recordConnectTime()

def isConnected(self):
return self.slave

def _missing_timer_fired(self):
self.missing_timer = None
# notify people, but only if we're still in the config
Expand Down Expand Up @@ -210,25 +217,9 @@ def attached(self, bot):
@return: a Deferred that fires when the attachment is complete
"""

self.slave_status.recordConnectTime()
# the botmaster should ensure this.
assert not self.isConnected()

if self.slave:
# uh-oh, we've got a duplicate slave. The most likely
# explanation is that the slave is behind a slow link, thinks we
# went away, and has attempted to reconnect, so we've got two
# "connections" from the same slave, but the previous one is
# stale. Give the new one precedence.
log.msg("duplicate slave %s replacing old one" % self.slavename)

# just in case we've got two identically-configured slaves,
# report the IP addresses of both so someone can resolve the
# squabble
tport = self.slave.broker.transport
log.msg("old slave was connected from", tport.getPeer())
log.msg("new slave is from", bot.broker.transport.getPeer())
d = self.disconnect()
else:
d = defer.succeed(None)
# now we go through a sequence of calls, gathering information, then
# tell the Botmaster that it can finally give this slave to all the
# Builders that care about it.
Expand All @@ -242,6 +233,7 @@ def attached(self, bot):
# We want to know when the graceful shutdown flag changes
self.slave_status.addGracefulWatcher(self._gracefulChanged)

d = defer.succeed(None)
def _log_attachment_on_slave(res):
d1 = bot.callRemote("print", "attached")
d1.addErrback(lambda why: None)
Expand Down
49 changes: 46 additions & 3 deletions master/buildbot/master.py
Expand Up @@ -313,8 +313,51 @@ def shouldMergeRequests(self, builder, req1, req2):
return self.mergeRequests(builder, req1, req2)
return req1.canBeMergedWith(req2)

def getPerspective(self, slavename):
return self.slaves[slavename]
def getPerspective(self, mind, slavename):
sl = self.slaves[slavename]
if not sl:
return None

# record when this connection attempt occurred
sl.recordConnectTime()

if sl.isConnected():
# uh-oh, we've got a duplicate slave. The most likely
# explanation is that the slave is behind a slow link, thinks we
# went away, and has attempted to reconnect, so we've got two
# "connections" from the same slave. The old may not be stale at this
# point, if there are two slave proceses out there with the same name,
# so instead of booting the old (which may be in the middle of a build),
# we reject the new connection and ping the old slave.
log.msg("duplicate slave %s; rejecting new slave and pinging old" % sl.slavename)

# just in case we've got two identically-configured slaves,
# report the IP addresses of both so someone can resolve the
# squabble
old_tport = sl.slave.broker.transport
new_tport = mind.broker.transport
log.msg("old slave was connected from", old_tport.getPeer())
log.msg("new slave is from", new_tport.getPeer())

# ping the old slave. If this kills it, then the new slave will connect
# again and everyone will be happy.
d = sl.slave.callRemote("print", "master got a duplicate connection; keeping this one")

# now return a dummy avatar and kill the new connection in 5
# seconds, thereby giving the ping a bit of time to kill the old
# connection, if necessary
def kill():
log.msg("killing new slave on", new_tport.getPeer())
new_tport.loseConnection()
reactor.callLater(5, kill)
class DummyAvatar(pb.Avatar):
def attached(self, *args):
pass
def detached(self, *args):
pass
return DummyAvatar()

return sl

def shutdownSlaves(self):
# TODO: make this into a bot method rather than a builder method
Expand Down Expand Up @@ -428,7 +471,7 @@ def requestAvatar(self, avatarID, mind, interface):
else:
# it must be one of the buildslaves: no other names will make it
# past the checker
p = self.botmaster.getPerspective(avatarID)
p = self.botmaster.getPerspective(mind, avatarID)

if not p:
raise ValueError("no perspective for '%s'" % avatarID)
Expand Down

0 comments on commit 9b81701

Please sign in to comment.