From ed27a061acfb78f89657a3a794e2f2c7dec60604 Mon Sep 17 00:00:00 2001 From: Lukas Berns Date: Fri, 1 Jun 2012 16:59:41 +0900 Subject: [PATCH 1/4] Restart failed workers --- README.md | 5 +++++ bin/up | 8 ++++++++ lib/up.js | 6 +++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2ac2669..02bb850 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,10 @@ The `up` command accepts the following options: - Strings like `'10s'` are accepted. - Defaults to `'10m'`, or `'500ms'` if `NODE_ENV` is `development`. +- `-k`/`--keepalive` + + - start a new worker after one dies unexpectedly + - `-f`/`--pidfile` - A filename to write the pid to @@ -114,6 +118,7 @@ parameters: - `workerTimeout`: (`Number`|`String`): see `--timeout` above. - `title`: (`String`): see `--title` above. - `assumeReady`: (`Boolean`): see Worker readiness below. + - `keepAlive`: (`Boolean`): see `--keepalive` above. ## Middleware diff --git a/bin/up b/bin/up index fa44180..cf8131b 100755 --- a/bin/up +++ b/bin/up @@ -43,6 +43,7 @@ program .option('-n, --number ', 'Number of workers to spawn.' , 'development' == process.env.NODE_ENV ? 1 : cpus) .option('-t, --timeout [ms]', 'Worker timeout.') + .option('-k, --keepalive', 'Restart failed workers.') /** * Capture requires. @@ -134,6 +135,12 @@ if (null != workerTimeout && isNaN(ms(workerTimeout))) { , program.timeout, ms(workerTimeout)); } +/** + * Parse keepalive + */ + +var keepAlive = program.keepalive; + /** * Start! */ @@ -148,6 +155,7 @@ var httpServer = http.Server().listen(program.port) , workerTimeout: workerTimeout , requires: requires , title: program.title + , keepAlive: keepAlive }) /** diff --git a/lib/up.js b/lib/up.js index e067328..d8d557f 100644 --- a/lib/up.js +++ b/lib/up.js @@ -75,6 +75,7 @@ function UpServer (server, file, opts) { ? opts.workerTimeout : workerTimeout); this.requires = opts.requires || []; this.assumeReady = opts.assumeReady === undefined ? true : !!opts.assumeReady; + this.keepAlive = opts.keepAlive || false; if (false !== opts.workerPingInterval) { this.workerPingInterval = ms(opts.workerPingInterval || '1m'); } @@ -202,7 +203,10 @@ UpServer.prototype.spawnWorker = function (fn) { if (~self.workers.indexOf(w)) { self.workers.splice(self.workers.indexOf(w), 1); self.lastIndex = -1; - // @TODO: auto-add workers ? + if (self.keepAlive && (self.workers.length + self.spawning.length < this.numWorkers)) { + debug('worker %s found dead. spawning 1 new worker', w.pid); + self.spawnWorker(); + } } self.emit('terminate', w) break; From 443b1bb9105fdfc8728779d1b6740edeb957074c Mon Sep 17 00:00:00 2001 From: Lukas Berns Date: Fri, 1 Jun 2012 17:19:21 +0900 Subject: [PATCH 2/4] Typo --- lib/up.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/up.js b/lib/up.js index d8d557f..ffc9f8c 100644 --- a/lib/up.js +++ b/lib/up.js @@ -203,7 +203,7 @@ UpServer.prototype.spawnWorker = function (fn) { if (~self.workers.indexOf(w)) { self.workers.splice(self.workers.indexOf(w), 1); self.lastIndex = -1; - if (self.keepAlive && (self.workers.length + self.spawning.length < this.numWorkers)) { + if (self.keepAlive && (self.workers.length + self.spawning.length < self.numWorkers)) { debug('worker %s found dead. spawning 1 new worker', w.pid); self.spawnWorker(); } From 0487753094d73d0db81b425e7a899edef6b5b448 Mon Sep 17 00:00:00 2001 From: Lukas Berns Date: Fri, 1 Jun 2012 17:45:44 +0900 Subject: [PATCH 3/4] Prevent auto-respawning storms --- README.md | 3 +++ lib/up.js | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 02bb850..be48e4c 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,9 @@ parameters: - `title`: (`String`): see `--title` above. - `assumeReady`: (`Boolean`): see Worker readiness below. - `keepAlive`: (`Boolean`): see `--keepalive` above. + - `minExpectedLifetime`: (`Number`|`String`): Number of ms a worker is + expected to live. Don't auto-respawn if a worker dies earlier. Strings + like `'10s'` are accepted. Defaults to `'20s'`. ## Middleware diff --git a/lib/up.js b/lib/up.js index ffc9f8c..1fa76dd 100644 --- a/lib/up.js +++ b/lib/up.js @@ -53,6 +53,13 @@ var workerTimeout = 'development' == env ? '500ms' : '10m'; var numWorkers = 'development' == env ? 1 : cpus; +/** + * Default minimum expected lifetime of a worker. + * If a worker dies younger, we don't respawn even if keepAlive == true. + * We want to prevent auto-respawning storms from overloading the system. + */ +var minExpectedLifetime = '20s'; + /** * UpServer factory/constructor. * @@ -76,6 +83,7 @@ function UpServer (server, file, opts) { this.requires = opts.requires || []; this.assumeReady = opts.assumeReady === undefined ? true : !!opts.assumeReady; this.keepAlive = opts.keepAlive || false; + this.minExpectedLifetime = ms(opts.minExpectedLifetime != null ? opts.minExpectedLifetime : minExpectedLifetime); if (false !== opts.workerPingInterval) { this.workerPingInterval = ms(opts.workerPingInterval || '1m'); } @@ -204,8 +212,13 @@ UpServer.prototype.spawnWorker = function (fn) { self.workers.splice(self.workers.indexOf(w), 1); self.lastIndex = -1; if (self.keepAlive && (self.workers.length + self.spawning.length < self.numWorkers)) { - debug('worker %s found dead. spawning 1 new worker', w.pid); - self.spawnWorker(); + if (new Date().getTime() - w.birthtime < self.minExpectedLifetime) { + debug('worker %s found dead at a too young age. won\'t respawn new worker', w.pid); + } + else { + debug('worker %s found dead. spawning 1 new worker', w.pid); + self.spawnWorker(); + } } } self.emit('terminate', w) @@ -277,6 +290,7 @@ function Worker (server) { this.proc.on('message', this.onMessage.bind(this)); this.proc.on('exit', this.onExit.bind(this)); this.pid = this.proc.pid; + this.birthtime = new Date().getTime(); debug('worker %s created', this.pid); } From 41d65270ef66d0322d0c02a0b3231a648a787956 Mon Sep 17 00:00:00 2001 From: Arlo Breault Date: Tue, 31 Jul 2012 14:58:44 -0700 Subject: [PATCH 4/4] add a test for respawning --- package.json | 3 +++ test/{sticky.js => sticky.test.js} | 0 test/{up.js => up.test.js} | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+) rename test/{sticky.js => sticky.test.js} (100%) rename test/{up.js => up.test.js} (89%) diff --git a/package.json b/package.json index 30ad7f4..27405b6 100644 --- a/package.json +++ b/package.json @@ -20,4 +20,7 @@ , "express": "*" , "superagent": "*" } + , "scripts": { + "test": "./node_modules/mocha/bin/mocha test/*.test.js" + } } diff --git a/test/sticky.js b/test/sticky.test.js similarity index 100% rename from test/sticky.js rename to test/sticky.test.js diff --git a/test/up.js b/test/up.test.js similarity index 89% rename from test/up.js rename to test/up.test.js index 5dc82ca..dca783a 100644 --- a/test/up.js +++ b/test/up.test.js @@ -212,5 +212,23 @@ describe('up', function () { testAssumeReady(done, false); }); + it('should respawn a worker when it dies', function (done) { + var httpServer = http.Server().listen() + , opts = { numWorkers: 1, keepAlive: true, minExpectedLifetime: '50' } + , srv = up(httpServer, __dirname + '/server', opts) + , orgPid = null; + srv.once('spawn', function () { + expect(srv.workers).to.have.length(1); + orgPid = srv.workers[0].pid + setTimeout(function () { + process.kill(orgPid, 'SIGKILL'); + setTimeout(function () { + expect(srv.workers).to.have.length(1); + expect(srv.workers[0].pid).to.not.equal(orgPid); + done(); + }, 300) // give it time to die and respawn + }, 75) // greater than minExpectedLifetime + }); + }); });