Skip to content

Commit

Permalink
Only error if errors are more than 4% of all requests.
Browse files Browse the repository at this point in the history
 🐿 v2.5.16
  • Loading branch information
Samuel Parkinson committed Oct 23, 2017
1 parent 14d5908 commit 48cb074
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 15 deletions.
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -23,7 +23,7 @@
"next-metrics": "^1.16.1"
},
"devDependencies": {
"@financial-times/n-gage": "^1.9.5",
"@financial-times/n-gage": "^1.14.0",
"body-parser": "^1.15.0",
"chai": "^3.0.0",
"coveralls": "^2.11.16",
Expand Down
11 changes: 6 additions & 5 deletions src/lib/error-rate-check.js
Expand Up @@ -7,13 +7,14 @@ module.exports = (appName, opts) => {
const severity = opts.severity || DEFAULT_SEVERITY;
let region = process.env.REGION ? '_' + process.env.REGION : '';
return nHealth.runCheck({
name: `The error rate for ${appName} is acceptable`,
type: 'graphiteSpike',
numerator: `next.heroku.${appName}.web_*${region}.express.*.res.status.{500,503,504}.count`,
divisor: `next.heroku.${appName}.web_*${region}.express.*.res.status.*.count`,
name: `The error rate for ${appName} is greater than 4% of requests`,
type: 'graphiteThreshold',
metric: `asPercent(summarize(sumSeries(next.heroku.${appName}.web_*${region}.express.*.res.status.{500,503,504}.count), '10min', 'sum', true), summarize(sumSeries(next.heroku.${appName}.web_*${region}.express.*.res.status.*.count), '10min', 'sum', true))`,
threshold: 4,
samplePeriod: '10min',
severity,
businessImpact: 'Users may see application error pages.',
technicalSummary: `The proportion of 500 responses for the ${appName} app is 3 times higher in the last 10 minutes than the error rate over the previous 7 days. This is a default n-express check.`,
technicalSummary: `The proportion of error responses for ${appName} is greater than 4% of all responses. This is a default n-express check.`,
panicGuide: 'Consult errors in sentry, application logs in splunk and run the application locally to identify errors'
});
};
13 changes: 4 additions & 9 deletions test/app/error-rate-check.test.js
Expand Up @@ -28,27 +28,22 @@ describe('Default error rate check', () => {
it('should compose correct graphite metric with region', () => {
process.env.REGION = 'US';

const expectedNumerator = 'next.heroku.app-name.web_*_US.express.*.res.status.{500,503,504}.count';
const expectedDivisor = 'next.heroku.app-name.web_*_US.express.*.res.status.*.count';
const metric = 'asPercent(summarize(sumSeries(next.heroku.app-name.web_*_US.express.*.res.status.{500,503,504}.count), \'10min\', \'sum\', true), summarize(sumSeries(next.heroku.app-name.web_*_US.express.*.res.status.*.count), \'10min\', \'sum\', true))';

subject('app-name');
expect(nHealthStub.runCheck).calledWithMatch({
numerator: expectedNumerator,
divisor: expectedDivisor
metric
});
});

it('should compose correct graphite metric without region', () => {
delete process.env.REGION;

const expectedNumerator = 'next.heroku.app-name.web_*.express.*.res.status.{500,503,504}.count';
const expectedDivisor = 'next.heroku.app-name.web_*.express.*.res.status.*.count';
const metric = 'asPercent(summarize(sumSeries(next.heroku.app-name.web_*.express.*.res.status.{500,503,504}.count), \'10min\', \'sum\', true), summarize(sumSeries(next.heroku.app-name.web_*.express.*.res.status.*.count), \'10min\', \'sum\', true))';

subject('app-name');

expect(nHealthStub.runCheck).calledWithMatch({
numerator: expectedNumerator,
divisor: expectedDivisor
metric
});
});

Expand Down

0 comments on commit 48cb074

Please sign in to comment.