Skip to content

Commit

Permalink
rule: fix reload signal not working (thanos-io#4442)
Browse files Browse the repository at this point in the history
* rule: fix reload signal not working

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* CHANGELOG: added PR

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* rule: added test for realoding using sighup signal

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* rule: adressed comments for test of realoding using sighup

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* ran make format

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* addressed comment for test of realoding using sighup

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>

* make linter happy

Signed-off-by: Juraj Michalek <juraj.michalek132@gmail.com>
  • Loading branch information
jmichalek132 authored and GiedriusS committed Jul 20, 2021
1 parent e81ba4e commit 9291431
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#4340](https://github.com/thanos-io/thanos/pull/4340) UI: now displays the duration and all annotations of an alert in the alerts page.
- [#4348](https://github.com/thanos-io/thanos/pull/4348) Fixed parsing of the port in the log middleware.
- [#4417](https://github.com/thanos-io/thanos/pull/4417) UI: fixed the night mode in Bucket UI.
- [#4442](https://github.com/thanos-io/thanos/pull/4442) Ruler: fix SIGHUP reload signal not working.

### Changed

Expand Down
5 changes: 3 additions & 2 deletions cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ type ruleConfig struct {
ruleFiles []string
objStoreConfig *extflag.PathOrContent
dataDir string
reloadSignal <-chan struct{}
lset labels.Labels
}

Expand Down Expand Up @@ -195,6 +194,7 @@ func registerRule(app *extkingpin.App) {
tracer,
comp,
*conf,
reload,
getFlagsMap(cmd.Flags()),
httpLogOpts,
grpcLogOpts,
Expand Down Expand Up @@ -257,6 +257,7 @@ func runRule(
tracer opentracing.Tracer,
comp component.Component,
conf ruleConfig,
reloadSignal <-chan struct{},
flagsMap map[string]string,
httpLogOpts []logging.Option,
grpcLogOpts []grpc_logging.Option,
Expand Down Expand Up @@ -483,7 +484,7 @@ func runRule(
}
for {
select {
case <-conf.reloadSignal:
case <-reloadSignal:
if err := reloadRules(logger, conf.ruleFiles, ruleMgr, conf.evalInterval, metrics); err != nil {
level.Error(logger).Log("msg", "reload rules by sighup failed", "err", err)
}
Expand Down
71 changes: 64 additions & 7 deletions test/e2e/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,39 @@ groups:
severity: page
annotations:
summary: "I always complain and I have been loaded via /-/reload."
`
testAlertRuleAddedLaterSignal = `
groups:
- name: example
interval: 1s
partial_response_strategy: "WARN"
rules:
- alert: TestAlert_HasBeenLoadedViaWebHandler
# It must be based on actual metric, otherwise call to StoreAPI would be not involved.
expr: absent(some_metric)
labels:
severity: page
annotations:
summary: "I always complain and I have been loaded via sighup signal."
- name: example2
interval: 1s
partial_response_strategy: "WARN"
rules:
- alert: TestAlert_HasBeenLoadedViaWebHandler
# It must be based on actual metric, otherwise call to StoreAPI would be not involved.
expr: absent(some_metric)
labels:
severity: page
annotations:
summary: "I always complain and I have been loaded via sighup signal."
`
)

type rulesResp struct {
Status string
Data *rulespb.RuleGroups
}

func createRuleFile(t *testing.T, path, content string) {
t.Helper()
err := ioutil.WriteFile(path, []byte(content), 0666)
Expand All @@ -97,6 +127,30 @@ func reloadRulesHTTP(t *testing.T, ctx context.Context, endpoint string) {
testutil.Equals(t, 200, resp.StatusCode)
}

func reloadRulesSignal(t *testing.T, r *e2ethanos.Service) {
c := e2e.NewCommand("kill", "-1", "1")
_, _, err := r.Exec(c)
testutil.Ok(t, err)
}

func checkReloadSuccessful(t *testing.T, ctx context.Context, endpoint string, expectedRulegroupCount int) {
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+endpoint+"/api/v1/rules", ioutil.NopCloser(bytes.NewReader(nil)))
testutil.Ok(t, err)
resp, err := http.DefaultClient.Do(req)
testutil.Ok(t, err)
testutil.Equals(t, 200, resp.StatusCode)

body, _ := ioutil.ReadAll(resp.Body)
testutil.Ok(t, resp.Body.Close())

var data = rulesResp{}

testutil.Ok(t, json.Unmarshal(body, &data))
testutil.Equals(t, "success", data.Status)

testutil.Assert(t, len(data.Data.Groups) == expectedRulegroupCount, fmt.Sprintf("expected there to be %d rule groups", expectedRulegroupCount))
}

func rulegroupCorrectData(t *testing.T, ctx context.Context, endpoint string) {
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+endpoint+"/api/v1/rules", ioutil.NopCloser(bytes.NewReader(nil)))
testutil.Ok(t, err)
Expand All @@ -108,10 +162,7 @@ func rulegroupCorrectData(t *testing.T, ctx context.Context, endpoint string) {
body, err := ioutil.ReadAll(resp.Body)
testutil.Ok(t, err)

var data struct {
Status string
Data *rulespb.RuleGroups
}
var data = rulesResp{}

testutil.Ok(t, json.Unmarshal(body, &data))
testutil.Equals(t, "success", data.Status)
Expand Down Expand Up @@ -317,12 +368,18 @@ func TestRule(t *testing.T) {
rulegroupCorrectData(t, ctx, r.HTTPEndpoint())
})

t.Run("reload works", func(t *testing.T) {
// Add a new rule via /-/reload.
// TODO(GiedriusS): add a test for reloading via SIGHUP. Need to extend e2e framework to expose PIDs.
t.Run("signal reload works", func(t *testing.T) {
// Add a new rule via sending sighup
createRuleFile(t, fmt.Sprintf("%s/newrule.yaml", rulesPath), testAlertRuleAddedLaterSignal)
reloadRulesSignal(t, r)
checkReloadSuccessful(t, ctx, r.HTTPEndpoint(), 4)
})

t.Run("http reload works", func(t *testing.T) {
// Add a new rule via /-/reload.
createRuleFile(t, fmt.Sprintf("%s/newrule.yaml", rulesPath), testAlertRuleAddedLaterWebHandler)
reloadRulesHTTP(t, ctx, r.HTTPEndpoint())
checkReloadSuccessful(t, ctx, r.HTTPEndpoint(), 3)
})

queryAndAssertSeries(t, ctx, q.HTTPEndpoint(), "ALERTS", promclient.QueryOptions{
Expand Down

0 comments on commit 9291431

Please sign in to comment.