Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: improve ems alert logging and event generation #1993

Merged
merged 2 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 18 additions & 23 deletions integration/test/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package promAlerts

import (
"fmt"
"github.com/Netapp/harvest-automation/test/installer"
"github.com/Netapp/harvest-automation/test/utils"
"github.com/netapp/harvest/v2/pkg/conf"
"github.com/netapp/harvest/v2/pkg/tree"
Expand All @@ -13,7 +14,7 @@ import (
const PrometheusAlertURL string = "http://localhost:9090/api/v1/alerts"
const TestClusterName = "umeng-aff300-05-06"
const TestNodeName = "umeng-aff300-06"
const User = "admin"
const Admin = "admin"

var volumeArwState = []string{
`"disable-in-progress"`,
Expand All @@ -34,6 +35,7 @@ type PromAlert struct {
}

func GetAlerts() (map[string]int, int) {
now := time.Now()
alertsData := make(map[string]int)
totalAlerts := 0

Expand All @@ -56,6 +58,11 @@ func GetAlerts() (map[string]int, int) {
}
}
}
log.Info().
Int("alertsData", len(alertsData)).
Int("totalAlerts", totalAlerts).
Str("dur", time.Since(now).Round(time.Millisecond).String()).
Msg("Get Prometheus alerts")
return alertsData, totalAlerts
}

Expand Down Expand Up @@ -93,8 +100,13 @@ func GetEmsAlerts(dir string, fileName string) ([]string, []string, []string) {
func GenerateEvents(emsNames []string, nodeScopedEms []string) []string {
supportedEms := make([]string, 0)
var jsonValue []byte
addr, user, pass, nodeName := GetPollerDetail()
url := "https://" + addr + "/api/private/cli/event/generate"
err := conf.LoadHarvestConfig(installer.HarvestConfigFile)
poller, err2 := conf.PollerNamed(TestClusterName)
dc1, err3 := conf.PollerNamed("dc1")
Comment on lines +104 to +105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only password we fetched from dc1, right ?

if err != nil && err2 != nil && err3 != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess any one err != nil then we would be erroring out?

log.Fatal().Errs("errors", []error{err, err2, err3}).Msg("Failed to load config")
}
url := "https://" + poller.Addr + "/api/private/cli/event/generate"
method := "POST"

volumeArwCount := 0
Expand All @@ -112,13 +124,13 @@ func GenerateEvents(emsNames []string, nodeScopedEms []string) []string {

// Handle for node-scoped ems, Passing node-name as input
if utils.Contains(nodeScopedEms, ems) {
jsonValue = []byte(fmt.Sprintf(`{"message-name": "%s", "values": [%s,2,3,4,5,6,7,8,9], "node": "%s"}`, ems, value, nodeName))
jsonValue = []byte(fmt.Sprintf(`{"message-name": "%s", "values": [%s,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], "node": "%s"}`, ems, value, TestNodeName))
} else {
jsonValue = []byte(fmt.Sprintf(`{"message-name": "%s", "values": [%s,2,3,4,5,6,7,8,9]}`, ems, value))
jsonValue = []byte(fmt.Sprintf(`{"message-name": "%s", "values": [%s,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}`, ems, value))
}

var data map[string]interface{}
data = utils.SendPostReqAndGetRes(url, method, jsonValue, user, pass)
data = utils.SendPostReqAndGetRes(url, method, jsonValue, Admin, dc1.Password)
if response := data["error"]; response != nil {
errorDetail := response.(map[string]interface{})
code := errorDetail["code"].(string)
Expand All @@ -133,20 +145,3 @@ func GenerateEvents(emsNames []string, nodeScopedEms []string) []string {

return supportedEms
}

func GetPollerDetail() (string, string, string, string) {
var (
err error
poller *conf.Poller
)

if err = conf.LoadHarvestConfig(utils.GetConfigDir() + "/harvest.yml"); err != nil {
utils.PanicIfNotNil(err)
}

if poller, err = conf.PollerNamed(TestClusterName); err != nil {
utils.PanicIfNotNil(err)
}

return poller.Addr, User, poller.Password, TestNodeName
}
37 changes: 20 additions & 17 deletions integration/test/bookend_ems_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/Netapp/harvest-automation/test/utils"
"github.com/rs/zerolog/log"
"testing"
"time"
)

var issuingEmsNames []string
Expand All @@ -31,52 +32,54 @@ var nodeScopedResolvingEmsList = []string{
}

func setupAlerts() {
totalAlerts := 0
numAlerts := 0
emsConfigDir := utils.GetHarvestRootDir() + "/conf/ems/9.6.0"
log.Info().Str("EmsConfigDir", emsConfigDir).Msg("Directory path")

// Fetch ems configured in template
_, issuingEmsNames, resolvingEmsNames = promAlerts.GetEmsAlerts(emsConfigDir, "ems.yaml")

// Identify supported issuing ems names for the given cluster
now := time.Now()
supportedIssuingEms = promAlerts.GenerateEvents(issuingEmsNames, nodeScopedIssuingEmsList)
log.Info().Msgf("Total supported issuing ems: %d", len(supportedIssuingEms))
log.Info().
Int("supportedIssuingEms", len(supportedIssuingEms)).
Str("dur", time.Since(now).Round(time.Millisecond).String()).
Msg("Supported issuing ems")

// Fetch previous prometheus alerts
oldAlertsData, _ = promAlerts.GetAlerts()

// Identify supported ems names for the given cluster
now = time.Now()
supportedResolvingEms = promAlerts.GenerateEvents(resolvingEmsNames, nodeScopedResolvingEmsList)
log.Info().Msgf("Total supported resolving ems:%d", len(supportedResolvingEms))
log.Info().
Int("supportedResolvingEms", len(supportedResolvingEms)).
Str("dur", time.Since(now).Round(time.Millisecond).String()).
Msg("Supported resolving ems")

// Fetch current prometheus alerts
newAlertsData, totalAlerts = promAlerts.GetAlerts()
if totalAlerts == 0 {
newAlertsData, numAlerts = promAlerts.GetAlerts()
if numAlerts == 0 {
log.Info().Msg("No alerts found in prometheus")
}
log.Info().Msgf("Total firing alerts %d", totalAlerts)
log.Info().Int("numAlerts", numAlerts).Msg("Firing alerts")
}

func TestEmsTestSuite(t *testing.T) {
utils.SkipIfMissing(t, utils.BookendEms)
setupAlerts()

// Evaluate bookend active ems events
foundBookendEms := make([]string, 0)

for _, issuingEms := range supportedIssuingEms {
// If the issuingEms did not exit before, then ignore the test-case.
// If the issuingEms did not exist before, then ignore the test-case.
if oldAlertsData[issuingEms] > 0 {
v := oldAlertsData[issuingEms] - newAlertsData[issuingEms]
if v < 1 {
foundBookendEms = append(foundBookendEms, issuingEms)
v := newAlertsData[issuingEms] - oldAlertsData[issuingEms]
if v > 1 {
Comment on lines +77 to +78
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would not be as per our test case.
If we have issuing ems(ex. volume.offline) count as 3[oldAlertsData] earlier and after we fire resolving ems(volume.online), the issuing ems count would be reduced to 2[newAlertsData]
So, v = 2-3 = -1

Condition would be,

if v >= 0 {
error log
}

t.Errorf("Extra bookend ems alerts raised event=%s, count=%d", issuingEms, v)
}
} else {
log.Info().Str("issuingEms", issuingEms).Msg("There is no active issuingEms")
log.Info().Str("issuingEms", issuingEms).Msg("Ignore. Did not exist before")
}
}
if len(foundBookendEms) > 0 {
log.Error().Strs("foundBookendEms", foundBookendEms).Msg("Unexpected bookendEms found")
t.Errorf("One or more extra bookend ems alerts %s have been raised", foundBookendEms)
}
}