From a8cf6f4806196bc3ebd659cf20f6a1892a326d63 Mon Sep 17 00:00:00 2001 From: Sukhwinder Dhillon Date: Tue, 28 Nov 2023 15:04:19 +0100 Subject: [PATCH] WIP --- icinga2.conf | 64 ++++++++++++------ internal/event/event.go | 40 +++++++++++- internal/incident/history_event_type.go | 12 ++++ internal/incident/incident.go | 86 +++++++++++++++++++------ internal/listener/listener.go | 27 ++------ schema/pgsql/schema.sql | 16 ++++- schema/pgsql/upgrades/020.sql | 6 ++ 7 files changed, 186 insertions(+), 65 deletions(-) create mode 100644 schema/pgsql/upgrades/020.sql diff --git a/icinga2.conf b/icinga2.conf index 11bb939f..d99ba0d5 100644 --- a/icinga2.conf +++ b/icinga2.conf @@ -41,7 +41,7 @@ function urlencode(str) { object User "icinga-notifications" { # Workaround, types filter here must exclude Problem, otherwise no Acknowledgement notifications are sent. # https://github.com/Icinga/icinga2/issues/9739 - types = [ Acknowledgement ] + types = [ Acknowledgement, DowntimeStart, DowntimeEnd, DowntimeRemoved, Custom, FlappingStart, FlappingEnd ] } var baseBody = { @@ -119,13 +119,25 @@ object NotificationCommand "icinga-notifications-host" use(hostBody, hostExtraTa vars.event_type = {{ if (macro("$notification.type$") == "ACKNOWLEDGEMENT") { return "acknowledgement" + } else if (macro("$notification.type$") == "DOWNTIMESTART") { + return "downtimeStart" + } else if (macro("$notification.type$") == "DOWNTIMEEND") { + return "downtimeEnd" + } else if (macro("$notification.type$") == "DOWNTIMECANCELLED") { + return "downtimeCancelled" + } else if (macro("$notification.type$") == "CUSTOM") { + return "custom" + } else if (macro("$notification.type$") == "FLAPPINGSTART") { + return "flappintStart" + } else if (macro("$notification.type$") == "FLAPPINGEND") { + return "flappingEnd" } return "" }} vars.event_severity = {{ - if (macro("$notification.type$") != "ACKNOWLEDGEMENT") { + if (macro("$notification.type$") == "PROBLEM" || macro("$notification.type$") == "RECOVERY") { return macro("$host.state$") == "DOWN" ? "crit" : "ok" } @@ -163,7 +175,7 @@ template Host "generic-icinga-notifications-host" default { apply Notification "icinga-notifications-forwarder" to Host { command = "icinga-notifications-host" - types = [ Acknowledgement ] + types = [ Acknowledgement, DowntimeStart, DowntimeEnd, DowntimeRemoved, Custom, FlappingStart, FlappingEnd ] users = [ "icinga-notifications" ] @@ -230,27 +242,39 @@ object NotificationCommand "icinga-notifications-service" use(serviceBody, servi vars.event_type = {{ if (macro("$notification.type$") == "ACKNOWLEDGEMENT") { - return "acknowledgement" - } + return "acknowledgement" + } else if (macro("$notification.type$") == "DOWNTIMESTART") { + return "downtimeStart" + } else if (macro("$notification.type$") == "DOWNTIMEEND") { + return "downtimeEnd" + } else if (macro("$notification.type$") == "DOWNTIMECANCELLED") { + return "downtimeCancelled" + } else if (macro("$notification.type$") == "CUSTOM") { + return "custom" + } else if (macro("$notification.type$") == "FLAPPINGSTART") { + return "flappintStart" + } else if (macro("$notification.type$") == "FLAPPINGEND") { + return "flappingEnd" + } return "" }} vars.event_severity = {{ - if (macro("$notification.type$") != "ACKNOWLEDGEMENT") { - var state = macro("$service.state$") - if (state == "OK") { - return "ok" - } else if (state == "WARNING") { - return "warning" - } else if (state == "CRITICAL") { - return "crit" - } else { // Unknown - return "err" - } - } - - return "" + if (macro("$notification.type$") != "PROBLEM" || macro("$notification.type$") != "RECOVERY") { + return "" + } + + var state = macro("$service.state$") + if (state == "OK") { + return "ok" + } else if (state == "WARNING") { + return "warning" + } else if (state == "CRITICAL") { + return "crit" + } else { // Unknown + return "err" + } }} } @@ -294,7 +318,7 @@ template Service "generic-icinga-notifications-service" default { apply Notification "icinga-notifications-forwarder" to Service { command = "icinga-notifications-service" - types = [ Acknowledgement ] + types = [ Acknowledgement, DowntimeStart, DowntimeEnd, DowntimeRemoved, Custom, FlappingStart, FlappingEnd ] users = [ "icinga-notifications" ] diff --git a/internal/event/event.go b/internal/event/event.go index 48b39fbf..f3bae002 100644 --- a/internal/event/event.go +++ b/internal/event/event.go @@ -29,11 +29,45 @@ type Event struct { } const ( - TypeState = "state" - TypeAcknowledgement = "acknowledgement" - TypeInternal = "internal" + TypeState = "state" + TypeAcknowledgement = "acknowledgement" + TypeDowntimeStart = "downtimeStart" + TypeDowntimeEnd = "downtimeEnd" + TypeDowntimeCancelled = "downtimeCancelled" + TypeCustom = "custom" + TypeFlappingStart = "flappingStart" + TypeFlappingEnd = "flappingEnd" + TypeInternal = "internal" ) +// Validate validates the event +func (e *Event) Validate() error { + if len(e.Tags) == 0 { + return fmt.Errorf("invalid event: tags cannot be empty") + } else if e.Severity == SeverityNone { + if e.Type == "" { + return fmt.Errorf("invalid event: must set 'type' or 'severity'") + } + + switch e.Type { + case TypeState: + case TypeAcknowledgement: + case TypeDowntimeStart: + case TypeDowntimeEnd: + case TypeDowntimeCancelled: + case TypeCustom: + case TypeFlappingStart: + case TypeFlappingEnd: + return nil + default: + return fmt.Errorf("invalid event type: %s", e.Type) + } + } else if e.Type != "" && e.Type != TypeState { + return fmt.Errorf("invalid event: if 'severity' is set, 'type' must not be set or set to %q\n", TypeState) + } + return nil +} + func (e *Event) String() string { return fmt.Sprintf("[time=%s type=%q severity=%s]", e.Time, e.Type, e.Severity.String()) } diff --git a/internal/incident/history_event_type.go b/internal/incident/history_event_type.go index acff9ed8..19ba6b03 100644 --- a/internal/incident/history_event_type.go +++ b/internal/incident/history_event_type.go @@ -16,6 +16,12 @@ const ( Opened Closed Notified + DowntimeStarted + DowntimeEnded + DowntimeCancelled + Custom + FlappingStarted + FlappingEnded ) var historyTypeByName = map[string]HistoryEventType{ @@ -26,6 +32,12 @@ var historyTypeByName = map[string]HistoryEventType{ "opened": Opened, "closed": Closed, "notified": Notified, + "downtime_started": DowntimeStarted, + "downtime_ended": DowntimeEnded, + "downtime_cancelled": DowntimeCancelled, + "custom": Custom, + "flapping_started": FlappingStarted, + "flapping_ended": FlappingEnded, } var historyEventTypeToName = func() map[HistoryEventType]string { diff --git a/internal/incident/incident.go b/internal/incident/incident.go index b0d60aa6..15ad3906 100644 --- a/internal/incident/incident.go +++ b/internal/incident/incident.go @@ -139,33 +139,35 @@ func (i *Incident) ProcessEvent(ctx context.Context, ev *event.Event, created bo return errors.New("can't insert incident event to the database") } - if ev.Type == event.TypeAcknowledgement { - return i.processAcknowledgementEvent(ctx, tx, ev) - } - var causedBy types.Int - if !created { - causedBy, err = i.processSeverityChangedEvent(ctx, tx, ev) + if ev.Type == event.TypeState { + if !created { + causedBy, err = i.processSeverityChangedEvent(ctx, tx, ev) + if err != nil { + return err + } + } + + // Check if any (additional) rules match this object. Filters of rules that already have a state don't have + // to be checked again, these rules already matched and stay effective for the ongoing incident. + causedBy, err = i.evaluateRules(ctx, tx, ev.ID, causedBy) if err != nil { return err } - } - - // Check if any (additional) rules match this object. Filters of rules that already have a state don't have - // to be checked again, these rules already matched and stay effective for the ongoing incident. - causedBy, err = i.evaluateRules(ctx, tx, ev.ID, causedBy) - if err != nil { - return err - } - // Re-evaluate escalations based on the newly evaluated rules. - escalations, err := i.evaluateEscalations(ev.Time) - if err != nil { - return err - } + // Re-evaluate escalations based on the newly evaluated rules. + escalations, err := i.evaluateEscalations(ev.Time) + if err != nil { + return err + } - if err := i.triggerEscalations(ctx, tx, ev, causedBy, escalations); err != nil { - return err + if err := i.triggerEscalations(ctx, tx, ev, causedBy, escalations); err != nil { + return err + } + } else { + if err = i.processNonStateTypeEvent(ctx, tx, ev); err != nil { + return err + } } notifications, err := i.addPendingNotifications(ctx, tx, ev, i.getRecipientsChannel(ev.Time), causedBy) @@ -572,6 +574,48 @@ func (i *Incident) notifyContact(contact *recipient.Contact, ev *event.Event, ch return nil } +func (i *Incident) processNonStateTypeEvent(ctx context.Context, tx *sqlx.Tx, ev *event.Event) error { + + if ev.Type == event.TypeAcknowledgement { + return i.processAcknowledgementEvent(ctx, tx, ev) + } + + var historyEvType HistoryEventType + switch ev.Type { + case event.TypeDowntimeStart: + historyEvType = DowntimeStarted + case event.TypeDowntimeEnd: + historyEvType = DowntimeEnded + case event.TypeDowntimeCancelled: + historyEvType = DowntimeCancelled + case event.TypeFlappingStart: + historyEvType = FlappingStarted + case event.TypeFlappingEnd: + historyEvType = FlappingEnded + case event.TypeCustom: + historyEvType = Custom + default: + //TODO: other events + return fmt.Errorf("type %s not implemented yet", ev.Type) + } + + hr := &HistoryRow{ + EventID: utils.ToDBInt(ev.ID), + Time: types.UnixMilli(time.Now()), + Type: historyEvType, + Message: utils.ToDBString(ev.Message), + } + + _, err := i.AddHistory(ctx, tx, hr, false) + if err != nil { + i.logger.Errorw("Failed to add history", zap.String("type", historyEvType.String()), zap.Error(err)) + + return fmt.Errorf("failed to add %s history", historyEvType.String()) + } + + return nil +} + // processAcknowledgementEvent processes the given ack event. // Promotes the ack author to incident.RoleManager if it's not already the case and generates a history entry. // Returns error on database failure. diff --git a/internal/listener/listener.go b/internal/listener/listener.go index b9b349f0..9d990857 100644 --- a/internal/listener/listener.go +++ b/internal/listener/listener.go @@ -91,34 +91,21 @@ func (l *Listener) ProcessEvent(w http.ResponseWriter, req *http.Request) { _, _ = fmt.Fprintf(w, "cannot parse JSON body: %v\n", err) return } - ev.Time = time.Now() - if ev.Severity == event.SeverityNone && ev.Type == "" { + if err := ev.Validate(); err != nil { + l.logger.Errorw("event validation failed", zap.Error(err)) w.WriteHeader(http.StatusBadRequest) - _, _ = fmt.Fprintln(w, "ignoring invalid event: must set 'type' or 'severity'") + _, _ = fmt.Fprintln(w, fmt.Sprintf("Ignoring %s", err)) return } - if ev.Severity != event.SeverityNone { - if ev.Type == "" { - ev.Type = event.TypeState - } else if ev.Type != event.TypeState { - w.WriteHeader(http.StatusBadRequest) - _, _ = fmt.Fprintf(w, "ignoring invalid event: if 'severity' is set, 'type' must not be set or set to %q\n", event.TypeState) - return - } + ev.Time = time.Now() + if ev.Type == "" { + ev.Type = event.TypeState } - if ev.Severity == event.SeverityNone { - if ev.Type != event.TypeAcknowledgement { - // It's neither a state nor an acknowledgement event. - w.WriteHeader(http.StatusBadRequest) - _, _ = fmt.Fprintf(w, "received not a state/acknowledgement event, ignoring\n") - return - } - } + l.logger.Infof("Processing %s event", ev.Type) - l.logger.Infof("Processing event") err = incident.ProcessEvent(context.Background(), l.db, l.logs, l.runtimeConfig, &ev) if err != nil { l.logger.Errorw("Failed to process event", zap.Error(err)) diff --git a/schema/pgsql/schema.sql b/schema/pgsql/schema.sql index 56592134..bd045d78 100644 --- a/schema/pgsql/schema.sql +++ b/schema/pgsql/schema.sql @@ -1,5 +1,19 @@ CREATE TYPE boolenum AS ENUM ( 'n', 'y' ); -CREATE TYPE incident_history_event_type AS ENUM ( 'incident_severity_changed', 'recipient_role_changed', 'escalation_triggered', 'rule_matched', 'opened', 'closed', 'notified' ); +CREATE TYPE incident_history_event_type AS ENUM ( + 'incident_severity_changed', + 'recipient_role_changed', + 'escalation_triggered', + 'rule_matched', + 'opened', + 'closed', + 'notified' + 'downtime_started', + 'downtime_ended', + 'downtime_cancelled', + 'custom', + 'flapping_started', + 'flapping_ended' +); CREATE TYPE frequency_type AS ENUM ( 'MINUTELY', 'HOURLY', 'DAILY', 'WEEKLY', 'MONTHLY', 'QUARTERLY', 'YEARLY' ); CREATE TYPE notification_state_type AS ENUM ( 'pending', 'sent', 'failed' ); diff --git a/schema/pgsql/upgrades/020.sql b/schema/pgsql/upgrades/020.sql new file mode 100644 index 00000000..ff20eef2 --- /dev/null +++ b/schema/pgsql/upgrades/020.sql @@ -0,0 +1,6 @@ +ALTER TYPE incident_history_event_type ADD VALUE 'downtime_started'; +ALTER TYPE incident_history_event_type ADD VALUE 'downtime_ended'; +ALTER TYPE incident_history_event_type ADD VALUE 'downtime_cancelled'; +ALTER TYPE incident_history_event_type ADD VALUE 'custom'; +ALTER TYPE incident_history_event_type ADD VALUE 'flapping_started'; +ALTER TYPE incident_history_event_type ADD VALUE 'flapping_ended'; \ No newline at end of file