Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PostgreSQL: get_sla_ok_percent to return decimal #710

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion schema/pgsql/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ BEGIN
END IF;
END LOOP;

RETURN 100 * (total_time - problem_time) / total_time;
RETURN (100 * (total_time - problem_time)::decimal / total_time)::decimal(7, 4);
END;
$$;

Expand Down
139 changes: 139 additions & 0 deletions schema/pgsql/upgrades/1.1.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
CREATE OR REPLACE FUNCTION get_sla_ok_percent(
in_host_id bytea20,
in_service_id bytea20,
in_start_time biguint,
in_end_time biguint
)
RETURNS decimal(7, 4)
LANGUAGE plpgsql
STABLE
PARALLEL RESTRICTED
AS $$
DECLARE
last_event_time biguint := in_start_time;
last_hard_state tinyuint;
active_downtimes uint := 0;
problem_time biguint := 0;
total_time biguint;
row record;
BEGIN
IF in_end_time <= in_start_time THEN
RAISE 'end time must be greater than start time';
END IF;

total_time := in_end_time - in_start_time;

-- Use the latest event at or before the beginning of the SLA interval as the initial state.
SELECT hard_state INTO last_hard_state
FROM sla_history_state s
WHERE s.host_id = in_host_id
AND ((in_service_id IS NULL AND s.service_id IS NULL) OR s.service_id = in_service_id)
AND s.event_time <= in_start_time
ORDER BY s.event_time DESC
LIMIT 1;

-- If this does not exist, use the previous state from the first event after the beginning of the SLA interval.
IF last_hard_state IS NULL THEN
SELECT previous_hard_state INTO last_hard_state
FROM sla_history_state s
WHERE s.host_id = in_host_id
AND ((in_service_id IS NULL AND s.service_id IS NULL) OR s.service_id = in_service_id)
AND s.event_time > in_start_time
ORDER BY s.event_time ASC
LIMIT 1;
END IF;

-- If this also does not exist, use the current host/service state.
IF last_hard_state IS NULL THEN
IF in_service_id IS NULL THEN
SELECT hard_state INTO last_hard_state
FROM host_state s
WHERE s.host_id = in_host_id;
ELSE
SELECT hard_state INTO last_hard_state
FROM service_state s
WHERE s.host_id = in_host_id
AND s.service_id = in_service_id;
END IF;
END IF;

IF last_hard_state IS NULL THEN
last_hard_state := 0;
END IF;

FOR row IN
(
-- all downtime_start events before the end of the SLA interval
-- for downtimes that overlap the SLA interval in any way
SELECT
GREATEST(downtime_start, in_start_time) AS event_time,
'downtime_start' AS event_type,
1 AS event_prio,
NULL::tinyuint AS hard_state,
NULL::tinyuint AS previous_hard_state
FROM sla_history_downtime d
WHERE d.host_id = in_host_id
AND ((in_service_id IS NULL AND d.service_id IS NULL) OR d.service_id = in_service_id)
AND d.downtime_start < in_end_time
AND d.downtime_end >= in_start_time
) UNION ALL (
-- all downtime_end events before the end of the SLA interval
-- for downtimes that overlap the SLA interval in any way
SELECT
downtime_end AS event_time,
'downtime_end' AS event_type,
2 AS event_prio,
NULL::tinyuint AS hard_state,
NULL::tinyuint AS previous_hard_state
FROM sla_history_downtime d
WHERE d.host_id = in_host_id
AND ((in_service_id IS NULL AND d.service_id IS NULL) OR d.service_id = in_service_id)
AND d.downtime_start < in_end_time
AND d.downtime_end >= in_start_time
AND d.downtime_end < in_end_time
) UNION ALL (
-- all state events strictly in interval
SELECT
event_time,
'state_change' AS event_type,
0 AS event_prio,
hard_state,
previous_hard_state
FROM sla_history_state s
WHERE s.host_id = in_host_id
AND ((in_service_id IS NULL AND s.service_id IS NULL) OR s.service_id = in_service_id)
AND s.event_time > in_start_time
AND s.event_time < in_end_time
) UNION ALL (
-- end event to keep loop simple, values are not used
SELECT
in_end_time AS event_time,
'end' AS event_type,
3 AS event_prio,
NULL::tinyuint AS hard_state,
NULL::tinyuint AS previous_hard_state
)
ORDER BY event_time, event_prio
LOOP
IF row.previous_hard_state = 99 THEN
total_time := total_time - (row.event_time - last_event_time);
ELSEIF ((in_service_id IS NULL AND last_hard_state > 0) OR (in_service_id IS NOT NULL AND last_hard_state > 1))
AND last_hard_state != 99
AND active_downtimes = 0
THEN
problem_time := problem_time + row.event_time - last_event_time;
END IF;

last_event_time := row.event_time;
IF row.event_type = 'state_change' THEN
last_hard_state := row.hard_state;
ELSEIF row.event_type = 'downtime_start' THEN
active_downtimes := active_downtimes + 1;
ELSEIF row.event_type = 'downtime_end' THEN
active_downtimes := active_downtimes - 1;
END IF;
END LOOP;

RETURN (100 * (total_time - problem_time)::decimal / total_time)::decimal(7, 4);
END;
$$;
36 changes: 36 additions & 0 deletions tests/sql/sla_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,42 @@ func TestSla(t *testing.T) {
Start: 1000,
End: 2000,
Expected: 60.0,
}, {
Name: "MultipleStateChangesDecimalsOddNumbers",
// Test flapping again, also that calculations are rounded correctly including decimal places.
Events: []SlaHistoryEvent{
&State{Time: 1000, State: 2, PreviousState: 99}, // -2.3%
&State{Time: 1023, State: 0, PreviousState: 2},
&State{Time: 1100, State: 2, PreviousState: 0}, // -14.2%
&State{Time: 1242, State: 0, PreviousState: 2},
&State{Time: 1300, State: 2, PreviousState: 0}, // -0.7%
&State{Time: 1307, State: 0, PreviousState: 2},
&State{Time: 1400, State: 2, PreviousState: 0}, // -26.6%
&State{Time: 1666, State: 0, PreviousState: 2},
},
Start: 1000,
End: 2000,
Expected: 56.2,
julianbrost marked this conversation as resolved.
Show resolved Hide resolved
}, {
Name: "MultipleStateChangesDecimalsFractionOneThird",
// Test decimal representation of a fraction including precision and scale.
Events: []SlaHistoryEvent{
&State{Time: 1000, State: 2, PreviousState: 99}, // -33.3..%
&State{Time: 1100, State: 0, PreviousState: 2},
},
Start: 1000,
End: 1300,
Expected: 66.6667,
}, {
Name: "MultipleStateChangesDecimalsFractionSeventhPart",
// Test decimal representation of a fraction including precision and scale.
Events: []SlaHistoryEvent{
&State{Time: 1000, State: 2, PreviousState: 99}, // -85.7142..%
&State{Time: 1600, State: 0, PreviousState: 2},
},
Start: 1000,
End: 1700,
Expected: 14.2857,
}, {
Name: "OverlappingDowntimesAndProblems",
// SLA should be 90%:
Expand Down