/
postgres-audit.sql
378 lines (335 loc) · 15.3 KB
/
postgres-audit.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
/*
* Copyright (C) 2010-2023 Evolveum and contributors
*
* This work is dual-licensed under the Apache License 2.0
* and European Union Public License. See LICENSE file for details.
*/
-- USAGE NOTES: You can apply this to the main repository schema.
-- For separate audit use this in a separate database.
-- See the docs here: https://docs.evolveum.com/midpoint/reference/repository/native-audit
--
-- @formatter:off because of terribly unreliable IDEA reformat for SQL
-- Naming conventions:
-- M_ prefix is used for tables in main part of the repo, MA_ for audit tables (can be separate)
-- Constraints/indexes use table_column(s)_suffix convention, with PK for primary key,
-- FK foreign key, IDX for index, KEY for unique index.
-- TR is suffix for triggers.
-- Names are generally lowercase (despite prefix/suffixes above in uppercase ;-)).
-- Column names are Java style and match attribute names from M-classes (e.g. MAuditEvent).
--
-- Other notes:
-- TEXT is used instead of VARCHAR, see: https://dba.stackexchange.com/a/21496/157622
-- noinspection SqlResolveForFile @ operator-class/"gin__int_ops"
-- just in case PUBLIC schema was dropped (fastest way to remove all midpoint objects)
-- drop schema public cascade;
CREATE SCHEMA IF NOT EXISTS public;
-- CREATE EXTENSION IF NOT EXISTS pg_trgm; -- support for trigram indexes
-- region custom enum types
DO $$ BEGIN
-- NOTE: Types in this block must be updated when changed in postgres-new.sql!
CREATE TYPE ObjectType AS ENUM (
'ABSTRACT_ROLE',
'ACCESS_CERTIFICATION_CAMPAIGN',
'ACCESS_CERTIFICATION_DEFINITION',
'ARCHETYPE',
'ASSIGNMENT_HOLDER',
'CASE',
'CONNECTOR',
'CONNECTOR_HOST',
'DASHBOARD',
'FOCUS',
'FORM',
'FUNCTION_LIBRARY',
'GENERIC_OBJECT',
'LOOKUP_TABLE',
'MARK',
'MESSAGE_TEMPLATE',
'NODE',
'OBJECT',
'OBJECT_COLLECTION',
'OBJECT_TEMPLATE',
'ORG',
'POLICY',
'REPORT',
'REPORT_DATA',
'RESOURCE',
'ROLE',
'ROLE_ANALYSIS_CLUSTER',
'ROLE_ANALYSIS_SESSION',
'SECURITY_POLICY',
'SEQUENCE',
'SERVICE',
'SHADOW',
'SIMULATION_RESULT',
'SYSTEM_CONFIGURATION',
'TASK',
'USER',
'VALUE_POLICY');
CREATE TYPE OperationResultStatusType AS ENUM ('SUCCESS', 'WARNING', 'PARTIAL_ERROR',
'FATAL_ERROR', 'HANDLED_ERROR', 'NOT_APPLICABLE', 'IN_PROGRESS', 'UNKNOWN');
EXCEPTION WHEN duplicate_object THEN raise notice 'Main repo custom types already exist, OK...'; END $$;
CREATE TYPE AuditEventTypeType AS ENUM ('GET_OBJECT', 'ADD_OBJECT', 'MODIFY_OBJECT',
'DELETE_OBJECT', 'EXECUTE_CHANGES_RAW', 'SYNCHRONIZATION', 'CREATE_SESSION',
'TERMINATE_SESSION', 'WORK_ITEM', 'WORKFLOW_PROCESS_INSTANCE', 'RECONCILIATION',
'SUSPEND_TASK', 'RESUME_TASK', 'RUN_TASK_IMMEDIATELY', 'DISCOVER_OBJECT', 'INFORMATION_DISCLOSURE');
CREATE TYPE AuditEventStageType AS ENUM ('REQUEST', 'EXECUTION', 'RESOURCE');
CREATE TYPE EffectivePrivilegesModificationType AS ENUM ('ELEVATION', 'FULL_ELEVATION', 'REDUCTION', 'OTHER');
CREATE TYPE ChangeType AS ENUM ('ADD', 'MODIFY', 'DELETE');
-- We try to create ShadowKindType (necessary if audit is in separate database, if it is in same
-- database as repository, type already exists.
DO $$ BEGIN
CREATE TYPE ShadowKindType AS ENUM ('ACCOUNT', 'ENTITLEMENT', 'GENERIC', 'UNKNOWN');
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- endregion
-- region management tables
-- Key -> value config table for internal use.
CREATE TABLE IF NOT EXISTS m_global_metadata (
name TEXT PRIMARY KEY,
value TEXT
);
-- endregion
-- region AUDIT
CREATE TABLE ma_audit_event (
-- ID is generated as unique, but if provided, it is checked for uniqueness
-- only in combination with timestamp because of partitioning.
id BIGSERIAL NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
eventIdentifier TEXT,
eventType AuditEventTypeType,
eventStage AuditEventStageType,
sessionIdentifier TEXT,
requestIdentifier TEXT,
taskIdentifier TEXT,
taskOid UUID,
hostIdentifier TEXT,
nodeIdentifier TEXT,
remoteHostAddress TEXT,
initiatorOid UUID,
initiatorType ObjectType,
initiatorName TEXT,
attorneyOid UUID,
attorneyName TEXT,
effectivePrincipalOid UUID,
effectivePrincipalType ObjectType,
effectivePrincipalName TEXT,
effectivePrivilegesModification EffectivePrivilegesModificationType,
targetOid UUID,
targetType ObjectType,
targetName TEXT,
targetOwnerOid UUID,
targetOwnerType ObjectType,
targetOwnerName TEXT,
channel TEXT, -- full URI, we do not want m_uri ID anymore
outcome OperationResultStatusType,
parameter TEXT,
result TEXT,
message TEXT,
changedItemPaths TEXT[],
resourceOids TEXT[],
properties JSONB,
-- ext JSONB, -- TODO extension container later
PRIMARY KEY (id, timestamp)
) PARTITION BY RANGE (timestamp);
CREATE INDEX ma_audit_event_timestamp_idx ON ma_audit_event (timestamp);
CREATE INDEX ma_audit_event_eventIdentifier_idx ON ma_audit_event (eventIdentifier);
CREATE INDEX ma_audit_event_sessionIdentifier_idx ON ma_audit_event (sessionIdentifier);
CREATE INDEX ma_audit_event_requestIdentifier_idx ON ma_audit_event (requestIdentifier);
-- This was originally eventStage + targetOid, but low variability eventStage can do more harm.
CREATE INDEX ma_audit_event_targetOid_idx ON ma_audit_event (targetOid);
-- TODO do we want to index every single column or leave the rest to full/partial scans?
-- Original repo/audit didn't have any more indexes either...
CREATE INDEX ma_audit_event_changedItemPaths_idx ON ma_audit_event USING gin(changeditempaths);
CREATE INDEX ma_audit_event_resourceOids_idx ON ma_audit_event USING gin(resourceOids);
CREATE INDEX ma_audit_event_properties_idx ON ma_audit_event USING gin(properties);
-- TODO trigram indexes for LIKE support? What columns? message, ...
CREATE TABLE ma_audit_delta (
recordId BIGINT NOT NULL, -- references ma_audit_event.id
timestamp TIMESTAMPTZ NOT NULL, -- references ma_audit_event.timestamp
checksum TEXT NOT NULL,
delta BYTEA,
deltaOid UUID,
deltaType ChangeType,
fullResult BYTEA,
objectNameNorm TEXT,
objectNameOrig TEXT,
resourceOid UUID,
resourceNameNorm TEXT,
resourceNameOrig TEXT,
shadowKind ShadowKindType,
shadowIntent TEXT,
status OperationResultStatusType,
PRIMARY KEY (recordId, timestamp, checksum)
) PARTITION BY RANGE (timestamp);
/* Similar FK is created PER PARTITION only, see audit_create_monthly_partitions
or *_default tables:
ALTER TABLE ma_audit_delta ADD CONSTRAINT ma_audit_delta_fk
FOREIGN KEY (recordId, timestamp) REFERENCES ma_audit_event (id, timestamp)
ON DELETE CASCADE;
-- Primary key covers the need for FK(recordId, timestamp) as well, no need for explicit index.
*/
-- TODO: any unique combination within single recordId? name+oid+type perhaps?
CREATE TABLE ma_audit_ref (
id BIGSERIAL NOT NULL, -- unique technical PK
recordId BIGINT NOT NULL, -- references ma_audit_event.id
timestamp TIMESTAMPTZ NOT NULL, -- references ma_audit_event.timestamp
name TEXT, -- multiple refs can have the same name, conceptually it's a Map(name -> refs[])
targetOid UUID,
targetType ObjectType,
targetNameOrig TEXT,
targetNameNorm TEXT,
PRIMARY KEY (id, timestamp) -- real PK must contain partition key (timestamp)
) PARTITION BY RANGE (timestamp);
/* Similar FK is created PER PARTITION only:
ALTER TABLE ma_audit_ref ADD CONSTRAINT ma_audit_ref_fk
FOREIGN KEY (recordId, timestamp) REFERENCES ma_audit_event (id, timestamp)
ON DELETE CASCADE;
*/
-- Index for FK mentioned above.
-- Index can be declared for partitioned table and will be partitioned automatically.
CREATE INDEX ma_audit_ref_recordId_timestamp_idx ON ma_audit_ref (recordId, timestamp);
-- Default tables used when no timestamp range partitions are created:
CREATE TABLE ma_audit_event_default PARTITION OF ma_audit_event DEFAULT;
CREATE TABLE ma_audit_delta_default PARTITION OF ma_audit_delta DEFAULT;
CREATE TABLE ma_audit_ref_default PARTITION OF ma_audit_ref DEFAULT;
/*
For info about what is and is not automatically created on the partition, see:
https://www.postgresql.org/docs/13/sql-createtable.html (search for "PARTITION OF parent_table")
In short, for our case PK and constraints are created automatically, but FK are not.
*/
ALTER TABLE ma_audit_delta_default ADD CONSTRAINT ma_audit_delta_default_fk
FOREIGN KEY (recordId, timestamp) REFERENCES ma_audit_event_default (id, timestamp)
ON DELETE CASCADE;
ALTER TABLE ma_audit_ref_default ADD CONSTRAINT ma_audit_ref_default_fk
FOREIGN KEY (recordId, timestamp) REFERENCES ma_audit_event_default (id, timestamp)
ON DELETE CASCADE;
-- endregion
-- region Schema versioning and upgrading
/*
Procedure applying a DB schema/data change for audit tables.
Use sequential change numbers to identify the changes.
This protects re-execution of the same change on the same database instance.
Use dollar-quoted string constant for a change, examples are lower, docs here:
https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
The transaction is committed if the change is executed.
The change number is NOT semantic and uses different key than original 'databaseSchemaVersion'.
Semantic schema versioning is still possible, but now only for information purposes.
Example of an DB upgrade script (stuff between $$ can be multiline, here compressed for brevity):
CALL apply_audit_change(1, $$ create table x(a int); insert into x values (1); $$);
CALL apply_audit_change(2, $$ alter table x add column b text; insert into x values (2, 'two'); $$);
-- not a good idea in general, but "true" forces the execution; it never updates change # to lower
CALL apply_audit_change(1, $$ insert into x values (3, 'three'); $$, true);
*/
CREATE OR REPLACE PROCEDURE apply_audit_change(changeNumber int, change TEXT, force boolean = false)
LANGUAGE plpgsql
AS $$
DECLARE
lastChange int;
BEGIN
SELECT value INTO lastChange FROM m_global_metadata WHERE name = 'schemaAuditChangeNumber';
-- change is executed if the changeNumber is newer - or if forced
IF lastChange IS NULL OR lastChange < changeNumber OR force THEN
EXECUTE change;
RAISE NOTICE 'Audit change #% executed!', changeNumber;
IF lastChange IS NULL THEN
INSERT INTO m_global_metadata (name, value) VALUES ('schemaAuditChangeNumber', changeNumber);
ELSIF changeNumber > lastChange THEN
-- even with force we never want to set lower change number, hence the IF above
UPDATE m_global_metadata SET value = changeNumber WHERE name = 'schemaAuditChangeNumber';
END IF;
COMMIT;
ELSE
RAISE NOTICE 'Audit change #% skipped - not newer than the last change #%!', changeNumber, lastChange;
END IF;
END $$;
-- endregion
-- https://www.postgresql.org/docs/current/runtime-config-query.html#GUC-ENABLE-PARTITIONWISE-JOIN
DO $$ BEGIN
EXECUTE 'ALTER DATABASE ' || current_database() || ' SET enable_partitionwise_join TO on';
END; $$;
-- region partition creation procedures
-- Use negative futureCount for creating partitions for the past months if needed.
-- See also the comment below the procedure for more details.
CREATE OR REPLACE PROCEDURE audit_create_monthly_partitions(futureCount int)
LANGUAGE plpgsql
AS $$
DECLARE
dateFrom TIMESTAMPTZ = date_trunc('month', current_timestamp);
dateTo TIMESTAMPTZ;
tableSuffix TEXT;
BEGIN
-- noinspection SqlUnused
FOR i IN 1..abs(futureCount) loop
dateTo := dateFrom + interval '1 month';
tableSuffix := to_char(dateFrom, 'YYYYMM');
BEGIN
-- PERFORM = select without using the result
PERFORM ('ma_audit_event_' || tableSuffix)::regclass;
RAISE NOTICE 'Tables for partition % already exist, OK...', tableSuffix;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'Creating partitions for range: % - %', dateFrom, dateTo;
-- values FROM are inclusive (>=), TO are exclusive (<)
EXECUTE format(
'CREATE TABLE %I PARTITION OF ma_audit_event FOR VALUES FROM (%L) TO (%L);',
'ma_audit_event_' || tableSuffix, dateFrom, dateTo);
EXECUTE format(
'CREATE TABLE %I PARTITION OF ma_audit_delta FOR VALUES FROM (%L) TO (%L);',
'ma_audit_delta_' || tableSuffix, dateFrom, dateTo);
EXECUTE format(
'CREATE TABLE %I PARTITION OF ma_audit_ref FOR VALUES FROM (%L) TO (%L);',
'ma_audit_ref_' || tableSuffix, dateFrom, dateTo);
/*
For info about what is and is not automatically created on the partition, see:
https://www.postgresql.org/docs/13/sql-createtable.html (search for "PARTITION OF parent_table")
In short, for our case PK and constraints are created automatically, but FK are not.
*/
EXECUTE format(
'ALTER TABLE %I ADD CONSTRAINT %I FOREIGN KEY (recordId, timestamp)' ||
' REFERENCES %I (id, timestamp) ON DELETE CASCADE',
'ma_audit_delta_' || tableSuffix,
'ma_audit_delta_' || tableSuffix || '_fk',
'ma_audit_event_' || tableSuffix);
EXECUTE format(
'ALTER TABLE %I ADD CONSTRAINT %I FOREIGN KEY (recordId, timestamp)' ||
' REFERENCES %I (id, timestamp) ON DELETE CASCADE',
'ma_audit_ref_' || tableSuffix,
'ma_audit_ref_' || tableSuffix || '_fk',
'ma_audit_event_' || tableSuffix);
END;
IF futureCount < 0 THEN
-- going to the past
dateFrom := dateFrom - interval '1 month';
ELSE
dateFrom := dateTo;
END IF;
END loop;
END $$;
-- endregion
/*
IMPORTANT: Only default partitions are created in this script!
Consider, whether you need partitioning before doing anything, for more read the docs:
https://docs.evolveum.com/midpoint/reference/repository/native-audit/#partitioning
Use something like this, if you desire monthly partitioning:
call audit_create_monthly_partitions(120);
This creates 120 monthly partitions into the future (10 years).
It can be safely called multiple times, so you can run it again anytime in the future.
If you forget to run, audit events will go to default partition so no data is lost,
however it may be complicated to organize it into proper partitions after the fact.
Create past partitions if needed, e.g. for migration. E.g., for last 12 months (including current):
call audit_create_monthly_partitions(-12);
Check the existing partitions with this SQL query:
select inhrelid::regclass as partition
from pg_inherits
where inhparent = 'ma_audit_event'::regclass;
Try this to see recent audit events with the real table where they are stored:
select tableoid::regclass::text AS table_name, *
from ma_audit_event
order by id desc
limit 50;
*/
-- Initializing the last change number used in postgres-new-upgrade.sql.
-- This is important to avoid applying any change more than once.
-- Also update SqaleUtils.CURRENT_SCHEMA_AUDIT_CHANGE_NUMBER
-- repo/repo-sqale/src/main/java/com/evolveum/midpoint/repo/sqale/SqaleUtils.java
call apply_audit_change(8, $$ SELECT 1 $$, true);