diff --git a/Makefile.in b/Makefile.in index 84c54ad3..8413cabd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -42,11 +42,14 @@ src/keyring/keyring_curl.o \ src/keyring/keyring_file.o \ src/keyring/keyring_vault.o \ src/keyring/keyring_api.o \ +src/catalog/tde_global_catalog.o \ src/catalog/tde_keyring.o \ src/catalog/tde_master_key.o \ src/common/pg_tde_shmem.o \ src/common/pg_tde_utils.o \ +src/smgr/pg_tde_smgr.o \ src/pg_tde_defs.o \ +src/pg_tde_event_capture.o \ src/pg_tde.o override PG_CPPFLAGS += @tde_CPPFLAGS@ diff --git a/meson.build b/meson.build index 51798501..a339d13d 100644 --- a/meson.build +++ b/meson.build @@ -39,12 +39,16 @@ pg_tde_sources = files( 'src/keyring/keyring_vault.c', 'src/keyring/keyring_api.c', + 'src/smgr/pg_tde_smgr.c', + + 'src/catalog/tde_global_catalog.c', 'src/catalog/tde_keyring.c', 'src/catalog/tde_master_key.c', 'src/common/pg_tde_shmem.c', 'src/common/pg_tde_utils.c', 'src/pg_tde_defs.c', 'src/pg_tde.c', + 'src/pg_tde_event_capture.c', ) incdir = include_directories('src/include', '.') diff --git a/pg_tde--1.0.sql b/pg_tde--1.0.sql index 3d32ec5e..9d43e98a 100644 --- a/pg_tde--1.0.sql +++ b/pg_tde--1.0.sql @@ -87,6 +87,12 @@ RETURNS table_am_handler AS 'MODULE_PATHNAME' LANGUAGE C; +-- Table access method +CREATE FUNCTION pg_tde2am_handler(internal) +RETURNS table_am_handler +AS 'MODULE_PATHNAME' +LANGUAGE C; + CREATE FUNCTION pgtde_is_encrypted(table_name VARCHAR) RETURNS boolean AS $$ @@ -129,5 +135,26 @@ CREATE FUNCTION pg_tde_version() RETURNS TEXT AS 'MODULE_PATHNAME' LANGUAGE C; CREATE ACCESS METHOD pg_tde TYPE TABLE HANDLER pg_tdeam_handler; COMMENT ON ACCESS METHOD pg_tde IS 'pg_tde table access method'; +CREATE ACCESS METHOD pg_tde2 TYPE TABLE HANDLER pg_tde2am_handler; +COMMENT ON ACCESS METHOD pg_tde2 IS 'pg_tde2 table access method'; + -- Per database extension initialization SELECT pg_tde_extension_initialize(); + +CREATE OR REPLACE FUNCTION pg_tde_ddl_command_start_capture() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C; + +CREATE OR REPLACE FUNCTION pg_tde_ddl_command_end_capture() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C; + +CREATE EVENT TRIGGER pg_tde_trigger_create_index +ON ddl_command_start +EXECUTE FUNCTION pg_tde_ddl_command_start_capture(); + +CREATE EVENT TRIGGER pg_tde_trigger_create_index_2 +ON ddl_command_end +EXECUTE FUNCTION pg_tde_ddl_command_end_capture(); diff --git a/src/access/pg_tde_prune.c b/src/access/pg_tde_prune.c index 73a4dcea..335edecb 100644 --- a/src/access/pg_tde_prune.c +++ b/src/access/pg_tde_prune.c @@ -127,6 +127,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer) if (RecoveryInProgress()) return; +#if PG_VERSION_NUM < 170000 /* * XXX: Magic to keep old_snapshot_threshold tests appear "working". They * currently are broken, and discussion of what to do about them is @@ -135,7 +136,7 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer) */ if (old_snapshot_threshold == 0) SnapshotTooOldMagicForTest(); - +#endif /* * First check whether there's any chance there's something to prune, * determining the appropriate horizon is a waste if there's no prune_xid @@ -166,14 +167,14 @@ pg_tde_page_prune_opt(Relation relation, Buffer buffer) if (!GlobalVisTestIsRemovableXid(vistest, prune_xid)) { - if (!OldSnapshotThresholdActive()) +#if PG_VERSION_NUM < 170000 + if ( !OldSnapshotThresholdActive()) return; - if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest), relation, &limited_xmin, &limited_ts)) return; - +#endif if (!TransactionIdPrecedes(prune_xid, limited_xmin)) return; } @@ -539,6 +540,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) */ if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after)) res = HEAPTUPLE_DEAD; +#if PG_VERSION_NUM < 170000 else if (OldSnapshotThresholdActive()) { /* haven't determined limited horizon yet, requests */ @@ -566,7 +568,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) res = HEAPTUPLE_DEAD; } } - +#endif return res; } diff --git a/src/access/pg_tde_tdemap.c b/src/access/pg_tde_tdemap.c index 3367e668..e424b83f 100644 --- a/src/access/pg_tde_tdemap.c +++ b/src/access/pg_tde_tdemap.c @@ -73,43 +73,37 @@ typedef struct TDEMapEntry int32 key_index; } TDEMapEntry; -/* Global variables */ -static char db_path[MAXPGPATH] = {0}; -static char db_map_path[MAXPGPATH] = {0}; -static char db_keydata_path[MAXPGPATH] = {0}; - -static void put_key_into_map(Oid rel_id, RelKeyData *key); - -static File pg_tde_open_file_basic(char *tde_filename, int fileFlags, bool ignore_missing); -static File pg_tde_file_header_write(char *tde_filename, File tde_file, TDEMasterKeyInfo *master_key_info, off_t *bytes_written); -static File pg_tde_file_header_read(char *tde_filename, File tde_file, TDEFileHeader *fheader, bool *is_new_file, off_t *bytes_read); +typedef struct TDEMapFilePath +{ + char map_path[MAXPGPATH]; + char keydata_path[MAXPGPATH]; +} TDEMapFilePath; -static RelKeyData* tde_create_rel_key(const RelFileLocator *rlocator, InternalKey *key, TDEMasterKeyInfo *master_key_info); -static RelKeyData *tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const RelFileLocator *rlocator); -static RelKeyData *tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, const RelFileLocator *rlocator); +static int pg_tde_open_file_basic(char *tde_filename, int fileFlags, bool ignore_missing); +static int pg_tde_file_header_write(char *tde_filename, int fd, TDEMasterKeyInfo *master_key_info, off_t *bytes_written); +static int pg_tde_file_header_read(char *tde_filename, int fd, TDEFileHeader *fheader, bool *is_new_file, off_t *bytes_read); -static void pg_tde_set_db_file_paths(Oid dbOid); -static File pg_tde_open_file(char *tde_filename, TDEMasterKeyInfo *master_key_info, bool should_fill_info, int fileFlags, bool *is_new_file, off_t *offset); +static int pg_tde_open_file(char *tde_filename, TDEMasterKeyInfo *master_key_info, bool should_fill_info, int fileFlags, bool *is_new_file, off_t *offset); static int32 pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMasterKeyInfo *master_key_info); -static off_t pg_tde_write_one_map_entry(File map_file, const RelFileLocator *rlocator, int flags, int32 key_index, TDEMapEntry *map_entry, off_t *offset); +static off_t pg_tde_write_one_map_entry(int fd, const RelFileLocator *rlocator, int flags, int32 key_index, TDEMapEntry *map_entry, off_t *offset); static int32 pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t *offset, bool should_delete); -static bool pg_tde_read_one_map_entry(File map_file, const RelFileLocator *rlocator, int flags, TDEMapEntry *map_entry, off_t *offset); +static bool pg_tde_read_one_map_entry(int fd, const RelFileLocator *rlocator, int flags, TDEMapEntry *map_entry, off_t *offset); static void pg_tde_write_keydata(char *db_keydata_path, TDEMasterKeyInfo *master_key_info, int32 key_index, RelKeyData *enc_rel_key_data); -static void pg_tde_write_one_keydata(File keydata_file, int32 key_index, RelKeyData *enc_rel_key_data); -static RelKeyData* pg_tde_get_key_from_file(const RelFileLocator *rlocator); +static void pg_tde_write_one_keydata(int keydata_fd, int32 key_index, RelKeyData *enc_rel_key_data); +static RelKeyData* pg_tde_get_key_from_file(const RelFileLocator *rlocator, GenericKeyring *keyring); static RelKeyData* pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEMasterKey *master_key); -static RelKeyData* pg_tde_read_one_keydata(File keydata_file, int32 key_index, TDEMasterKey *master_key); +static RelKeyData* pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEMasterKey *master_key); -static File keyrotation_init_file(TDEMasterKeyInfo *new_master_key_info, char *rotated_filename, char *filename, bool *is_new_file, off_t *curr_pos); +static int keyrotation_init_file(TDEMasterKeyInfo *new_master_key_info, char *rotated_filename, char *filename, bool *is_new_file, off_t *curr_pos); static void finalize_key_rotation(char *m_path_old, char *k_path_old, char *m_path_new, char *k_path_new); /* * Generate an encrypted key for the relation and store it in the keymap file. */ -void -pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel) +RelKeyData* +pg_tde_create_key_map_entry(const RelFileLocator *newrlocator) { InternalKey int_key; RelKeyData *rel_key_data; @@ -117,11 +111,13 @@ pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel) TDEMasterKey *master_key; XLogRelKey xlrec; - master_key = GetMasterKey(); + master_key = GetMasterKey(newrlocator->dbOid, newrlocator->spcOid, NULL); if (master_key == NULL) { ereport(ERROR, (errmsg("failed to retrieve master key"))); + + return NULL; } memset(&int_key, 0, sizeof(InternalKey)); @@ -131,11 +127,13 @@ pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel) ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("could not generate internal key for relation \"%s\": %s", - RelationGetRelationName(rel), ERR_error_string(ERR_get_error(), NULL)))); + "TODO", ERR_error_string(ERR_get_error(), NULL)))); + + return NULL; } /* Encrypt the key */ - rel_key_data = tde_create_rel_key(newrlocator, &int_key, &master_key->keyInfo); + rel_key_data = tde_create_rel_key(newrlocator->relNumber, &int_key, &master_key->keyInfo); enc_rel_key_data = tde_encrypt_rel_key(master_key, rel_key_data, newrlocator); /* @@ -152,6 +150,8 @@ pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel) * Add the encyrpted key to the key map data file structure. */ pg_tde_write_key_map_entry(newrlocator, enc_rel_key_data, &master_key->keyInfo); + + return rel_key_data; } /* Head of the key cache (linked list) */ @@ -164,6 +164,12 @@ RelKey *tde_rel_key_map = NULL; */ RelKeyData * GetRelationKey(RelFileLocator rel) +{ + return GetRelationKeyWithKeyring(rel, NULL); +} + +RelKeyData * +GetRelationKeyWithKeyring(RelFileLocator rel, GenericKeyring *keyring) { RelKey *curr; RelKeyData *key; @@ -177,15 +183,18 @@ GetRelationKey(RelFileLocator rel) } } - key = pg_tde_get_key_from_file(&rel); + key = pg_tde_get_key_from_file(&rel, keyring); - put_key_into_map(rel.relNumber, key); + if (key != NULL) + { + pg_tde_put_key_into_map(rel.relNumber, key); + } return key; } -static void -put_key_into_map(Oid rel_id, RelKeyData *key) { +void +pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key) { RelKey *new; RelKey *prev = NULL; @@ -216,8 +225,8 @@ tde_sprint_key(InternalKey *k) * Creates a key for a relation identified by rlocator. Returns the newly * created key. */ -static RelKeyData * -tde_create_rel_key(const RelFileLocator *rlocator, InternalKey *key, TDEMasterKeyInfo *master_key_info) +RelKeyData * +tde_create_rel_key(Oid rel_id, InternalKey *key, TDEMasterKeyInfo *master_key_info) { RelKeyData *rel_key_data; @@ -228,7 +237,7 @@ tde_create_rel_key(const RelFileLocator *rlocator, InternalKey *key, TDEMasterKe rel_key_data->internal_key.ctx = NULL; /* Add to the decrypted key to cache */ - put_key_into_map(rlocator->relNumber, rel_key_data); + pg_tde_put_key_into_map(rel_id, rel_key_data); return rel_key_data; } @@ -236,7 +245,7 @@ tde_create_rel_key(const RelFileLocator *rlocator, InternalKey *key, TDEMasterKe /* * Encrypts a given key and returns the encrypted one. */ -static RelKeyData * +RelKeyData * tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const RelFileLocator *rlocator) { RelKeyData *enc_rel_key_data; @@ -250,7 +259,7 @@ tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const Re /* * Decrypts a given key and returns the decrypted one. */ -static RelKeyData * +RelKeyData * tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, const RelFileLocator *rlocator) { RelKeyData *rel_key_data = NULL; @@ -261,32 +270,24 @@ tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, cons return rel_key_data; } -/* - * Sets the global variables so that we don't have to do this again for this - * backend lifetime. - */ -static void -pg_tde_set_db_file_paths(Oid dbOid) +inline void +pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_path, char *keydata_path) { - /* Return if the values are already set */ - if (*db_path && *db_map_path && *db_keydata_path) - return; + char *db_path; - /* Fill in the values */ - snprintf(db_path, MAXPGPATH, "%s", GetDatabasePath(dbOid, DEFAULTTABLESPACE_OID)); + /* If this is a global space, than the call might be in a critial section + * (during XLog write) so we can't do GetDatabasePath as it calls palloc() + */ + if (rlocator->spcOid == GLOBALTABLESPACE_OID) + db_path = "global"; + else + db_path = GetDatabasePath(rlocator->dbOid, rlocator->spcOid); - /* Set the file nanes for map and keydata */ - join_path_components(db_map_path, db_path, PG_TDE_MAP_FILENAME); - join_path_components(db_keydata_path, db_path, PG_TDE_KEYDATA_FILENAME); -} -/* - * Path data clean up once the transaction is done. - */ -void -pg_tde_cleanup_path_vars(void) -{ - *db_path = *db_map_path = *db_keydata_path = 0; + if (map_path) + join_path_components(map_path, db_path, PG_TDE_MAP_FILENAME); + if (keydata_path) + join_path_components(keydata_path, db_path, PG_TDE_KEYDATA_FILENAME); } /* @@ -294,10 +295,17 @@ pg_tde_cleanup_path_vars(void) * Returns true if both map and key data files are created. */ void -pg_tde_delete_tde_files(Oid dbOid) +pg_tde_delete_tde_files(Oid dbOid, Oid spcOid) { + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; + /* Set the file paths */ - pg_tde_set_db_file_paths(dbOid); + pg_tde_set_db_file_paths(&(RelFileLocator) { + spcOid, + dbOid, + 0}, + db_map_path, db_keydata_path); /* Remove these files without emitting any error */ PathNameDeleteTemporaryFile(db_map_path, false); @@ -316,24 +324,30 @@ pg_tde_delete_tde_files(Oid dbOid) bool pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info) { - File map_file = -1; - File keydata_file = -1; + int map_fd = -1; + int keydata_fd = -1; off_t curr_pos = 0; bool is_new_map = false; bool is_new_key_data = false; + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; /* Set the file paths */ - pg_tde_set_db_file_paths(master_key_info->databaseId); + pg_tde_set_db_file_paths(&(RelFileLocator) { + master_key_info->tablespaceId, + master_key_info->databaseId, + 0}, + db_map_path, db_keydata_path); ereport(LOG, (errmsg("pg_tde_save_master_key"))); /* Create or truncate these map and keydata files. */ - map_file = pg_tde_open_file(db_map_path, master_key_info, false, O_RDWR | O_CREAT | O_TRUNC, &is_new_map, &curr_pos); - keydata_file = pg_tde_open_file(db_keydata_path, master_key_info, false, O_RDWR | O_CREAT | O_TRUNC, &is_new_key_data, &curr_pos); + map_fd = pg_tde_open_file(db_map_path, master_key_info, false, O_RDWR | O_CREAT | O_TRUNC, &is_new_map, &curr_pos); + keydata_fd = pg_tde_open_file(db_keydata_path, master_key_info, false, O_RDWR | O_CREAT | O_TRUNC, &is_new_key_data, &curr_pos); /* Closing files. */ - FileClose(map_file); - FileClose(keydata_file); + close(map_fd); + close(keydata_fd); return (is_new_map && is_new_key_data); } @@ -343,30 +357,35 @@ pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info) * a LW_SHARED or higher lock on files before calling this function. */ TDEMasterKeyInfo * -pg_tde_get_master_key(Oid dbOid) +pg_tde_get_master_key(Oid dbOid, Oid spcOid) { - File tde_file = -1; + int fd = -1; TDEFileHeader fheader; TDEMasterKeyInfo *master_key_info = NULL; bool is_new_file = false; off_t bytes_read = 0; + char db_map_path[MAXPGPATH] = {0}; /* Set the file paths */ - pg_tde_set_db_file_paths(dbOid); + pg_tde_set_db_file_paths(&(RelFileLocator) { + spcOid, + dbOid, + 0}, + db_map_path, NULL); /* * Ensuring that we always open the file in binary mode. The caller must * specify other flags for reading, writing or creating the file. */ - tde_file = pg_tde_open_file_basic(db_map_path, O_RDONLY, true); + fd = pg_tde_open_file_basic(db_map_path, O_RDONLY, true); /* The file does not exist. */ - if (tde_file < 0) + if (fd < 0) return NULL; - pg_tde_file_header_read(db_map_path, tde_file, &fheader, &is_new_file, &bytes_read); + pg_tde_file_header_read(db_map_path, fd, &fheader, &is_new_file, &bytes_read); - FileClose(tde_file); + close(fd); /* It's not a new file. So we can memcpy the master key info from the header */ if (!is_new_file) @@ -386,32 +405,32 @@ pg_tde_get_master_key(Oid dbOid) * Returns the file descriptor in case of a success. Otherwise, fatal error * is raised except when ignore_missing is true and the file does not exit. */ -static File +static int pg_tde_open_file_basic(char *tde_filename, int fileFlags, bool ignore_missing) { - File tde_file = -1; + int fd = -1; /* * Ensuring that we always open the file in binary mode. The caller must * specify other flags for reading, writing or creating the file. */ - tde_file = PathNameOpenFile(tde_filename, fileFlags | PG_BINARY); - if (tde_file < 0 && !(errno == ENOENT && ignore_missing == true)) + fd = BasicOpenFile(tde_filename, fileFlags | PG_BINARY); + if (fd < 0 && !(errno == ENOENT && ignore_missing == true)) { ereport(ERROR, (errcode_for_file_access(), - errmsg("Could not open tde file \"%s\": %m", + errmsg("could not open tde file \"%s\": %m", tde_filename))); } - return tde_file; + return fd; } /* * Write TDE file header to a TDE file. */ -static File -pg_tde_file_header_write(char *tde_filename, File tde_file, TDEMasterKeyInfo *master_key_info, off_t *bytes_written) +static int +pg_tde_file_header_write(char *tde_filename, int fd, TDEMasterKeyInfo *master_key_info, off_t *bytes_written) { TDEFileHeader fheader; size_t sz = sizeof(TDEMasterKeyInfo); @@ -425,33 +444,42 @@ pg_tde_file_header_write(char *tde_filename, File tde_file, TDEMasterKeyInfo *ma memset(&fheader.master_key_info, 0, sz); memcpy(&fheader.master_key_info, master_key_info, sz); - *bytes_written = FileWrite(tde_file, &fheader, TDE_FILE_HEADER_SIZE, 0, WAIT_EVENT_DATA_FILE_WRITE); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + *bytes_written = pg_pwrite(fd, &fheader, TDE_FILE_HEADER_SIZE, 0); if (*bytes_written != TDE_FILE_HEADER_SIZE) { ereport(ERROR, (errcode_for_file_access(), - errmsg("Could not write tde file \"%s\": %m", + errmsg("could not write tde file \"%s\": %m", tde_filename))); } - return tde_file; + if (pg_fsync(fd) != 0) + { + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tde_filename))); + } + + return fd; } /* * Read TDE file header from a TDE file and fill in the fheader data structure. */ -static File -pg_tde_file_header_read(char *tde_filename, File tde_file, TDEFileHeader *fheader, bool *is_new_file, off_t *bytes_read) +static int +pg_tde_file_header_read(char *tde_filename, int fd, TDEFileHeader *fheader, bool *is_new_file, off_t *bytes_read) { Assert(fheader); - *bytes_read = FileRead(tde_file, fheader, TDE_FILE_HEADER_SIZE, 0, WAIT_EVENT_DATA_FILE_READ); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + *bytes_read = pg_pread(fd, fheader, TDE_FILE_HEADER_SIZE, 0); *is_new_file = (*bytes_read == 0); /* File doesn't exist */ if (*bytes_read == 0) - return tde_file; + return fd; if (*bytes_read != TDE_FILE_HEADER_SIZE || fheader->file_version != PG_TDE_FILEMAGIC) @@ -463,7 +491,7 @@ pg_tde_file_header_read(char *tde_filename, File tde_file, TDEFileHeader *fheade tde_filename))); } - return tde_file; + return fd; } /* @@ -483,10 +511,10 @@ pg_tde_file_header_read(char *tde_filename, File tde_file, TDEFileHeader *fheade * The caller can pass the required flags to ensure that file is created * or an error is thrown if the file does not exist. */ -File +int pg_tde_open_file(char *tde_filename, TDEMasterKeyInfo *master_key_info, bool should_fill_info, int fileFlags, bool *is_new_file, off_t *curr_pos) { - File tde_file = -1; + int fd = -1; TDEFileHeader fheader; off_t bytes_read = 0; off_t bytes_written = 0; @@ -495,16 +523,16 @@ pg_tde_open_file(char *tde_filename, TDEMasterKeyInfo *master_key_info, bool sho * Ensuring that we always open the file in binary mode. The caller must * specify other flags for reading, writing or creating the file. */ - tde_file = pg_tde_open_file_basic(tde_filename, fileFlags, false); + fd = pg_tde_open_file_basic(tde_filename, fileFlags, false); - pg_tde_file_header_read(tde_filename, tde_file, &fheader, is_new_file, &bytes_read); + pg_tde_file_header_read(tde_filename, fd, &fheader, is_new_file, &bytes_read); /* In case it's a new file, let's add the header now. */ if (*is_new_file && master_key_info) - pg_tde_file_header_write(tde_filename, tde_file, master_key_info, &bytes_written); + pg_tde_file_header_write(tde_filename, fd, master_key_info, &bytes_written); *curr_pos = bytes_read + bytes_written; - return tde_file; + return fd; } /* @@ -519,7 +547,7 @@ pg_tde_open_file(char *tde_filename, TDEMasterKeyInfo *master_key_info, bool sho static int32 pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMasterKeyInfo *master_key_info) { - File map_file = -1; + int map_fd = -1; int32 key_index = 0; TDEMapEntry map_entry; bool is_new_file; @@ -528,7 +556,7 @@ pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMas bool found = false; /* Open and vaidate file for basic correctness. */ - map_file = pg_tde_open_file(db_map_path, master_key_info, false, O_RDWR | O_CREAT, &is_new_file, &curr_pos); + map_fd = pg_tde_open_file(db_map_path, master_key_info, false, O_RDWR | O_CREAT, &is_new_file, &curr_pos); prev_pos = curr_pos; /* @@ -539,7 +567,7 @@ pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMas while(1) { prev_pos = curr_pos; - found = pg_tde_read_one_map_entry(map_file, NULL, MAP_ENTRY_FREE, &map_entry, &curr_pos); + found = pg_tde_read_one_map_entry(map_fd, NULL, MAP_ENTRY_FREE, &map_entry, &curr_pos); /* We either reach EOF or found an empty slot in the middle of the file */ if (prev_pos == curr_pos || found) @@ -551,10 +579,10 @@ pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMas /* Write the given entry at the location pointed by prev_pos; i.e. the free entry */ curr_pos = prev_pos; - pg_tde_write_one_map_entry(map_file, rlocator, MAP_ENTRY_VALID, key_index, &map_entry, &prev_pos); + pg_tde_write_one_map_entry(map_fd, rlocator, MAP_ENTRY_VALID, key_index, &map_entry, &prev_pos); /* Let's close the file. */ - FileClose(map_file); + close(map_fd); /* Register the entry to be freed in case the transaction aborts */ RegisterEntryForDeletion(rlocator, curr_pos, false); @@ -567,7 +595,7 @@ pg_tde_write_map_entry(const RelFileLocator *rlocator, char *db_map_path, TDEMas * map file. */ static off_t -pg_tde_write_one_map_entry(File map_file, const RelFileLocator *rlocator, int flags, int32 key_index, TDEMapEntry *map_entry, off_t *offset) +pg_tde_write_one_map_entry(int fd, const RelFileLocator *rlocator, int flags, int32 key_index, TDEMapEntry *map_entry, off_t *offset) { int bytes_written = 0; @@ -578,16 +606,27 @@ pg_tde_write_one_map_entry(File map_file, const RelFileLocator *rlocator, int fl map_entry->flags = flags; map_entry->key_index = key_index; - bytes_written = FileWrite(map_file, map_entry, MAP_ENTRY_SIZE, *offset, WAIT_EVENT_DATA_FILE_WRITE); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + bytes_written = pg_pwrite(fd, map_entry, MAP_ENTRY_SIZE, *offset); /* Add the entry to the file */ if (bytes_written != MAP_ENTRY_SIZE) { + char db_map_path[MAXPGPATH] = {0}; + pg_tde_set_db_file_paths(rlocator, db_map_path, NULL); ereport(FATAL, (errcode_for_file_access(), - errmsg("Could not write tde map file \"%s\": %m", + errmsg("could not write tde map file \"%s\": %m", db_map_path))); } + if (pg_fsync(fd) != 0) + { + char db_map_path[MAXPGPATH] = {0}; + pg_tde_set_db_file_paths(rlocator, db_map_path, NULL); + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", db_map_path))); + } return (*offset + bytes_written); } @@ -604,7 +643,7 @@ pg_tde_write_one_map_entry(File map_file, const RelFileLocator *rlocator, int fl static int32 pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_t *offset, bool should_delete) { - File map_file = -1; + File map_fd = -1; int32 key_index = 0; TDEMapEntry map_entry; bool is_new_file; @@ -618,7 +657,7 @@ pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_ * Open and validate file for basic correctness. DO NOT create it. * The file should pre-exist otherwise we should never be here. */ - map_file = pg_tde_open_file(db_map_path, NULL, false, O_RDWR, &is_new_file, &curr_pos); + map_fd = pg_tde_open_file(db_map_path, NULL, false, O_RDWR, &is_new_file, &curr_pos); /* * If we need to delete an entry, we expect an offset value to the start @@ -627,13 +666,13 @@ pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_ */ if (should_delete == true && *offset > 0) { - curr_pos = lseek(FileGetRawDesc(map_file), *offset, SEEK_SET); + curr_pos = lseek(FileGetRawDesc(map_fd), *offset, SEEK_SET); if (curr_pos == -1) { ereport(FATAL, (errcode_for_file_access(), - errmsg("Could not seek in tde map file \"%s\": %m", + errmsg("could not seek in tde map file \"%s\": %m", db_map_path))); } } @@ -651,7 +690,7 @@ pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_ while(1) { prev_pos = curr_pos; - found = pg_tde_read_one_map_entry(map_file, rlocator, MAP_ENTRY_VALID, &map_entry, &curr_pos); + found = pg_tde_read_one_map_entry(map_fd, rlocator, MAP_ENTRY_VALID, &map_entry, &curr_pos); /* We've reached EOF */ if (curr_pos == prev_pos) @@ -663,7 +702,7 @@ pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_ /* Mark the entry pointed by prev_pos as free */ if (should_delete) { - pg_tde_write_one_map_entry(map_file, NULL, MAP_ENTRY_FREE, 0, &map_entry, &prev_pos); + pg_tde_write_one_map_entry(map_fd, NULL, MAP_ENTRY_FREE, 0, &map_entry, &prev_pos); } break; @@ -674,7 +713,7 @@ pg_tde_process_map_entry(const RelFileLocator *rlocator, char *db_map_path, off_ } /* Let's close the file. */ - FileClose(map_file); + close(map_fd); /* Return -1 indicating that no entry was removed */ return ((found) ? key_index : -1); @@ -701,7 +740,8 @@ pg_tde_read_one_map_entry(File map_file, const RelFileLocator *rlocator, int fla Assert(offset); /* Read the entry at the given offset */ - bytes_read = FileRead(map_file, map_entry, MAP_ENTRY_SIZE, *offset, WAIT_EVENT_DATA_FILE_READ); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + bytes_read = pg_pread(map_file, map_entry, MAP_ENTRY_SIZE, *offset); /* We've reached the end of the file. */ if (bytes_read != MAP_ENTRY_SIZE) @@ -730,39 +770,46 @@ pg_tde_read_one_map_entry(File map_file, const RelFileLocator *rlocator, int fla static void pg_tde_write_keydata(char *db_keydata_path, TDEMasterKeyInfo *master_key_info, int32 key_index, RelKeyData *enc_rel_key_data) { - File keydata_file = -1; + File fd = -1; bool is_new_file; off_t curr_pos = 0; /* Open and validate file for basic correctness. */ - keydata_file = pg_tde_open_file(db_keydata_path, master_key_info, false, O_RDWR | O_CREAT, &is_new_file, &curr_pos); + fd = pg_tde_open_file(db_keydata_path, master_key_info, false, O_RDWR | O_CREAT, &is_new_file, &curr_pos); /* Write a single key data */ - pg_tde_write_one_keydata(keydata_file, key_index, enc_rel_key_data); + pg_tde_write_one_keydata(fd, key_index, enc_rel_key_data); /* Let's close the file. */ - FileClose(keydata_file); + close(fd); } /* * Function writes a single RelKeyData into the file at the given index. */ static void -pg_tde_write_one_keydata(File keydata_file, int32 key_index, RelKeyData *enc_rel_key_data) +pg_tde_write_one_keydata(int fd, int32 key_index, RelKeyData *enc_rel_key_data) { off_t curr_pos; - Assert(keydata_file != -1); + Assert(fd != -1); /* Calculate the writing position in the file. */ curr_pos = (key_index * INTERNAL_KEY_LEN) + TDE_FILE_HEADER_SIZE; - if (FileWrite(keydata_file, &enc_rel_key_data->internal_key, INTERNAL_KEY_LEN, curr_pos, WAIT_EVENT_DATA_FILE_WRITE) != INTERNAL_KEY_LEN) + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + if (pg_pwrite(fd, &enc_rel_key_data->internal_key, INTERNAL_KEY_LEN, curr_pos) != INTERNAL_KEY_LEN) { ereport(FATAL, (errcode_for_file_access(), - errmsg("Could not write tde key data file \"%s\": %m", - db_keydata_path))); + errmsg("could not write tde key data file: %m"))); + } + + if (pg_fsync(fd) != 0) + { + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file: %m"))); } } @@ -772,7 +819,7 @@ pg_tde_write_one_keydata(File keydata_file, int32 key_index, RelKeyData *enc_rel static RelKeyData * pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEMasterKey *master_key) { - File keydata_file = -1; + int fd = -1; RelKeyData *enc_rel_key_data; off_t read_pos = 0; bool is_new_file; @@ -780,13 +827,13 @@ pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEMasterKey *master /* Open and validate file for basic correctness. */ LWLockAcquire(lock_files, LW_SHARED); - keydata_file = pg_tde_open_file(db_keydata_path, &master_key->keyInfo, false, O_RDONLY, &is_new_file, &read_pos); + fd = pg_tde_open_file(db_keydata_path, &master_key->keyInfo, false, O_RDONLY, &is_new_file, &read_pos); /* Read the encrypted key from file */ - enc_rel_key_data = pg_tde_read_one_keydata(keydata_file, key_index, master_key); + enc_rel_key_data = pg_tde_read_one_keydata(fd, key_index, master_key); /* Let's close the file. */ - FileClose(keydata_file); + close(fd); LWLockRelease(lock_files); return enc_rel_key_data; @@ -796,7 +843,7 @@ pg_tde_read_keydata(char *db_keydata_path, int32 key_index, TDEMasterKey *master * Reads a single keydata from the file. */ static RelKeyData * -pg_tde_read_one_keydata(File keydata_file, int32 key_index, TDEMasterKey *master_key) +pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEMasterKey *master_key) { RelKeyData *enc_rel_key_data; off_t read_pos = 0; @@ -810,21 +857,34 @@ pg_tde_read_one_keydata(File keydata_file, int32 key_index, TDEMasterKey *master read_pos += (key_index * INTERNAL_KEY_LEN) + TDE_FILE_HEADER_SIZE; /* Check if the file has a valid key */ - if ((read_pos + INTERNAL_KEY_LEN) > FileSize(keydata_file)) + if ((read_pos + INTERNAL_KEY_LEN) > lseek(keydata_fd, 0, SEEK_END)) { + char db_keydata_path[MAXPGPATH] = {0}; + pg_tde_set_db_file_paths(&(RelFileLocator) { + master_key->keyInfo.tablespaceId, + master_key->keyInfo.databaseId, + 0}, + NULL, db_keydata_path); ereport(FATAL, (errcode(ERRCODE_NO_DATA_FOUND), - errmsg("Could not find the required key at index %d in tde data file \"%s\": %m", + errmsg("could not find the required key at index %d in tde data file \"%s\": %m", key_index, db_keydata_path))); } /* Read the encrypted key */ - if (FileRead(keydata_file, &(enc_rel_key_data->internal_key), INTERNAL_KEY_LEN, read_pos, WAIT_EVENT_DATA_FILE_READ) != INTERNAL_KEY_LEN) + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + if (pg_pread(keydata_fd, &(enc_rel_key_data->internal_key), INTERNAL_KEY_LEN, read_pos) != INTERNAL_KEY_LEN) { + char db_keydata_path[MAXPGPATH] = {0}; + pg_tde_set_db_file_paths(&(RelFileLocator) { + master_key->keyInfo.tablespaceId, + master_key->keyInfo.databaseId, + 0}, + NULL, db_keydata_path); ereport(FATAL, (errcode_for_file_access(), - errmsg("Could not read key at index %d in tde key data file \"%s\": %m", + errmsg("could not read key at index %d in tde key data file \"%s\": %m", key_index, db_keydata_path))); } @@ -842,13 +902,15 @@ pg_tde_read_one_keydata(File keydata_file, int32 key_index, TDEMasterKey *master void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_key_data, TDEMasterKeyInfo *master_key_info) { - int32 key_index = 0; - LWLock *lock_files = tde_lwlock_mk_files(); + int32 key_index = 0; + LWLock *lock_files = tde_lwlock_mk_files(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; Assert(rlocator); /* Set the file paths */ - pg_tde_set_db_file_paths(rlocator->dbOid); + pg_tde_set_db_file_paths(rlocator, db_map_path, db_keydata_path); /* Create the map entry and then add the encrypted key to the data file */ LWLockAcquire(lock_files, LW_EXCLUSIVE); @@ -866,14 +928,16 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_k void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator) { - int32 key_index = 0; - off_t offset = 0; - LWLock *lock_files = tde_lwlock_mk_files(); + int32 key_index = 0; + off_t offset = 0; + LWLock *lock_files = tde_lwlock_mk_files(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; Assert(rlocator); /* Get the file paths */ - pg_tde_set_db_file_paths(rlocator->dbOid); + pg_tde_set_db_file_paths(rlocator, db_map_path, db_keydata_path); /* Remove the map entry if found */ LWLockAcquire(lock_files, LW_EXCLUSIVE); @@ -884,7 +948,7 @@ pg_tde_delete_key_map_entry(const RelFileLocator *rlocator) { ereport(WARNING, (errcode(ERRCODE_NO_DATA_FOUND), - errmsg("Could not find the required map entry for deletion of relation %d in tde map file \"%s\": %m", + errmsg("could not find the required map entry for deletion of relation %d in tde map file \"%s\": %m", rlocator->relNumber, db_map_path))); @@ -909,13 +973,15 @@ pg_tde_delete_key_map_entry(const RelFileLocator *rlocator) void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset) { - int32 key_index = 0; - LWLock *lock_files = tde_lwlock_mk_files(); + int32 key_index = 0; + LWLock *lock_files = tde_lwlock_mk_files(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; Assert(rlocator); /* Get the file paths */ - pg_tde_set_db_file_paths(rlocator->dbOid); + pg_tde_set_db_file_paths(rlocator, db_map_path, db_keydata_path); /* Remove the map entry if found */ LWLockAcquire(lock_files, LW_EXCLUSIVE); @@ -926,7 +992,7 @@ pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset) { ereport(WARNING, (errcode(ERRCODE_NO_DATA_FOUND), - errmsg("Could not find the required map entry for deletion of relation %d in tde map file \"%s\": %m", + errmsg("could not find the required map entry for deletion of relation %d in tde map file \"%s\": %m", rlocator->relNumber, db_map_path))); @@ -939,21 +1005,23 @@ pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset) * reads the key data from the keydata file. */ static RelKeyData * -pg_tde_get_key_from_file(const RelFileLocator *rlocator) +pg_tde_get_key_from_file(const RelFileLocator *rlocator, GenericKeyring *keyring) { - int32 key_index = 0; - TDEMasterKey *master_key; - RelKeyData *rel_key_data; - RelKeyData *enc_rel_key_data; - off_t offset = 0; - LWLock *lock_files = tde_lwlock_mk_files(); + int32 key_index = 0; + TDEMasterKey *master_key; + RelKeyData *rel_key_data; + RelKeyData *enc_rel_key_data; + off_t offset = 0; + LWLock *lock_files = tde_lwlock_mk_files(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; Assert(rlocator); LWLockAcquire(lock_files, LW_SHARED); /* Get/generate a master, create the key for relation and get the encrypted key with bytes to write */ - master_key = GetMasterKey(); + master_key = GetMasterKey(rlocator->dbOid, rlocator->spcOid, keyring); if (master_key == NULL) { LWLockRelease(lock_files); @@ -962,12 +1030,17 @@ pg_tde_get_key_from_file(const RelFileLocator *rlocator) } /* Get the file paths */ - pg_tde_set_db_file_paths(rlocator->dbOid); + pg_tde_set_db_file_paths(rlocator, db_map_path, db_keydata_path); /* Read the map entry and get the index of the relation key */ key_index = pg_tde_process_map_entry(rlocator, db_map_path, &offset, false); - /* Add the encrypted key to the data file. */ + if (key_index == -1) + { + LWLockRelease(lock_files); + return NULL; + } + enc_rel_key_data = pg_tde_read_keydata(db_keydata_path, key_index, master_key); LWLockRelease(lock_files); @@ -1023,8 +1096,8 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key int32 key_index[MASTER_KEY_COUNT] = {0}; RelKeyData *rel_key_data[MASTER_KEY_COUNT]; RelKeyData *enc_rel_key_data[MASTER_KEY_COUNT]; - File m_file[MASTER_KEY_COUNT] = {-1}; - File k_file[MASTER_KEY_COUNT] = {-1}; + int m_fd[MASTER_KEY_COUNT] = {-1}; + int k_fd[MASTER_KEY_COUNT] = {-1}; char m_path[MASTER_KEY_COUNT][MAXPGPATH]; char k_path[MASTER_KEY_COUNT][MAXPGPATH]; TDEMapEntry map_entry; @@ -1035,12 +1108,18 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key off_t map_size; off_t keydata_size; XLogMasterKeyRotate *xlrec; - off_t xlrec_size; - LWLock *lock_files = tde_lwlock_mk_files(); - LWLock *lock_cache = tde_lwlock_mk_cache(); + off_t xlrec_size; + LWLock *lock_files = tde_lwlock_mk_files(); + LWLock *lock_cache = tde_lwlock_mk_cache(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; /* Set the file paths */ - pg_tde_set_db_file_paths(master_key->keyInfo.databaseId); + pg_tde_set_db_file_paths(&(RelFileLocator) { + master_key->keyInfo.tablespaceId, + master_key->keyInfo.databaseId, + 0}, + db_map_path, db_keydata_path); /* Let's update the pathnames in the local variable for ease of use/readability */ strncpy(m_path[OLD_MASTER_KEY], db_map_path, MAXPGPATH); @@ -1050,17 +1129,17 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key LWLockAcquire(lock_cache, LW_EXCLUSIVE); /* Open both files in read only mode. We don't need to track the current position of the keydata file. We always use the key index */ - m_file[OLD_MASTER_KEY] = pg_tde_open_file(m_path[OLD_MASTER_KEY], &master_key->keyInfo, false, O_RDONLY, &is_new_file, &curr_pos[OLD_MASTER_KEY]); - k_file[OLD_MASTER_KEY] = pg_tde_open_file(k_path[OLD_MASTER_KEY], &master_key->keyInfo, false, O_RDONLY, &is_new_file, &read_pos_tmp); + m_fd[OLD_MASTER_KEY] = pg_tde_open_file(m_path[OLD_MASTER_KEY], &master_key->keyInfo, false, O_RDONLY, &is_new_file, &curr_pos[OLD_MASTER_KEY]); + k_fd[OLD_MASTER_KEY] = pg_tde_open_file(k_path[OLD_MASTER_KEY], &master_key->keyInfo, false, O_RDONLY, &is_new_file, &read_pos_tmp); - m_file[NEW_MASTER_KEY] = keyrotation_init_file(&new_master_key->keyInfo, m_path[NEW_MASTER_KEY], m_path[OLD_MASTER_KEY], &is_new_file, &curr_pos[NEW_MASTER_KEY]); - k_file[NEW_MASTER_KEY] = keyrotation_init_file(&new_master_key->keyInfo, k_path[NEW_MASTER_KEY], k_path[OLD_MASTER_KEY], &is_new_file, &read_pos_tmp); + m_fd[NEW_MASTER_KEY] = keyrotation_init_file(&new_master_key->keyInfo, m_path[NEW_MASTER_KEY], m_path[OLD_MASTER_KEY], &is_new_file, &curr_pos[NEW_MASTER_KEY]); + k_fd[NEW_MASTER_KEY] = keyrotation_init_file(&new_master_key->keyInfo, k_path[NEW_MASTER_KEY], k_path[OLD_MASTER_KEY], &is_new_file, &read_pos_tmp); /* Read all entries until EOF */ for(key_index[OLD_MASTER_KEY] = 0; ; key_index[OLD_MASTER_KEY]++) { prev_pos[OLD_MASTER_KEY] = curr_pos[OLD_MASTER_KEY]; - found = pg_tde_read_one_map_entry(m_file[OLD_MASTER_KEY], NULL, MAP_ENTRY_VALID, &map_entry, &curr_pos[OLD_MASTER_KEY]); + found = pg_tde_read_one_map_entry(m_fd[OLD_MASTER_KEY], NULL, MAP_ENTRY_VALID, &map_entry, &curr_pos[OLD_MASTER_KEY]); /* We either reach EOF */ if (prev_pos[OLD_MASTER_KEY] == curr_pos[OLD_MASTER_KEY]) @@ -1076,7 +1155,7 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key rloc.spcOid = DEFAULTTABLESPACE_OID; /* Let's get the decrypted key and re-encrypt it with the new key. */ - enc_rel_key_data[OLD_MASTER_KEY] = pg_tde_read_one_keydata(k_file[OLD_MASTER_KEY], key_index[OLD_MASTER_KEY], master_key); + enc_rel_key_data[OLD_MASTER_KEY] = pg_tde_read_one_keydata(k_fd[OLD_MASTER_KEY], key_index[OLD_MASTER_KEY], master_key); /* Decrypt and re-encrypt keys */ rel_key_data[OLD_MASTER_KEY] = tde_decrypt_rel_key(master_key, enc_rel_key_data[OLD_MASTER_KEY], &rloc); @@ -1084,20 +1163,20 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key /* Write the given entry at the location pointed by prev_pos */ prev_pos[NEW_MASTER_KEY] = curr_pos[NEW_MASTER_KEY]; - curr_pos[NEW_MASTER_KEY] = pg_tde_write_one_map_entry(m_file[NEW_MASTER_KEY], &rloc, MAP_ENTRY_VALID, key_index[NEW_MASTER_KEY], &map_entry, &prev_pos[NEW_MASTER_KEY]); - pg_tde_write_one_keydata(k_file[NEW_MASTER_KEY], key_index[NEW_MASTER_KEY], enc_rel_key_data[NEW_MASTER_KEY]); + curr_pos[NEW_MASTER_KEY] = pg_tde_write_one_map_entry(m_fd[NEW_MASTER_KEY], &rloc, MAP_ENTRY_VALID, key_index[NEW_MASTER_KEY], &map_entry, &prev_pos[NEW_MASTER_KEY]); + pg_tde_write_one_keydata(k_fd[NEW_MASTER_KEY], key_index[NEW_MASTER_KEY], enc_rel_key_data[NEW_MASTER_KEY]); /* Increment the key index for the new master key */ key_index[NEW_MASTER_KEY]++; } /* Close unrotated files */ - FileClose(m_file[OLD_MASTER_KEY]); - FileClose(k_file[OLD_MASTER_KEY]); + close(m_fd[OLD_MASTER_KEY]); + close(k_fd[OLD_MASTER_KEY]); /* Let's calculate sizes */ - map_size = FileSize(m_file[NEW_MASTER_KEY]); - keydata_size = FileSize(k_file[NEW_MASTER_KEY]); + map_size = lseek(m_fd[NEW_MASTER_KEY], 0, SEEK_END); + keydata_size = lseek(k_fd[NEW_MASTER_KEY], 0, SEEK_END); xlrec_size = map_size + keydata_size + SizeoOfXLogMasterKeyRotate; /* palloc and fill in the structure */ @@ -1107,12 +1186,13 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key xlrec->map_size = map_size; xlrec->keydata_size = keydata_size; - FileRead(m_file[NEW_MASTER_KEY], xlrec->buff, xlrec->map_size, 0, WAIT_EVENT_DATA_FILE_READ); - FileRead(k_file[NEW_MASTER_KEY], &xlrec->buff[xlrec->map_size], xlrec->keydata_size, 0, WAIT_EVENT_DATA_FILE_READ); + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + pg_pread(m_fd[NEW_MASTER_KEY], xlrec->buff, xlrec->map_size, 0); + pg_pread(k_fd[NEW_MASTER_KEY], &xlrec->buff[xlrec->map_size], xlrec->keydata_size, 0); /* Close the files */ - FileClose(m_file[NEW_MASTER_KEY]); - FileClose(k_file[NEW_MASTER_KEY]); + close(m_fd[NEW_MASTER_KEY]); + close(k_fd[NEW_MASTER_KEY]); /* Insert the XLog record */ XLogBeginInsert(); @@ -1129,7 +1209,6 @@ pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key /* Free up the palloc'ed data */ pfree(xlrec); - /* TODO: Remove the existing ones from cache etc. */ return true; #undef OLD_MASTER_KEY @@ -1144,58 +1223,83 @@ bool pg_tde_write_map_keydata_files(off_t map_size, char *m_file_data, off_t keydata_size, char *k_file_data) { TDEFileHeader *fheader; - char m_path_new[MAXPGPATH]; - char k_path_new[MAXPGPATH]; - File m_file_new; - File k_file_new; - bool is_new_file; - off_t curr_pos = 0; - off_t read_pos_tmp = 0; - LWLock *lock_files = tde_lwlock_mk_files(); - LWLock *lock_cache = tde_lwlock_mk_cache(); + char m_path_new[MAXPGPATH]; + char k_path_new[MAXPGPATH]; + int m_fd_new; + int k_fd_new; + bool is_new_file; + off_t curr_pos = 0; + off_t read_pos_tmp = 0; + LWLock *lock_files = tde_lwlock_mk_files(); + LWLock *lock_cache = tde_lwlock_mk_cache(); + char db_map_path[MAXPGPATH] = {0}; + char db_keydata_path[MAXPGPATH] = {0}; + bool is_err = false; /* Let's get the header. Buff should start with the map file header. */ fheader = (TDEFileHeader *) m_file_data; /* Set the file paths */ - pg_tde_set_db_file_paths(fheader->master_key_info.databaseId); + pg_tde_set_db_file_paths(&(RelFileLocator) { + fheader->master_key_info.tablespaceId, + fheader->master_key_info.databaseId, + 0}, + db_map_path, db_keydata_path); LWLockAcquire(lock_files, LW_EXCLUSIVE); LWLockAcquire(lock_cache, LW_EXCLUSIVE); /* Initialize the new files and set the names */ - m_file_new = keyrotation_init_file(&fheader->master_key_info, m_path_new, db_map_path, &is_new_file, &curr_pos); - k_file_new = keyrotation_init_file(&fheader->master_key_info, k_path_new, db_keydata_path, &is_new_file, &read_pos_tmp); + m_fd_new = keyrotation_init_file(&fheader->master_key_info, m_path_new, db_map_path, &is_new_file, &curr_pos); + k_fd_new = keyrotation_init_file(&fheader->master_key_info, k_path_new, db_keydata_path, &is_new_file, &read_pos_tmp); - if (FileWrite(m_file_new, m_file_data, map_size, 0, WAIT_EVENT_DATA_FILE_WRITE) != map_size) + /* TODO: pgstat_report_wait_start / pgstat_report_wait_end */ + if (pg_pwrite(m_fd_new, m_file_data, map_size, 0) != map_size) { - LWLockRelease(lock_cache); - LWLockRelease(lock_files); - ereport(WARNING, (errcode_for_file_access(), - errmsg("Could not write tde file \"%s\": %m", + errmsg("could not write tde file \"%s\": %m", m_path_new))); + is_err = true; + goto FINALIZE; } - - if (FileWrite(k_file_new, k_file_data, keydata_size, 0, WAIT_EVENT_DATA_FILE_WRITE) != keydata_size) + if (pg_fsync(m_fd_new) != 0) { - LWLockRelease(lock_cache); - LWLockRelease(lock_files); + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", m_path_new))); + is_err = true; + goto FINALIZE; + } + + if (pg_pwrite(k_fd_new, k_file_data, keydata_size, 0) != keydata_size) + { ereport(WARNING, (errcode_for_file_access(), - errmsg("Could not write tde file \"%s\": %m", + errmsg("could not write tde file \"%s\": %m", k_path_new))); + is_err = true; + goto FINALIZE; + } + if (pg_fsync(k_fd_new) != 0) + { + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", k_path_new))); + is_err = true; + goto FINALIZE; } - FileClose(m_file_new); - FileClose(k_file_new); +FINALIZE: + close(m_fd_new); + close(k_fd_new); - finalize_key_rotation(db_map_path, db_keydata_path, m_path_new, k_path_new); + if (!is_err) + finalize_key_rotation(db_map_path, db_keydata_path, m_path_new, k_path_new); LWLockRelease(lock_cache); LWLockRelease(lock_files); - return true; + return !is_err; } diff --git a/src/access/pg_tde_vacuumlazy.c b/src/access/pg_tde_vacuumlazy.c index bd7eca4e..acf894f6 100644 --- a/src/access/pg_tde_vacuumlazy.c +++ b/src/access/pg_tde_vacuumlazy.c @@ -2828,8 +2828,11 @@ should_attempt_truncation(LVRelState *vacrel) { BlockNumber possibly_freeable; - if (!vacrel->do_rel_truncate || VacuumFailsafeActive || - old_snapshot_threshold >= 0) + if (!vacrel->do_rel_truncate || VacuumFailsafeActive +#if PG_VERSION_NUM < 170000 + || old_snapshot_threshold >= 0 +#endif + ) return false; possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages; diff --git a/src/access/pg_tde_xlog.c b/src/access/pg_tde_xlog.c index b87344e9..246ef195 100644 --- a/src/access/pg_tde_xlog.c +++ b/src/access/pg_tde_xlog.c @@ -12,13 +12,34 @@ #include "postgres.h" +#include "pg_tde_defines.h" #include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xloginsert.h" +#include "catalog/pg_tablespace_d.h" +#include "storage/bufmgr.h" +#include "storage/shmem.h" +#include "utils/guc.h" +#include "utils/memutils.h" #include "access/pg_tde_tdemap.h" #include "access/pg_tde_xlog.h" -#include "catalog/tde_master_key.h" +#include "encryption/enc_tde.h" +#ifdef PERCONA_FORK +#include "catalog/tde_global_catalog.h" + +static char *TDEXLogEncryptBuf = NULL; + +/* GUC */ +static bool EncryptXLog = false; + +static XLogPageHeaderData EncryptCurrentPageHrd; +static XLogPageHeaderData DecryptCurrentPageHrd; + +static ssize_t TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset); +static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix); +static int XLOGChooseNumBuffers(void); +#endif /* * TDE fork XLog @@ -103,3 +124,274 @@ pg_tde_rmgr_identify(uint8 info) return NULL; } + +#ifdef PERCONA_FORK + +/* + * ------------------------- + * XLog Storage Manager + */ + +void +XLogInitGUC(void) +{ + DefineCustomBoolVariable("pg_tde.wal_encrypt", /* name */ + "Enable/Disable encryption of WAL.", /* short_desc */ + NULL, /* long_desc */ + &EncryptXLog, /* value address */ + false, /* boot value */ + PGC_POSTMASTER, /* context */ + 0, /* flags */ + NULL, /* check_hook */ + NULL, /* assign_hook */ + NULL /* show_hook */ + ); +} + +static int +XLOGChooseNumBuffers(void) +{ + int xbuffers; + + xbuffers = NBuffers / 32; + if (xbuffers > (wal_segment_size / XLOG_BLCKSZ)) + xbuffers = (wal_segment_size / XLOG_BLCKSZ); + if (xbuffers < 8) + xbuffers = 8; + return xbuffers; +} + +/* + * Defines the size of the XLog encryption buffer + */ +Size +TDEXLogEncryptBuffSize(void) +{ + int xbuffers; + + xbuffers = (XLOGbuffers == -1) ? XLOGChooseNumBuffers() : XLOGbuffers; + return (Size) XLOG_BLCKSZ * xbuffers; +} + +/* + * Alloc memory for the encryption buffer. + * + * It should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). We can't + * (re)alloc this buf in pg_tde_xlog_seg_write() based on the write size as + * it's called in the CRIT section, hence no allocations are allowed. + * + * Access to this buffer happens during XLogWrite() call which should + * be called with WALWriteLock held, hence no need in extra locks. + */ +void +TDEXLogShmemInit(void) +{ + bool foundBuf; + + if (EncryptXLog) + { + TDEXLogEncryptBuf = (char *) + TYPEALIGN(PG_IO_ALIGN_SIZE, + ShmemInitStruct("TDE XLog Encryption Buffer", + XLOG_TDE_ENC_BUFF_ALIGNED_SIZE, + &foundBuf)); + + elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", XLOG_TDE_ENC_BUFF_ALIGNED_SIZE); + } +} + +void +TDEXLogSmgrInit(void) +{ + SetXLogSmgr(&tde_xlog_smgr); +} + +ssize_t +pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) +{ + if (EncryptXLog) + return TDEXLogWriteEncryptedPages(fd, buf, count, offset); + else + return pg_pwrite(fd, buf, count, offset); +} + +/* + * Encrypt XLog page(s) from the buf and write to the segment file. + */ +static ssize_t +TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset) +{ + char iv_prefix[16] = {0,}; + size_t data_size = 0; + XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd; + XLogPageHeader enc_buf_page; + RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID); + off_t enc_off; + size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; + uint32 iv_ctr = 0; + +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "write encrypted WAL, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset); +#endif + + /* + * Go through the buf page-by-page and encrypt them. + * We may start or finish writing from/in the middle of the page + * (walsender or `full_page_writes = off`). So preserve a page header + * for the IV init data. + * + * TODO: check if walsender restarts form the beggining of the page + * in case of the crash. + */ + for (enc_off = 0; enc_off < count;) + { + data_size = Min(page_size, count); + + if (page_size == XLOG_BLCKSZ) + { + memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD); + + /* + * Need to use a separate buf for the encryption so the page remains non-crypted + * in the XLog buf (XLogInsert has to have access to records' lsn). + */ + enc_buf_page = (XLogPageHeader) (TDEXLogEncryptBuf + enc_off); + memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr)); + enc_buf_page->xlp_info |= XLP_ENCRYPTED; + + enc_off += XLogPageHeaderSize(curr_page_hdr); + data_size -= XLogPageHeaderSize(curr_page_hdr); + /* it's a beginning of the page */ + iv_ctr = 0; + } + else + { + /* we're in the middle of the page */ + iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); + } + + if (data_size + enc_off > count) + { + data_size = count - enc_off; + } + + /* + * The page is zeroed (no data), no sense to enctypt. + * This may happen when base_backup or other requests XLOG SWITCH and + * some pages in XLog buffer still not used. + */ + if (curr_page_hdr->xlp_magic == 0) + { + /* ensure all the page is {0} */ + Assert((*((char *) buf + enc_off) == 0) && + memcmp((char *) buf + enc_off, (char *) buf + enc_off + 1, data_size - 1) == 0); + + memcpy((char *) enc_buf_page, (char *) buf + enc_off, data_size); + } + else + { + SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); + PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size, + TDEXLogEncryptBuf + enc_off, key); + } + + page_size = XLOG_BLCKSZ; + enc_off += data_size; + } + + return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset); +} + +/* + * Read the XLog pages from the segment file and dectypt if need. + */ +ssize_t +pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t readsz; + char iv_prefix[16] = {0,}; + size_t data_size = 0; + XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd; + RelKeyData *key = GetGlCatInternalKey(XLOG_TDE_OID); + size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; + off_t dec_off; + uint32 iv_ctr = 0; + +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "read from a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset); +#endif + + readsz = pg_pread(fd, buf, count, offset); + + /* + * Read the buf page by page and decypt ecnrypted pages. + * We may start or fihish reading from/in the middle of the page (walreceiver) + * in such a case we should preserve the last read page header for + * the IV data and the encryption state. + * + * TODO: check if walsender/receiver restarts form the beggining of the page + * in case of the crash. + */ + for (dec_off = 0; dec_off < readsz;) + { + data_size = Min(page_size, readsz); + + if (page_size == XLOG_BLCKSZ) + { + memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD); + + /* set the flag to "not encrypted" for the walreceiver */ + ((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED; + + Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0); + dec_off += XLogPageHeaderSize(curr_page_hdr); + data_size -= XLogPageHeaderSize(curr_page_hdr); + /* it's a beginning of the page */ + iv_ctr = 0; + } + else + { + /* we're in the middle of the page */ + iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); + } + + if ((data_size + dec_off) > readsz) + { + data_size = readsz - dec_off; + } + + if (curr_page_hdr->xlp_info & XLP_ENCRYPTED) + { + SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); + PG_TDE_DECRYPT_DATA( + iv_prefix, iv_ctr, + (char *) buf + dec_off, data_size, (char *) buf + dec_off, key); + } + + page_size = XLOG_BLCKSZ; + dec_off += data_size; + } + + return readsz; +} + +/* IV: TLI(uint32) + XLogRecPtr(uint64)*/ +static void +SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix) +{ + iv_prefix[0] = (tli >> 24); + iv_prefix[1] = ((tli >> 16) & 0xFF); + iv_prefix[2] = ((tli >> 8) & 0xFF); + iv_prefix[3] = (tli & 0xFF); + + iv_prefix[4] = (lsn >> 56); + iv_prefix[5] = ((lsn >> 48) & 0xFF); + iv_prefix[6] = ((lsn >> 40) & 0xFF); + iv_prefix[7] = ((lsn >> 32) & 0xFF); + iv_prefix[8] = ((lsn >> 24) & 0xFF); + iv_prefix[9] = ((lsn >> 16) & 0xFF); + iv_prefix[10] = ((lsn >> 8) & 0xFF); + iv_prefix[11] = (lsn & 0xFF); +} + +#endif diff --git a/src/access/pg_tdeam.c b/src/access/pg_tdeam.c index 7fa3f83f..02f610b6 100644 --- a/src/access/pg_tdeam.c +++ b/src/access/pg_tdeam.c @@ -431,7 +431,9 @@ pg_tde_getpage(TableScanDesc sscan, BlockNumber block) LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page); +#endif lines = PageGetMaxOffsetNumber(page); ntup = 0; @@ -570,9 +572,9 @@ pg_tde_gettup_start_page(HeapScanDesc scan, ScanDirection dir, int *linesleft, /* Caller is responsible for ensuring buffer is locked if needed */ page = BufferGetPage(scan->rs_cbuf); - +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); - +#endif *linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1; if (ScanDirectionIsForward(dir)) @@ -603,9 +605,9 @@ pg_tde_gettup_continue_page(HeapScanDesc scan, ScanDirection dir, int *linesleft /* Caller is responsible for ensuring buffer is locked if needed */ page = BufferGetPage(scan->rs_cbuf); - +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); - +#endif if (ScanDirectionIsForward(dir)) { *lineoff = OffsetNumberNext(scan->rs_coffset); @@ -870,8 +872,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan, /* continue from previously returned page/tuple */ block = scan->rs_cblock; /* current page */ page = BufferGetPage(scan->rs_cbuf); +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); - +#endif lineindex = scan->rs_cindex + dir; if (ScanDirectionIsForward(dir)) linesleft = scan->rs_ntuples - lineindex; @@ -890,7 +893,9 @@ pg_tde_gettup_pagemode(HeapScanDesc scan, { pg_tde_getpage((TableScanDesc) scan, block); page = BufferGetPage(scan->rs_cbuf); +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page); +#endif linesleft = scan->rs_ntuples; lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1; @@ -1107,10 +1112,10 @@ pg_tde_getnext(TableScanDesc sscan, ScanDirection direction) * rather than the AM oid, is that this allows to write regression tests * that create another AM reusing the heap handler. */ - if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine())) + if (unlikely(sscan->rs_rd->rd_tableam != GetPGTdeamTableAmRoutine())) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg_internal("only heap AM is supported"))); + errmsg_internal("only pg_tde AM is supported"))); /* * We don't expect direct calls to pg_tde_getnext with valid CheckXidAlive @@ -1380,8 +1385,9 @@ pg_tde_fetch(Relation relation, */ LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(snapshot, relation, page); - +#endif /* * We'd better check for out-of-range offnum in case of VACUUM since the * TID was obtained. @@ -1671,8 +1677,9 @@ pg_tde_get_latest_tid(TableScanDesc sscan, buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid)); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); +#if PG_VERSION_NUM < 170000 TestForOldSnapshot(snapshot, relation, page); - +#endif /* * Check for bogus item number. This is not treated as an error * condition because it can happen while following a t_ctid link. We diff --git a/src/access/pg_tdeam_handler.c b/src/access/pg_tdeam_handler.c index f01c769c..6956572a 100644 --- a/src/access/pg_tdeam_handler.c +++ b/src/access/pg_tdeam_handler.c @@ -55,6 +55,7 @@ #include "utils/rel.h" PG_FUNCTION_INFO_V1(pg_tdeam_handler); +PG_FUNCTION_INFO_V1(pg_tde2am_handler); static void reform_and_rewrite_tuple(HeapTuple tuple, @@ -645,7 +646,7 @@ pg_tdeam_relation_set_new_filelocator(Relation rel, ereport(DEBUG1, (errmsg("creating key file for relation %s", RelationGetRelationName(rel)))); - pg_tde_create_key_map_entry(newrlocator, rel); + pg_tde_create_key_map_entry(newrlocator); } } @@ -2633,7 +2634,6 @@ static const TableAmRoutine pg_tdeam_methods = { .scan_sample_next_tuple = pg_tdeam_scan_sample_next_tuple }; - const TableAmRoutine * GetPGTdeamTableAmRoutine(void) { @@ -2646,6 +2646,12 @@ pg_tdeam_handler(PG_FUNCTION_ARGS) PG_RETURN_POINTER(&pg_tdeam_methods); } +Datum +pg_tde2am_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(GetHeapamTableAmRoutine()); +} + bool is_pg_tde_rel(Relation rel) { diff --git a/src/catalog/tde_global_catalog.c b/src/catalog/tde_global_catalog.c new file mode 100644 index 00000000..4fb72908 --- /dev/null +++ b/src/catalog/tde_global_catalog.c @@ -0,0 +1,248 @@ +/*------------------------------------------------------------------------- + * + * tde_global_catalog.c + * Global catalog key management + * + * + * IDENTIFICATION + * src/catalog/tde_global_catalog.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#ifdef PERCONA_FORK + +#include "storage/shmem.h" +#include "utils/guc.h" + +#include "access/pg_tde_tdemap.h" +#include "catalog/tde_global_catalog.h" +#include "catalog/tde_keyring.h" +#include "catalog/tde_master_key.h" + +#include +#include +#include + +#define MASTER_KEY_DEFAULT_NAME "tde-global-catalog-key" + +/* TODO: not sure if we need an option of multiple master keys for the global catalog */ +typedef enum +{ + TDE_GCAT_XLOG_KEY, + + /* must be last */ + TDE_GCAT_KEYS_COUNT +} GlobalCatalogKeyTypes; + +typedef struct EncryptionStateData +{ + GenericKeyring *keyring; + TDEMasterKey master_keys[TDE_GCAT_KEYS_COUNT]; +} EncryptionStateData; + +static EncryptionStateData * EncryptionState = NULL; + +/* GUC */ +static char *KRingProviderType = NULL; +static char *KRingProviderFilePath = NULL; + +static void init_gl_catalog_keys(void); +static void init_keyring(void); +static TDEMasterKey * create_master_key(const char *key_name, + GenericKeyring * keyring, Oid dbOid, Oid spcOid, + bool ensure_new_key); + +void +TDEGlCatInitGUC(void) +{ + DefineCustomStringVariable("pg_tde.global_keyring_type", + "Keyring type for global catalog", + NULL, + &KRingProviderType, + NULL, + PGC_POSTMASTER, + 0, /* no flags required */ + NULL, + NULL, + NULL + ); + DefineCustomStringVariable("pg_tde.global_keyring_file_path", + "Keyring file options for global catalog", + NULL, + &KRingProviderFilePath, + NULL, + PGC_POSTMASTER, + 0, /* no flags required */ + NULL, + NULL, + NULL + ); +} + + +Size +TDEGlCatEncStateSize(void) +{ + Size size; + + size = sizeof(EncryptionStateData); + size = add_size(size, sizeof(KeyringProviders)); + + return MAXALIGN(size); +} + +void +TDEGlCatShmemInit(void) +{ + bool foundBuf; + char *allocptr; + + EncryptionState = (EncryptionStateData *) + ShmemInitStruct("TDE XLog Encryption State", + TDEGlCatEncStateSize(), &foundBuf); + + allocptr = ((char *) EncryptionState) + MAXALIGN(sizeof(EncryptionStateData)); + EncryptionState->keyring = (GenericKeyring *) allocptr; + memset(EncryptionState->keyring, 0, sizeof(KeyringProviders)); + memset(EncryptionState->master_keys, 0, sizeof(TDEMasterKey) * TDE_GCAT_KEYS_COUNT); +} + +void +TDEGlCatKeyInit(void) +{ + char db_map_path[MAXPGPATH] = {0}; + + init_keyring(); + + pg_tde_set_db_file_paths(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID), + db_map_path, NULL); + if (access(db_map_path, F_OK) == -1) + { + init_gl_catalog_keys(); + } + else + { + /* put an internal key into the cache */ + GetGlCatInternalKey(XLOG_TDE_OID); + } +} + +TDEMasterKey * +TDEGetGlCatKeyFromCache(void) +{ + TDEMasterKey *mkey; + + mkey = &EncryptionState->master_keys[TDE_GCAT_XLOG_KEY]; + if (mkey->keyLength == 0) + return NULL; + + return mkey; +} + +void +TDEPutGlCatKeyInCache(TDEMasterKey * mkey) +{ + memcpy(EncryptionState->master_keys + TDE_GCAT_XLOG_KEY, mkey, sizeof(TDEMasterKey)); +} + +RelKeyData * +GetGlCatInternalKey(Oid obj_id) +{ + return GetRelationKeyWithKeyring(GLOBAL_SPACE_RLOCATOR(obj_id), EncryptionState->keyring); +} + +/* + * TODO: should be aligned with the rest of the keyring_provider code after its + * refactoring + * + * TODO: add Vault + */ +static void +init_keyring(void) +{ + EncryptionState->keyring->type = get_keyring_provider_from_typename(KRingProviderType); + switch (EncryptionState->keyring->type) + { + case FILE_KEY_PROVIDER: + FileKeyring * kring = (FileKeyring *) EncryptionState->keyring; + strncpy(kring->file_name, KRingProviderFilePath, sizeof(kring->file_name)); + break; + } +} + +/* + * Keys are created during the cluster start only, so no locks needed here. + */ +static void +init_gl_catalog_keys(void) +{ + InternalKey int_key; + RelKeyData *rel_key_data; + RelKeyData *enc_rel_key_data; + RelFileLocator *rlocator; + TDEMasterKey *mkey; + + mkey = create_master_key(MASTER_KEY_DEFAULT_NAME, + EncryptionState->keyring, + GLOBAL_DATA_TDE_OID, GLOBALTABLESPACE_OID, false); + + memset(&int_key, 0, sizeof(InternalKey)); + + /* Create and store an internal key for XLog */ + if (!RAND_bytes(int_key.key, INTERNAL_KEY_LEN)) + { + ereport(FATAL, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate internal key for \"WAL\": %s", + ERR_error_string(ERR_get_error(), NULL)))); + } + + rlocator = &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID); + rel_key_data = tde_create_rel_key(rlocator->relNumber, &int_key, &mkey->keyInfo); + enc_rel_key_data = tde_encrypt_rel_key(mkey, rel_key_data, rlocator); + pg_tde_write_key_map_entry(rlocator, enc_rel_key_data, &mkey->keyInfo); + + /* + * TODO: move global catalog internal keys into own cache. This cache should + * be in the TopMemmoryContext because of SSL contexts + * (see https://github.com/Percona-Lab/pg_tde/pull/214#discussion_r1648998317) + */ + pg_tde_put_key_into_map(rlocator->relNumber, rel_key_data); + TDEPutGlCatKeyInCache(mkey); +} + +static TDEMasterKey * +create_master_key(const char *key_name, GenericKeyring * keyring, + Oid dbOid, Oid spcOid, bool ensure_new_key) +{ + TDEMasterKey *masterKey; + keyInfo *keyInfo = NULL; + + masterKey = palloc(sizeof(TDEMasterKey)); + masterKey->keyInfo.databaseId = dbOid; + masterKey->keyInfo.tablespaceId = spcOid; + masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION; + masterKey->keyInfo.keyringId = keyring->key_id; + strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN); + gettimeofday(&masterKey->keyInfo.creationTime, NULL); + + keyInfo = load_latest_versioned_key_name(&masterKey->keyInfo, keyring, ensure_new_key); + + if (keyInfo == NULL) + keyInfo = KeyringGenerateNewKeyAndStore(keyring, masterKey->keyInfo.keyId.versioned_name, INTERNAL_KEY_LEN, false); + + if (keyInfo == NULL) + { + ereport(ERROR, + (errmsg("failed to retrieve master key"))); + } + + masterKey->keyLength = keyInfo->data.len; + memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len); + + return masterKey; +} +#endif /* PERCONA_FORK */ diff --git a/src/catalog/tde_keyring.c b/src/catalog/tde_keyring.c index fc56deee..5397dcb7 100644 --- a/src/catalog/tde_keyring.c +++ b/src/catalog/tde_keyring.c @@ -50,13 +50,12 @@ PG_FUNCTION_INFO_V1(keyring_delete_dependency_check_trigger); #define FILE_KEYRING_TYPE_KEY "type" static FileKeyring *load_file_keyring_provider_options(Datum keyring_options); -static ProviderType get_keyring_provider_from_typename(char *provider_type); static GenericKeyring *load_keyring_provider_options(ProviderType provider_type, Datum keyring_options); static VaultV2Keyring *load_vaultV2_keyring_provider_options(Datum keyring_options); static void debug_print_kerying(GenericKeyring *keyring); static GenericKeyring *load_keyring_provider_from_tuple(HeapTuple tuple, TupleDesc tupDesc); -static ProviderType +ProviderType get_keyring_provider_from_typename(char *provider_type) { if (provider_type == NULL) diff --git a/src/catalog/tde_master_key.c b/src/catalog/tde_master_key.c index 2d2cd9cd..8d1fd707 100644 --- a/src/catalog/tde_master_key.c +++ b/src/catalog/tde_master_key.c @@ -29,8 +29,9 @@ #include #include "access/pg_tde_tdemap.h" - -#define DEFAULT_MASTER_KEY_VERSION 1 +#ifdef PERCONA_FORK +#include "catalog/tde_global_catalog.h" +#endif typedef struct TdeMasterKeySharedState { @@ -67,12 +68,10 @@ static Size required_shared_mem_size(void); static int required_locks_count(void); static void shared_memory_shutdown(int code, Datum arg); static void master_key_startup_cleanup(int tde_tbl_count, void *arg); -static keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key); -static void clear_master_key_cache(Oid databaseId, Oid tablespaceId) ; +static void clear_master_key_cache(Oid databaseId) ; static inline dshash_table *get_master_key_Hash(void); static TDEMasterKey *get_master_key_from_cache(Oid dbOid); static void push_master_key_to_cache(TDEMasterKey *masterKey); -static TDEMasterKey *set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key); static const TDEShmemSetupRoutine master_key_info_shmem_routine = { .init_shared_state = initialize_shared_state, @@ -214,23 +213,41 @@ save_master_key_info(TDEMasterKeyInfo *master_key_info) * throws an error. */ TDEMasterKey * -GetMasterKey(void) +GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring) { TDEMasterKey *masterKey = NULL; TDEMasterKeyInfo *masterKeyInfo = NULL; - GenericKeyring *keyring = NULL; const keyInfo *keyInfo = NULL; KeyringReturnCodes keyring_ret; - Oid dbOid = MyDatabaseId; LWLock *lock_files = tde_lwlock_mk_files(); LWLock *lock_cache = tde_lwlock_mk_cache(); + // TODO: This recursion counter is a dirty hack until the metadata is in the catalog + // As otherwise we would call GetMasterKey recursively and deadlock + static int recursion = 0; + + if(recursion > 0) + { + return NULL; + } + + recursion++; + LWLockAcquire(lock_cache, LW_SHARED); - masterKey = get_master_key_from_cache(dbOid); +#ifdef PERCONA_FORK + /* Global catalog has its own cache */ + if (spcOid == GLOBALTABLESPACE_OID) + masterKey = TDEGetGlCatKeyFromCache(); + else +#endif + masterKey = get_master_key_from_cache(dbOid); LWLockRelease(lock_cache); if (masterKey) + { + recursion--; return masterKey; + } /* * We should hold an exclusive lock here to ensure that a valid master key, if found, is added @@ -239,38 +256,44 @@ GetMasterKey(void) LWLockAcquire(lock_files, LW_SHARED); LWLockAcquire(lock_cache, LW_EXCLUSIVE); - masterKey = get_master_key_from_cache(dbOid); +#ifdef PERCONA_FORK + /* Global catalog has its own cache */ + if (spcOid == GLOBALTABLESPACE_OID) + masterKey = TDEGetGlCatKeyFromCache(); + else +#endif + masterKey = get_master_key_from_cache(dbOid); if (masterKey) { LWLockRelease(lock_cache); LWLockRelease(lock_files); + recursion--; return masterKey; } /* Master key not present in cache. Load from the keyring */ - masterKeyInfo = pg_tde_get_master_key(dbOid); + masterKeyInfo = pg_tde_get_master_key(dbOid, spcOid); if (masterKeyInfo == NULL) { LWLockRelease(lock_cache); LWLockRelease(lock_files); - ereport(ERROR, - (errmsg("Master key does not exists for the database"), - errhint("Use set_master_key interface to set the master key"))); + recursion--; return NULL; } - /* Load the master key from keyring and store it in cache */ - keyring = GetKeyProviderByID(masterKeyInfo->keyringId); if (keyring == NULL) { - LWLockRelease(lock_cache); - LWLockRelease(lock_files); + keyring = GetKeyProviderByID(masterKeyInfo->keyringId); + if (keyring == NULL) + { + LWLockRelease(lock_cache); + LWLockRelease(lock_files); - ereport(ERROR, - (errmsg("Key provider with ID:\"%d\" does not exists", masterKeyInfo->keyringId))); - return NULL; + recursion--; + return NULL; + } } keyInfo = KeyringGetKey(keyring, masterKeyInfo->keyId.versioned_name, false, &keyring_ret); @@ -279,8 +302,7 @@ GetMasterKey(void) LWLockRelease(lock_cache); LWLockRelease(lock_files); - ereport(ERROR, - (errmsg("failed to retrieve master key \"%s\" from keyring.", masterKeyInfo->keyId.versioned_name))); + recursion--; return NULL; } @@ -290,8 +312,13 @@ GetMasterKey(void) memcpy(masterKey->keyData, keyInfo->data.data, keyInfo->data.len); masterKey->keyLength = keyInfo->data.len; - Assert(MyDatabaseId == masterKey->keyInfo.databaseId); - push_master_key_to_cache(masterKey); + Assert(dbOid == masterKey->keyInfo.databaseId); +#ifdef PERCONA_FORK + if (spcOid == GLOBALTABLESPACE_OID) + TDEPutGlCatKeyInCache(masterKey); + else +#endif + push_master_key_to_cache(masterKey); /* Release the exclusive locks here */ LWLockRelease(lock_cache); @@ -300,6 +327,7 @@ GetMasterKey(void) if (masterKeyInfo) pfree(masterKeyInfo); + recursion--; return masterKey; } @@ -313,12 +341,11 @@ GetMasterKey(void) * to make sure if some other caller has not added a master key for * same database while we were waiting for the lock. */ - -static TDEMasterKey * -set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool ensure_new_key) +TDEMasterKey * +set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, + Oid dbOid, Oid spcOid, bool ensure_new_key) { TDEMasterKey *masterKey = NULL; - Oid dbOid = MyDatabaseId; LWLock *lock_files = tde_lwlock_mk_files(); LWLock *lock_cache = tde_lwlock_mk_cache(); bool is_dup_key = false; @@ -334,14 +361,15 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool /* TODO: Add the key in the cache? */ if (is_dup_key == false) - is_dup_key = (pg_tde_get_master_key(dbOid) != NULL); + is_dup_key = (pg_tde_get_master_key(dbOid, spcOid) != NULL); if (is_dup_key == false) { const keyInfo *keyInfo = NULL; masterKey = palloc(sizeof(TDEMasterKey)); - masterKey->keyInfo.databaseId = MyDatabaseId; + masterKey->keyInfo.databaseId = dbOid; + masterKey->keyInfo.tablespaceId = spcOid; masterKey->keyInfo.keyId.version = DEFAULT_MASTER_KEY_VERSION; masterKey->keyInfo.keyringId = keyring->key_id; strncpy(masterKey->keyInfo.keyId.name, key_name, TDE_KEY_NAME_LEN); @@ -370,7 +398,7 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool XLogBeginInsert(); XLogRegisterData((char *) &masterKey->keyInfo, sizeof(TDEMasterKeyInfo)); XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_MASTER_KEY); - + push_master_key_to_cache(masterKey); } @@ -396,7 +424,10 @@ set_master_key_with_keyring(const char *key_name, GenericKeyring *keyring, bool bool SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key) { - TDEMasterKey *master_key = set_master_key_with_keyring(key_name, GetKeyProviderByName(provider_name), ensure_new_key); + TDEMasterKey *master_key = set_master_key_with_keyring(key_name, + GetKeyProviderByName(provider_name), + MyDatabaseId, MyDatabaseTableSpace, + ensure_new_key); return (master_key != NULL); } @@ -404,10 +435,11 @@ SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_ke bool RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key) { - TDEMasterKey *master_key = GetMasterKey(); + TDEMasterKey *master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL); TDEMasterKey new_master_key; const keyInfo *keyInfo = NULL; GenericKeyring *keyring; + bool is_rotated; /* * Let's set everything the same as the older master key and @@ -446,8 +478,13 @@ RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool en new_master_key.keyLength = keyInfo->data.len; memcpy(new_master_key.keyData, keyInfo->data.data, keyInfo->data.len); - clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace); - return pg_tde_perform_rotate_key(master_key, &new_master_key); + is_rotated = pg_tde_perform_rotate_key(master_key, &new_master_key); + if (is_rotated) { + clear_master_key_cache(master_key->keyInfo.databaseId); + push_master_key_to_cache(&new_master_key); + } + + return is_rotated; } /* @@ -459,7 +496,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec) bool ret; ret = pg_tde_write_map_keydata_files(xlrec->map_size, xlrec->buff, xlrec->keydata_size, &xlrec->buff[xlrec->map_size]); - clear_master_key_cache(MyDatabaseId, MyDatabaseTableSpace); + clear_master_key_cache(MyDatabaseId); return ret; } @@ -469,7 +506,7 @@ xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec) * If ensure_new_key is true, then we will keep on incrementing the version number * till we get a key name that is not present in the keyring */ -static keyInfo * +keyInfo * load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, GenericKeyring *keyring, bool ensure_new_key) { KeyringReturnCodes kr_ret; @@ -553,7 +590,7 @@ GetMasterKeyProviderId(void) } { /* Master key not present in cache. Try Loading it from the info file */ - masterKeyInfo = pg_tde_get_master_key(dbOid); + masterKeyInfo = pg_tde_get_master_key(dbOid, MyDatabaseTableSpace); if (masterKeyInfo) { keyringId = masterKeyInfo->keyringId; @@ -609,7 +646,7 @@ static void push_master_key_to_cache(TDEMasterKey *masterKey) { TDEMasterKey *cacheEntry = NULL; - Oid databaseId = MyDatabaseId; + Oid databaseId = masterKey->keyInfo.databaseId; bool found = false; cacheEntry = dshash_find_or_insert(get_master_key_Hash(), &databaseId, &found); @@ -653,18 +690,18 @@ master_key_startup_cleanup(int tde_tbl_count, void* arg) void cleanup_master_key_info(Oid databaseId, Oid tablespaceId) { - clear_master_key_cache(databaseId, tablespaceId); + clear_master_key_cache(databaseId); /* * TODO: Although should never happen. Still verify if any table in the * database is using tde */ /* Remove the tde files */ - pg_tde_delete_tde_files(databaseId); + pg_tde_delete_tde_files(databaseId, tablespaceId); } static void -clear_master_key_cache(Oid databaseId, Oid tablespaceId) +clear_master_key_cache(Oid databaseId) { TDEMasterKey *cache_entry; @@ -737,9 +774,14 @@ Datum pg_tde_master_key_info(PG_FUNCTION_ARGS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context that cannot accept type record"))); - master_key = GetMasterKey(); + master_key = GetMasterKey(MyDatabaseId, MyDatabaseTableSpace, NULL); if (master_key == NULL) - PG_RETURN_NULL(); + { + ereport(ERROR, + (errmsg("Master key does not exists for the database"), + errhint("Use set_master_key interface to set the master key"))); + PG_RETURN_NULL(); + } keyring = GetKeyProviderByID(master_key->keyInfo.keyringId); diff --git a/src/common/pg_tde_utils.c b/src/common/pg_tde_utils.c index 5f5c9e93..af359693 100644 --- a/src/common/pg_tde_utils.c +++ b/src/common/pg_tde_utils.c @@ -33,6 +33,12 @@ get_tde_table_am_oid(void) return get_table_am_oid("pg_tde", false); } +Oid +get_tde2_table_am_oid(void) +{ + return get_table_am_oid("pg_tde2", false); +} + /* * Returns the list of OIDs for all TDE tables in a database */ @@ -208,4 +214,4 @@ extract_json_option_value(Datum top_json, const char* field_name) elog(ERROR, "Unknown type for object %s: %s", field_name, type_cstr); return NULL; } -} \ No newline at end of file +} diff --git a/src/include/access/pg_tde_tdemap.h b/src/include/access/pg_tde_tdemap.h index c6ff9083..e0e06b63 100644 --- a/src/include/access/pg_tde_tdemap.h +++ b/src/include/access/pg_tde_tdemap.h @@ -10,6 +10,7 @@ #include "utils/rel.h" #include "access/xlog_internal.h" +#include "catalog/pg_tablespace_d.h" #include "catalog/tde_master_key.h" #include "storage/fd.h" #include "storage/relfilelocator.h" @@ -46,22 +47,28 @@ typedef struct XLogRelKey RelKeyData relKey; } XLogRelKey; -extern void pg_tde_create_key_map_entry(const RelFileLocator *newrlocator, Relation rel); +extern RelKeyData* pg_tde_create_key_map_entry(const RelFileLocator *newrlocator); extern void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, RelKeyData *enc_rel_key_data, TDEMasterKeyInfo *master_key_info); extern void pg_tde_delete_key_map_entry(const RelFileLocator *rlocator); extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset); -extern RelKeyData *pg_tde_get_key_from_fork(const RelFileLocator *rlocator); extern RelKeyData *GetRelationKey(RelFileLocator rel); +extern RelKeyData *GetRelationKeyWithKeyring(RelFileLocator rel, GenericKeyring *keyring); -extern void pg_tde_cleanup_path_vars(void); -extern void pg_tde_delete_tde_files(Oid dbOid); +extern void pg_tde_delete_tde_files(Oid dbOid, Oid spcOid); -extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid); +extern TDEMasterKeyInfo *pg_tde_get_master_key(Oid dbOid, Oid spcOid); extern bool pg_tde_save_master_key(TDEMasterKeyInfo *master_key_info); extern bool pg_tde_perform_rotate_key(TDEMasterKey *master_key, TDEMasterKey *new_master_key); extern bool pg_tde_write_map_keydata_files(off_t map_size, char *m_file_data, off_t keydata_size, char *k_file_data); +extern RelKeyData* tde_create_rel_key(Oid rel_id, InternalKey *key, TDEMasterKeyInfo *master_key_info); +extern RelKeyData *tde_encrypt_rel_key(TDEMasterKey *master_key, RelKeyData *rel_key_data, const RelFileLocator *rlocator); +extern RelKeyData *tde_decrypt_rel_key(TDEMasterKey *master_key, RelKeyData *enc_rel_key_data, const RelFileLocator *rlocator); + +extern void pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_path, char *keydata_path); const char * tde_sprint_key(InternalKey *k); +extern void pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key); + #endif /*PG_TDE_MAP_H*/ diff --git a/src/include/access/pg_tde_xlog.h b/src/include/access/pg_tde_xlog.h index bc32c979..17a1c65c 100644 --- a/src/include/access/pg_tde_xlog.h +++ b/src/include/access/pg_tde_xlog.h @@ -9,7 +9,12 @@ #ifndef PG_TDE_XLOG_H #define PG_TDE_XLOG_H +#include "postgres.h" +#include "access/xlog.h" #include "access/xlog_internal.h" +#ifdef PERCONA_FORK +#include "access/xlog_smgr.h" +#endif /* TDE XLOG resource manager */ #define XLOG_TDE_ADD_RELATION_KEY 0x00 @@ -32,4 +37,28 @@ static const RmgrData pg_tde_rmgr = { .rm_identify = pg_tde_rmgr_identify }; +#ifdef PERCONA_FORK + +/* XLog encryption staff */ + +extern Size TDEXLogEncryptBuffSize(void); + +#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE) + +extern void TDEXLogShmemInit(void); + +extern ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset); +extern ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset); + +static const XLogSmgr tde_xlog_smgr = { + .seg_read = pg_tde_xlog_seg_read, + .seg_write = pg_tde_xlog_seg_write, +}; + +extern void TDEXLogSmgrInit(void); + +extern void XLogInitGUC(void); + +#endif + #endif /* PG_TDE_XLOG_H */ diff --git a/src/include/access/pg_tdeam.h b/src/include/access/pg_tdeam.h index df3de661..13f5795f 100644 --- a/src/include/access/pg_tdeam.h +++ b/src/include/access/pg_tdeam.h @@ -333,4 +333,7 @@ extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation, /* Defined in pg_tdeam_handler.c */ extern bool is_pg_tde_rel(Relation rel); +const TableAmRoutine * +GetPGTdeamTableAmRoutine(void); + #endif /* PG_TDEAM_H */ diff --git a/src/include/catalog/tde_global_catalog.h b/src/include/catalog/tde_global_catalog.h new file mode 100644 index 00000000..5dd44b9e --- /dev/null +++ b/src/include/catalog/tde_global_catalog.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * tde_global_catalog.h + * Global catalog key management + * + * src/include/catalog/tde_global_catalog.h + * + *------------------------------------------------------------------------- + */ + +#ifndef TDE_GLOBAL_CATALOG_H +#define TDE_GLOBAL_CATALOG_H + +#include "postgres.h" + +#include "catalog/tde_master_key.h" + +/* + * Needed for glogbal data (WAL etc) keys identification in caches and storage. + * We take IDs the oid type operators, so there is no overlap with the "real" + * catalog object possible. + */ +#define GLOBAL_DATA_TDE_OID 607 /* Global objects fake "db" */ +#define XLOG_TDE_OID 608 + +#define GLOBAL_SPACE_RLOCATOR(_obj_oid) (RelFileLocator) { \ + GLOBALTABLESPACE_OID, \ + GLOBAL_DATA_TDE_OID, \ + _obj_oid \ +} + +extern void TDEGlCatInitGUC(void); +extern Size TDEGlCatEncStateSize(void); +extern void TDEGlCatShmemInit(void); +extern void TDEGlCatKeyInit(void); + +extern TDEMasterKey *TDEGetGlCatKeyFromCache(void); +extern void TDEPutGlCatKeyInCache(TDEMasterKey *mkey); +extern RelKeyData *GetGlCatInternalKey(Oid obj_id); + +#endif /*TDE_GLOBAL_CATALOG_H*/ diff --git a/src/include/catalog/tde_keyring.h b/src/include/catalog/tde_keyring.h index 29b2100e..5eb310cd 100644 --- a/src/include/catalog/tde_keyring.h +++ b/src/include/catalog/tde_keyring.h @@ -54,8 +54,15 @@ typedef struct VaultV2Keyring char vault_mount_path[MAXPGPATH]; } VaultV2Keyring; +typedef union KeyringProviders +{ + FileKeyring file; + VaultV2Keyring vault; +} KeyringProviders; + extern List *GetAllKeyringProviders(void); extern GenericKeyring *GetKeyProviderByName(const char *provider_name); extern GenericKeyring *GetKeyProviderByID(int provider_id); +extern ProviderType get_keyring_provider_from_typename(char *provider_type); #endif /*TDE_KEYRING_H*/ diff --git a/src/include/catalog/tde_master_key.h b/src/include/catalog/tde_master_key.h index b12e2b1f..2f70c9c9 100644 --- a/src/include/catalog/tde_master_key.h +++ b/src/include/catalog/tde_master_key.h @@ -17,6 +17,7 @@ #include "nodes/pg_list.h" #include "storage/lwlock.h" +#define DEFAULT_MASTER_KEY_VERSION 1 #define MASTER_KEY_NAME_LEN TDE_KEY_NAME_LEN #define MAX_MASTER_KEY_VERSION_NUM 100000 @@ -68,9 +69,16 @@ extern LWLock *tde_lwlock_mk_cache(void); extern bool save_master_key_info(TDEMasterKeyInfo *masterKeyInfo); extern Oid GetMasterKeyProviderId(void); -extern TDEMasterKey* GetMasterKey(void); +extern TDEMasterKey* GetMasterKey(Oid dbOid, Oid spcOid, GenericKeyring *keyring); extern bool SetMasterKey(const char *key_name, const char *provider_name, bool ensure_new_key); extern bool RotateMasterKey(const char *new_key_name, const char *new_provider_name, bool ensure_new_key); extern bool xl_tde_perform_rotate_key(XLogMasterKeyRotate *xlrec); - +extern TDEMasterKey *set_master_key_with_keyring(const char *key_name, + GenericKeyring *keyring, + Oid dbOid, Oid spcOid, + bool ensure_new_key); +extern keyInfo *load_latest_versioned_key_name(TDEMasterKeyInfo *mastere_key_info, + GenericKeyring *keyring, + bool ensure_new_key); + #endif /*PG_TDE_MASTER_KEY_H*/ diff --git a/src/include/common/pg_tde_utils.h b/src/include/common/pg_tde_utils.h index c33c14b6..9e2d856e 100644 --- a/src/include/common/pg_tde_utils.h +++ b/src/include/common/pg_tde_utils.h @@ -12,10 +12,11 @@ #include "nodes/pg_list.h" extern Oid get_tde_table_am_oid(void); +extern Oid get_tde2_table_am_oid(void); extern List *get_all_tde_tables(void); extern int get_tde_tables_count(void); extern const char *extract_json_cstr(Datum json, const char* field_name); const char *extract_json_option_value(Datum top_json, const char* field_name); -#endif /*PG_TDE_UTILS_H*/ \ No newline at end of file +#endif /*PG_TDE_UTILS_H*/ diff --git a/src/include/pg_tde_defines.h b/src/include/pg_tde_defines.h index aaa49722..74f47f9d 100644 --- a/src/include/pg_tde_defines.h +++ b/src/include/pg_tde_defines.h @@ -22,6 +22,7 @@ //#define ENCRYPTION_DEBUG 1 //#define KEYRING_DEBUG 1 //#define TDE_FORK_DEBUG 1 +// #define TDE_XLOG_DEBUG 1 #define pg_tde_fill_tuple heap_fill_tuple #define pg_tde_form_tuple heap_form_tuple @@ -37,8 +38,6 @@ #define pgstat_count_pg_tde_insert pgstat_count_heap_insert #define pg_tde_getattr heap_getattr -#define GetPGTdeamTableAmRoutine GetHeapamTableAmRoutine - #define TDE_PageAddItem(rel, oid, blkno, page, item, size, offsetNumber, overwrite, is_heap) \ PGTdePageAddItemExtended(rel, oid, blkno, page, item, size, offsetNumber, \ ((overwrite) ? PAI_OVERWRITE : 0) | \ diff --git a/src/include/pg_tde_event_capture.h b/src/include/pg_tde_event_capture.h new file mode 100644 index 00000000..d93226ff --- /dev/null +++ b/src/include/pg_tde_event_capture.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * pg_tde_event_capture.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_TDE_EVENT_CAPTURE_H +#define PG_TDE_EVENT_CAPTURE_H + +#include "postgres.h" +#include "nodes/parsenodes.h" + +typedef enum TdeCreateEventType +{ + TDE_UNKNOWN_CREATE_EVENT, + TDE_TABLE_CREATE_EVENT, + TDE_INDEX_CREATE_EVENT +} TdeCreateEventType; + +typedef struct TdeCreateEvent +{ + TdeCreateEventType eventType; /* DDL statement type */ + bool encryptMode; /* true when the table uses encryption */ + Oid baseTableOid; /* Oid of table on which index is being + * created on. For create table statement this + * contains InvalidOid */ + RangeVar *relation; /* Reference to the parsed relation from + * create statement */ +} TdeCreateEvent; + +extern TdeCreateEvent * GetCurrentTdeCreateEvent(void); + +#endif diff --git a/src/include/smgr/pg_tde_smgr.h b/src/include/smgr/pg_tde_smgr.h new file mode 100644 index 00000000..359b34db --- /dev/null +++ b/src/include/smgr/pg_tde_smgr.h @@ -0,0 +1,4 @@ + +#pragma once + +extern void RegisterStorageMgr(); diff --git a/src/keyring/.keyring_api.c.swp b/src/keyring/.keyring_api.c.swp deleted file mode 100644 index e730fc72..00000000 Binary files a/src/keyring/.keyring_api.c.swp and /dev/null differ diff --git a/src/keyring/keyring_file.c b/src/keyring/keyring_file.c index 812e9fab..f5d8648d 100644 --- a/src/keyring/keyring_file.c +++ b/src/keyring/keyring_file.c @@ -1,11 +1,11 @@ /*------------------------------------------------------------------------- * * keyring_file.c - * Implements the file provider keyring - * routines. + * Implements the file provider keyring + * routines. * * IDENTIFICATION - * contrib/pg_tde/src/keyring/keyring_file.c + * contrib/pg_tde/src/keyring/keyring_file.c * *------------------------------------------------------------------------- */ @@ -40,21 +40,21 @@ static keyInfo* get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, KeyringReturnCodes *return_code) { keyInfo* key = NULL; - File file = -1; + int fd = -1; FileKeyring* file_keyring = (FileKeyring*)keyring; off_t bytes_read = 0; off_t curr_pos = 0; *return_code = KEYRING_CODE_SUCCESS; - file = PathNameOpenFile(file_keyring->file_name, PG_BINARY); - if (file < 0) + fd = BasicOpenFile(file_keyring->file_name, PG_BINARY); + if (fd < 0) return NULL; key = palloc(sizeof(keyInfo)); while(true) { - bytes_read = FileRead(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_READ); + bytes_read = pg_pread(fd, key, sizeof(keyInfo), curr_pos); curr_pos += bytes_read; if (bytes_read == 0 ) @@ -62,13 +62,13 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, /* * Empty keyring file is considered as a valid keyring file that has no keys */ - FileClose(file); + close(fd); pfree(key); return NULL; } if (bytes_read != sizeof(keyInfo)) { - FileClose(file); + close(fd); pfree(key); /* Corrupt file */ *return_code = KEYRING_CODE_DATA_CORRUPTED; @@ -81,21 +81,21 @@ get_key_by_name(GenericKeyring* keyring, const char* key_name, bool throw_error, } if (strncasecmp(key->name.name, key_name, sizeof(key->name.name)) == 0) { - FileClose(file); + close(fd); return key; } } - FileClose(file); + close(fd); pfree(key); - return NULL; + return NULL; } static KeyringReturnCodes set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error) { - off_t bytes_written = 0; + off_t bytes_written = 0; off_t curr_pos = 0; - File file; + int fd; FileKeyring* file_keyring = (FileKeyring*)keyring; keyInfo *existing_key; KeyringReturnCodes return_code = KEYRING_CODE_SUCCESS; @@ -111,26 +111,35 @@ set_key_by_name(GenericKeyring* keyring, keyInfo *key, bool throw_error) return KEYRING_CODE_INVALID_OPERATION; } - file = PathNameOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY); - if (file < 0) - { + fd = BasicOpenFile(file_keyring->file_name, O_CREAT | O_RDWR | PG_BINARY); + if (fd < 0) + { ereport(throw_error?ERROR:WARNING, (errcode_for_file_access(), errmsg("Failed to open keyring file %s :%m", file_keyring->file_name))); - return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; - } + return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; + } /* Write key to the end of file */ - curr_pos = FileSize(file); - bytes_written = FileWrite(file, key, sizeof(keyInfo), curr_pos, WAIT_EVENT_DATA_FILE_WRITE); + curr_pos = lseek(fd, 0, SEEK_END); + bytes_written = pg_pwrite(fd, key, sizeof(keyInfo), curr_pos); if (bytes_written != sizeof(keyInfo)) - { - FileClose(file); - ereport(throw_error?ERROR:WARNING, - (errcode_for_file_access(), - errmsg("keyring file \"%s\" can't be written: %m", - file_keyring->file_name))); - return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; - } - FileClose(file); + { + close(fd); + ereport(throw_error?ERROR:WARNING, + (errcode_for_file_access(), + errmsg("keyring file \"%s\" can't be written: %m", + file_keyring->file_name))); + return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; + } + if (pg_fsync(fd) != 0) + { + close(fd); + ereport(throw_error?ERROR:WARNING, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + file_keyring->file_name))); + return KEYRING_CODE_RESOURCE_NOT_ACCESSABLE; + } + close(fd); return KEYRING_CODE_SUCCESS; } diff --git a/src/pg_tde.c b/src/pg_tde.c index 7b076592..2298fe67 100644 --- a/src/pg_tde.c +++ b/src/pg_tde.c @@ -31,6 +31,10 @@ #include "keyring/keyring_vault.h" #include "utils/builtins.h" #include "pg_tde_defs.h" +#include "smgr/pg_tde_smgr.h" +#ifdef PERCONA_FORK +#include "catalog/tde_global_catalog.h" +#endif #define MAX_ON_INSTALLS 5 @@ -59,6 +63,11 @@ tde_shmem_request(void) { Size sz = TdeRequiredSharedMemorySize(); int required_locks = TdeRequiredLocksCount(); + +#ifdef PERCONA_FORK + sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE); +#endif + if (prev_shmem_request_hook) prev_shmem_request_hook(); RequestAddinShmemSpace(sz); @@ -74,6 +83,14 @@ tde_shmem_startup(void) TdeShmemInit(); AesInit(); + +#ifdef PERCONA_FORK + TDEGlCatShmemInit(); + TDEGlCatKeyInit(); + + TDEXLogShmemInit(); + TDEXLogSmgrInit(); +#endif } void @@ -86,7 +103,10 @@ _PG_init(void) keyringRegisterVariables(); InitializeMasterKeyInfo(); - +#ifdef PERCONA_FORK + XLogInitGUC(); + TDEGlCatInitGUC(); +#endif prev_shmem_request_hook = shmem_request_hook; shmem_request_hook = tde_shmem_request; prev_shmem_startup_hook = shmem_startup_hook; @@ -98,6 +118,8 @@ _PG_init(void) InstallFileKeyring(); InstallVaultV2Keyring(); RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr); + + RegisterStorageMgr(); } Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS) diff --git a/src/pg_tde_event_capture.c b/src/pg_tde_event_capture.c new file mode 100644 index 00000000..d0dc191b --- /dev/null +++ b/src/pg_tde_event_capture.c @@ -0,0 +1,147 @@ +/*------------------------------------------------------------------------- + * + * pg_tde_event_capture.c + * event trigger logic to identify if we are creating the encrypted table or not. + * + * IDENTIFICATION + * contrib/pg_tde/src/pg_tde_event_trigger.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "funcapi.h" +#include "fmgr.h" +#include "utils/rel.h" +#include "utils/builtins.h" +#include "catalog/pg_class.h" +#include "access/table.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/namespace.h" +#include "commands/event_trigger.h" +#include "common/pg_tde_utils.h" +#include "pg_tde_event_capture.h" + +/* Global variable that gets set at ddl start and cleard out at ddl end*/ +TdeCreateEvent tdeCurrentCreateEvent = {.relation = NULL}; + + +static void reset_current_tde_create_event(void); + +PG_FUNCTION_INFO_V1(pg_tde_ddl_command_start_capture); +PG_FUNCTION_INFO_V1(pg_tde_ddl_command_end_capture); + +TdeCreateEvent * +GetCurrentTdeCreateEvent(void) +{ + return &tdeCurrentCreateEvent; +} + +/* + * pg_tde_ddl_command_start_capture is an event trigger function triggered + * at the start of any DDL command execution. + * + * The function specifically focuses on CREATE INDEX and CREATE TABLE statements, + * aiming to determine if the create table or the table on which an index is being created + * utilizes the pg_tde access method for encryption. + * Once it confirms the table's encryption requirement or usage, + * it updates the table information in the tdeCurrentCreateEvent global variable. + * This information can be accessed by SMGR or any other component + * during the execution of this DDL statement. + */ +Datum +pg_tde_ddl_command_start_capture(PG_FUNCTION_ARGS) +{ + /* TODO: verify update_compare_indexes failure related to this */ +#ifdef PERCONA_FORK + EventTriggerData *trigdata; + Node *parsetree; + + /* Ensure this function is being called as an event trigger */ + if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */ + ereport(ERROR, + (errmsg("Function can only be fired by event trigger manager"))); + + trigdata = (EventTriggerData *) fcinfo->context; + parsetree = trigdata->parsetree; + + elog(DEBUG2, "EVENT TRIGGER (%s) %s", trigdata->event, nodeToString(parsetree)); + reset_current_tde_create_event(); + + if (IsA(parsetree, IndexStmt)) + { + IndexStmt *stmt = (IndexStmt *) parsetree; + Oid relationId = RangeVarGetRelid(stmt->relation, NoLock, true); + + tdeCurrentCreateEvent.eventType = TDE_INDEX_CREATE_EVENT; + tdeCurrentCreateEvent.baseTableOid = relationId; + tdeCurrentCreateEvent.relation = stmt->relation; + + if (relationId != InvalidOid) + { + LOCKMODE lockmode = AccessShareLock; /* TODO. Verify lock mode? */ + Relation rel = table_open(relationId, lockmode); + + if (rel->rd_rel->relam == get_tde_table_am_oid()) + { + /* We are creating the index on encrypted table */ + /* set the global state */ + tdeCurrentCreateEvent.encryptMode = true; + } + else + table_close(rel, lockmode); + } + else + ereport(DEBUG1, (errmsg("Failed to get relation Oid for relation:%s", stmt->relation->relname))); + + } + else if (IsA(parsetree, CreateStmt)) + { + CreateStmt *stmt = (CreateStmt *) parsetree; + + tdeCurrentCreateEvent.eventType = TDE_TABLE_CREATE_EVENT; + tdeCurrentCreateEvent.relation = stmt->relation; + + if (stmt->accessMethod && !strcmp(stmt->accessMethod, "pg_tde2")) + { + tdeCurrentCreateEvent.encryptMode = true; + } + } +#endif + PG_RETURN_NULL(); +} + +/* + * trigger function called at the end of DDL statement execution. + * It just clears the tdeCurrentCreateEvent global variable. + */ +Datum +pg_tde_ddl_command_end_capture(PG_FUNCTION_ARGS) +{ +#ifdef PERCONA_FORK + /* Ensure this function is being called as an event trigger */ + if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) /* internal error */ + ereport(ERROR, + (errmsg("Function can only be fired by event trigger manager"))); + + elog(DEBUG1, "Type:%s EncryptMode:%s, Oid:%d, Relation:%s ", + (tdeCurrentCreateEvent.eventType == TDE_INDEX_CREATE_EVENT) ? "CREATE INDEX" : + (tdeCurrentCreateEvent.eventType == TDE_TABLE_CREATE_EVENT) ? "CREATE TABLE" : "UNKNOWN", + tdeCurrentCreateEvent.encryptMode ? "true" : "false", + tdeCurrentCreateEvent.baseTableOid, + tdeCurrentCreateEvent.relation ? tdeCurrentCreateEvent.relation->relname : "UNKNOWN"); + + /* All we need to do is to clear the event state */ + reset_current_tde_create_event(); +#endif + PG_RETURN_NULL(); +} + +static void +reset_current_tde_create_event(void) +{ + tdeCurrentCreateEvent.encryptMode = false; + tdeCurrentCreateEvent.eventType = TDE_UNKNOWN_CREATE_EVENT; + tdeCurrentCreateEvent.baseTableOid = InvalidOid; + tdeCurrentCreateEvent.relation = NULL; +} diff --git a/src/smgr/pg_tde_smgr.c b/src/smgr/pg_tde_smgr.c new file mode 100644 index 00000000..7c4aa49f --- /dev/null +++ b/src/smgr/pg_tde_smgr.c @@ -0,0 +1,213 @@ + +#include "smgr/pg_tde_smgr.h" +#include "postgres.h" +#include "storage/smgr.h" +#include "storage/md.h" +#include "catalog/catalog.h" +#include "encryption/enc_aes.h" +#include "access/pg_tde_tdemap.h" +#include "pg_tde_event_capture.h" + +#ifdef PERCONA_FORK + +// TODO: implement proper IV +// iv should be based on blocknum + relfile, available in the API +static char iv[16] = {0,}; + +static RelKeyData* +tde_smgr_get_key(SMgrRelation reln) +{ + // TODO: This recursion counter is a dirty hack until the metadata is in the catalog + // As otherwise we would call GetMasterKey recursively and deadlock + static int recursion = 0; + + if(IsCatalogRelationOid(reln->smgr_rlocator.locator.relNumber)) + { + // do not try to encrypt/decrypt catalog tables + return NULL; + } + + if(recursion != 0) + { + return NULL; + } + + recursion++; + + + if(GetMasterKey(reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.spcOid, NULL)==NULL) + { + recursion--; + return NULL; + } + + TdeCreateEvent* event = GetCurrentTdeCreateEvent(); + + // if this is a CREATE TABLE, we have to generate the key + if(event->encryptMode == true && event->eventType == TDE_TABLE_CREATE_EVENT) + { + recursion--; + return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator); + } + + // if this is a CREATE INDEX, we have to load the key based on the table + if(event->encryptMode == true && event->eventType == TDE_INDEX_CREATE_EVENT) + { + // For now keep it simple and create separate key for indexes + // Later we might modify the map infrastructure to support the same keys + recursion--; + return pg_tde_create_key_map_entry(&reln->smgr_rlocator.locator); + } + + // otherwise, see if we have a key for the relation, and return if yes + RelKeyData* rkd = GetRelationKey(reln->smgr_rlocator.locator); + + recursion--; + + return rkd; +} + +void +tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync) +{ + AesInit(); + + char* local_blocks = malloc( BLCKSZ * (nblocks+1) ); + char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks); + const void** local_buffers = malloc ( sizeof(void*) * nblocks ); + + RelKeyData* rkd = tde_smgr_get_key(reln); + + if(rkd == NULL) + { + mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync); + + return; + } + + for(int i = 0; i < nblocks; ++i ) + { + local_buffers[i] = &local_blocks_aligned[i*BLCKSZ]; + int out_len = BLCKSZ; + AesEncrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, local_buffers[i], &out_len); + } + + mdwritev(reln, forknum, blocknum, + local_buffers, nblocks, skipFsync); + + free(local_blocks); + free(local_buffers); +} + +void +tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync) +{ + AesInit(); + + char* local_blocks = malloc( BLCKSZ * (1+1) ); + char* local_blocks_aligned = (char*)TYPEALIGN(PG_IO_ALIGN_SIZE, local_blocks); + + RelKeyData* rkd = tde_smgr_get_key(reln); + + if(rkd == NULL) + { + mdextend(reln, forknum, blocknum, buffer, skipFsync); + + return; + } + + int out_len = BLCKSZ; + AesEncrypt(rkd->internal_key.key, iv, ((char*)buffer), BLCKSZ, local_blocks_aligned, &out_len); + + mdextend(reln, forknum, blocknum, local_blocks_aligned, skipFsync); + + + free(local_blocks); +} + +void +tde_mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks) +{ + AesInit(); + + mdreadv(reln, forknum, blocknum, buffers, nblocks); + + RelKeyData* rkd = tde_smgr_get_key(reln); + + if(rkd == NULL) + { + return; + } + + for(int i = 0; i < nblocks; ++i) + { + bool allZero = true; + for(int j = 0; j < 32; ++j) + { + if(((char**)buffers)[i][j] != 0) + { + // Postgres creates all zero blocks in an optimized route, which we do not try + // to encrypt. + // Instead we detect if a block is all zero at decryption time, and + // leave it as is. + // This could be a security issue later, but it is a good first prototype + allZero = false; + break; + } + } + if(allZero) continue; + + int out_len = BLCKSZ; + AesDecrypt(rkd->internal_key.key, iv, ((char**)buffers)[i], BLCKSZ, ((char**)buffers)[i], &out_len); + } +} + +void +tde_mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) +{ + // This is the only function that gets called during actual CREATE TABLE/INDEX (EVENT TRIGGER) + // so we create the key here by loading it + // Later calls then decide to encrypt or not based on the existence of the key + tde_smgr_get_key(reln); + + return mdcreate(reln, forknum, isRedo); +} + + +static SMgrId tde_smgr_id; +static const struct f_smgr tde_smgr = { + .name = "tde", + .smgr_init = mdinit, + .smgr_shutdown = NULL, + .smgr_open = mdopen, + .smgr_close = mdclose, + .smgr_create = tde_mdcreate, + .smgr_exists = mdexists, + .smgr_unlink = mdunlink, + .smgr_extend = tde_mdextend, + .smgr_zeroextend = mdzeroextend, + .smgr_prefetch = mdprefetch, + .smgr_readv = tde_mdreadv, + .smgr_writev = tde_mdwritev, + .smgr_writeback = mdwriteback, + .smgr_nblocks = mdnblocks, + .smgr_truncate = mdtruncate, + .smgr_immedsync = mdimmedsync, +}; + +void RegisterStorageMgr() +{ + tde_smgr_id = smgr_register(&tde_smgr, 0); + + // TODO: figure out how this part should work in a real extension + storage_manager_id = tde_smgr_id; +} + +#else +void RegisterStorageMgr() +{ +} +#endif /* PERCONA_FORK */ diff --git a/src/transam/pg_tde_xact_handler.c b/src/transam/pg_tde_xact_handler.c index ff84c3d9..4b0576a0 100644 --- a/src/transam/pg_tde_xact_handler.c +++ b/src/transam/pg_tde_xact_handler.c @@ -52,8 +52,6 @@ pg_tde_xact_callback(XactEvent event, void *arg) { pending_delete_cleanup(); } - - pg_tde_cleanup_path_vars(); } void diff --git a/t/results/001_basic.out b/t/results/001_basic.out deleted file mode 100644 index d6f838c9..00000000 --- a/t/results/001_basic.out +++ /dev/null @@ -1,13 +0,0 @@ -CREATE EXTENSION pg_tde; --- server restart -CREATE TABLE test_enc(id SERIAL,k INTEGER,PRIMARY KEY (id)) USING pg_tde; -INSERT INTO test_enc (k) VALUES (5),(6); -SELECT * FROM test_enc ORDER BY id ASC; -1|5 -2|6 --- server restart -SELECT * FROM test_enc ORDER BY id ASC; -1|5 -2|6 -DROP TABLE test_enc; -DROP EXTENSION pg_tde;