Skip to content
Permalink
Browse files
MDEV-18644: Support full_crc32 for page_compressed
This is a follow-up task to MDEV-12026, which introduced
innodb_checksum_algorithm=full_crc32 and a simpler page format.
MDEV-12026 did not enable full_crc32 for page_compressed tables,
which we will be doing now.

This is joint work with Thirunarayanan Balathandayuthapani.

For innodb_checksum_algorithm=full_crc32 we change the
page_compressed format as follows:

FIL_PAGE_TYPE: The most significant bit will be set to indicate
page_compressed format. The least significant bits will contain
the compressed page size, rounded up to a multiple of 256 bytes.

The checksum will be stored in the last 4 bytes of the page
(whether it is the full page or a page_compressed page whose
size is determined by FIL_PAGE_TYPE), covering all preceding
bytes of the page. If encryption is used, then the page will
be encrypted between compression and computing the checksum.
For page_compressed, FIL_PAGE_LSN will not be repeated at
the end of the page.

FSP_SPACE_FLAGS (already implemented as part of MDEV-12026):
We will store the innodb_compression_algorithm that may be used
to compress pages. Previously, the choice of algorithm was written
to each compressed data page separately, and one would be unable
to know in advance which compression algorithm(s) are used.

fil_space_t::full_crc32_page_compressed_len(): Determine if the
page_compressed algorithm of the tablespace needs to know the
exact length of the compressed data. If yes, we will reserve and
write an extra byte for this right before the checksum.

buf_page_is_compressed(): Determine if a page uses page_compressed
(in any innodb_checksum_algorithm).

fil_page_decompress(): Pass also fil_space_t::flags so that the
format can be determined.

buf_page_is_zeroes(): Check if a page is full of zero bytes.

buf_page_full_crc32_is_corrupted(): Renamed from
buf_encrypted_full_crc32_page_is_corrupted(). For full_crc32,
we always simply validate the checksum to the page contents,
while the physical page size is explicitly specified by an
unencrypted part of the page header.

buf_page_full_crc32_size(): Determine the size of a full_crc32 page.

buf_dblwr_check_page_lsn(): Make this a debug-only function, because
it involves potentially costly lookups of fil_space_t.

create_table_info_t::check_table_options(),
ha_innobase::check_if_supported_inplace_alter(): Do allow the creation
of SPATIAL INDEX with full_crc32 also when page_compressed is used.

commit_cache_norebuild(): Preserve the compression algorithm when
updating the page_compression_level.

dict_tf_to_fsp_flags(): Set the flags for page compression algorithm.
FIXME: Maybe there should be a table option page_compression_algorithm
and a session variable to back it?
  • Loading branch information
dr-m committed Mar 18, 2019
1 parent 2151aed commit 6b6fa3c
Show file tree
Hide file tree
Showing 30 changed files with 889 additions and 477 deletions.
@@ -281,7 +281,7 @@ static void init_page_size(const byte* buf)
const unsigned flags = mach_read_from_4(buf + FIL_PAGE_DATA
+ FSP_SPACE_FLAGS);

if (FSP_FLAGS_FCRC32_HAS_MARKER(flags)) {
if (fil_space_t::full_crc32(flags)) {
srv_page_size = fil_space_t::logical_size(flags);
physical_page_size = srv_page_size;
return;
@@ -461,7 +461,7 @@ is_page_corrupted(
return (false);
}

if (!zip_size) {
if (!zip_size && (!is_compressed || !use_full_crc32)) {
/* check the stored log sequence numbers
for uncompressed tablespace. */
logseq = mach_read_from_4(buf + FIL_PAGE_LSN + 4);
@@ -613,8 +613,10 @@ static bool update_checksum(byte* page, ulint flags)
}

} else if (use_full_crc32) {
checksum = buf_calc_page_full_crc32(page);
byte* c = page + physical_page_size - FIL_PAGE_FCRC32_CHECKSUM;
ulint payload = buf_page_full_crc32_size(page, NULL, NULL)
- FIL_PAGE_FCRC32_CHECKSUM;
checksum = ut_crc32(page, payload);
byte* c = page + payload;
if (mach_read_from_4(c) == checksum) return false;
mach_write_to_4(c, checksum);
if (is_log_enabled) {
@@ -178,7 +178,9 @@ static void init_ibd_data(ds_local_file_t *local_file, const uchar *buf, size_t
ulint flags = mach_read_from_4(&buf[FIL_PAGE_DATA + FSP_SPACE_FLAGS]);
ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
local_file->pagesize= ssize == 0 ? UNIV_PAGE_SIZE_ORIG : ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
local_file->compressed = (my_bool)FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
local_file->compressed = fil_space_t::full_crc32(flags)
? fil_space_t::is_compressed(flags)
: bool(FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));

#if defined(_WIN32) && (MYSQL_VERSION_ID > 100200)
/* Make compressed file sparse, on Windows.
@@ -361,7 +361,8 @@ static bool page_is_corrupted(const byte *page, ulint page_no,

if (page_type == FIL_PAGE_PAGE_COMPRESSED
|| page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
ulint decomp = fil_page_decompress(tmp_frame, tmp_page);
ulint decomp = fil_page_decompress(tmp_frame, tmp_page,
space->flags);
page_type = mach_read_from_2(tmp_page + FIL_PAGE_TYPE);

return (!decomp
@@ -1,6 +1,6 @@
--- innodb-spatial-index.result
+++ innodb-spatial-index.result
@@ -1,22 +1,25 @@
@@ -1,23 +1,26 @@
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL, SPATIAL index(coordinate)) ENGINE=INNODB
ENCRYPTED=YES;
@@ -14,7 +14,8 @@
-Got one of the listed errors
DROP TABLE t1;
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL) ENCRYPTED=YES ENGINE=INNODB;
c VARCHAR(256), coordinate POINT NOT NULL)
PAGE_COMPRESSED=YES, ENCRYPTED=YES ENGINE=INNODB;
ALTER TABLE t1 ADD SPATIAL INDEX b1(coordinate), ALGORITHM=COPY;
-Got one of the listed errors
ALTER TABLE t1 ADD SPATIAL INDEX b2(coordinate), FORCE, ALGORITHM=INPLACE;
@@ -1,6 +1,6 @@
--- innodb-spatial-index.result
+++ innodb-spatial-index.result
@@ -1,22 +1,25 @@
@@ -1,23 +1,26 @@
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL, SPATIAL index(coordinate)) ENGINE=INNODB
ENCRYPTED=YES;
@@ -14,7 +14,8 @@
-Got one of the listed errors
DROP TABLE t1;
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL) ENCRYPTED=YES ENGINE=INNODB;
c VARCHAR(256), coordinate POINT NOT NULL)
PAGE_COMPRESSED=YES, ENCRYPTED=YES ENGINE=INNODB;
ALTER TABLE t1 ADD SPATIAL INDEX b1(coordinate), ALGORITHM=COPY;
-Got one of the listed errors
ALTER TABLE t1 ADD SPATIAL INDEX b2(coordinate), FORCE, ALGORITHM=INPLACE;
@@ -8,7 +8,8 @@ ALTER TABLE t1 ENCRYPTED=YES;
Got one of the listed errors
DROP TABLE t1;
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL) ENCRYPTED=YES ENGINE=INNODB;
c VARCHAR(256), coordinate POINT NOT NULL)
PAGE_COMPRESSED=YES, ENCRYPTED=YES ENGINE=INNODB;
ALTER TABLE t1 ADD SPATIAL INDEX b1(coordinate), ALGORITHM=COPY;
Got one of the listed errors
ALTER TABLE t1 ADD SPATIAL INDEX b2(coordinate), FORCE, ALGORITHM=INPLACE;
@@ -45,7 +45,8 @@ DROP TABLE t1;
# Index creation
#
CREATE TABLE t1 (pk INT PRIMARY KEY AUTO_INCREMENT,
c VARCHAR(256), coordinate POINT NOT NULL) ENCRYPTED=YES ENGINE=INNODB;
c VARCHAR(256), coordinate POINT NOT NULL)
PAGE_COMPRESSED=YES, ENCRYPTED=YES ENGINE=INNODB;
# FIXME: MDEV-13851 Encrypted table refuses some form of ALGORITHM=COPY,
# but allows rebuild by FORCE
--error $error_code
@@ -1,4 +1,5 @@
call mtr.add_suppression("InnoDB: Table `test`.`t1` has an unreadable root page");
call mtr.add_suppression("InnoDB: Encrypted page .* in file .*test.t1\\.ibd looks corrupted; key_version=1");
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT, c char(200)) ENGINE=InnoDB page_compressed=yes encrypted=yes;
insert into t1(b, c) values("mariadb", "mariabackup");
# Corrupt the table
@@ -1,5 +1,6 @@
source include/have_file_key_management.inc;
call mtr.add_suppression("InnoDB: Table `test`.`t1` has an unreadable root page");
call mtr.add_suppression("InnoDB: Encrypted page .* in file .*test.t1\\.ibd looks corrupted; key_version=1");
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT, c char(200)) ENGINE=InnoDB page_compressed=yes encrypted=yes;
insert into t1(b, c) values("mariadb", "mariabackup");

@@ -35,6 +35,7 @@ Created 11/5/1995 Heikki Tuuri
#include "mach0data.h"
#include "buf0buf.h"
#include "buf0checksum.h"
#include "ut0crc32.h"
#include <string.h>

#ifndef UNIV_INNOCHECKSUM
@@ -478,7 +479,8 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)

byte* dst_frame = bpage->zip.data ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
bool page_compressed = fil_page_is_compressed(dst_frame);
bool page_compressed = space->is_compressed()
&& buf_page_is_compressed(dst_frame, space->flags);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);

if (bpage->id.page_no() == 0) {
@@ -493,22 +495,31 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
buf_tmp_buffer_t* slot;
uint key_version = buf_page_get_key_version(dst_frame, space->flags);

if (page_compressed) {
if (page_compressed && !key_version) {
/* the page we read is unencrypted */
/* Find free slot from temporary memory array */
decompress:
if (space->full_crc32()
&& buf_page_is_corrupted(true, dst_frame, space->flags)) {
return false;
}

slot = buf_pool_reserve_tmp_slot(buf_pool);
/* For decompression, use crypt_buf. */
buf_tmp_reserve_crypt_buf(slot);

decompress_with_slot:
ut_d(fil_page_type_validate(space, dst_frame));

bpage->write_size = fil_page_decompress(slot->crypt_buf,
dst_frame);
bpage->write_size = fil_page_decompress(
slot->crypt_buf, dst_frame, space->flags);
slot->release();

ut_ad(!bpage->write_size || fil_page_type_validate(space, dst_frame));
ut_ad(!bpage->write_size
|| fil_page_type_validate(space, dst_frame));

ut_ad(space->pending_io());

return bpage->write_size != 0;
}

@@ -545,7 +556,8 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)

ut_d(fil_page_type_validate(space, dst_frame));

if (fil_page_is_compressed_encrypted(dst_frame)) {
if ((space->full_crc32() && page_compressed)
|| fil_page_is_compressed_encrypted(dst_frame)) {
goto decompress_with_slot;
}

@@ -882,19 +894,6 @@ buf_page_is_checksum_valid_none(
&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
}

/** Checks if the page is in full crc32 checksum format.
@param[in] read_buf database page
@param[in] checksum_field checksum field
@return true if the page is in full crc32 checksum format. */
bool buf_page_is_checksum_valid_full_crc32(
const byte* read_buf,
size_t checksum_field)
{
const uint32_t full_crc32 = buf_calc_page_full_crc32(read_buf);

return checksum_field == full_crc32;
}

/** Checks whether the lsn present in the page is lesser than the
peek current lsn.
@param[in] check_lsn lsn to check
@@ -934,6 +933,22 @@ static void buf_page_check_lsn(bool check_lsn, const byte* read_buf)
#endif /* !UNIV_INNOCHECKSUM */
}

/** Check if a page is all zeroes.
@param[in] read_buf database page
@param[in] page_size page frame size
@return whether the page is all zeroes */
bool buf_page_is_zeroes(const void* read_buf, size_t page_size)
{
const ulint* b = reinterpret_cast<const ulint*>(read_buf);
const ulint* const e = b + page_size / sizeof *b;
do {
if (*b++) {
return false;
}
} while (b != e);
return true;
}

/** Check if a page is corrupt.
@param[in] check_lsn whether the LSN should be checked
@param[in] read_buf database page
@@ -949,14 +964,18 @@ buf_page_is_corrupted(
#ifndef UNIV_INNOCHECKSUM
DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(true); );
#endif
if (FSP_FLAGS_FCRC32_HAS_MARKER(fsp_flags)) {
const byte* end = read_buf + srv_page_size;
uint crc32 = mach_read_from_4(end - FIL_PAGE_FCRC32_CHECKSUM);

if (!crc32) {
const byte* b = read_buf;
while (b != end) if (*b++) goto nonzero;
/* An all-zero page is not corrupted. */
if (fil_space_t::full_crc32(fsp_flags)) {
bool compressed = false, corrupted = false;
const uint size = buf_page_full_crc32_size(
read_buf, &compressed, &corrupted);
if (corrupted) {
return true;
}
const byte* end = read_buf + (size - FIL_PAGE_FCRC32_CHECKSUM);
uint crc32 = mach_read_from_4(end);

if (!crc32 && size == srv_page_size
&& buf_page_is_zeroes(read_buf, size)) {
return false;
}

@@ -967,14 +986,17 @@ buf_page_is_corrupted(
crc32++;
}
});
nonzero:
if (!buf_page_is_checksum_valid_full_crc32(read_buf, crc32)) {

if (crc32 != ut_crc32(read_buf,
size - FIL_PAGE_FCRC32_CHECKSUM)) {
return true;
}

if (!mach_read_from_4(read_buf + FIL_PAGE_FCRC32_KEY_VERSION)
if (!compressed
&& !mach_read_from_4(FIL_PAGE_FCRC32_KEY_VERSION
+ read_buf)
&& memcmp(read_buf + (FIL_PAGE_LSN + 4),
end - FIL_PAGE_FCRC32_END_LSN, 4)) {
end - (FIL_PAGE_FCRC32_END_LSN
- FIL_PAGE_FCRC32_CHECKSUM), 4)) {
return true;
}

@@ -3962,7 +3984,6 @@ buf_zip_decompress(
<< ", none: "
<< page_zip_calc_checksum(
frame, size, SRV_CHECKSUM_ALGORITHM_NONE);

goto err_exit;
}

@@ -5846,13 +5867,16 @@ buf_mark_space_corrupt(buf_page_t* bpage, const fil_space_t* space)
/** Check if the encrypted page is corrupted for the full crc32 format.
@param[in] space_id page belongs to space id
@param[in] dst_frame page
@param[in] is_compressed compressed page
@return true if page is corrupted or false if it isn't */
static bool buf_encrypted_full_crc32_page_is_corrupted(
static bool buf_page_full_crc32_is_corrupted(
ulint space_id,
const byte* dst_frame)
const byte* dst_frame,
bool is_compressed)
{
if (memcmp(dst_frame + FIL_PAGE_LSN + 4,
dst_frame + srv_page_size - FIL_PAGE_FCRC32_END_LSN, 4)) {
if (!is_compressed
&& memcmp(dst_frame + FIL_PAGE_LSN + 4,
dst_frame + srv_page_size - FIL_PAGE_FCRC32_END_LSN, 4)) {
return true;
}

@@ -5900,9 +5924,12 @@ static dberr_t buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
if (!still_encrypted) {
/* If traditional checksums match, we assume that page is
not anymore encrypted. */
if (key_version && space->full_crc32()) {
corrupted = buf_encrypted_full_crc32_page_is_corrupted(
space->id, dst_frame);
if (space->full_crc32()
&& !buf_page_is_zeroes(dst_frame, space->physical_size())
&& (key_version || space->is_compressed())) {
corrupted = buf_page_full_crc32_is_corrupted(
space->id, dst_frame,
space->is_compressed());
} else {
corrupted = buf_page_is_corrupted(
true, dst_frame, space->flags);
@@ -7374,7 +7401,7 @@ buf_page_encrypt(
&& (!crypt_data->is_default_encryption()
|| srv_encrypt_tables);

bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
bool page_compressed = space->is_compressed();

if (!encrypted && !page_compressed) {
/* No need to encrypt or page compress the page.
@@ -7398,6 +7425,19 @@ buf_page_encrypt(

buf_tmp_reserve_crypt_buf(slot);
byte *dst_frame = slot->crypt_buf;
const bool full_crc32 = space->full_crc32();

if (full_crc32) {
/* Write LSN for the full crc32 checksum before
encryption. Because lsn is one of the input for encryption. */
mach_write_to_8(src_frame + FIL_PAGE_LSN,
bpage->newest_modification);
if (!page_compressed) {
mach_write_to_4(
src_frame + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
(ulint) bpage->newest_modification);
}
}

if (!page_compressed) {
not_compressed:
@@ -7427,6 +7467,18 @@ buf_page_encrypt(

bpage->real_size = out_len;

if (full_crc32) {
ut_d(bool compressed = false);
out_len = buf_page_full_crc32_size(tmp,
#ifdef UNIV_DEBUG
&compressed,
#else
NULL,
#endif
NULL);
ut_ad(compressed);
}

/* Workaround for MDEV-15527. */
memset(tmp + out_len, 0 , srv_page_size - out_len);
ut_d(fil_page_type_validate(space, tmp));
@@ -7440,6 +7492,13 @@ buf_page_encrypt(
dst_frame);
}

if (full_crc32) {
compile_time_assert(FIL_PAGE_FCRC32_CHECKSUM == 4);
mach_write_to_4(tmp + out_len - 4,
ut_crc32(tmp, out_len - 4));
ut_ad(!buf_page_is_corrupted(true, tmp, space->flags));
}

slot->out_buf = dst_frame = tmp;
}

0 comments on commit 6b6fa3c

Please sign in to comment.