Skip to content

Commit

Permalink
MDEV-27436: binlog corruption (/tmp no space left on device at the sa…
Browse files Browse the repository at this point in the history
…me moment)

This commit fixes several bugs in error handling around disk full when
writing the statement/transaction binlog caches:

1. If the error occurs during a non-transactional statement, the code
attempts to binlog the partially executed statement (as it cannot roll
back). The stmt_cache->error was still set from the disk full error. This
caused MYSQL_BIN_LOG::write_cache() to get an error while trying to read the
cache to copy it to the binlog. This was then wrongly interpreted as a disk
full error writing to the binlog file. As a result, a partial event group
containing just a GTID event (no query or commit) was binlogged. Fixed by
checking if an error is set in the statement cache, and if so binlog an
INCIDENT event instead of a corrupt event group, as for other errors.

2. For LOAD DATA LOCAL INFILE, if a disk full error occured while writing to
the statement cache, the code would attempt to abort and read-and-discard
any remaining data sent by the client. The discard code would however
continue trying to write data to the statement cache, and wrongly interpret
another disk full error as end-of-file from the client. This left the client
connection with extra data which corrupts the communication for the next
command, as well as again causing an corrupt/incomplete event to be
binlogged. Fixed by restoring the default read function before reading any
remaining data from the client connection.

Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com>
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
  • Loading branch information
knielsen committed Oct 31, 2023
1 parent e52777f commit 6fa69ad
Show file tree
Hide file tree
Showing 8 changed files with 286 additions and 8 deletions.
38 changes: 38 additions & 0 deletions mysql-test/suite/rpl/r/rpl_binlog_cache_disk_full_loaddata.result
@@ -0,0 +1,38 @@
include/master-slave.inc
[connection master]
connection master;
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
SET GLOBAL binlog_stmt_cache_size= 4096;
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;
FLUSH STATUS;
SHOW STATUS LIKE "binlog_stmt_cache%";
Variable_name Value
Binlog_stmt_cache_disk_use 0
Binlog_stmt_cache_use 0
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,load_data_binlog_cache_error";
LOAD DATA CONCURRENT LOCAL INFILE 'std_data/bug30435_5k.txt'
REPLACE INTO TABLE t1 (a);
ERROR HY000: Error writing file '<FILENAME>' (Errcode: 28 "No space left on device")
SET SESSION debug_dbug= @old_dbug;
SHOW STATUS LIKE "binlog_stmt_cache%";
Variable_name Value
Binlog_stmt_cache_disk_use 1
Binlog_stmt_cache_use 1
SELECT IF(COUNT(*) > 0 AND COUNT(*) < 5000,
"ok",
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
AS check_result
FROM t1;
check_result
ok
connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
SELECT COUNT(*) FROM t1;
COUNT(*)
0
connection master;
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
DROP TABLE t1;
include/rpl_end.inc
51 changes: 51 additions & 0 deletions mysql-test/suite/rpl/r/rpl_binlog_cache_disk_full_row.result
@@ -0,0 +1,51 @@
include/master-slave.inc
[connection master]
connection master;
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
SET GLOBAL binlog_stmt_cache_size= 4096;
CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=MyISAM;
FLUSH STATUS;
SHOW STATUS LIKE "binlog_stmt_cache%";
Variable_name Value
Binlog_stmt_cache_disk_use 0
Binlog_stmt_cache_use 0
INSERT INTO t1 VALUES (0, CONCAT("?", "-", REPEAT("x", 200)));
INSERT INTO t1 SELECT a+1, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+2, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+4, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+8, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+16, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+32, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+64, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+128, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
SHOW STATUS LIKE "binlog_stmt_cache%";
Variable_name Value
Binlog_stmt_cache_disk_use 2
Binlog_stmt_cache_use 9
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,simulate_disk_full_at_flush_pending";
INSERT INTO t1 SELECT a+256, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
ERROR HY000: Error writing file '<FILENAME>' (Errcode: 28 "No space left on device")
SET SESSION debug_dbug= @old_dbug;
SHOW STATUS LIKE "binlog_stmt_cache%";
Variable_name Value
Binlog_stmt_cache_disk_use 3
Binlog_stmt_cache_use 10
SELECT IF(COUNT(*) > 256 AND COUNT(*) < 512,
"ok",
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
AS check_result
FROM t1;
check_result
ok
ALTER TABLE t1 COMMENT '<mumble>';
connection slave;
include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
SELECT COUNT(*) FROM t1;
COUNT(*)
256
connection master;
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
DROP TABLE t1;
include/rpl_end.inc
47 changes: 47 additions & 0 deletions mysql-test/suite/rpl/t/rpl_binlog_cache_disk_full_loaddata.test
@@ -0,0 +1,47 @@
--source include/have_binlog_format_statement.inc
--source include/have_debug.inc
--source include/master-slave.inc

--connection master
# Set minimal cache size so smaller transaction can trigger spill to disk.
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
SET GLOBAL binlog_stmt_cache_size= 4096;

CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;

FLUSH STATUS;
SHOW STATUS LIKE "binlog_stmt_cache%";
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,load_data_binlog_cache_error";
--replace_regex /Error writing file '[^']+'/Error writing file '<FILENAME>'/
--error 3
LOAD DATA CONCURRENT LOCAL INFILE 'std_data/bug30435_5k.txt'
REPLACE INTO TABLE t1 (a);
SET SESSION debug_dbug= @old_dbug;
SHOW STATUS LIKE "binlog_stmt_cache%";
# The actual number of rows left after the disk full error may change as
# binlog event sizes are modified. So here we just test that we get partial
# update from the last INSERT..SELECT that gets disk full error.
SELECT IF(COUNT(*) > 0 AND COUNT(*) < 5000,
"ok",
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
AS check_result
FROM t1;

--save_master_pos

--connection slave
--let $slave_sql_errno= 1590
--source include/wait_for_slave_sql_error_and_skip.inc

--sync_with_master
SELECT COUNT(*) FROM t1;

# Cleanup

--connection master
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
DROP TABLE t1;

--source include/rpl_end.inc
61 changes: 61 additions & 0 deletions mysql-test/suite/rpl/t/rpl_binlog_cache_disk_full_row.test
@@ -0,0 +1,61 @@
--source include/have_binlog_format_row.inc
--source include/have_debug.inc
--source include/master-slave.inc

--connection master
# Set minimal cache size so smaller transaction can trigger spill to disk.
SET @save_binlog_stmt_cache_size= @@GLOBAL.binlog_stmt_cache_size;
SET GLOBAL binlog_stmt_cache_size= 4096;

CALL mtr.add_suppression('"No space left on device".*An incident event is written to binary log');
CREATE TABLE t1 (a INT PRIMARY KEY, b VARCHAR(255)) ENGINE=MyISAM;

FLUSH STATUS;
SHOW STATUS LIKE "binlog_stmt_cache%";
INSERT INTO t1 VALUES (0, CONCAT("?", "-", REPEAT("x", 200)));
INSERT INTO t1 SELECT a+1, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+2, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+4, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+8, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+16, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+32, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+64, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
INSERT INTO t1 SELECT a+128, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
SHOW STATUS LIKE "binlog_stmt_cache%";

SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,simulate_disk_full_at_flush_pending";
--replace_regex /Error writing file '[^']+'/Error writing file '<FILENAME>'/
--error 3
INSERT INTO t1 SELECT a+256, CONCAT(a, "-", REPEAT("x", 200)) FROM t1;
SET SESSION debug_dbug= @old_dbug;
SHOW STATUS LIKE "binlog_stmt_cache%";
# The actual number of rows left after the disk full error may change as
# binlog event sizes are modified. So here we just test that we get partial
# update from the last INSERT..SELECT that gets disk full error.
SELECT IF(COUNT(*) > 256 AND COUNT(*) < 512,
"ok",
CONCAT("ERROR! Row count ", COUNT(*), " not as expected for partially executed query"))
AS check_result
FROM t1;

# A random extra event that helped show the bug that a partial event
# group was binlogged.
ALTER TABLE t1 COMMENT '<mumble>';

--save_master_pos

--connection slave
--let $slave_sql_errno= 1590
--source include/wait_for_slave_sql_error_and_skip.inc

--sync_with_master
SELECT COUNT(*) FROM t1;

# Cleanup

--connection master
SET GLOBAL binlog_stmt_cache_size= @save_binlog_stmt_cache_size;
DROP TABLE t1;

--source include/rpl_end.inc
58 changes: 53 additions & 5 deletions sql/log.cc
Expand Up @@ -2228,6 +2228,23 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd)
}


/*
Check if there was an error while writing the statement cache.
If the cache content is corrupt due to an error, we should write an incident
event to the binlog rather than write corrupt data to it.
*/
bool
MYSQL_BIN_LOG::check_cache_error(THD *thd, binlog_cache_data *cache_data)
{
if (!cache_data)
return false;
if (check_write_error(thd))
return true;
if (!cache_data->empty() && cache_data->cache_log.error)
return true;
return false;
}

/**
@note
How do we handle this (unlikely but legal) case:
Expand Down Expand Up @@ -5874,7 +5891,7 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
engines, data is written to table but writing to binary log failed. In
these scenarios rollback is not possible. Hence report an incident.
*/
if (mysql_bin_log.check_write_error(this) && cache_data &&
if (mysql_bin_log.check_cache_error(this, cache_data) &&
lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE) &&
table->current_lock == F_WRLCK)
cache_data->set_incident();
Expand Down Expand Up @@ -6005,20 +6022,37 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
/*
Write pending event to the cache.
*/
#ifndef DBUG_OFF
bool clear_dbug= false;
#endif
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
{DBUG_SET("+d,simulate_file_write_error");});
{
if (my_b_tell(&cache_data->cache_log) > 10000)
{
DBUG_SET("+d,simulate_file_write_error");
clear_dbug= true;
}
});
if (writer.write(pending))
{
set_write_error(thd, is_transactional);
if (check_write_error(thd) && cache_data &&
if (check_cache_error(thd, cache_data) &&
stmt_has_updated_non_trans_table(thd))
cache_data->set_incident();
delete pending;
cache_data->set_pending(NULL);
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
{DBUG_SET("-d,simulate_file_write_error");});
{
if (clear_dbug)
DBUG_SET("-d,simulate_file_write_error");
});
DBUG_RETURN(1);
}
DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
{
if (clear_dbug)
DBUG_SET("-d,simulate_file_write_error");
});

delete pending;
}
Expand Down Expand Up @@ -6613,7 +6647,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
if (unlikely(error))
{
set_write_error(thd, is_trans_cache);
if (check_write_error(thd) && cache_data &&
if (check_cache_error(thd, cache_data) &&
stmt_has_updated_non_trans_table(thd))
cache_data->set_incident();
}
Expand Down Expand Up @@ -8337,6 +8371,20 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
binlog_cache_mngr *mngr= entry->cache_mngr;
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");

/*
An error in the trx_cache will truncate the cache to the last good
statement, it won't leave a lingering error. Assert that this holds.
*/
DBUG_ASSERT(!(entry->using_trx_cache && !mngr->trx_cache.empty() &&
mngr->get_binlog_cache_log(TRUE)->error));
/*
An error in the stmt_cache would be caught on the higher level and result
in an incident event being written over a (possibly corrupt) cache content.
Assert that this holds.
*/
DBUG_ASSERT(!(entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
mngr->get_binlog_cache_log(FALSE)->error));

if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
DBUG_RETURN(ER_ERROR_ON_WRITE);

Expand Down
1 change: 1 addition & 0 deletions sql/log.h
Expand Up @@ -815,6 +815,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
int write_cache(THD *thd, IO_CACHE *cache);
void set_write_error(THD *thd, bool is_transactional);
bool check_write_error(THD *thd);
bool check_cache_error(THD *thd, binlog_cache_data *cache_data);

void start_union_events(THD *thd, query_id_t query_id_param);
void stop_union_events(THD *thd);
Expand Down
4 changes: 4 additions & 0 deletions sql/sql_load.cc
Expand Up @@ -253,6 +253,10 @@ class READ_INFO: public Load_data_param
*/
void skip_data_till_eof()
{
#ifndef EMBEDDED_LIBRARY
if (mysql_bin_log.is_open())
cache.read_function= cache.real_read_function;
#endif
while (GET != my_b_EOF)
;
}
Expand Down
34 changes: 31 additions & 3 deletions sql/sql_repl.cc
Expand Up @@ -4499,13 +4499,30 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
/* buffer contains position where we started last read */
uchar* buffer= (uchar*) my_b_get_buffer_start(file);
uint max_event_size= lf_info->thd->variables.max_allowed_packet;
int res;
#ifndef DBUG_OFF
bool did_dbug_inject= false;
#endif

if (lf_info->thd->is_current_stmt_binlog_format_row())
goto ret;
if (lf_info->last_pos_in_file != HA_POS_ERROR &&
lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
goto ret;

DBUG_EXECUTE_IF("load_data_binlog_cache_error",
{
/*
Simulate "disk full" error in the middle of writing to
the binlog cache.
*/
if (lf_info->last_pos_in_file >= 2*4096)
{
DBUG_SET("+d,simulate_file_write_error");
did_dbug_inject= true;
}
};);

for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
buffer += MY_MIN(block_len, max_event_size),
block_len -= MY_MIN(block_len, max_event_size))
Expand All @@ -4517,7 +4534,10 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
MY_MIN(block_len, max_event_size),
lf_info->log_delayed);
if (mysql_bin_log.write(&a))
DBUG_RETURN(1);
{
res= 1;
goto err;
}
}
else
{
Expand All @@ -4526,12 +4546,20 @@ int log_loaded_block(IO_CACHE* file, uchar *Buffer, size_t Count)
MY_MIN(block_len, max_event_size),
lf_info->log_delayed);
if (mysql_bin_log.write(&b))
DBUG_RETURN(1);
{
res= 1;
goto err;
}
lf_info->wrote_create_file= 1;
}
}
ret:
int res= Buffer ? lf_info->real_read_function(file, Buffer, Count) : 0;
res= Buffer ? lf_info->real_read_function(file, Buffer, Count) : 0;
err:
#ifndef DBUG_OFF
if (did_dbug_inject)
DBUG_SET("-d,simulate_file_write_error");
#endif
DBUG_RETURN(res);
}

Expand Down

0 comments on commit 6fa69ad

Please sign in to comment.