Skip to content

Commit dc9c555

Browse files
committed
MDEV-16367 mariabackup: error: failed to copy enough redo log
log_copying_thread(): Keep copying redo log until the end has been reached. (Previously, we would stop copying as soon as the first batch of xtrabackup_copy_logfile() returned.) log_copying: Remove. Use log_copying_running instead. copy_logfile: Remove. Log copying will now only be invoked from 2 places: from xtrabackup_backup_func() for the initial batch, and from log_copying_thread() until all of the log has been read. Use the global variable metadata_to_lsn for determining if the final part of the log is being copied. xtrabackup_copy_log(): Add diagnostic messages for terminating the copying. These messages should be dead code, because log_group_read_log_seg() should be checking for the same. xtrabackup_copy_logfile(): Correct the retrying logic. If anything was successfully read, process the portion that was read. On failure, let the caller close dst_log_file. io_watching_thread(): Stop throttling during the last phase of copying the log (metadata_to_lsn!=0). The final copying of the log will now be performed in log_copying_thread(). stop_backup_threads(): Clean up the message about stopping the log copying thread. xtrabackup_backup_low(): Read metadata_to_lsn from the latest checkpoint header page, even if it is the first page. Let the log_copying_thread take care of copying all of the redo log.
1 parent 619c277 commit dc9c555

File tree

1 file changed

+56
-57
lines changed

1 file changed

+56
-57
lines changed

extra/mariabackup/xtrabackup.cc

Lines changed: 56 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@ typedef struct xb_filter_entry_struct xb_filter_entry_t;
172172
lsn_t checkpoint_lsn_start;
173173
lsn_t checkpoint_no_start;
174174
static lsn_t log_copy_scanned_lsn;
175-
static bool log_copying;
176175
static bool log_copying_running;
177176
static bool io_watching_thread_running;
178177

@@ -2452,25 +2451,12 @@ xtrabackup_copy_datafile(fil_node_t* node, uint thread_n)
24522451
return(FALSE);
24532452
}
24542453

2455-
/** How to copy a redo log segment in backup */
2456-
enum copy_logfile {
2457-
/** Initial copying: copy at least one block */
2458-
COPY_FIRST,
2459-
/** Tracking while copying data files */
2460-
COPY_ONLINE,
2461-
/** Final copying: copy until the end of the log */
2462-
COPY_LAST
2463-
};
2464-
24652454
/** Copy redo log blocks to the data sink.
2466-
@param[in] copy how to copy the log
2467-
@param[in] start_lsn buffer start LSN
2468-
@param[in] end_lsn buffer end LSN
2469-
@return last scanned LSN (equals to last copied LSN if copy=COPY_LAST)
2455+
@param start_lsn buffer start LSN
2456+
@param end_lsn buffer end LSN
2457+
@return last scanned LSN
24702458
@retval 0 on failure */
2471-
static
2472-
lsn_t
2473-
xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
2459+
static lsn_t xtrabackup_copy_log(lsn_t start_lsn, lsn_t end_lsn)
24742460
{
24752461
lsn_t scanned_lsn = start_lsn;
24762462
const byte* log_block = log_sys->buf;
@@ -2485,6 +2471,9 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
24852471
&& scanned_checkpoint - checkpoint >= 0x80000000UL) {
24862472
/* Garbage from a log buffer flush which was made
24872473
before the most recent database recovery */
2474+
msg("mariabackup: checkpoint wrap: "
2475+
LSN_PF ",%zx,%zx\n",
2476+
scanned_lsn, scanned_checkpoint, checkpoint);
24882477
break;
24892478
}
24902479

@@ -2505,6 +2494,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
25052494
>= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE
25062495
|| data_len <= LOG_BLOCK_HDR_SIZE) {
25072496
/* We got a garbage block (abrupt end of the log). */
2497+
msg("mariabackup: garbage block: " LSN_PF ",%zu\n",
2498+
scanned_lsn, data_len);
25082499
break;
25092500
} else {
25102501
/* We got a partial block (abrupt end of the log). */
@@ -2524,7 +2515,7 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
25242515

25252516
log_sys->log.scanned_lsn = scanned_lsn;
25262517

2527-
end_lsn = copy == COPY_LAST
2518+
end_lsn = metadata_to_lsn
25282519
? ut_uint64_align_up(scanned_lsn, OS_FILE_LOG_BLOCK_SIZE)
25292520
: scanned_lsn & ~lsn_t(OS_FILE_LOG_BLOCK_SIZE - 1);
25302521

@@ -2544,10 +2535,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
25442535
}
25452536

25462537
/** Copy redo log until the current end of the log is reached
2547-
@param copy how to copy the log
25482538
@return whether the operation failed */
2549-
static bool
2550-
xtrabackup_copy_logfile(copy_logfile copy)
2539+
static bool xtrabackup_copy_logfile()
25512540
{
25522541
ut_a(dst_log_file != NULL);
25532542
ut_ad(recv_sys != NULL);
@@ -2560,33 +2549,29 @@ xtrabackup_copy_logfile(copy_logfile copy)
25602549

25612550
start_lsn = ut_uint64_align_down(log_copy_scanned_lsn,
25622551
OS_FILE_LOG_BLOCK_SIZE);
2563-
/* When copying the first or last part of the log, retry a few
2564-
times to ensure that all log up to the last checkpoint will be
2565-
read. */
25662552
do {
25672553
end_lsn = start_lsn + RECV_SCAN_SIZE;
25682554

25692555
xtrabackup_io_throttling();
25702556

25712557
log_mutex_enter();
2572-
25732558
lsn_t lsn= start_lsn;
2574-
for(int retries= 0; retries < 100; retries++) {
2559+
for (int retries= 0; retries < 100; retries++) {
25752560
if (log_group_read_log_seg(log_sys->buf, &log_sys->log,
2576-
&lsn, end_lsn)){
2561+
&lsn, end_lsn)
2562+
|| lsn != start_lsn) {
25772563
break;
25782564
}
2579-
msg("Retrying read of a redo log block");
2565+
msg("Retrying read of log at LSN=" LSN_PF "\n", lsn);
25802566
my_sleep(1000);
25812567
}
25822568

2583-
start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn);
2569+
start_lsn = (lsn == start_lsn)
2570+
? 0 : xtrabackup_copy_log(start_lsn, lsn);
25842571

25852572
log_mutex_exit();
25862573

25872574
if (!start_lsn) {
2588-
ds_close(dst_log_file);
2589-
dst_log_file = NULL;
25902575
msg("mariabackup: Error: xtrabackup_copy_logfile()"
25912576
" failed.\n");
25922577
return(true);
@@ -2612,12 +2597,23 @@ static os_thread_ret_t log_copying_thread(void*)
26122597
*/
26132598
my_thread_init();
26142599

2615-
do {
2600+
for (;;) {
26162601
os_event_reset(log_copying_stop);
26172602
os_event_wait_time_low(log_copying_stop,
26182603
xtrabackup_log_copy_interval * 1000ULL,
26192604
0);
2620-
} while (log_copying && xtrabackup_copy_logfile(COPY_ONLINE));
2605+
if (xtrabackup_copy_logfile()) {
2606+
break;
2607+
}
2608+
2609+
log_mutex_enter();
2610+
bool completed = metadata_to_lsn
2611+
&& metadata_to_lsn < log_copy_scanned_lsn;
2612+
log_mutex_exit();
2613+
if (completed) {
2614+
break;
2615+
}
2616+
}
26212617

26222618
log_copying_running = false;
26232619
my_thread_end();
@@ -2632,7 +2628,7 @@ static os_thread_ret_t io_watching_thread(void*)
26322628
/* currently, for --backup only */
26332629
ut_a(xtrabackup_backup);
26342630

2635-
while (log_copying) {
2631+
while (log_copying_running && !metadata_to_lsn) {
26362632
os_thread_sleep(1000000); /*1 sec*/
26372633
io_ticket = xtrabackup_throttle;
26382634
os_event_set(wait_throttle);
@@ -3662,16 +3658,16 @@ xb_set_max_open_files(
36623658

36633659
static void stop_backup_threads()
36643660
{
3665-
log_copying = false;
3666-
3667-
if (log_copying_stop) {
3661+
if (log_copying_stop && log_copying_running) {
36683662
os_event_set(log_copying_stop);
3669-
msg("mariabackup: Stopping log copying thread.\n");
3663+
fputs("mariabackup: Stopping log copying thread", stderr);
3664+
fflush(stderr);
36703665
while (log_copying_running) {
3671-
msg(".");
3666+
putc('.', stderr);
3667+
fflush(stderr);
36723668
os_thread_sleep(200000); /*0.2 sec*/
36733669
}
3674-
msg("\n");
3670+
putc('\n', stderr);
36753671
os_event_destroy(log_copying_stop);
36763672
}
36773673

@@ -3686,10 +3682,10 @@ static void stop_backup_threads()
36863682

36873683
/** Implement the core of --backup
36883684
@return whether the operation succeeded */
3689-
static
3690-
bool
3691-
xtrabackup_backup_low()
3685+
static bool xtrabackup_backup_low()
36923686
{
3687+
ut_ad(!metadata_to_lsn);
3688+
36933689
/* read the latest checkpoint lsn */
36943690
{
36953691
ulint max_cp_field;
@@ -3698,25 +3694,24 @@ xtrabackup_backup_low()
36983694

36993695
if (recv_find_max_checkpoint(&max_cp_field) == DB_SUCCESS
37003696
&& log_sys->log.format != 0) {
3697+
if (max_cp_field == LOG_CHECKPOINT_1) {
3698+
log_group_header_read(&log_sys->log,
3699+
max_cp_field);
3700+
}
37013701
metadata_to_lsn = mach_read_from_8(
37023702
log_sys->checkpoint_buf + LOG_CHECKPOINT_LSN);
37033703
msg("mariabackup: The latest check point"
37043704
" (for incremental): '" LSN_PF "'\n",
37053705
metadata_to_lsn);
37063706
} else {
3707-
metadata_to_lsn = 0;
37083707
msg("mariabackup: Error: recv_find_max_checkpoint() failed.\n");
37093708
}
37103709
log_mutex_exit();
37113710
}
37123711

37133712
stop_backup_threads();
37143713

3715-
if (!dst_log_file || xtrabackup_copy_logfile(COPY_LAST)) {
3716-
return false;
3717-
}
3718-
3719-
if (ds_close(dst_log_file)) {
3714+
if (ds_close(dst_log_file) || !metadata_to_lsn) {
37203715
dst_log_file = NULL;
37213716
return false;
37223717
}
@@ -3795,6 +3790,7 @@ xtrabackup_backup_func()
37953790
srv_read_only_mode = TRUE;
37963791

37973792
srv_operation = SRV_OPERATION_BACKUP;
3793+
metadata_to_lsn = 0;
37983794

37993795
if (xb_close_files)
38003796
msg("mariabackup: warning: close-files specified. Use it "
@@ -3805,7 +3801,12 @@ xtrabackup_backup_func()
38053801
/* initialize components */
38063802
if(innodb_init_param()) {
38073803
fail:
3804+
metadata_to_lsn = log_copying_running;
38083805
stop_backup_threads();
3806+
if (dst_log_file) {
3807+
ds_close(dst_log_file);
3808+
dst_log_file = NULL;
3809+
}
38093810
if (fil_system) {
38103811
innodb_shutdown();
38113812
}
@@ -3814,7 +3815,6 @@ xtrabackup_backup_func()
38143815

38153816
xb_normalize_init_values();
38163817

3817-
38183818
if (srv_file_flush_method_str == NULL) {
38193819
/* These are the default options */
38203820
srv_file_flush_method = SRV_FSYNC;
@@ -4058,9 +4058,7 @@ xtrabackup_backup_func()
40584058
goto log_write_fail;
40594059
}
40604060

4061-
/* start flag */
4062-
log_copying = TRUE;
4063-
4061+
log_copying_running = true;
40644062
/* start io throttle */
40654063
if(xtrabackup_throttle) {
40664064
os_thread_id_t io_watching_thread_id;
@@ -4078,18 +4076,19 @@ xtrabackup_backup_func()
40784076
if (err != DB_SUCCESS) {
40794077
msg("mariabackup: error: xb_load_tablespaces() failed with"
40804078
" error %s.\n", ut_strerr(err));
4079+
fail_before_log_copying_thread_start:
4080+
log_copying_running = false;
40814081
goto fail;
40824082
}
40834083

40844084
/* copy log file by current position */
40854085
log_copy_scanned_lsn = checkpoint_lsn_start;
40864086
recv_sys->recovered_lsn = log_copy_scanned_lsn;
40874087

4088-
if (xtrabackup_copy_logfile(COPY_FIRST))
4089-
goto fail;
4088+
if (xtrabackup_copy_logfile())
4089+
goto fail_before_log_copying_thread_start;
40904090

40914091
log_copying_stop = os_event_create(0);
4092-
log_copying_running = true;
40934092
os_thread_create(log_copying_thread, NULL, &log_copying_thread_id);
40944093

40954094
/* FLUSH CHANGED_PAGE_BITMAPS call */

0 commit comments

Comments
 (0)