Skip to content

Commit c391fb1

Browse files
committed
MDEV-35577 Broken recovery after SET GLOBAL innodb_log_file_size
If InnoDB is killed in such a way that there had been no writes to a newly resized ib_logfile101 after it replaced ib_logfile0 in log_t::write_checkpoint(), it is possible that recovery will accidentally interpret some garbage at the end of the log as valid. log_t::write_buf(): To prevent the corruption, write an extra NUL byte at the end of log_sys.resize_buf, like we always did for the main log_sys.buf. To remove some conditional branches from a time critical code path, we instantiate a separate template for the rare case that the log is being resized. Define as __attribute__((always_inline)) so that this will be inlined also in the rare case the log is being resized. log_t::writer: Pointer to the current implementation of log_t::write_buf(). For quick access, this is located in the same cache line with log_sys.latch, which protects it. log_t::writer_update(): Update log_sys.writer. log_t::resize_write_buf(): Remove ATTRIBUTE_NOINLINE ATTRIBUTE_COLD. Now that log_t::write_buf() will be instantiated separately for the rare case of log resizing being in progress, there is no need to forbid this code from being inlined. Thanks to Thirunarayanan Balathandayuthapani for finding the root cause of this bug and suggesting the fix of writing an extra NUL byte. Reviewed by: Debarun Banerjee
1 parent b9e592a commit c391fb1

File tree

3 files changed

+86
-30
lines changed

3 files changed

+86
-30
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,6 +1840,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
18401840
if (resizing > 1 && resizing <= checkpoint_lsn)
18411841
{
18421842
ut_ad(is_mmap() == !resize_flush_buf);
1843+
ut_ad(is_mmap() == !resize_log.is_opened());
18431844

18441845
if (!is_mmap())
18451846
{
@@ -1905,6 +1906,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
19051906
resize_flush_buf= nullptr;
19061907
resize_target= 0;
19071908
resize_lsn.store(0, std::memory_order_relaxed);
1909+
writer_update();
19081910
}
19091911

19101912
log_resize_release();

storage/innobase/include/log0log.h

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ wait and check if an already running write is covering the request.
6060
@param durable whether the write needs to be durable
6161
@param callback log write completion callback */
6262
void log_write_up_to(lsn_t lsn, bool durable,
63-
const completion_callback *callback= nullptr);
63+
const completion_callback *callback= nullptr) noexcept;
6464

6565
/** Write to the log file up to the last log entry.
6666
@param durable whether to wait for a durable write to complete */
@@ -249,6 +249,8 @@ struct log_t
249249

250250
/** latest completed checkpoint (protected by latch.wr_lock()) */
251251
Atomic_relaxed<lsn_t> last_checkpoint_lsn;
252+
/** The log writer (protected by latch.wr_lock()) */
253+
lsn_t (*writer)() noexcept;
252254
/** next checkpoint LSN (protected by latch.wr_lock()) */
253255
lsn_t next_checkpoint_lsn;
254256

@@ -270,7 +272,8 @@ struct log_t
270272
@return the value of buf_free */
271273
size_t lock_lsn() noexcept;
272274

273-
/** log sequence number when log resizing was initiated, or 0 */
275+
/** log sequence number when log resizing was initiated;
276+
0 if the log is not being resized, 1 if resize_start() is in progress */
274277
std::atomic<lsn_t> resize_lsn;
275278
/** the log sequence number at the start of the log file */
276279
lsn_t first_lsn;
@@ -358,7 +361,8 @@ struct log_t
358361
inline lsn_t get_write_target() const;
359362

360363
/** @return LSN at which log resizing was started and is still in progress
361-
@retval 0 if no log resizing is in progress */
364+
@retval 0 if no log resizing is in progress
365+
@retval 1 if resize_start() is in progress */
362366
lsn_t resize_in_progress() const noexcept
363367
{ return resize_lsn.load(std::memory_order_relaxed); }
364368

@@ -387,7 +391,6 @@ struct log_t
387391
/** Write resize_buf to resize_log.
388392
@param b resize_buf or resize_flush_buf
389393
@param length the used length of b */
390-
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
391394
void resize_write_buf(const byte *b, size_t length) noexcept;
392395
public:
393396

@@ -489,6 +492,9 @@ struct log_t
489492
#endif
490493

491494
private:
495+
/** Update writer and mtr_t::finisher */
496+
void writer_update() noexcept;
497+
492498
/** Wait in append_prepare() for buffer to become available
493499
@tparam spin whether to use the spin-only lock_lsn()
494500
@param b the value of buf_free
@@ -551,10 +557,20 @@ struct log_t
551557
@param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
552558
inline void write_checkpoint(lsn_t end_lsn) noexcept;
553559

554-
/** Write buf to ib_logfile0.
555-
@tparam release_latch whether to invoke latch.wr_unlock()
560+
/** Variations of write_buf() */
561+
enum resizing_and_latch {
562+
/** skip latch.wr_unlock(); log resizing may or may not be in progress */
563+
RETAIN_LATCH,
564+
/** invoke latch.wr_unlock(); !(resize_in_progress() > 1) */
565+
NOT_RESIZING,
566+
/** invoke latch.wr_unlock(); resize_in_progress() > 1 */
567+
RESIZING
568+
};
569+
570+
/** Write buf to ib_logfile0 and possibly ib_logfile101.
571+
@tparam resizing whether to release latch and whether resize_in_progress()>1
556572
@return the current log sequence number */
557-
template<bool release_latch> inline lsn_t write_buf() noexcept;
573+
template<resizing_and_latch resizing> inline lsn_t write_buf() noexcept;
558574

559575
/** Create the log. */
560576
void create(lsn_t lsn) noexcept;

storage/innobase/log/log0log.cc

Lines changed: 61 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ void log_t::create()
101101
ut_ad(!checkpoint_buf);
102102
ut_ad(!buf);
103103
ut_ad(!flush_buf);
104+
ut_ad(!writer);
104105
max_buf_free= 1;
105106

106107
latch.SRW_LOCK_INIT(log_latch_key);
@@ -330,6 +331,7 @@ bool log_t::attach(log_file_t file, os_offset_t size)
330331

331332
ut_ad(!buf);
332333
ut_ad(!flush_buf);
334+
ut_ad(!writer);
333335
#ifdef HAVE_INNODB_MMAP
334336
if (size)
335337
{
@@ -352,7 +354,7 @@ bool log_t::attach(log_file_t file, os_offset_t size)
352354
# endif
353355
buf= static_cast<byte*>(ptr);
354356
max_buf_free= 1;
355-
mtr_t::finisher_update();
357+
writer_update();
356358
# ifdef HAVE_PMEM
357359
if (is_pmem)
358360
return true;
@@ -395,7 +397,7 @@ bool log_t::attach(log_file_t file, os_offset_t size)
395397
TRASH_ALLOC(buf, buf_size);
396398
TRASH_ALLOC(flush_buf, buf_size);
397399
max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN;
398-
mtr_t::finisher_update();
400+
writer_update();
399401
memset_aligned<512>(checkpoint_buf, 0, write_size);
400402

401403
#ifdef HAVE_INNODB_MMAP
@@ -508,6 +510,8 @@ void log_t::close_file()
508510
checkpoint_buf= nullptr;
509511
}
510512

513+
writer= nullptr;
514+
511515
#ifdef HAVE_INNODB_MMAP
512516
if (really_close)
513517
#endif
@@ -671,6 +675,8 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
671675
(lsn_t{write_size - 1} + start_lsn - first_lsn));
672676
else if (!is_opened())
673677
resize_log.close();
678+
679+
writer_update();
674680
}
675681
resize_lsn.store(start_lsn, std::memory_order_relaxed);
676682
status= success ? RESIZE_STARTED : RESIZE_FAILED;
@@ -721,6 +727,7 @@ void log_t::resize_abort() noexcept
721727
resize_lsn.store(0, std::memory_order_relaxed);
722728
}
723729

730+
writer_update();
724731
log_resize_release();
725732
}
726733

@@ -924,7 +931,6 @@ void log_t::persist(lsn_t lsn, bool holding_latch) noexcept
924931
}
925932
#endif
926933

927-
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
928934
void log_t::resize_write_buf(const byte *b, size_t length) noexcept
929935
{
930936
const size_t block_size_1= write_size - 1;
@@ -959,20 +965,24 @@ void log_t::resize_write_buf(const byte *b, size_t length) noexcept
959965
b, offset, length) == DB_SUCCESS);
960966
}
961967

962-
/** Write buf to ib_logfile0.
963-
@tparam release_latch whether to invoke latch.wr_unlock()
968+
/** Write buf to ib_logfile0 and possibly ib_logfile101.
969+
@tparam resizing whether to release latch and whether resize_in_progress()>1
964970
@return the current log sequence number */
965-
template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
971+
template<log_t::resizing_and_latch resizing>
972+
inline __attribute__((always_inline))
973+
lsn_t log_t::write_buf() noexcept
966974
{
967975
ut_ad(latch_have_wr());
968976
ut_ad(!is_mmap());
969977
ut_ad(!srv_read_only_mode);
978+
ut_ad(resizing == RETAIN_LATCH ||
979+
(resizing == RESIZING) == (resize_in_progress() > 1));
970980

971981
const lsn_t lsn{get_lsn(std::memory_order_relaxed)};
972982

973983
if (write_lsn >= lsn)
974984
{
975-
if (release_latch)
985+
if (resizing != RETAIN_LATCH)
976986
latch.wr_unlock();
977987
ut_ad(write_lsn == lsn);
978988
}
@@ -990,7 +1000,16 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
9901000
ut_ad(write_size_1 >= 511);
9911001

9921002
const byte *const write_buf{buf};
993-
const byte *const re_write_buf{resize_buf};
1003+
byte *const re_write_buf{resizing == NOT_RESIZING ? nullptr : resize_buf};
1004+
ut_ad(resizing == RETAIN_LATCH ||
1005+
(resizing == NOT_RESIZING) == !re_write_buf);
1006+
ut_ad(!re_write_buf == !resize_flush_buf);
1007+
if (resizing == RESIZING)
1008+
#ifdef _MSC_VER
1009+
__assume(re_write_buf != nullptr);
1010+
#else
1011+
if (!re_write_buf) __builtin_unreachable();
1012+
#endif
9941013
offset&= ~lsn_t{write_size_1};
9951014

9961015
if (length <= write_size_1)
@@ -1002,13 +1021,14 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
10021021
buf + length, flush_buf);
10031022
... /* TODO: Update the LSN and adjust other code. */
10041023
#else
1005-
# ifdef HAVE_valgrind
10061024
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length);
1025+
buf[length]= 0; /* ensure that recovery catches EOF */
1026+
#endif
10071027
if (UNIV_LIKELY_NULL(re_write_buf))
1028+
{
10081029
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length);
1009-
# endif
1010-
buf[length]= 0; /* allow recovery to catch EOF faster */
1011-
#endif
1030+
re_write_buf[length]= 0;
1031+
}
10121032
length= write_size_1 + 1;
10131033
}
10141034
else
@@ -1023,27 +1043,28 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
10231043
(We want to avoid memset() while holding exclusive log_sys.latch)
10241044
This block will be overwritten later, once records beyond
10251045
the current LSN are generated. */
1026-
#ifdef HAVE_valgrind
1027-
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
1028-
if (UNIV_LIKELY_NULL(re_write_buf))
1029-
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) -
1030-
new_buf_free);
1031-
#endif
1046+
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
10321047
buf[length]= 0; /* allow recovery to catch EOF faster */
10331048
length&= ~write_size_1;
10341049
memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
10351050
if (UNIV_LIKELY_NULL(re_write_buf))
1051+
{
1052+
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) -
1053+
new_buf_free);
10361054
memcpy_aligned<16>(resize_flush_buf, re_write_buf + length,
10371055
(new_buf_free + 15) & ~15);
1056+
re_write_buf[length + new_buf_free]= 0;
1057+
}
10381058
length+= write_size_1 + 1;
10391059
}
10401060

10411061
std::swap(buf, flush_buf);
1042-
std::swap(resize_buf, resize_flush_buf);
1062+
if (UNIV_LIKELY_NULL(re_write_buf))
1063+
std::swap(resize_buf, resize_flush_buf);
10431064
}
10441065

10451066
write_to_log++;
1046-
if (release_latch)
1067+
if (resizing != RETAIN_LATCH)
10471068
latch.wr_unlock();
10481069

10491070
DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF,
@@ -1103,7 +1124,7 @@ wait and check if an already running write is covering the request.
11031124
@param durable whether the write needs to be durable
11041125
@param callback log write completion callback */
11051126
void log_write_up_to(lsn_t lsn, bool durable,
1106-
const completion_callback *callback)
1127+
const completion_callback *callback) noexcept
11071128
{
11081129
ut_ad(!srv_read_only_mode || log_sys.buf_free_ok());
11091130
ut_ad(lsn != LSN_MAX);
@@ -1148,7 +1169,7 @@ void log_write_up_to(lsn_t lsn, bool durable,
11481169
group_commit_lock::ACQUIRED)
11491170
{
11501171
log_sys.latch.wr_lock(SRW_LOCK_CALL);
1151-
pending_write_lsn= write_lock.release(log_sys.write_buf<true>());
1172+
pending_write_lsn= write_lock.release(log_sys.writer());
11521173
}
11531174

11541175
if (durable)
@@ -1165,6 +1186,23 @@ void log_write_up_to(lsn_t lsn, bool durable,
11651186
}
11661187
}
11671188

1189+
static lsn_t log_writer() noexcept
1190+
{
1191+
return log_sys.write_buf<log_t::NOT_RESIZING>();
1192+
}
1193+
1194+
ATTRIBUTE_COLD static lsn_t log_writer_resizing() noexcept
1195+
{
1196+
return log_sys.write_buf<log_t::RESIZING>();
1197+
}
1198+
1199+
void log_t::writer_update() noexcept
1200+
{
1201+
ut_ad(latch_have_wr());
1202+
writer= resize_in_progress() ? log_writer_resizing : log_writer;
1203+
mtr_t::finisher_update();
1204+
}
1205+
11681206
/** Write to the log file up to the last log entry.
11691207
@param durable whether to wait for a durable write to complete */
11701208
void log_buffer_flush_to_disk(bool durable)
@@ -1232,7 +1270,7 @@ ATTRIBUTE_COLD void log_write_and_flush()
12321270
else
12331271
#endif
12341272
{
1235-
const lsn_t lsn{log_sys.write_buf<false>()};
1273+
const lsn_t lsn{log_sys.write_buf<log_t::RETAIN_LATCH>()};
12361274
write_lock.release(lsn);
12371275
log_flush(lsn);
12381276
}

0 commit comments

Comments
 (0)