Skip to content

Commit fa36221

Browse files
committed
1. Add n_recs field to header of logbuf. 2. Handle multiple in-flushing logbuf by using doubled-linked list
1 parent 2409bbb commit fa36221

File tree

8 files changed

+616
-319
lines changed

8 files changed

+616
-319
lines changed

storage/innobase/btr/btr0cur.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ btr_cur_latch_leaves(
350350
page_id_t(page_id.space(), right_page_no),
351351
page_size, RW_X_LATCH, cursor->index, mtr);
352352
latch_leaves.blocks[2] = get_block;
353+
#if defined(UNIV_PMEMOBJ_PART_PL)
354+
//PM_PPL skip those
355+
#else //original
353356
#ifdef UNIV_BTR_DEBUG
354357
ut_a(page_is_comp(get_block->frame)
355358
== page_is_comp(page));
@@ -359,6 +362,8 @@ btr_cur_latch_leaves(
359362
== page_get_page_no(page));
360363
#endif //UNIV_PMEMOBJ_BUF
361364
#endif /* UNIV_BTR_DEBUG */
365+
366+
#endif /*UNIV_PMEMOBJ_PART_PL*/
362367
if (spatial) {
363368
cursor->rtr_info->tree_blocks[
364369
RTR_MAX_LEVELS + 2] = get_block;

storage/innobase/buf/buf0dblwr.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ extern PMEM_WRAPPER* gb_pmw;
4343
extern pfs_os_file_t gb_dbw_file;
4444
#endif /* UNIV_PMEMOBJ_DBW */
4545

46+
#if defined (UNIV_PMEMOBJ_PART_PL)
47+
#include "my_pmemobj.h"
48+
extern PMEM_WRAPPER* gb_pmw;
49+
#endif /*UNIV_PMEMOBJ_PART_PL*/
50+
4651
#ifndef UNIV_HOTBACKUP
4752

4853
/** The doublewrite buffer */
@@ -952,7 +957,21 @@ buf_dblwr_check_block(
952957
if (block->skip_flush_check) {
953958
return;
954959
}
955-
960+
//#if defined (UNIV_PMEMOBJ_PART_PL)
961+
// //debug
962+
// ulint type = fil_page_get_type(block->frame);
963+
// PMEM_PAGE_LOG_BLOCK* plogblock;
964+
// buf_page_t* bpage;
965+
//
966+
// if (type == 0){
967+
// plogblock =
968+
// pm_ppl_get_log_block_by_key(gb_pmw->pop, gb_pmw->ppl, block->page.id.fold());
969+
//
970+
// bpage = (buf_page_t*) &(block->page);
971+
//
972+
// assert(0);
973+
// }
974+
//#endif //UNIV_PMEMOBJ_PART_PL
956975
switch (fil_page_get_type(block->frame)) {
957976
case FIL_PAGE_INDEX:
958977
case FIL_PAGE_RTREE:

storage/innobase/buf/buf0flu.cc

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -810,23 +810,6 @@ buf_flush_write_complete(
810810
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
811811

812812
ut_ad(bpage);
813-
#if defined (UNIV_PMEMOBJ_PART_PL)
814-
//tdnguyen test
815-
ulint page_no;
816-
ulint space_id;
817-
byte* frame = ((buf_block_t*) bpage)->frame;
818-
page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
819-
space_id = mach_read_from_4(frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
820-
821-
if ((bpage->id.space() != 0
822-
&& bpage->id.space() != space_id)
823-
|| bpage->id.page_no() != page_no) {
824-
printf("PMEM_ERROR in buf_flush_write_complete, input (space %zu, page_no %zu) differ write (space %zu, page_no %zu)\n", bpage->id.space(), bpage->id.page_no(), space_id, page_no);
825-
826-
assert(0);
827-
}
828-
829-
#endif
830813
buf_flush_remove(bpage);
831814

832815
flush_type = buf_page_get_flush_type(bpage);
@@ -844,6 +827,18 @@ buf_flush_write_complete(
844827
#else //original
845828
buf_dblwr_update(bpage, flush_type);
846829
#endif /* UNIV_PMEMOBJ_BUF */
830+
831+
#if defined (UNIV_PMEMOBJ_PART_PL)
832+
//we only call pm_ppl_flush_page when the flushed page is persist on storage
833+
pm_ppl_flush_page(
834+
gb_pmw->pop, gb_pmw->ppl,
835+
bpage,
836+
bpage->id.space(),
837+
bpage->id.page_no(),
838+
bpage->id.fold(),
839+
bpage->newest_modification);
840+
841+
#endif //UNIV_PMEMBOJ_PART_PL
847842
}
848843
#endif /* !UNIV_HOTBACKUP */
849844

@@ -1136,27 +1131,9 @@ buf_flush_write_block_low(
11361131
//skip_pm_write:
11371132
#endif /*UNIV_PMEMOBJ_BUF*/
11381133

1139-
#if defined (UNIV_PMEMOBJ_PART_PL)
1140-
1141-
ulint read_space_id = mach_read_from_4(frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1142-
ulint read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
1143-
1144-
if(read_space_id != bpage->id.space() || read_page_no != bpage->id.page_no()){
1145-
printf("PMEM_ERROR before pm_ppl_flush_page(), input (space %zu, page_no %zu) differ read (space %zu, page_no %zu)\n", bpage->id.space(), bpage->id.page_no(), read_space_id, read_page_no);
1146-
assert(0);
1147-
}
1148-
else{
1149-
//printf("PMEM_INFO pm_ppl_flush_page(), input (space %zu, page_no %zu)\n", bpage->id.space(), bpage->id.page_no());
1150-
}
1151-
1152-
// PL-NVM without PB-NVM
1153-
pm_ppl_flush_page(
1154-
gb_pmw->pop, gb_pmw->ppl,
1155-
bpage->id.space(),
1156-
bpage->id.page_no(),
1157-
bpage->id.fold(),
1158-
bpage->newest_modification);
1159-
#endif /*UNIV_PMEMOBJ_BUF*/
1134+
#if defined (UNIV_PMEMOBJ_PART_PL)
1135+
pm_ppl_set_flush_state(gb_pmw->pop, gb_pmw->ppl, bpage);
1136+
#endif /*UNIV_PMEMOBJ_PART_PL*/
11601137

11611138
/* Disable use of double-write buffer for temporary tablespace.
11621139
Given the nature and load of temporary tablespace doublewrite buffer

storage/innobase/include/my_pmem_common.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ static const size_t PMEM_GROUP_PARTITION_SIZE= 512;
6464
#define PMEM_MAX_LISTS_PER_BUCKET 2
6565
//#define PMEM_BUF_THRESHOLD 0.8
6666

67-
#define PMEM_LOG_BUF_HEADER_SIZE 4
67+
//#define PMEM_LOG_BUF_HEADER_SIZE 4
68+
#define PMEM_LOG_BUF_HEADER_SIZE 8 /*4-byte real_len, 4-byte n_recs*/
6869

6970
enum {
7071
PMEM_READ = 1,
@@ -103,6 +104,12 @@ enum PMEM_REDO_PHASE{
103104
PMEM_REDO_PHASE2 = 2,
104105
};
105106

107+
enum PMEM_PARSE_RESULT {
108+
PMEM_PARSE_NEED = 1,
109+
PMEM_PARSE_BLOCK_NOT_EXISTED = 2,
110+
PMEM_PARSE_LSN_OLD = 3,
111+
};
112+
106113
enum PMEM_LOG_BLOCK_STATE {
107114
PMEM_FREE_LOG_BLOCK = 1, //the log block is free, a transaction can write its log records
108115
PMEM_COMMIT_LOG_BLOCK = 2, // the transaction is either commit or abort

storage/innobase/include/my_pmemobj.h

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,8 @@ struct __pmem_page_log_hashed_line {
555555
PMEMrwlock lock;
556556

557557
int hashed_id;
558-
TOID(PMEM_PAGE_LOG_BUF) logbuf; //pointer to current log buffer
559-
TOID(PMEM_PAGE_LOG_BUF) flush_logbuf; //pointer to flushing log buffer
558+
TOID(PMEM_PAGE_LOG_BUF) logbuf; //pointer to current log buffer (head)
559+
TOID(PMEM_PAGE_LOG_BUF) tail_logbuf; //pointer to the oldest log buffer (tail)
560560

561561
uint64_t diskaddr; //log file offset, update when flush log, reset when purging file
562562
uint64_t write_diskaddr; //diskaddr that log recs are durable write write_diskaddr < diskaddr
@@ -614,21 +614,29 @@ struct __pmem_page_log_buf {
614614
PMEM_LOG_BUF_STATE state;
615615
uint64_t size;
616616
uint64_t cur_off; //the current offset (0 - log buf size), reset when switching log_buf
617+
uint64_t n_recs;
617618

618619
int check; //for AIO
619620

620621
uint64_t diskaddr; //write address assigned when the buffer is full
621622

623+
TOID(PMEM_PAGE_LOG_BUF) prev;
624+
TOID(PMEM_PAGE_LOG_BUF) next;
625+
622626
//link with the list free pool
623627
POBJ_LIST_ENTRY(PMEM_PAGE_LOG_BUF) list_entries;
624628
};
625629

626630
/*
627631
* One log block per-page
628-
* Follow the implementation of PMEM_BUF_BLOCK
632+
* Metadata of a page during forward processing
633+
* Persistent after the system crash
629634
* */
630635
struct __pmem_page_log_block {
631636
PMEMrwlock lock;
637+
638+
PMEM_BLOCK_STATE state;
639+
632640
bool is_free; //flag
633641
uint64_t bid; //block id
634642
uint64_t key; //fold id
@@ -637,10 +645,16 @@ struct __pmem_page_log_block {
637645

638646
/*LSN */
639647
uint64_t pageLSN; // pageLSN of the NVM-page
640-
uint64_t firstLSN; // LSN of the first log record
641648
uint64_t lastLSN; // LSN of the last log record
649+
650+
uint64_t firstLSN; // LSN of the first log record
642651
uint32_t start_off;// offset of the first log rec of this page on log buffer/ disk
643652
uint32_t start_diskaddr;// diskaddr when the first log rec is written
653+
654+
//this value is used for testing recovery
655+
bool first_rec_found;
656+
uint32_t first_rec_size;
657+
mlog_id_t first_rec_type;
644658
};
645659

646660
/*
@@ -662,6 +676,7 @@ struct __pmem_recv_line {
662676
ulint scanned_checkpoint_no;
663677

664678
ulint recovered_offset;
679+
ulint recovered_addr;
665680
lsn_t recovered_lsn;
666681

667682
bool found_corrupt_log;
@@ -672,7 +687,7 @@ struct __pmem_recv_line {
672687
lsn_t mlog_checkpoint_lsn;
673688

674689
mem_heap_t* heap; /*!< memory heap of log records and file addresses */
675-
690+
ulint alloc_hash_size; //allocated heap size
676691
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
677692
ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */
678693
recv_dblwr_t dblwr;
@@ -1072,12 +1087,6 @@ pm_ppl_hash_get(
10721087
PMEM_PAGE_LOG_HASHED_LINE* pline,
10731088
uint64_t key );
10741089

1075-
1076-
void pm_ppl_hash_add_at_page_read(
1077-
PMEMobjpool* pop,
1078-
PMEM_PAGE_PART_LOG* ppl,
1079-
buf_page_t* bpage);
1080-
10811090
plog_hash_t*
10821091
pm_ppl_hash_add(
10831092
PMEMobjpool* pop,
@@ -1247,10 +1256,17 @@ pm_ppl_commit(
12471256
uint64_t tid,
12481257
uint64_t eid);
12491258

1259+
void
1260+
pm_ppl_set_flush_state(
1261+
PMEMobjpool* pop,
1262+
PMEM_PAGE_PART_LOG* ppl,
1263+
buf_page_t* bpage);
1264+
12501265
void
12511266
pm_ppl_flush_page(
12521267
PMEMobjpool* pop,
12531268
PMEM_PAGE_PART_LOG* ppl,
1269+
buf_page_t* bpage,
12541270
uint64_t space,
12551271
uint64_t page_no,
12561272
uint64_t key,
@@ -1317,6 +1333,8 @@ pm_ppl_parse_recs(
13171333
PMEM_PAGE_LOG_HASHED_LINE* pline,
13181334
byte* recv_buf,
13191335
uint64_t len,
1336+
uint64_t* n_skip1_recs,
1337+
uint64_t* n_skip2_recs,
13201338
uint64_t* n_need_recs
13211339
);
13221340

@@ -1333,6 +1351,7 @@ pm_ppl_recv_parse_log_rec(
13331351
bool apply,
13341352
uint64_t* rec_lsn,
13351353
byte** body,
1354+
PMEM_PARSE_RESULT* parse_res,
13361355
bool* is_need);
13371356

13381357

0 commit comments

Comments
 (0)