Skip to content

Commit 32911df

Browse files
committed
optimize recovery
1 parent 8263443 commit 32911df

File tree

7 files changed

+525
-1285
lines changed

7 files changed

+525
-1285
lines changed

build_mysql.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
MYSQL_HOME=/home/vldb/mysql-plnvm
33

44
#debug mode
5-
#IS_DEBUG=0
6-
IS_DEBUG=1
5+
IS_DEBUG=0
6+
#IS_DEBUG=1
77

88
#BUILD_NAME="-DUNIV_TRACE_RECOVERY_TIME"
99
#BUILD_NAME="-DUNIV_TRACE_FLUSH_TIME -DUNIV_SKIPLOG"

storage/innobase/buf/buf0flu.cc

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ static const int buf_flush_page_cleaner_priority = -20;
6565
extern ulint gb_flush_time;
6666
#endif
6767
#if defined(UNIV_PMEMOBJ_BUF) || defined (UNIV_PMEMOBJ_PART_PL)
68+
#include <sys/syscall.h>
69+
#include <sys/types.h> //for gettid()
6870
#include "my_pmemobj.h"
6971
#include <libpmemobj.h>
7072
extern PMEM_WRAPPER* gb_pmw;
@@ -4337,6 +4339,13 @@ DECLARE_THREAD(pm_log_redoer_worker)(
43374339
os_thread_create */
43384340
{
43394341
ulint i;
4342+
pid_t thread_id;
4343+
ulint idx;
4344+
ulint lines_per_thread;
4345+
//int dist_mode = 2;
4346+
int dist_mode = 1;
4347+
4348+
ulint start_time, end_time, e_time;
43404349

43414350
PMEM_LOG_REDOER* redoer = gb_pmw->ppl->redoer;
43424351

@@ -4346,9 +4355,18 @@ DECLARE_THREAD(pm_log_redoer_worker)(
43464355
my_thread_init();
43474356

43484357
mutex_enter(&redoer->mutex);
4358+
idx = redoer->n_workers;
43494359
redoer->n_workers++;
43504360
os_event_reset(redoer->is_log_all_closed);
43514361
mutex_exit(&redoer->mutex);
4362+
4363+
//thread_id = os_thread_pf(os_thread_get_curr_id());
4364+
lines_per_thread = redoer->size / (srv_ppl_n_redoer_threads - 1);
4365+
4366+
//thread_id = syscall(SYS_gettid);
4367+
//idx = thread_id % srv_ppl_n_redoer_threads;
4368+
4369+
printf("Redoers thread %zu idx %zu created\n",thread_id, idx);
43524370

43534371
while (true) {
43544372
//worker thread wait until there is is_requested signal
@@ -4361,23 +4379,44 @@ DECLARE_THREAD(pm_log_redoer_worker)(
43614379
//do nothing
43624380
break;
43634381
}
4364-
4365-
for (i = 0; i < redoer->size; i++) {
4366-
mutex_enter(&redoer->mutex);
4382+
/*Method 1: sequential distribute*/
4383+
//for (i = 0; i < redoer->size; i++)
4384+
/*Method 2: segment distribute*/
4385+
for (i = idx * lines_per_thread;
4386+
i < (idx + 1) * lines_per_thread &&
4387+
i < redoer->size
4388+
; i++)
4389+
/*Method 3: evently distribute*/
4390+
//for (i = idx ;
4391+
// i < redoer->size
4392+
// ; i+= srv_ppl_n_redoer_threads)
4393+
{
4394+
if (dist_mode ==1)
4395+
mutex_enter(&redoer->mutex);
43674396

43684397
pline = redoer->hashed_line_arr[i];
43694398

43704399
if (pline != NULL && !pline->is_redoing)
43714400
{
43724401
pline->is_redoing = true;
4402+
recv_line = pline->recv_line;
43734403
//do not hold the mutex during REDOing
4374-
mutex_exit(&redoer->mutex);
4404+
if (dist_mode ==1)
4405+
mutex_exit(&redoer->mutex);
43754406

43764407
/***this call REDOing for a line ***/
43774408
if (redoer->phase == PMEM_REDO_PHASE1){
43784409
//printf("PMEM_REDO: start REDO_PHASE1 (scan and parse) line %zu ...\n", pline->hashed_id);
43794410

4411+
4412+
//start_time = ut_time_us(NULL);
43804413
bool is_err = pm_ppl_redo_line(gb_pmw->pop, gb_pmw->ppl, pline);
4414+
//end_time = ut_time_us(NULL);
4415+
4416+
//recv_line->redo1_thread_id = idx;
4417+
//recv_line->redo1_start_time = start_time;
4418+
//recv_line->redo1_end_time = end_time;
4419+
//recv_line->redo1_elapse_time = (end_time - start_time);
43814420

43824421
if (is_err){
43834422
printf("PMEM_REDO: error redoing line %zu \n", pline->hashed_id);
@@ -4389,27 +4428,37 @@ DECLARE_THREAD(pm_log_redoer_worker)(
43894428
#if defined (UNIV_PMEMOBJ_PART_PL_DEBUG)
43904429
printf("PMEM_REDO: start REDO_PHASE2 (applying) line %zu ...\n", pline->hashed_id);
43914430
#endif
4431+
//start_time = ut_time_us(NULL);
43924432
pm_ppl_recv_apply_hashed_line(
43934433
gb_pmw->pop, gb_pmw->ppl,
43944434
pline, pline->recv_line->is_ibuf_avail);
4435+
//end_time = ut_time_us(NULL);
43954436

4437+
//recv_line->redo2_thread_id = idx;
4438+
//recv_line->redo2_start_time = start_time;
4439+
//recv_line->redo2_end_time = end_time;
4440+
//recv_line->redo2_elapse_time = (end_time - start_time);
43964441
#if defined (UNIV_PMEMOBJ_PART_PL_DEBUG)
43974442
printf("PMEM_REDO: end REDO_PHASE2 (applying) line %zu\n", pline->hashed_id);
43984443
#endif
43994444
}
44004445

4401-
mutex_enter(&redoer->mutex);
4446+
if (dist_mode ==1)
4447+
mutex_enter(&redoer->mutex);
4448+
44024449
redoer->hashed_line_arr[i] = NULL;
44034450
//redoer->n_requested--;
44044451
redoer->n_remains--;
44054452

44064453
if (redoer->n_remains == 0){
44074454
//this is the last REDO
4408-
mutex_exit(&redoer->mutex);
4455+
if (dist_mode ==1)
4456+
mutex_exit(&redoer->mutex);
44094457
break;
44104458
}
44114459
}
4412-
mutex_exit(&redoer->mutex);
4460+
if (dist_mode ==1)
4461+
mutex_exit(&redoer->mutex);
44134462
} //end for
44144463

44154464
// after this for loop, all lines are either done REDO or REDOing by other threads, this thread has nothing to do
@@ -4419,7 +4468,8 @@ DECLARE_THREAD(pm_log_redoer_worker)(
44194468
mutex_enter(&redoer->mutex);
44204469
redoer->n_workers--;
44214470
if (redoer->n_workers == 0) {
4422-
printf("The last log redoer is closing\n");
4471+
printf("The last log redoer is closing. Redo phase %zu redoer->n_remains %zu ppl->n_redoing_lines %zu\n",
4472+
redoer->phase, redoer->n_remains, gb_pmw->ppl->n_redoing_lines);
44234473
//trigger the coordinator (the pm_ppl_redo) to wakeup
44244474
os_event_set(redoer->is_log_all_finished);
44254475
}

storage/innobase/handler/ha_innodb.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19691,7 +19691,7 @@ static MYSQL_SYSVAR_DOUBLE(ppl_log_buf_flush_pct, srv_ppl_log_buf_flush_pct,
1969119691
static MYSQL_SYSVAR_DOUBLE(ppl_ckpt_threshold, srv_ppl_ckpt_threshold,
1969219692
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
1969319693
"Percentage of log file fill to trigger the checkpoint, default is 0.7",
19694-
NULL, NULL, 0.7, 0.1, 1, 0);
19694+
NULL, NULL, 0.7, 0.05, 1000, 0);
1969519695

1969619696
static MYSQL_SYSVAR_ULONG(ppl_log_flusher_wake_threshold, srv_ppl_log_flusher_wake_threshold,
1969719697
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

storage/innobase/include/my_pmemobj.h

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,8 @@ struct __pmem_dpt {
479479
* */
480480
struct __pmem_page_part_log {
481481
//we only use this lock to protect below values
482-
PMEMrwlock lock;
482+
PMEMrwlock ckpt_lock;
483+
PMEMrwlock recv_lock;
483484
uint64_t max_oldest_lsn;
484485
uint64_t min_oldest_lsn;
485486
uint64_t ckpt_lsn;
@@ -577,6 +578,8 @@ struct __pmem_page_log_hashed_line {
577578
TOID_ARRAY(TOID(PMEM_PAGE_LOG_BLOCK)) arr;
578579
uint64_t n_blocks; //the current non-free blocks
579580
uint64_t max_blocks; //the total log block in bucket
581+
long long* bit_arr; //bit array to manage free slots
582+
uint16_t n_bit_blocks; //number of block in bit_arr
580583

581584
/*Hash table*/
582585
hash_table_t* addr_hash; //hash the log block in this line
@@ -641,6 +644,7 @@ struct __pmem_page_log_block {
641644

642645
bool is_free; //flag
643646
uint64_t bid; //block id
647+
uint32_t id; //id on the line
644648
uint64_t key; //fold id
645649

646650
int32_t count; //number of active tx
@@ -693,9 +697,24 @@ struct __pmem_recv_line {
693697
mem_heap_t* heap; /*!< memory heap of log records and file addresses */
694698
ulint alloc_hash_size; //allocated heap size
695699
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
696-
ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */
700+
ulint n_addrs;/*!< number of not processed pages in the hash table */
701+
ulint n_addrs_done;/*!< number of not processed pages in the hash table */
702+
ulint n_read_reqs; /*number of read request in phase 2 REDO */
703+
ulint n_read_done; /*number of read request in phase 2 REDO */
704+
697705
recv_dblwr_t dblwr;
698706
encryption_list_t* encryption_list;
707+
708+
/*statistic info*/
709+
ulint redo1_thread_id;
710+
ulint redo1_start_time;
711+
ulint redo1_end_time;
712+
ulint redo1_elapse_time;
713+
714+
ulint redo2_thread_id;
715+
ulint redo2_start_time;
716+
ulint redo2_end_time;
717+
ulint redo2_elapse_time;
699718
};
700719

701720
//////////////// FLUSHER /////////////////
@@ -1093,6 +1112,15 @@ pm_ppl_hash_get(
10931112

10941113
plog_hash_t*
10951114
pm_ppl_hash_add(
1115+
PMEMobjpool* pop,
1116+
PMEM_PAGE_PART_LOG* ppl,
1117+
PMEM_PAGE_LOG_HASHED_LINE* pline,
1118+
PMEM_PAGE_LOG_BLOCK* plog_block,
1119+
uint32_t idx
1120+
);
1121+
1122+
plog_hash_t*
1123+
pm_ppl_hash_check_and_add(
10961124
PMEMobjpool* pop,
10971125
PMEM_PAGE_PART_LOG* ppl,
10981126
PMEM_PAGE_LOG_HASHED_LINE* pline,
@@ -1347,7 +1375,7 @@ uint64_t
13471375
pm_ppl_recv_parse_log_rec(
13481376
PMEMobjpool* pop,
13491377
PMEM_PAGE_PART_LOG* ppl,
1350-
PMEM_RECV_LINE* recv_line,
1378+
PMEM_PAGE_LOG_HASHED_LINE* pline,
13511379
mlog_id_t* type,
13521380
byte* ptr,
13531381
byte* end_ptr,
@@ -1493,6 +1521,32 @@ void
14931521
pm_ppl_remove_fil_spaces();
14941522

14951523
////////////// UTILITY////////////
1524+
1525+
//////////// BIT ARRAY /////////
1526+
void
1527+
pm_bit_set(
1528+
long long* arr,
1529+
size_t block_size,
1530+
uint64_t bit_i);
1531+
1532+
void
1533+
pm_bit_clear(
1534+
long long* arr,
1535+
size_t block_size,
1536+
uint64_t bit_i);
1537+
1538+
int32_t
1539+
pm_search_first_free_slot(
1540+
long long* bit_arr,
1541+
uint16_t n_bit_blocks,
1542+
uint16_t block_size);
1543+
1544+
#define PM_BIT_SET(A, bs, i) ( A[i / bs] |= 1 << (i % bs) )
1545+
1546+
#define PM_BIT_SET(A, bs, i) ( A[i / bs] &= ~(1 << (i % bs)) )
1547+
1548+
/////////// END BIT ARRAY //////
1549+
14961550
int64_t
14971551
__update_tt_entry_on_write_log(
14981552
PMEMobjpool* pop,

storage/innobase/log/log0log.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,9 +2113,9 @@ pm_ppl_checkpoint(
21132113
//}
21142114

21152115
/* (5) update the global ckpt_lsn*/
2116-
pmemobj_rwlock_wrlock(pop, &ppl->lock);
2116+
pmemobj_rwlock_wrlock(pop, &ppl->ckpt_lock);
21172117
ppl->ckpt_lsn = new_oldest;
2118-
pmemobj_rwlock_unlock(pop, &ppl->lock);
2118+
pmemobj_rwlock_unlock(pop, &ppl->ckpt_lock);
21192119
}
21202120
#endif //UNIV_PMEMOBJ_PART_PL
21212121

0 commit comments

Comments
 (0)