-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
ibuf0ibuf.cc
5131 lines (4125 loc) · 141 KB
/
ibuf0ibuf.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file ibuf/ibuf0ibuf.cc
Insert buffer
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "ha_prototypes.h"
#include "ibuf0ibuf.h"
#include "sync0sync.h"
#include "btr0sea.h"
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
my_bool srv_ibuf_disable_background_merge;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/** Number of bits describing a single page */
#define IBUF_BITS_PER_PAGE 4
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE must be an even number!"
#endif
/** The start address for an insert buffer bitmap page bitmap */
#define IBUF_BITMAP PAGE_DATA
#include "buf0buf.h"
#include "buf0rea.h"
#include "fsp0fsp.h"
#include "trx0sys.h"
#include "fil0fil.h"
#include "rem0rec.h"
#include "btr0cur.h"
#include "btr0pcur.h"
#include "btr0btr.h"
#include "row0upd.h"
#include "dict0boot.h"
#include "fut0lst.h"
#include "lock0lock.h"
#include "log0recv.h"
#include "que0que.h"
#include "srv0start.h" /* srv_shutdown_state */
#include "fsp0sysspace.h"
#include "rem0cmp.h"
/* STRUCTURE OF AN INSERT BUFFER RECORD
In versions < 4.1.x:
1. The first field is the page number.
2. The second field is an array which stores type info for each subsequent
field. We store the information which affects the ordering of records, and
also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
is 10 bytes.
3. Next we have the fields of the actual index record.
In versions >= 4.1.x:
Note that contary to what we planned in the 1990's, there will only be one
insert buffer tree, and that is in the system tablespace of InnoDB.
1. The first field is the space id.
2. The second field is a one-byte marker (0) which differentiates records from
the < 4.1.x storage format.
3. The third field is the page number.
4. The fourth field contains the type info, where we have also added 2 bytes to
store the charset. In the compressed table format of 5.0.x we must add more
information here so that we can build a dummy 'index' struct which 5.0.x
can use in the binary search on the index page in the ibuf merge phase.
5. The rest of the fields contain the fields of the actual index record.
In versions >= 5.0.3:
The first byte of the fourth field is an additional marker (0) if the record
is in the compact format. The presence of this marker can be detected by
looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
The high-order bit of the character set field in the type info is the
"nullable" flag for the field.
In versions >= 5.5:
The optional marker byte at the start of the fourth field is replaced by
mandatory 3 fields, totaling 4 bytes:
1. 2 bytes: Counter field, used to sort records within a (space id, page
no) in the order they were added. This is needed so that for example the
sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
correctly.
2. 1 byte: Operation type (see ibuf_op_t).
3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
To ensure older records, which do not have counters to enforce correct
sorting, are merged before any new records, ibuf_insert checks if we're
trying to insert to a position that contains old-style records, and if so,
refuses the insert. Thus, ibuf pages are gradually converted to the new
format as their corresponding buffer pool pages are read into memory.
*/
/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
If an OS thread performs any operation that brings in disk pages from
non-system tablespaces into the buffer pool, or creates such a page there,
then the operation may have as a side effect an insert buffer index tree
compression. Thus, the tree latch of the insert buffer tree may be acquired
in the x-mode, and also the file space latch of the system tablespace may
be acquired in the x-mode.
Also, an insert to an index in a non-system tablespace can have the same
effect. How do we know this cannot lead to a deadlock of OS threads? There
is a problem with the i\o-handler threads: they break the latching order
because they own x-latches to pages which are on a lower level than the
insert buffer tree latch, its page latches, and the tablespace latch an
insert buffer operation can reserve.
The solution is the following: Let all the tree and page latches connected
with the insert buffer be later in the latching order than the fsp latch and
fsp page latches.
Insert buffer pages must be such that the insert buffer is never invoked
when these pages are accessed as this would result in a recursion violating
the latching order. We let a special i/o-handler thread take care of i/o to
the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
pages and the first inode page, which contains the inode of the ibuf tree: let
us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
access both non-ibuf and ibuf pages.
Then an i/o-handler for the insert buffer never needs to access recursively the
insert buffer tree and thus obeys the latching order. On the other hand, other
i/o-handlers for other tablespaces may require access to the insert buffer,
but because all kinds of latches they need to access there are later in the
latching order, no violation of the latching order occurs in this case,
either.
A problem is how to grow and contract an insert buffer tree. As it is later
in the latching order than the fsp management, we have to reserve the fsp
latch first, before adding or removing pages from the insert buffer tree.
We let the insert buffer tree have its own file space management: a free
list of pages linked to the tree root. To prevent recursive using of the
insert buffer when adding pages to the tree, we must first load these pages
to memory, obtaining a latch on them, and only after that add them to the
free list of the insert buffer tree. More difficult is removing of pages
from the free list. If there is an excess of pages in the free list of the
ibuf tree, they might be needed if some thread reserves the fsp latch,
intending to allocate more file space. So we do the following: if a thread
reserves the fsp latch, we check the writer count field of the latch. If
this field has value 1, it means that the thread did not own the latch
before entering the fsp system, and the mtr of the thread contains no
modifications to the fsp pages. Now we are free to reserve the ibuf latch,
and check if there is an excess of pages in the free list. We can then, in a
separate mini-transaction, take them out of the free list and free them to
the fsp system.
To avoid deadlocks in the ibuf system, we divide file pages into three levels:
(1) non-ibuf pages,
(2) ibuf tree pages and the pages in the ibuf tree free list, and
(3) ibuf bitmap pages.
No OS thread is allowed to access higher level pages if it has latches to
lower level pages; even if the thread owns a B-tree latch it must not access
the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
it uses synchronous aio, it can access any pages, as long as it obeys the
access order rules. */
/** Operations that can currently be buffered. */
ibuf_use_t ibuf_use = IBUF_USE_ALL;
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Flag to control insert buffer debugging. */
uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/** The insert buffer control structure */
ibuf_t* ibuf = NULL;
#ifdef UNIV_IBUF_COUNT_DEBUG
/** Number of tablespaces in the ibuf_counts array */
#define IBUF_COUNT_N_SPACES 4
/** Number of pages within each tablespace in the ibuf_counts array */
#define IBUF_COUNT_N_PAGES 130000
/** Buffered entry counts for file pages, used in debugging */
static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
/** Checks that the indexes to ibuf_counts[][] are within limits.
@param[in] page_id page id */
UNIV_INLINE
void
ibuf_count_check(
const page_id_t& page_id)
{
if (page_id.space() < IBUF_COUNT_N_SPACES
&& page_id.page_no() < IBUF_COUNT_N_PAGES) {
return;
}
ib::fatal() << "UNIV_IBUF_COUNT_DEBUG limits space_id and page_no"
" and breaks crash recovery. space_id=" << page_id.space()
<< ", should be 0<=space_id<" << IBUF_COUNT_N_SPACES
<< ". page_no=" << page_id.page_no()
<< ", should be 0<=page_no<" << IBUF_COUNT_N_PAGES;
}
#endif
/** @name Offsets to the per-page bits in the insert buffer bitmap */
/* @{ */
#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
amount of free space */
#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
changes for the page */
#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
the ibuf tree, excluding the
root page, or is in the free
list of the ibuf */
/* @} */
#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format,
the page number. later, the space_id */
#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker
consisting of 1 byte that is 0 */
#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the
page number */
#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */
#define IBUF_REC_FIELD_USER 4 /* first user field */
/* Various constants for checking the type of an ibuf record and extracting
data from it. For details, see the description of the record format at the
top of this file. */
/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record
The fourth column in the MySQL 5.5 format contains an operation
type, counter, and some flags. */
/* @{ */
#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at
the beginning of the fourth field */
#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
/* Offsets for the fields at the beginning of the fourth field */
#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */
#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */
#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */
/* Record flag masks */
#define IBUF_REC_COMPACT 0x1 /*!< Set in
IBUF_REC_OFFSET_FLAGS if the
user index is in COMPACT
format or later */
/** The mutex used to block pessimistic inserts to ibuf trees */
static ib_mutex_t ibuf_pessimistic_insert_mutex;
/** The mutex protecting the insert buffer structs */
static ib_mutex_t ibuf_mutex;
/** The mutex protecting the insert buffer bitmaps */
static ib_mutex_t ibuf_bitmap_mutex;
/** The area in pages from which contract looks for page numbers for merge */
const ulint IBUF_MERGE_AREA = 8;
/** Inside the merge area, pages which have at most 1 per this number less
buffered entries compared to maximum volume that can buffered for a single
page are merged along with the page whose buffer became full */
const ulint IBUF_MERGE_THRESHOLD = 4;
/** In ibuf_contract at most this number of pages is read to memory in one
batch, in order to merge the entries for them in the insert buffer */
const ulint IBUF_MAX_N_PAGES_MERGED = IBUF_MERGE_AREA;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
many pages, we start to contract it in connection to inserts there, using
non-synchronous contract */
const ulint IBUF_CONTRACT_ON_INSERT_NON_SYNC = 0;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
many pages, we start to contract it in connection to inserts there, using
synchronous contract */
const ulint IBUF_CONTRACT_ON_INSERT_SYNC = 5;
/** If the combined size of the ibuf trees exceeds ibuf->max_size by
this many pages, we start to contract it synchronous contract, but do
not insert */
const ulint IBUF_CONTRACT_DO_NOT_INSERT = 10;
/* TODO: how to cope with drop table if there are records in the insert
buffer for the indexes of the table? Is there actually any problem,
because ibuf merge is done to a page when it is read in, and it is
still physically like the index page even if the index would have been
dropped! So, there seems to be no problem. */
/******************************************************************//**
Sets the flag in the current mini-transaction record indicating we're
inside an insert buffer routine. */
UNIV_INLINE
void
ibuf_enter(
/*=======*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(!mtr->is_inside_ibuf());
mtr->enter_ibuf();
}
/******************************************************************//**
Sets the flag in the current mini-transaction record indicating we're
exiting an insert buffer routine. */
UNIV_INLINE
void
ibuf_exit(
/*======*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_ad(mtr->is_inside_ibuf());
mtr->exit_ibuf();
}
/**************************************************************//**
Commits an insert buffer mini-transaction and sets the persistent
cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */
UNIV_INLINE
void
ibuf_btr_pcur_commit_specify_mtr(
/*=============================*/
btr_pcur_t* pcur, /*!< in/out: persistent cursor */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ut_d(ibuf_exit(mtr));
btr_pcur_commit_specify_mtr(pcur, mtr);
}
/******************************************************************//**
Gets the ibuf header page and x-latches it.
@return insert buffer header page */
static
page_t*
ibuf_header_page_get(
/*=================*/
mtr_t* mtr) /*!< in/out: mini-transaction */
{
buf_block_t* block;
ut_ad(!ibuf_inside(mtr));
page_t* page = NULL;
block = buf_page_get(
page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
univ_page_size, RW_X_LATCH, mtr);
if (!block->page.encrypted) {
buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
page = buf_block_get_frame(block);
}
return page;
}
/******************************************************************//**
Gets the root page and sx-latches it.
@return insert buffer tree root page */
static
page_t*
ibuf_tree_root_get(
/*===============*/
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
page_t* root;
ut_ad(ibuf_inside(mtr));
ut_ad(mutex_own(&ibuf_mutex));
mtr_sx_lock(dict_index_get_lock(ibuf->index), mtr);
/* only segment list access is exclusive each other */
block = buf_page_get(
page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
univ_page_size, RW_SX_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
root = buf_block_get_frame(block);
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
ut_ad(ibuf->empty == page_is_empty(root));
return(root);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
/** Gets the ibuf count for a given page.
@param[in] page_id page id
@return number of entries in the insert buffer currently buffered for
this page */
ulint
ibuf_count_get(
const page_id_t& page_id)
{
ibuf_count_check(page_id);
return(ibuf_counts[page_id.space()][page_id.page_no()]);
}
/** Sets the ibuf count for a given page.
@param[in] page_id page id
@param[in] val value to set */
static
void
ibuf_count_set(
const page_id_t& page_id,
ulint val)
{
ibuf_count_check(page_id);
ut_a(val < UNIV_PAGE_SIZE);
ibuf_counts[page_id.space()][page_id.page_no()] = val;
}
#endif
/******************************************************************//**
Closes insert buffer and frees the data structures. */
void
ibuf_close(void)
/*============*/
{
mutex_free(&ibuf_pessimistic_insert_mutex);
mutex_free(&ibuf_mutex);
mutex_free(&ibuf_bitmap_mutex);
dict_table_t* ibuf_table = ibuf->index->table;
rw_lock_free(&ibuf->index->lock);
dict_mem_index_free(ibuf->index);
dict_mem_table_free(ibuf_table);
ut_free(ibuf);
ibuf = NULL;
}
/******************************************************************//**
Updates the size information of the ibuf, assuming the segment size has not
changed. */
static
void
ibuf_size_update(
/*=============*/
const page_t* root) /*!< in: ibuf tree root */
{
ut_ad(mutex_own(&ibuf_mutex));
ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
+ PAGE_BTR_IBUF_FREE_LIST);
ibuf->height = 1 + btr_page_get_level_low(root);
/* the '1 +' is the ibuf header page */
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
}
/******************************************************************//**
Creates the insert buffer data structure at a database startup and initializes
the data structures for the insert buffer.
@return DB_SUCCESS or failure */
dberr_t
ibuf_init_at_db_start(void)
/*=======================*/
{
page_t* root;
mtr_t mtr;
ulint n_used;
page_t* header_page;
dberr_t error= DB_SUCCESS;
ibuf = static_cast<ibuf_t*>(ut_zalloc_nokey(sizeof(ibuf_t)));
/* At startup we intialize ibuf to have a maximum of
CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
buffer pool size. Once ibuf struct is initialized this
value is updated with the user supplied size by calling
ibuf_max_size_update(). */
ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
* CHANGE_BUFFER_DEFAULT_SIZE) / 100;
mutex_create(LATCH_ID_IBUF, &ibuf_mutex);
mutex_create(LATCH_ID_IBUF_BITMAP, &ibuf_bitmap_mutex);
mutex_create(LATCH_ID_IBUF_PESSIMISTIC_INSERT,
&ibuf_pessimistic_insert_mutex);
mtr_start(&mtr);
mtr_x_lock_space(IBUF_SPACE_ID, &mtr);
mutex_enter(&ibuf_mutex);
header_page = ibuf_header_page_get(&mtr);
if (!header_page) {
return (DB_DECRYPTION_FAILED);
}
fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
&n_used, &mtr);
ut_ad(n_used >= 2);
ibuf->seg_size = n_used;
{
buf_block_t* block;
block = buf_page_get(
page_id_t(IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO),
univ_page_size, RW_X_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
root = buf_block_get_frame(block);
}
ibuf_size_update(root);
mutex_exit(&ibuf_mutex);
ibuf->empty = page_is_empty(root);
mtr.commit();
ibuf->index = dict_mem_index_create(
"innodb_change_buffer", "CLUST_IND",
IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
ibuf->index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
ibuf->index->table = dict_mem_table_create(
"innodb_change_buffer", IBUF_SPACE_ID, 1, 0, 0, 0);
ibuf->index->n_uniq = REC_MAX_N_FIELDS;
rw_lock_create(index_tree_rw_lock_key, &ibuf->index->lock,
SYNC_IBUF_INDEX_TREE);
#ifdef BTR_CUR_ADAPT
ibuf->index->search_info = btr_search_info_create(ibuf->index->heap);
#endif /* BTR_CUR_ADAPT */
ibuf->index->page = FSP_IBUF_TREE_ROOT_PAGE_NO;
ut_d(ibuf->index->cached = TRUE);
return (error);
}
/*********************************************************************//**
Updates the max_size value for ibuf. */
void
ibuf_max_size_update(
/*=================*/
ulint new_val) /*!< in: new value in terms of
percentage of the buffer pool size */
{
ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
* new_val) / 100;
mutex_enter(&ibuf_mutex);
ibuf->max_size = new_size;
mutex_exit(&ibuf_mutex);
}
/*********************************************************************//**
Initializes an ibuf bitmap page. */
void
ibuf_bitmap_page_init(
/*==================*/
buf_block_t* block, /*!< in: bitmap page */
mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
ulint byte_offset;
page = buf_block_get_frame(block);
fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
/* Write all zeros to the bitmap */
byte_offset = UT_BITS_IN_BYTES(block->page.size.physical()
* IBUF_BITS_PER_PAGE);
memset(page + IBUF_BITMAP, 0, byte_offset);
/* The remaining area (up to the page trailer) is uninitialized. */
mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
}
/*********************************************************************//**
Parses a redo log record of an ibuf bitmap page init.
@return end of log record or NULL */
byte*
ibuf_parse_bitmap_init(
/*===================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
buf_block_t* block, /*!< in: block or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
ut_ad(ptr != NULL);
ut_ad(end_ptr != NULL);
if (block) {
ibuf_bitmap_page_init(block, mtr);
}
return(ptr);
}
# ifdef UNIV_DEBUG
/** Gets the desired bits for a given page from a bitmap page.
@param[in] page bitmap page
@param[in] page_id page id whose bits to get
@param[in] page_size page id whose bits to get
@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
@param[in,out] mtr mini-transaction holding an x-latch on the
bitmap page
@return value of bits */
# define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr) \
ibuf_bitmap_page_get_bits_low(page, page_id, page_size, \
MTR_MEMO_PAGE_X_FIX, mtr, bit)
# else /* UNIV_DEBUG */
/** Gets the desired bits for a given page from a bitmap page.
@param[in] page bitmap page
@param[in] page_id page id whose bits to get
@param[in] page_size page id whose bits to get
@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
@param[in,out] mtr mini-transaction holding an x-latch on the
bitmap page
@return value of bits */
# define ibuf_bitmap_page_get_bits(page, page_id, page_size, bit, mtr) \
ibuf_bitmap_page_get_bits_low(page, page_id, page_size, bit)
# endif /* UNIV_DEBUG */
/** Gets the desired bits for a given page from a bitmap page.
@param[in] page bitmap page
@param[in] page_id page id whose bits to get
@param[in] page_size page size
@param[in] latch_type MTR_MEMO_PAGE_X_FIX, MTR_MEMO_BUF_FIX, ...
@param[in,out] mtr mini-transaction holding latch_type on the
bitmap page
@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
@return value of bits */
UNIV_INLINE
ulint
ibuf_bitmap_page_get_bits_low(
const page_t* page,
const page_id_t& page_id,
const page_size_t& page_size,
#ifdef UNIV_DEBUG
ulint latch_type,
mtr_t* mtr,
#endif /* UNIV_DEBUG */
ulint bit)
{
ulint byte_offset;
ulint bit_offset;
ulint map_byte;
ulint value;
ut_ad(bit < IBUF_BITS_PER_PAGE);
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
bit_offset = (page_id.page_no() % page_size.physical())
* IBUF_BITS_PER_PAGE + bit;
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
value = ut_bit_get_nth(map_byte, bit_offset);
if (bit == IBUF_BITMAP_FREE) {
ut_ad(bit_offset + 1 < 8);
value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
}
return(value);
}
/** Sets the desired bit for a given page in a bitmap page.
@param[in,out] page bitmap page
@param[in] page_id page id whose bits to set
@param[in] page_size page size
@param[in] bit IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
@param[in] val value to set
@param[in,out] mtr mtr containing an x-latch to the bitmap page */
static
void
ibuf_bitmap_page_set_bits(
page_t* page,
const page_id_t& page_id,
const page_size_t& page_size,
ulint bit,
ulint val,
mtr_t* mtr)
{
ulint byte_offset;
ulint bit_offset;
ulint map_byte;
ut_ad(bit < IBUF_BITS_PER_PAGE);
#if IBUF_BITS_PER_PAGE % 2
# error "IBUF_BITS_PER_PAGE % 2 != 0"
#endif
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr->is_named_space(page_id.space()));
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
|| (0 == ibuf_count_get(page_id)));
#endif
bit_offset = (page_id.page_no() % page_size.physical())
* IBUF_BITS_PER_PAGE + bit;
byte_offset = bit_offset / 8;
bit_offset = bit_offset % 8;
ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
if (bit == IBUF_BITMAP_FREE) {
ut_ad(bit_offset + 1 < 8);
ut_ad(val <= 3);
map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
} else {
ut_ad(val <= 1);
map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
}
mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
MLOG_1BYTE, mtr);
}
/** Calculates the bitmap page number for a given page number.
@param[in] page_id page id
@param[in] page_size page size
@return the bitmap page id where the file page is mapped */
UNIV_INLINE
const page_id_t
ibuf_bitmap_page_no_calc(
const page_id_t& page_id,
const page_size_t& page_size)
{
ulint bitmap_page_no;
bitmap_page_no = FSP_IBUF_BITMAP_OFFSET
+ (page_id.page_no() & ~(page_size.physical() - 1));
return(page_id_t(page_id.space(), bitmap_page_no));
}
/** Gets the ibuf bitmap page where the bits describing a given file page are
stored.
@param[in] page_id page id of the file page
@param[in] page_size page size of the file page
@param[in] file file name
@param[in] line line where called
@param[in,out] mtr mini-transaction
@return bitmap page where the file page is mapped, that is, the bitmap
page containing the descriptor bits for the file page; the bitmap page
is x-latched */
static
page_t*
ibuf_bitmap_get_map_page_func(
const page_id_t& page_id,
const page_size_t& page_size,
const char* file,
unsigned line,
mtr_t* mtr)
{
buf_block_t* block = NULL;
dberr_t err = DB_SUCCESS;
block = buf_page_get_gen(ibuf_bitmap_page_no_calc(page_id, page_size),
page_size, RW_X_LATCH, NULL, BUF_GET,
file, line, mtr, &err);
if (err != DB_SUCCESS) {
return NULL;
}
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
return(buf_block_get_frame(block));
}
/** Gets the ibuf bitmap page where the bits describing a given file page are
stored.
@param[in] page_id page id of the file page
@param[in] page_size page size of the file page
@param[in,out] mtr mini-transaction
@return bitmap page where the file page is mapped, that is, the bitmap
page containing the descriptor bits for the file page; the bitmap page
is x-latched */
#define ibuf_bitmap_get_map_page(page_id, page_size, mtr) \
ibuf_bitmap_get_map_page_func(page_id, page_size, \
__FILE__, __LINE__, mtr)
/************************************************************************//**
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
UNIV_INLINE
void
ibuf_set_free_bits_low(
/*===================*/
const buf_block_t* block, /*!< in: index page; free bits are set if
the index is non-clustered and page
level is 0 */
ulint val, /*!< in: value to set: < 4 */
mtr_t* mtr) /*!< in/out: mtr */
{
page_t* bitmap_page;
buf_frame_t* frame;
ut_ad(mtr->is_named_space(block->page.id.space()));
if (!block) {
return;
}
frame = buf_block_get_frame(block);
if (!frame || !page_is_leaf(frame)) {
return;
}
bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
block->page.size, mtr);
#ifdef UNIV_IBUF_DEBUG
ut_a(val <= ibuf_index_page_calc_free(block));
#endif /* UNIV_IBUF_DEBUG */
ibuf_bitmap_page_set_bits(
bitmap_page, block->page.id, block->page.size,
IBUF_BITMAP_FREE, val, mtr);
}
/************************************************************************//**
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
void
ibuf_set_free_bits_func(
/*====================*/
buf_block_t* block, /*!< in: index page of a non-clustered index;
free bit is reset if page level is 0 */
#ifdef UNIV_IBUF_DEBUG
ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
value which the bits must have before
setting; this is for debugging */
#endif /* UNIV_IBUF_DEBUG */
ulint val) /*!< in: value to set: < 4 */
{
mtr_t mtr;
page_t* page;
page_t* bitmap_page;
page = buf_block_get_frame(block);
if (!page_is_leaf(page)) {
return;
}
mtr_start(&mtr);
const fil_space_t* space = mtr.set_named_space(block->page.id.space());
bitmap_page = ibuf_bitmap_get_map_page(block->page.id,
block->page.size, &mtr);
switch (space->purpose) {
case FIL_TYPE_LOG:
ut_ad(0);
break;
case FIL_TYPE_TABLESPACE:
/* Avoid logging while fixing up truncate of table. */
if (!srv_is_tablespace_truncated(block->page.id.space())) {
break;
}
/* fall through */
case FIL_TYPE_TEMPORARY:
case FIL_TYPE_IMPORT:
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
}
#ifdef UNIV_IBUF_DEBUG
if (max_val != ULINT_UNDEFINED) {
ulint old_val;
old_val = ibuf_bitmap_page_get_bits(
bitmap_page, block->page.id,
IBUF_BITMAP_FREE, &mtr);
# if 0
if (old_val != max_val) {
fprintf(stderr,
"Ibuf: page %lu old val %lu max val %lu\n",
page_get_page_no(page),
old_val, max_val);
}
# endif
ut_a(old_val <= max_val);
}
# if 0
fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
page_get_page_no(page), val,
ibuf_index_page_calc_free(block));
# endif
ut_a(val <= ibuf_index_page_calc_free(block));
#endif /* UNIV_IBUF_DEBUG */
ibuf_bitmap_page_set_bits(
bitmap_page, block->page.id, block->page.size,
IBUF_BITMAP_FREE, val, &mtr);
mtr_commit(&mtr);
}
/************************************************************************//**
Resets the free bits of the page in the ibuf bitmap. This is done in a
separate mini-transaction, hence this operation does not restrict
further work to only ibuf bitmap operations, which would result if the
latch to the bitmap page were kept. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to decrement or reset the bits in the bitmap in a mini-transaction
that is committed before the mini-transaction that affects the free
space. */
void
ibuf_reset_free_bits(
/*=================*/
buf_block_t* block) /*!< in: index page; free bits are set to 0
if the index is a non-clustered
non-unique, and page level is 0 */
{
ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
}
/**********************************************************************//**
Updates the free bits for an uncompressed page to reflect the present
state. Does this in the mtr given, which means that the latching
order rules virtually prevent any further operations for this OS
thread until mtr is committed. NOTE: The free bits in the insert
buffer bitmap must never exceed the free space on a page. It is safe
to set the free bits in the same mini-transaction that updated the
page. */
void
ibuf_update_free_bits_low(
/*======================*/
const buf_block_t* block, /*!< in: index page */
ulint max_ins_size, /*!< in: value of
maximum insert size
with reorganize before
the latest operation
performed to the page */