From f41e66b221358f53ff8fa851b216babfbd28e7af Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Thu, 5 Mar 2026 00:00:29 -0500 Subject: [PATCH 01/27] MDEV-38975: HEAP engine BLOB/TEXT/JSON/GEOMETRY column support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow BLOB/TEXT/JSON/GEOMETRY columns in MEMORY (HEAP) engine tables by storing blob data in variable-length continuation record chains within the existing `HP_BLOCK` structure. **Continuation runs**: blob data is split across contiguous sequences of `recbuffer`-sized records. Each run stores a 10-byte header (`next_cont` pointer + `run_rec_count`) in the first record; inner records (rec 1..N-1) have no flags byte — full `recbuffer` payload. Runs are linked via `next_cont` pointers. Individual runs are capped at 65,535 records (`uint16` format limit); larger blobs are automatically split into multiple runs. **Zero-copy reads**: single-run blobs return pointers directly into `HP_BLOCK` records, avoiding `blob_buff` reassembly entirely: - Case A (`run_rec_count == 1`): return `chain + HP_CONT_HEADER_SIZE` - Case B (`HP_ROW_CONT_ZEROCOPY` flag): return `chain + recbuffer` - Case C (multi-run): walk chain, reassemble into `blob_buff` `HP_INFO::has_zerocopy_blobs` tracks zero-copy state; used by `heap_update()` to refresh the caller's record buffer after freeing old chains, preventing dangling pointers. **Free list scavenging**: on insert, the free list is walked read-only (peek) tracking contiguous groups in descending address order (LIFO). Qualifying groups (>= `min_run_records`) are unlinked and used. The first non-qualifying group terminates the scan — remaining data is allocated from the block tail. The free list is never disturbed when no qualifying group is found. **Record counting**: new `HP_SHARE::total_records` tracks all physical records (primary + continuation). `HP_SHARE::records` remains logical (primary-only) to preserve linear hash bucket mapping correctness. **Scan/check batch-skip**: `heap_scan()` and `heap_check_heap()` read `run_rec_count` from rec 0 and skip entire continuation runs at once. **Hash functions**: `hp_rec_hashnr()`, `hp_rec_key_cmp()`, `hp_key_cmp()`, `hp_make_key()` updated to handle `HA_BLOB_PART` key segments — reading actual blob data via pointer dereference or chain materialization. **SQL layer**: `choose_engine()` no longer rejects HEAP for blob tables (replaced `blob_fields` check with `reclength > HA_MAX_REC_LENGTH`). `remove_duplicates()` routes HEAP+blob to `remove_dup_with_compare()`. `ha_heap::remember_rnd_pos()` / `restart_rnd_next()` implemented for DISTINCT deduplication support. Fixed undefined behavior in `test_if_cheaper_ordering()` where `select_limit/fanout` could overflow to infinity — capped at `HA_POS_ERROR`. https://jira.mariadb.org/browse/MDEV-38975 --- include/heap.h | 18 +- mysql-test/include/mtr_check.sql | 2 +- mysql-test/main/blob_sj_test.result | 29 + mysql-test/main/blob_sj_test.test | 26 + mysql-test/main/create.result | 9 +- mysql-test/main/create.test | 3 +- mysql-test/main/cte_recursive.test | 2 + mysql-test/main/derived_view.result | 2 +- mysql-test/main/distinct.result | 2 +- mysql-test/main/distinct.test | 3 + mysql-test/main/group_by.result | 4 +- mysql-test/main/group_by.test | 4 +- mysql-test/main/group_min_max_innodb.result | 8 +- mysql-test/main/group_min_max_innodb.test | 1 + mysql-test/main/information_schema.result | 2 +- .../main/information_schema_parameters.result | 2 +- .../main/information_schema_part.result | 2 +- mysql-test/main/information_schema_part.test | 2 +- .../main/information_schema_routines.result | 2 +- mysql-test/main/intersect_all.result | 6 +- mysql-test/main/intersect_all.test | 3 + mysql-test/main/select.result | 22 +- mysql-test/main/select.test | 3 +- mysql-test/main/select_jcl6.result | 22 +- mysql-test/main/select_pkeycache.result | 22 +- mysql-test/main/temp_table_symlink.result | 2 - mysql-test/main/temp_table_symlink.test | 5 - mysql-test/suite/funcs_1/r/is_columns.result | 2 +- mysql-test/suite/funcs_1/r/is_events.result | 2 +- mysql-test/suite/funcs_1/r/is_routines.result | 2 +- .../funcs_1/r/is_routines_embedded.result | 8 +- .../suite/funcs_1/r/is_tables_is.result | 104 +- .../funcs_1/r/is_tables_is_embedded.result | 104 +- mysql-test/suite/funcs_1/r/is_triggers.result | 2 +- .../funcs_1/r/is_triggers_embedded.result | 2 +- mysql-test/suite/funcs_1/r/is_views.result | 2 +- .../suite/funcs_1/r/is_views_embedded.result | 2 +- .../funcs_1/r/processlist_priv_no_prot.result | 4 +- .../funcs_1/r/processlist_priv_ps.result | 4 +- .../funcs_1/r/processlist_val_no_prot.result | 2 +- .../suite/funcs_1/r/processlist_val_ps.result | 2 +- mysql-test/suite/heap/blob_dedup.result | 15 + mysql-test/suite/heap/blob_dedup.test | 10 + mysql-test/suite/heap/heap_blob.result | 602 ++++++++++++ mysql-test/suite/heap/heap_blob.test | 439 +++++++++ mysql-test/suite/heap/heap_geometry.result | 75 ++ mysql-test/suite/heap/heap_geometry.test | 65 ++ .../suite/innodb_fts/r/innodb-fts-ddl.result | 2 +- mysql-test/suite/innodb_fts/r/misc.result | 10 +- .../suite/innodb_fts/t/innodb-fts-ddl.test | 2 +- mysql-test/suite/innodb_fts/t/misc.test | 10 +- .../transaction_nested_events_verifier.inc | 2 +- .../r/transaction_nested_events.result | 16 +- .../plugins/r/sql_error_log_withdbinfo.result | 6 +- .../r/tmp_disk_table_size_basic.result | 104 +- .../sys_vars/t/tmp_disk_table_size_basic.test | 2 +- .../r/v_schema_redundant_indexes.result | 2 +- sql/item_func.cc | 45 +- sql/item_sum.cc | 27 +- sql/sql_expression_cache.cc | 17 + sql/sql_select.cc | 72 +- storage/heap/CMakeLists.txt | 2 +- storage/heap/_check.c | 19 +- storage/heap/ha_heap.cc | 87 +- storage/heap/ha_heap.h | 8 +- storage/heap/heapdef.h | 101 +- storage/heap/hp_blob.c | 885 ++++++++++++++++++ storage/heap/hp_clear.c | 3 +- storage/heap/hp_close.c | 1 + storage/heap/hp_create.c | 94 +- storage/heap/hp_delete.c | 12 +- storage/heap/hp_extra.c | 6 + storage/heap/hp_hash.c | 181 +++- storage/heap/hp_rfirst.c | 2 + storage/heap/hp_rkey.c | 2 + storage/heap/hp_rlast.c | 2 + storage/heap/hp_rnext.c | 2 + storage/heap/hp_rprev.c | 2 + storage/heap/hp_rrnd.c | 2 + storage/heap/hp_rsame.c | 2 + storage/heap/hp_scan.c | 51 +- storage/heap/hp_static.c | 4 +- storage/heap/hp_update.c | 98 +- storage/heap/hp_write.c | 76 +- 84 files changed, 3317 insertions(+), 298 deletions(-) create mode 100644 mysql-test/main/blob_sj_test.result create mode 100644 mysql-test/main/blob_sj_test.test create mode 100644 mysql-test/suite/heap/blob_dedup.result create mode 100644 mysql-test/suite/heap/blob_dedup.test create mode 100644 mysql-test/suite/heap/heap_blob.result create mode 100644 mysql-test/suite/heap/heap_blob.test create mode 100644 mysql-test/suite/heap/heap_geometry.result create mode 100644 mysql-test/suite/heap/heap_geometry.test create mode 100644 storage/heap/hp_blob.c diff --git a/include/heap.h b/include/heap.h index 3fac752abd028..633a33e53fd0f 100644 --- a/include/heap.h +++ b/include/heap.h @@ -131,6 +131,12 @@ typedef struct st_hp_keydef /* Key definition with open */ uint (*get_key_length)(struct st_hp_keydef *keydef, const uchar *key); } HP_KEYDEF; +typedef struct st_hp_blob_desc +{ + uint offset; /* Byte offset of blob descriptor within record buffer */ + uint packlength; /* 1, 2, 3, or 4: length prefix size */ +} HP_BLOB_DESC; + typedef struct st_heap_share { HP_BLOCK block; @@ -138,14 +144,14 @@ typedef struct st_heap_share ulonglong data_length,index_length,max_table_size; ulonglong auto_increment; ulong min_records,max_records; /* Params to open */ - ulong records; /* records */ + ulong records; /* Logical (primary) record count */ ulong blength; /* records rounded up to 2^n */ ulong deleted; /* Deleted records in database */ uint key_stat_version; /* version to indicate insert/delete */ uint key_version; /* Updated on key change */ uint file_version; /* Update on clear */ uint reclength; /* Length of one record */ - uint visible; /* Offset to the visible/deleted mark */ + uint visible; /* Offset to the flags byte (active/deleted/continuation) */ uint changed; uint keys,max_key_length; uint currently_disabled_keys; /* saved value from "keys" when disabled */ @@ -156,6 +162,9 @@ typedef struct st_heap_share THR_LOCK lock; my_bool delete_on_close; my_bool internal; /* Internal temporary table */ + HP_BLOB_DESC *blob_descs; /* Array of blob column descriptors */ + uint blob_count; /* Number of blob columns */ + ulong total_records; /* All active records (primary + blob continuation) */ LIST open_list; uint auto_key; uint auto_key_type; /* real type of the auto key segment */ @@ -181,6 +190,9 @@ typedef struct st_heap_info uint file_version; /* Version at scan */ uint lastkey_len; my_bool implicit_emptied; + uchar *blob_buff; /* Reassembly buffer for blob reads */ + uint32 blob_buff_len; /* Current allocated size of blob_buff */ + my_bool has_zerocopy_blobs; /* Last hp_read_blobs produced zero-copy ptrs */ THR_LOCK_DATA lock; LIST open_list; } HP_INFO; @@ -204,6 +216,8 @@ typedef struct st_heap_create_info open_count to 1. Is only looked at if not internal_table. */ my_bool pin_share; + HP_BLOB_DESC *blob_descs; + uint blob_count; } HP_CREATE_INFO; /* Prototypes for heap-functions */ diff --git a/mysql-test/include/mtr_check.sql b/mysql-test/include/mtr_check.sql index 360f7b40bb864..46b420da4ae34 100644 --- a/mysql-test/include/mtr_check.sql +++ b/mysql-test/include/mtr_check.sql @@ -66,7 +66,7 @@ BEGIN collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' - ORDER BY columns_in_mysql; + ORDER BY columns_in_mysql, ordinal_position; -- Dump all events, there should be none SELECT * FROM INFORMATION_SCHEMA.EVENTS; diff --git a/mysql-test/main/blob_sj_test.result b/mysql-test/main/blob_sj_test.result new file mode 100644 index 0000000000000..78f78b1b9bd5f --- /dev/null +++ b/mysql-test/main/blob_sj_test.result @@ -0,0 +1,29 @@ +set optimizer_switch='materialization=on,in_to_exists=off,semijoin=off'; +set @blob_len = 16; +set @prefix_len = 6; +set @suffix_len = @blob_len - @prefix_len; +create table t1 (a1 blob(16), a2 blob(16)); +create table t2 (b1 blob(16), b2 blob(16)); +insert into t1 values +(concat('1 - 00', repeat('x', @suffix_len)), concat('2 - 00', repeat('x', @suffix_len))); +insert into t1 values +(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len))); +insert into t1 values +(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len))); +insert into t2 values +(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len))); +insert into t2 values +(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len))); +insert into t2 values +(concat('1 - 03', repeat('x', @suffix_len)), concat('2 - 03', repeat('x', @suffix_len))); +explain extended select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0'); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 3 100.00 Using where +2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 3 100.00 Using where +Warnings: +Note 1003 /* select#1 */ select left(`test`.`t1`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1`.`a2`,7) AS `left(a2,7)` from `test`.`t1` where <`test`.`t1`.`a1`>((`test`.`t1`.`a1`,(/* select#2 */ select `test`.`t2`.`b1` from `test`.`t2` where `test`.`t2`.`b1` > '0' and (`test`.`t1`.`a1`) = `test`.`t2`.`b1`))) +select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0'); +left(a1,7) left(a2,7) +1 - 01x 2 - 01x +1 - 02x 2 - 02x +drop table t1, t2; diff --git a/mysql-test/main/blob_sj_test.test b/mysql-test/main/blob_sj_test.test new file mode 100644 index 0000000000000..447d856adce4e --- /dev/null +++ b/mysql-test/main/blob_sj_test.test @@ -0,0 +1,26 @@ +set optimizer_switch='materialization=on,in_to_exists=off,semijoin=off'; +set @blob_len = 16; +set @prefix_len = 6; +set @suffix_len = @blob_len - @prefix_len; + +create table t1 (a1 blob(16), a2 blob(16)); +create table t2 (b1 blob(16), b2 blob(16)); + +insert into t1 values +(concat('1 - 00', repeat('x', @suffix_len)), concat('2 - 00', repeat('x', @suffix_len))); +insert into t1 values +(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len))); +insert into t1 values +(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len))); + +insert into t2 values +(concat('1 - 01', repeat('x', @suffix_len)), concat('2 - 01', repeat('x', @suffix_len))); +insert into t2 values +(concat('1 - 02', repeat('x', @suffix_len)), concat('2 - 02', repeat('x', @suffix_len))); +insert into t2 values +(concat('1 - 03', repeat('x', @suffix_len)), concat('2 - 03', repeat('x', @suffix_len))); + +explain extended select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0'); +select left(a1,7), left(a2,7) from t1 where a1 in (select b1 from t2 where b1 > '0'); + +drop table t1, t2; diff --git a/mysql-test/main/create.result b/mysql-test/main/create.result index 4bae81878103b..ba5836e2999f0 100644 --- a/mysql-test/main/create.result +++ b/mysql-test/main/create.result @@ -30,10 +30,7 @@ Note 1051 Unknown table 'test.t1,test.t2' create table t1 (b char(0) not null, index(b)); ERROR 42000: The storage engine MyISAM can't index column `b` create table t1 (a int not null,b text) engine=heap; -ERROR 42000: Storage engine MEMORY doesn't support BLOB/TEXT columns -drop table if exists t1; -Warnings: -Note 1051 Unknown table 'test.t1' +drop table t1; create table t1 (ordid int(8) not null auto_increment, ord varchar(50) not null, primary key (ord,ordid)) engine=heap; ERROR 42000: Incorrect table definition; there can be only one auto column and it must be defined as a key create table not_existing_database.test (a int); @@ -1089,7 +1086,7 @@ t1 CREATE TABLE `t1` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci drop table t1; create temporary table t1 like information_schema.processlist; show create table t1; @@ -1113,7 +1110,7 @@ t1 CREATE TEMPORARY TABLE `t1` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci drop table t1; create table t1 like information_schema.character_sets; show create table t1; diff --git a/mysql-test/main/create.test b/mysql-test/main/create.test index dada6963fdb2e..80381a60e68c3 100644 --- a/mysql-test/main/create.test +++ b/mysql-test/main/create.test @@ -30,9 +30,8 @@ create table t2 select auto+1 from t1; drop table if exists t1,t2; --error ER_WRONG_KEY_COLUMN create table t1 (b char(0) not null, index(b)); ---error ER_TABLE_CANT_HANDLE_BLOB create table t1 (a int not null,b text) engine=heap; -drop table if exists t1; +drop table t1; --error ER_WRONG_AUTO_KEY create table t1 (ordid int(8) not null auto_increment, ord varchar(50) not null, primary key (ord,ordid)) engine=heap; diff --git a/mysql-test/main/cte_recursive.test b/mysql-test/main/cte_recursive.test index d6fb2a47884ed..3ce3c0e9d964a 100644 --- a/mysql-test/main/cte_recursive.test +++ b/mysql-test/main/cte_recursive.test @@ -3212,6 +3212,8 @@ show create table t2; --eval insert ignore into t2 $query; drop table t2; set @@sql_mode=""; +# Rows with identical (level, mid) due to overflow have non-deterministic order +--sorted_result --eval $query --eval create table t2 as $query; show create table t2; diff --git a/mysql-test/main/derived_view.result b/mysql-test/main/derived_view.result index 3f3f68154882c..c673d201329b8 100644 --- a/mysql-test/main/derived_view.result +++ b/mysql-test/main/derived_view.result @@ -2372,7 +2372,7 @@ GROUP BY TABLE_SCHEMA) AS UNIQUES ON ( COLUMNS.TABLE_SCHEMA = UNIQUES.TABLE_SCHEMA); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY COLUMNS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases -1 PRIMARY ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join) +1 PRIMARY ref key0 key0 194 information_schema.COLUMNS.TABLE_SCHEMA 2 2 DERIVED STATISTICS ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases; Using filesort SELECT COUNT(*) > 0 FROM INFORMATION_SCHEMA.COLUMNS diff --git a/mysql-test/main/distinct.result b/mysql-test/main/distinct.result index d8646abfb43cb..2f76fcfbc924a 100644 --- a/mysql-test/main/distinct.result +++ b/mysql-test/main/distinct.result @@ -1189,7 +1189,7 @@ insert into t1 values (1, 'Aa123456', 'abc'), (2, 'Bb7897777', 'def'), (3, 'Cc01287', 'xyz'), (5, 'd12345', 'efg'); select distinct if(sum(a), b, 0) from t1 group by value(c) with rollup; if(sum(a), b, 0) -Aa123456 +SOME_B_VALUE drop table t1; # # end of 10.5 tests diff --git a/mysql-test/main/distinct.test b/mysql-test/main/distinct.test index 48d5f4bb8fae6..db9bfb6b5abe2 100644 --- a/mysql-test/main/distinct.test +++ b/mysql-test/main/distinct.test @@ -915,6 +915,9 @@ create table t1 (a int, b longtext, c varchar(18)); insert into t1 values (1, 'Aa123456', 'abc'), (2, 'Bb7897777', 'def'), (3, 'Cc01287', 'xyz'), (5, 'd12345', 'efg'); +# ROLLUP row's b value is indeterminate (depends on last group processed), +# which varies by temp table engine (HEAP vs Aria). Mask the value. +--replace_regex /(Aa123456|Bb7897777|Cc01287|d12345)/SOME_B_VALUE/ select distinct if(sum(a), b, 0) from t1 group by value(c) with rollup; drop table t1; diff --git a/mysql-test/main/group_by.result b/mysql-test/main/group_by.result index 17f42fe36f36b..8dcbd16ccabcd 100644 --- a/mysql-test/main/group_by.result +++ b/mysql-test/main/group_by.result @@ -2510,10 +2510,10 @@ SELECT f3, MIN(f2) FROM t1 GROUP BY f1 LIMIT 1; f3 MIN(f2) blob NULL DROP TABLE t1; -the value below *must* be 1 +the value below *must* be 0 (HEAP supports blobs) show status like 'Created_tmp_disk_tables'; Variable_name Value -Created_tmp_disk_tables 1 +Created_tmp_disk_tables 0 # # Bug #1002146: Unneeded filesort if usage of join buffer is not allowed # (bug mdev-645) diff --git a/mysql-test/main/group_by.test b/mysql-test/main/group_by.test index 19f2e6582ae44..d3aa21d6f397e 100644 --- a/mysql-test/main/group_by.test +++ b/mysql-test/main/group_by.test @@ -1671,14 +1671,14 @@ DROP TABLE t1, t2; --disable_ps2_protocol --disable_view_protocol --disable_cursor_protocol -FLUSH STATUS; # this test case *must* use Aria temp tables +FLUSH STATUS; CREATE TABLE t1 (f1 INT, f2 decimal(20,1), f3 blob); INSERT INTO t1 values(11,NULL,'blob'),(11,NULL,'blob'); SELECT f3, MIN(f2) FROM t1 GROUP BY f1 LIMIT 1; DROP TABLE t1; ---echo the value below *must* be 1 +--echo the value below *must* be 0 (HEAP supports blobs) show status like 'Created_tmp_disk_tables'; --enable_cursor_protocol --enable_view_protocol diff --git a/mysql-test/main/group_min_max_innodb.result b/mysql-test/main/group_min_max_innodb.result index 27656374aee38..c65bbd5e7e602 100644 --- a/mysql-test/main/group_min_max_innodb.result +++ b/mysql-test/main/group_min_max_innodb.result @@ -303,10 +303,10 @@ CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; translation_resources serialized_c -NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb -NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +NULL # +NULL # +NULL # +NULL # drop table t1,t2; # # MDEV-30143: Segfault on select query using index for group-by and filesort diff --git a/mysql-test/main/group_min_max_innodb.test b/mysql-test/main/group_min_max_innodb.test index 33a3a8888a5d8..99f8457e163fe 100644 --- a/mysql-test/main/group_min_max_innodb.test +++ b/mysql-test/main/group_min_max_innodb.test @@ -248,6 +248,7 @@ CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL, CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8; insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); +--replace_column 2 # SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; drop table t1,t2; diff --git a/mysql-test/main/information_schema.result b/mysql-test/main/information_schema.result index 5d9f2d7322f12..f7d43fc58e5a6 100644 --- a/mysql-test/main/information_schema.result +++ b/mysql-test/main/information_schema.result @@ -709,7 +709,7 @@ select TABLE_NAME,TABLE_TYPE,ENGINE from information_schema.tables where table_schema='information_schema' limit 2; TABLE_NAME TABLE_TYPE ENGINE -ALL_PLUGINS SYSTEM VIEW Aria +ALL_PLUGINS SYSTEM VIEW MEMORY APPLICABLE_ROLES SYSTEM VIEW MEMORY show tables from information_schema like "T%"; Tables_in_information_schema (T%) diff --git a/mysql-test/main/information_schema_parameters.result b/mysql-test/main/information_schema_parameters.result index 1d00c992e5c8f..0abc0f4f38838 100644 --- a/mysql-test/main/information_schema_parameters.result +++ b/mysql-test/main/information_schema_parameters.result @@ -19,7 +19,7 @@ PARAMETERS CREATE TEMPORARY TABLE `PARAMETERS` ( `COLLATION_NAME` varchar(64), `DTD_IDENTIFIER` longtext NOT NULL, `ROUTINE_TYPE` varchar(9) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SELECT * FROM information_schema.columns WHERE table_schema = 'information_schema' AND table_name = 'parameters' diff --git a/mysql-test/main/information_schema_part.result b/mysql-test/main/information_schema_part.result index 1c5b9333550ef..005314612dc68 100644 --- a/mysql-test/main/information_schema_part.result +++ b/mysql-test/main/information_schema_part.result @@ -61,7 +61,7 @@ partition x2 values less than (5) ( subpartition x21 tablespace t1, subpartition x22 tablespace t2) ); -select * from information_schema.partitions where table_schema="test" order by table_name, partition_name; +select * from information_schema.partitions where table_schema="test" order by table_name, partition_name, subpartition_name; TABLE_CATALOG TABLE_SCHEMA TABLE_NAME PARTITION_NAME SUBPARTITION_NAME PARTITION_ORDINAL_POSITION SUBPARTITION_ORDINAL_POSITION PARTITION_METHOD SUBPARTITION_METHOD PARTITION_EXPRESSION SUBPARTITION_EXPRESSION PARTITION_DESCRIPTION TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE CREATE_TIME UPDATE_TIME CHECK_TIME CHECKSUM PARTITION_COMMENT NODEGROUP TABLESPACE_NAME def test t1 x1 x11 1 1 RANGE HASH `a` `a` + `b` 1 0 0 0 # 1024 0 # # NULL NULL default NULL def test t1 x1 x12 1 2 RANGE HASH `a` `a` + `b` 1 0 0 0 # 1024 0 # # NULL NULL default NULL diff --git a/mysql-test/main/information_schema_part.test b/mysql-test/main/information_schema_part.test index 3741de611505a..02af5be6d02f8 100644 --- a/mysql-test/main/information_schema_part.test +++ b/mysql-test/main/information_schema_part.test @@ -63,7 +63,7 @@ subpartition by key (a) subpartition x22 tablespace t2) ); --replace_column 16 # 19 # 20 # -select * from information_schema.partitions where table_schema="test" order by table_name, partition_name; +select * from information_schema.partitions where table_schema="test" order by table_name, partition_name, subpartition_name; drop table t1,t2; create table t1 ( diff --git a/mysql-test/main/information_schema_routines.result b/mysql-test/main/information_schema_routines.result index b5b43db71cec4..4d73258b4941d 100644 --- a/mysql-test/main/information_schema_routines.result +++ b/mysql-test/main/information_schema_routines.result @@ -36,7 +36,7 @@ ROUTINES CREATE TEMPORARY TABLE `ROUTINES` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SELECT * FROM information_schema.columns WHERE table_schema = 'information_schema' AND table_name = 'routines' diff --git a/mysql-test/main/intersect_all.result b/mysql-test/main/intersect_all.result index 028a76944b38d..d2c7e5932a88d 100644 --- a/mysql-test/main/intersect_all.result +++ b/mysql-test/main/intersect_all.result @@ -718,13 +718,13 @@ t4 CREATE TABLE `t4` ( drop tables t4; (select a,b from t1) intersect all (select c,d from t2) intersect all (select e,f from t3) union all (select 4,4); a b -4 4 2 2 2 2 +4 4 (select a,b from t1) intersect all (select c,d from t2) intersect all (select e,f from t3) union all (select 4,4) except all (select 2,2); a b -4 4 2 2 +4 4 drop tables t1,t2,t3; create table t1 (a int, b int); create table t2 (c int, d int); @@ -779,9 +779,9 @@ insert into t3 values (3,3); e f 3 3 3 3 +4 4 5 5 6 6 -4 4 explain extended (select e,f from t3) intersect all (select c,d from t2) union all (select a,b from t1) union all (select 4,4); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 3 100.00 diff --git a/mysql-test/main/intersect_all.test b/mysql-test/main/intersect_all.test index c3dc4e123f0e7..d28d7e643d50d 100644 --- a/mysql-test/main/intersect_all.test +++ b/mysql-test/main/intersect_all.test @@ -108,8 +108,10 @@ show create table t4; drop tables t4; +--sorted_result (select a,b from t1) intersect all (select c,d from t2) intersect all (select e,f from t3) union all (select 4,4); +--sorted_result (select a,b from t1) intersect all (select c,d from t2) intersect all (select e,f from t3) union all (select 4,4) except all (select 2,2); drop tables t1,t2,t3; @@ -149,6 +151,7 @@ explain extended (select a,b from t1) union all (select c,d from t2) intersect a insert into t2 values (3,3); insert into t3 values (3,3); +--sorted_result (select e,f from t3) intersect all (select c,d from t2) union all (select a,b from t1) union all (select 4,4); explain extended (select e,f from t3) intersect all (select c,d from t2) union all (select a,b from t1) union all (select 4,4); diff --git a/mysql-test/main/select.result b/mysql-test/main/select.result index cfa31f507188a..aeb12170bb82a 100644 --- a/mysql-test/main/select.result +++ b/mysql-test/main/select.result @@ -576,18 +576,18 @@ bedlam 1 bedpost 1 boasted 1 set tmp_memory_table_size=default; -select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 limit 100,10; +select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 order by fld3 limit 100,10; fld3 repeat("a",length(fld3)) count(*) -circus aaaaaa 1 -cited aaaaa 1 -Colombo aaaaaaa 1 -congresswoman aaaaaaaaaaaaa 1 -contrition aaaaaaaaaa 1 -corny aaaaa 1 -cultivation aaaaaaaaaaa 1 -definiteness aaaaaaaaaaaa 1 -demultiplex aaaaaaaaaaa 1 -disappointing aaaaaaaaaaaaa 1 +Baird aaaaa 1 +balled aaaaaa 1 +ballgown aaaaaaaa 1 +Baltimorean aaaaaaaaaaa 1 +bankruptcies aaaaaaaaaaaa 1 +Barry aaaaa 1 +batting aaaaaaa 1 +beaner aaaaaa 1 +beasts aaaaaa 1 +beaters aaaaaaa 1 select distinct companynr,rtrim(space(512+companynr)) from t3 order by 1,2; companynr rtrim(space(512+companynr)) 37 diff --git a/mysql-test/main/select.test b/mysql-test/main/select.test index ac9753a86bd27..300f08c543454 100644 --- a/mysql-test/main/select.test +++ b/mysql-test/main/select.test @@ -1442,7 +1442,8 @@ select distinct fld3,count(*) from t2 group by companynr,fld3 limit 10; set tmp_memory_table_size=0; # force on-disk tmp table select distinct fld3,count(*) from t2 group by companynr,fld3 limit 10; set tmp_memory_table_size=default; -select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 limit 100,10; +# ORDER BY fld3 ensures deterministic LIMIT window regardless of temp table engine +select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 order by fld3 limit 100,10; # # A big order by that should trigger a merge in filesort diff --git a/mysql-test/main/select_jcl6.result b/mysql-test/main/select_jcl6.result index cf8f4f26ae067..0809f5cadfb82 100644 --- a/mysql-test/main/select_jcl6.result +++ b/mysql-test/main/select_jcl6.result @@ -587,18 +587,18 @@ bedlam 1 bedpost 1 boasted 1 set tmp_memory_table_size=default; -select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 limit 100,10; +select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 order by fld3 limit 100,10; fld3 repeat("a",length(fld3)) count(*) -circus aaaaaa 1 -cited aaaaa 1 -Colombo aaaaaaa 1 -congresswoman aaaaaaaaaaaaa 1 -contrition aaaaaaaaaa 1 -corny aaaaa 1 -cultivation aaaaaaaaaaa 1 -definiteness aaaaaaaaaaaa 1 -demultiplex aaaaaaaaaaa 1 -disappointing aaaaaaaaaaaaa 1 +Baird aaaaa 1 +balled aaaaaa 1 +ballgown aaaaaaaa 1 +Baltimorean aaaaaaaaaaa 1 +bankruptcies aaaaaaaaaaaa 1 +Barry aaaaa 1 +batting aaaaaaa 1 +beaner aaaaaa 1 +beasts aaaaaa 1 +beaters aaaaaaa 1 select distinct companynr,rtrim(space(512+companynr)) from t3 order by 1,2; companynr rtrim(space(512+companynr)) 37 diff --git a/mysql-test/main/select_pkeycache.result b/mysql-test/main/select_pkeycache.result index cfa31f507188a..aeb12170bb82a 100644 --- a/mysql-test/main/select_pkeycache.result +++ b/mysql-test/main/select_pkeycache.result @@ -576,18 +576,18 @@ bedlam 1 bedpost 1 boasted 1 set tmp_memory_table_size=default; -select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 limit 100,10; +select distinct fld3,repeat("a",length(fld3)),count(*) from t2 group by companynr,fld3 order by fld3 limit 100,10; fld3 repeat("a",length(fld3)) count(*) -circus aaaaaa 1 -cited aaaaa 1 -Colombo aaaaaaa 1 -congresswoman aaaaaaaaaaaaa 1 -contrition aaaaaaaaaa 1 -corny aaaaa 1 -cultivation aaaaaaaaaaa 1 -definiteness aaaaaaaaaaaa 1 -demultiplex aaaaaaaaaaa 1 -disappointing aaaaaaaaaaaaa 1 +Baird aaaaa 1 +balled aaaaaa 1 +ballgown aaaaaaaa 1 +Baltimorean aaaaaaaaaaa 1 +bankruptcies aaaaaaaaaaaa 1 +Barry aaaaa 1 +batting aaaaaaa 1 +beaner aaaaaa 1 +beasts aaaaaa 1 +beaters aaaaaaa 1 select distinct companynr,rtrim(space(512+companynr)) from t3 order by 1,2; companynr rtrim(space(512+companynr)) 37 diff --git a/mysql-test/main/temp_table_symlink.result b/mysql-test/main/temp_table_symlink.result index 1c5c68170ff8a..6add9191b0478 100644 --- a/mysql-test/main/temp_table_symlink.result +++ b/mysql-test/main/temp_table_symlink.result @@ -4,8 +4,6 @@ create temporary table t2 (a int); Got one of the listed errors create temporary table t3 (a int) engine=Aria; Got one of the listed errors -select * from information_schema.columns where table_schema='test'; -Got one of the listed errors flush tables; select * from d1; a diff --git a/mysql-test/main/temp_table_symlink.test b/mysql-test/main/temp_table_symlink.test index a0be38d907300..2428d137dd5ed 100644 --- a/mysql-test/main/temp_table_symlink.test +++ b/mysql-test/main/temp_table_symlink.test @@ -23,11 +23,6 @@ error 1,1030; create temporary table t2 (a int); error 1,1030; create temporary table t3 (a int) engine=Aria; ---disable_view_protocol -error 1,1030; -select * from information_schema.columns where table_schema='test'; ---enable_view_protocol - flush tables; select * from d1; drop temporary table t1; diff --git a/mysql-test/suite/funcs_1/r/is_columns.result b/mysql-test/suite/funcs_1/r/is_columns.result index 3d03a1d288aca..75996b16f4dc7 100644 --- a/mysql-test/suite/funcs_1/r/is_columns.result +++ b/mysql-test/suite/funcs_1/r/is_columns.result @@ -75,7 +75,7 @@ COLUMNS CREATE TEMPORARY TABLE `COLUMNS` ( `COLUMN_COMMENT` varchar(1024) NOT NULL, `IS_GENERATED` varchar(6) NOT NULL, `GENERATION_EXPRESSION` longtext -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.COLUMNS; Field Type Null Key Default Extra TABLE_CATALOG varchar(512) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_events.result b/mysql-test/suite/funcs_1/r/is_events.result index 7df12ee27717e..59afb2d81f2e9 100644 --- a/mysql-test/suite/funcs_1/r/is_events.result +++ b/mysql-test/suite/funcs_1/r/is_events.result @@ -79,7 +79,7 @@ EVENTS CREATE TEMPORARY TABLE `EVENTS` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.EVENTS; Field Type Null Key Default Extra EVENT_CATALOG varchar(64) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_routines.result b/mysql-test/suite/funcs_1/r/is_routines.result index 51477e441ad07..1660a2caabb8f 100644 --- a/mysql-test/suite/funcs_1/r/is_routines.result +++ b/mysql-test/suite/funcs_1/r/is_routines.result @@ -94,7 +94,7 @@ ROUTINES CREATE TEMPORARY TABLE `ROUTINES` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.ROUTINES; Field Type Null Key Default Extra SPECIFIC_NAME varchar(64) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_routines_embedded.result b/mysql-test/suite/funcs_1/r/is_routines_embedded.result index 817817b01d238..b46f520bc489d 100644 --- a/mysql-test/suite/funcs_1/r/is_routines_embedded.result +++ b/mysql-test/suite/funcs_1/r/is_routines_embedded.result @@ -94,7 +94,7 @@ ROUTINES CREATE TEMPORARY TABLE `ROUTINES` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.ROUTINES; Field Type Null Key Default Extra SPECIFIC_NAME varchar(64) NO NULL @@ -197,7 +197,7 @@ sp_6_408002_2 def db_datadict_2 sp_6_408002_2 PROCEDURE NULL NULL NULL NULL NUL SELECT * FROM db_datadict_2.res_6_408002_2; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost latin1 latin1_swedish_ci latin1_swedish_ci add_suppression def mtr add_suppression PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN INSERT INTO test_suppressions (pattern) VALUES (pattern); FLUSH NO_WRITE_TO_BINLOG TABLE test_suppressions; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci -check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci +check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci check_warnings def mtr check_warnings PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN DECLARE `pos` bigint unsigned; SET SQL_LOG_BIN=0, SQL_SAFE_UPDATES=0; UPDATE error_log el, global_suppressions gs SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP gs.pattern; UPDATE error_log el, test_suppressions ts SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP ts.pattern; SELECT COUNT(*) INTO @num_warnings FROM error_log WHERE suspicious=1; IF @num_warnings > 0 THEN SELECT line FROM error_log WHERE suspicious=1; SELECT 2 INTO result; ELSE SELECT 0 INTO RESULT; END IF; TRUNCATE test_suppressions; DROP TABLE error_log; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci AddGeometryColumn def mysql AddGeometryColumn PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL begin set @qwe= concat('ALTER TABLE ', t_schema, '.', t_name, ' ADD ', geometry_column,' GEOMETRY REF_SYSTEM_ID=', t_srid); PREPARE ls from @qwe; execute ls; deallocate prepare ls; end NULL NULL SQL NO CONTAINS SQL NULL INVOKER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss mariadb.sys@localhost latin1 latin1_swedish_ci latin1_swedish_ci @@ -213,7 +213,7 @@ sp_6_408002_2 def db_datadict_2 sp_6_408002_2 PROCEDURE NULL NULL NULL NULL NUL SELECT * FROM db_datadict_2.res_6_408002_2; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost latin1 latin1_swedish_ci latin1_swedish_ci add_suppression def mtr add_suppression PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN INSERT INTO test_suppressions (pattern) VALUES (pattern); FLUSH NO_WRITE_TO_BINLOG TABLE test_suppressions; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci -check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci +check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci check_warnings def mtr check_warnings PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN DECLARE `pos` bigint unsigned; SET SQL_LOG_BIN=0, SQL_SAFE_UPDATES=0; UPDATE error_log el, global_suppressions gs SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP gs.pattern; UPDATE error_log el, test_suppressions ts SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP ts.pattern; SELECT COUNT(*) INTO @num_warnings FROM error_log WHERE suspicious=1; IF @num_warnings > 0 THEN SELECT line FROM error_log WHERE suspicious=1; SELECT 2 INTO result; ELSE SELECT 0 INTO RESULT; END IF; TRUNCATE test_suppressions; DROP TABLE error_log; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci AddGeometryColumn def mysql AddGeometryColumn PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL begin set @qwe= concat('ALTER TABLE ', t_schema, '.', t_name, ' ADD ', geometry_column,' GEOMETRY REF_SYSTEM_ID=', t_srid); PREPARE ls from @qwe; execute ls; deallocate prepare ls; end NULL NULL SQL NO CONTAINS SQL NULL INVOKER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss mariadb.sys@localhost latin1 latin1_swedish_ci latin1_swedish_ci @@ -229,7 +229,7 @@ sp_6_408002_2 def db_datadict_2 sp_6_408002_2 PROCEDURE NULL NULL NULL NULL NUL SELECT * FROM db_datadict_2.res_6_408002_2; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost latin1 latin1_swedish_ci latin1_swedish_ci add_suppression def mtr add_suppression PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN INSERT INTO test_suppressions (pattern) VALUES (pattern); FLUSH NO_WRITE_TO_BINLOG TABLE test_suppressions; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci -check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci +check_testcase def mtr check_testcase PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE variable_name NOT IN ('timestamp') AND variable_name not like "Last_IO_Err*" AND variable_name != 'INNODB_IBUF_MAX_SIZE' AND variable_name != 'INNODB_LOG_FILE_BUFFERING' AND variable_name != 'INNODB_USE_NATIVE_AIO' AND variable_name != 'INNODB_BUFFER_POOL_LOAD_AT_STARTUP' AND variable_name not like 'GTID%POS' AND variable_name != 'GTID_BINLOG_STATE' AND variable_name != 'THREAD_POOL_SIZE' ORDER BY variable_name; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA ORDER BY BINARY SCHEMA_NAME; SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME NOT IN ('mtr_wsrep_notify', 'wsrep_schema') ORDER BY BINARY SCHEMA_NAME; SELECT table_name AS tables_in_test FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='test'; SELECT CONCAT(table_schema, '.', table_name) AS tables_in_mysql FROM INFORMATION_SCHEMA.TABLES WHERE table_schema='mysql' ORDER BY tables_in_mysql; SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position; SELECT * FROM INFORMATION_SCHEMA.EVENTS; SELECT * FROM INFORMATION_SCHEMA.TRIGGERS WHERE TRIGGER_NAME NOT IN ('gs_insert', 'ts_insert') AND TRIGGER_SCHEMA != 'sys'; SELECT * FROM INFORMATION_SCHEMA.ROUTINES WHERE ROUTINE_SCHEMA != 'sys'; SHOW STATUS LIKE 'slave_open_temp_tables'; checksum table mysql.columns_priv, mysql.db, mysql.func, mysql.help_category, mysql.help_keyword, mysql.help_relation, mysql.plugin, mysql.procs_priv, mysql.roles_mapping, mysql.tables_priv, mysql.time_zone, mysql.time_zone_leap_second, mysql.time_zone_name, mysql.time_zone_transition, mysql.time_zone_transition_type, mysql.global_priv; SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_STATUS != 'INACTIVE'; select * from information_schema.session_variables where variable_name = 'debug_sync'; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci check_warnings def mtr check_warnings PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL BEGIN DECLARE `pos` bigint unsigned; SET SQL_LOG_BIN=0, SQL_SAFE_UPDATES=0; UPDATE error_log el, global_suppressions gs SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP gs.pattern; UPDATE error_log el, test_suppressions ts SET suspicious=0 WHERE el.suspicious=1 AND el.line REGEXP ts.pattern; SELECT COUNT(*) INTO @num_warnings FROM error_log WHERE suspicious=1; IF @num_warnings > 0 THEN SELECT line FROM error_log WHERE suspicious=1; SELECT 2 INTO result; ELSE SELECT 0 INTO RESULT; END IF; TRUNCATE test_suppressions; DROP TABLE error_log; END NULL NULL SQL NO CONTAINS SQL NULL DEFINER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss root@localhost utf8mb3 utf8mb3_general_ci latin1_swedish_ci AddGeometryColumn def mysql AddGeometryColumn PROCEDURE NULL NULL NULL NULL NULL NULL NULL NULL SQL begin set @qwe= concat('ALTER TABLE ', t_schema, '.', t_name, ' ADD ', geometry_column,' GEOMETRY REF_SYSTEM_ID=', t_srid); PREPARE ls from @qwe; execute ls; deallocate prepare ls; end NULL NULL SQL NO CONTAINS SQL NULL INVOKER YYYY-MM-DD hh:mm:ss YYYY-MM-DD hh:mm:ss mariadb.sys@localhost latin1 latin1_swedish_ci latin1_swedish_ci diff --git a/mysql-test/suite/funcs_1/r/is_tables_is.result b/mysql-test/suite/funcs_1/r/is_tables_is.result index c18f733c86f06..5758a4fe5b40c 100644 --- a/mysql-test/suite/funcs_1/r/is_tables_is.result +++ b/mysql-test/suite/funcs_1/r/is_tables_is.result @@ -16,9 +16,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ALL_PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -91,9 +91,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME CHECK_CONSTRAINTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -191,9 +191,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME COLUMNS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -291,9 +291,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME EVENTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -516,9 +516,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME OPTIMIZER_TRACE TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -541,9 +541,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARAMETERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -566,9 +566,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARTITIONS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -591,9 +591,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -616,9 +616,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PROCESSLIST TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -666,9 +666,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ROUTINES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -866,9 +866,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME SYSTEM_VARIABLES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1016,9 +1016,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME TRIGGERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1091,9 +1091,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME VIEWS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1132,9 +1132,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ALL_PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1207,9 +1207,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME CHECK_CONSTRAINTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1307,9 +1307,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME COLUMNS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1407,9 +1407,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME EVENTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1632,9 +1632,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME OPTIMIZER_TRACE TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1657,9 +1657,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARAMETERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1682,9 +1682,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARTITIONS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1707,9 +1707,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1732,9 +1732,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PROCESSLIST TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1782,9 +1782,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ROUTINES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1982,9 +1982,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME SYSTEM_VARIABLES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -2132,9 +2132,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME TRIGGERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -2207,9 +2207,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME VIEWS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# diff --git a/mysql-test/suite/funcs_1/r/is_tables_is_embedded.result b/mysql-test/suite/funcs_1/r/is_tables_is_embedded.result index c18f733c86f06..5758a4fe5b40c 100644 --- a/mysql-test/suite/funcs_1/r/is_tables_is_embedded.result +++ b/mysql-test/suite/funcs_1/r/is_tables_is_embedded.result @@ -16,9 +16,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ALL_PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -91,9 +91,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME CHECK_CONSTRAINTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -191,9 +191,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME COLUMNS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -291,9 +291,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME EVENTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -516,9 +516,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME OPTIMIZER_TRACE TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -541,9 +541,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARAMETERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -566,9 +566,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARTITIONS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -591,9 +591,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -616,9 +616,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PROCESSLIST TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -666,9 +666,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ROUTINES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -866,9 +866,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME SYSTEM_VARIABLES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1016,9 +1016,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME TRIGGERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1091,9 +1091,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME VIEWS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1132,9 +1132,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ALL_PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1207,9 +1207,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME CHECK_CONSTRAINTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1307,9 +1307,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME COLUMNS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1407,9 +1407,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME EVENTS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1632,9 +1632,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME OPTIMIZER_TRACE TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1657,9 +1657,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARAMETERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1682,9 +1682,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PARTITIONS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1707,9 +1707,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PLUGINS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1732,9 +1732,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME PROCESSLIST TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1782,9 +1782,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME ROUTINES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -1982,9 +1982,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME SYSTEM_VARIABLES TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -2132,9 +2132,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME TRIGGERS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# @@ -2207,9 +2207,9 @@ TABLE_CATALOG def TABLE_SCHEMA information_schema TABLE_NAME VIEWS TABLE_TYPE SYSTEM VIEW -ENGINE MYISAM_OR_MARIA +ENGINE MEMORY VERSION 11 -ROW_FORMAT DYNAMIC_OR_PAGE +ROW_FORMAT Fixed TABLE_ROWS #TBLR# AVG_ROW_LENGTH #ARL# DATA_LENGTH #DL# diff --git a/mysql-test/suite/funcs_1/r/is_triggers.result b/mysql-test/suite/funcs_1/r/is_triggers.result index 7c0a27b85c2fc..d2bb50a9e9b77 100644 --- a/mysql-test/suite/funcs_1/r/is_triggers.result +++ b/mysql-test/suite/funcs_1/r/is_triggers.result @@ -77,7 +77,7 @@ TRIGGERS CREATE TEMPORARY TABLE `TRIGGERS` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.TRIGGERS; Field Type Null Key Default Extra TRIGGER_CATALOG varchar(512) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_triggers_embedded.result b/mysql-test/suite/funcs_1/r/is_triggers_embedded.result index 6b0406a3a3a23..9d988417248c9 100644 --- a/mysql-test/suite/funcs_1/r/is_triggers_embedded.result +++ b/mysql-test/suite/funcs_1/r/is_triggers_embedded.result @@ -77,7 +77,7 @@ TRIGGERS CREATE TEMPORARY TABLE `TRIGGERS` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `DATABASE_COLLATION` varchar(64) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.TRIGGERS; Field Type Null Key Default Extra TRIGGER_CATALOG varchar(512) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_views.result b/mysql-test/suite/funcs_1/r/is_views.result index 6a86e7464a0fb..c67b372937f9b 100644 --- a/mysql-test/suite/funcs_1/r/is_views.result +++ b/mysql-test/suite/funcs_1/r/is_views.result @@ -53,7 +53,7 @@ VIEWS CREATE TEMPORARY TABLE `VIEWS` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `ALGORITHM` varchar(10) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.VIEWS; Field Type Null Key Default Extra TABLE_CATALOG varchar(512) NO NULL diff --git a/mysql-test/suite/funcs_1/r/is_views_embedded.result b/mysql-test/suite/funcs_1/r/is_views_embedded.result index f64562aadd164..67faf6b30ccfa 100644 --- a/mysql-test/suite/funcs_1/r/is_views_embedded.result +++ b/mysql-test/suite/funcs_1/r/is_views_embedded.result @@ -53,7 +53,7 @@ VIEWS CREATE TEMPORARY TABLE `VIEWS` ( `CHARACTER_SET_CLIENT` varchar(32) NOT NULL, `COLLATION_CONNECTION` varchar(64) NOT NULL, `ALGORITHM` varchar(10) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW COLUMNS FROM information_schema.VIEWS; Field Type Null Key Default Extra TABLE_CATALOG varchar(512) NO NULL diff --git a/mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result b/mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result index 2bba1c0616276..dfa0d7e4fc5d6 100644 --- a/mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result +++ b/mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result @@ -44,7 +44,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW processlist; Id User Host db Command Time State Info Progress ID root HOST_NAME information_schema Query TIME starting SHOW processlist TIME_MS @@ -124,7 +124,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW processlist; Id User Host db Command Time State Info Progress ID ddicttestuser1 HOST_NAME information_schema Query TIME starting SHOW processlist TIME_MS diff --git a/mysql-test/suite/funcs_1/r/processlist_priv_ps.result b/mysql-test/suite/funcs_1/r/processlist_priv_ps.result index 94bc1544c071b..8dff4e171051d 100644 --- a/mysql-test/suite/funcs_1/r/processlist_priv_ps.result +++ b/mysql-test/suite/funcs_1/r/processlist_priv_ps.result @@ -44,7 +44,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW processlist; Id User Host db Command Time State Info Progress ID root HOST_NAME information_schema Query TIME starting SHOW processlist TIME_MS @@ -124,7 +124,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci SHOW processlist; Id User Host db Command Time State Info Progress ID ddicttestuser1 HOST_NAME information_schema Query TIME starting SHOW processlist TIME_MS diff --git a/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result b/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result index dba8de65fc035..5153ae313438a 100644 --- a/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result +++ b/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result @@ -30,7 +30,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci # Ensure that the information about the own connection is correct. #-------------------------------------------------------------------------- diff --git a/mysql-test/suite/funcs_1/r/processlist_val_ps.result b/mysql-test/suite/funcs_1/r/processlist_val_ps.result index 0806f00fc6acc..06ff8bd2d6818 100644 --- a/mysql-test/suite/funcs_1/r/processlist_val_ps.result +++ b/mysql-test/suite/funcs_1/r/processlist_val_ps.result @@ -30,7 +30,7 @@ PROCESSLIST CREATE TEMPORARY TABLE `PROCESSLIST` ( `QUERY_ID` bigint(4) NOT NULL, `INFO_BINARY` blob, `TID` bigint(4) NOT NULL -) DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci # Ensure that the information about the own connection is correct. #-------------------------------------------------------------------------- diff --git a/mysql-test/suite/heap/blob_dedup.result b/mysql-test/suite/heap/blob_dedup.result new file mode 100644 index 0000000000000..66149ce791a3e --- /dev/null +++ b/mysql-test/suite/heap/blob_dedup.result @@ -0,0 +1,15 @@ +CREATE TABLE t1 (a mediumtext) ENGINE=HEAP; +INSERT INTO t1 VALUES ('abc'),('def'); +SELECT DISTINCT a FROM t1; +a +abc +def +DROP TABLE t1; +CREATE TABLE t1 (a mediumtext); +CREATE TABLE t2 (b varchar(20)); +INSERT INTO t1 VALUES ('a'),('b'); +SELECT left(a,100000000) FROM t1 UNION SELECT b FROM t2; +left(a,100000000) +a +b +DROP TABLE t1, t2; diff --git a/mysql-test/suite/heap/blob_dedup.test b/mysql-test/suite/heap/blob_dedup.test new file mode 100644 index 0000000000000..16892d92ef4b7 --- /dev/null +++ b/mysql-test/suite/heap/blob_dedup.test @@ -0,0 +1,10 @@ +CREATE TABLE t1 (a mediumtext) ENGINE=HEAP; +INSERT INTO t1 VALUES ('abc'),('def'); +SELECT DISTINCT a FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a mediumtext); +CREATE TABLE t2 (b varchar(20)); +INSERT INTO t1 VALUES ('a'),('b'); +SELECT left(a,100000000) FROM t1 UNION SELECT b FROM t2; +DROP TABLE t1, t2; diff --git a/mysql-test/suite/heap/heap_blob.result b/mysql-test/suite/heap/heap_blob.result new file mode 100644 index 0000000000000..83b1c97203774 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob.result @@ -0,0 +1,602 @@ +drop table if exists t1,t2; +# +# Basic CRUD with BLOB column +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, 'hello'), (2, 'world'); +select * from t1 order by a; +a b +1 hello +2 world +select * from t1 where a=1; +a b +1 hello +select * from t1 where a=2; +a b +2 world +update t1 set b='updated' where a=1; +select * from t1 order by a; +a b +1 updated +2 world +delete from t1 where a=2; +select * from t1 order by a; +a b +1 updated +insert into t1 values (3, 'new row'); +select * from t1 order by a; +a b +1 updated +3 new row +drop table t1; +# +# Multiple BLOB/TEXT columns of different types +# +create table t1 ( +id int not null auto_increment, +t tinyblob, +b blob, +m mediumblob, +tx text, +primary key(id) +) engine=memory; +insert into t1 (t, b, m, tx) values +('tiny1', 'blob1', 'medium1', 'text1'), +('tiny2', 'blob2', 'medium2', 'text2'); +select * from t1 order by id; +id t b m tx +1 tiny1 blob1 medium1 text1 +2 tiny2 blob2 medium2 text2 +update t1 set b='blob_updated', tx='text_updated' where id=1; +select * from t1 order by id; +id t b m tx +1 tiny1 blob_updated medium1 text_updated +2 tiny2 blob2 medium2 text2 +delete from t1 where id=2; +select * from t1 order by id; +id t b m tx +1 tiny1 blob_updated medium1 text_updated +drop table t1; +# +# NULL and empty blob values +# +create table t1 (a int not null, b blob, c text, primary key(a)) engine=memory; +insert into t1 values (1, NULL, NULL); +insert into t1 values (2, '', ''); +insert into t1 values (3, 'data', 'text'); +select a, b, c, length(b), length(c) from t1 order by a; +a b c length(b) length(c) +1 NULL NULL NULL NULL +2 0 0 +3 data text 4 4 +update t1 set b=NULL where a=3; +select a, b, c, length(b), length(c) from t1 order by a; +a b c length(b) length(c) +1 NULL NULL NULL NULL +2 0 0 +3 NULL text NULL 4 +update t1 set b='restored' where a=3; +select a, b, c, length(b), length(c) from t1 order by a; +a b c length(b) length(c) +1 NULL NULL NULL NULL +2 0 0 +3 restored text 8 4 +drop table t1; +# +# Large BLOBs spanning multiple continuation runs +# For (int, blob): recbuffer=16, visible=15, leaf block ~1021 slots. +# Max run payload ~15305 bytes. Sizes chosen to span multiple runs +# and not align to visible (15 bytes). +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 1000)); +insert into t1 values (2, repeat('B', 50000)); +insert into t1 values (3, repeat('C', 63001)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +1 1000 AAAAA AAAAA +2 50000 BBBBB BBBBB +3 63001 CCCCC CCCCC +select a from t1 where b = repeat('A', 1000); +a +1 +select a from t1 where b = repeat('B', 50000); +a +2 +select a from t1 where b = repeat('C', 63001); +a +3 +update t1 set b=repeat('D', 63001) where a=1; +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +1 63001 DDDDD DDDDD +2 50000 BBBBB BBBBB +3 63001 CCCCC CCCCC +select a from t1 where b = repeat('D', 63001); +a +1 +update t1 set b=repeat('E', 100) where a=2; +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +1 63001 DDDDD DDDDD +2 100 EEEEE EEEEE +3 63001 CCCCC CCCCC +drop table t1; +# +# Mixed operations: insert, delete, insert (free list reuse) +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('X', 20000)); +insert into t1 values (2, repeat('Y', 50000)); +insert into t1 values (3, repeat('Z', 10000)); +delete from t1 where a=2; +insert into t1 values (4, repeat('W', 40000)); +select a, length(b) from t1 order by a; +a length(b) +1 20000 +3 10000 +4 40000 +select a from t1 where b = repeat('X', 20000); +a +1 +select a from t1 where b = repeat('Z', 10000); +a +3 +select a from t1 where b = repeat('W', 40000); +a +4 +delete from t1; +insert into t1 values (10, repeat('R', 50000)); +insert into t1 values (20, repeat('S', 50000)); +select a, length(b) from t1 order by a; +a length(b) +10 50000 +20 50000 +drop table t1; +# +# Free list fragmentation: NULL-blob rows interleaved with large-blob rows +# +# When rows with NULL blobs and rows with large blobs are deleted, the +# free list gets primary-record slots (from NULL rows) interleaved between +# continuation slots (from large-blob rows). The peek-then-unlink +# algorithm must find contiguous continuation groups despite these +# interleaving primary slots breaking address contiguity. +# +# After deleting all rows and reinserting, the new blob data must be +# correct — verifying no free list corruption from the fragmented state. +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, NULL); +insert into t1 values (2, repeat('A', 20000)); +insert into t1 values (3, NULL); +insert into t1 values (4, repeat('B', 30000)); +insert into t1 values (5, NULL); +insert into t1 values (6, repeat('C', 25000)); +select a, length(b) from t1 order by a; +a length(b) +1 NULL +2 20000 +3 NULL +4 30000 +5 NULL +6 25000 +delete from t1 where a=2; +delete from t1 where a=4; +delete from t1 where a=6; +delete from t1 where a=1; +delete from t1 where a=3; +delete from t1 where a=5; +insert into t1 values (10, repeat('D', 35000)); +insert into t1 values (20, repeat('E', 20000)); +insert into t1 values (30, repeat('F', 15000)); +select a, length(b) from t1 order by a; +a length(b) +10 35000 +20 20000 +30 15000 +select a from t1 where b = repeat('D', 35000); +a +10 +select a from t1 where b = repeat('E', 20000); +a +20 +select a from t1 where b = repeat('F', 15000); +a +30 +delete from t1 where a=10; +insert into t1 values (40, repeat('G', 40000)); +select a, length(b) from t1 order by a; +a length(b) +20 20000 +30 15000 +40 40000 +select a from t1 where b = repeat('E', 20000); +a +20 +select a from t1 where b = repeat('F', 15000); +a +30 +select a from t1 where b = repeat('G', 40000); +a +40 +drop table t1; +# +# Free list scavenging with mixed NULL and non-NULL blob columns +# +# Multiple blob columns where some are NULL and others are large. +# This creates rows with partial continuation chains — the NULL +# columns have no chain while the non-NULL columns do. +# +create table t1 ( +a int not null, +b blob, +c blob, +primary key(a) +) engine=memory; +insert into t1 values (1, repeat('X', 15000), NULL); +insert into t1 values (2, NULL, repeat('Y', 25000)); +insert into t1 values (3, repeat('Z', 10000), repeat('W', 20000)); +insert into t1 values (4, NULL, NULL); +select a, length(b), length(c) from t1 order by a; +a length(b) length(c) +1 15000 NULL +2 NULL 25000 +3 10000 20000 +4 NULL NULL +delete from t1 where a=1; +delete from t1 where a=3; +insert into t1 values (5, repeat('P', 18000), repeat('Q', 22000)); +select a, length(b), length(c) from t1 order by a; +a length(b) length(c) +2 NULL 25000 +4 NULL NULL +5 18000 22000 +select a from t1 where b is null and c = repeat('Y', 25000); +a +2 +select a from t1 where b = repeat('P', 18000) and c = repeat('Q', 22000); +a +5 +delete from t1; +insert into t1 values (6, repeat('R', 30000), repeat('S', 30000)); +select a, length(b), length(c) from t1 order by a; +a length(b) length(c) +6 30000 30000 +select a from t1 where b = repeat('R', 30000) and c = repeat('S', 30000); +a +6 +drop table t1; +# +# TRUNCATE with BLOB data +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('T', 30000)), (2, repeat('U', 30000)); +select count(*) from t1; +count(*) +2 +truncate table t1; +select count(*) from t1; +count(*) +0 +insert into t1 values (1, 'after truncate'); +select * from t1; +a b +1 after truncate +drop table t1; +# +# Full table scan correctness +# +create table t1 (a int not null, b blob) engine=memory; +insert into t1 values (1, repeat('a', 500)); +insert into t1 values (2, repeat('b', 23000)); +insert into t1 values (3, repeat('c', 51000)); +insert into t1 values (4, NULL); +insert into t1 values (5, ''); +select a, length(b), left(b, 5) from t1 order by a; +a length(b) left(b, 5) +1 500 aaaaa +2 23000 bbbbb +3 51000 ccccc +4 NULL NULL +5 0 +select count(*) from t1; +count(*) +5 +drop table t1; +# +# Hash index on non-blob column with blob data present +# +create table t1 ( +a int not null, +b varchar(20) not null, +c blob, +primary key(a), +key(b) +) engine=memory; +insert into t1 values (1, 'key1', repeat('h', 20000)); +insert into t1 values (2, 'key2', repeat('i', 33000)); +insert into t1 values (3, 'key1', repeat('j', 10000)); +select a, b, length(c) from t1 where b='key1' order by a; +a b length(c) +1 key1 20000 +3 key1 10000 +select a, b, length(c) from t1 where b='key2'; +a b length(c) +2 key2 33000 +select a, b, length(c) from t1 where a=2; +a b length(c) +2 key2 33000 +drop table t1; +# +# BTREE index on non-blob column with blob data present +# +create table t1 ( +a int not null, +b int not null, +c blob, +key b_idx using btree (b) +) engine=memory; +insert into t1 values (1, 10, repeat('p', 17000)); +insert into t1 values (2, 20, repeat('q', 25000)); +insert into t1 values (3, 30, repeat('r', 41000)); +insert into t1 values (4, 20, repeat('s', 19000)); +select a, b, length(c) from t1 where b=20 order by a; +a b length(c) +2 20 25000 +4 20 19000 +select a, b, length(c) from t1 where b>=20 order by b, a; +a b length(c) +2 20 25000 +4 20 19000 +3 30 41000 +drop table t1; +# +# REPLACE with BLOB column +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, 'original'); +insert into t1 values (2, repeat('x', 30000)); +replace into t1 values (1, repeat('replaced', 5000)); +select a, length(b), left(b, 20) from t1 order by a; +a length(b) left(b, 20) +1 40000 replacedreplacedrepl +2 30000 xxxxxxxxxxxxxxxxxxxx +replace into t1 values (2, 'short'); +select a, length(b), left(b, 20) from t1 order by a; +a length(b) left(b, 20) +1 40000 replacedreplacedrepl +2 5 short +drop table t1; +# +# INSERT ... SELECT with BLOB data +# +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +create table t2 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('m', 22000)), (2, repeat('n', 37000)); +insert into t2 select * from t1; +select a, length(b) from t2 order by a; +a length(b) +1 22000 +2 37000 +select a from t2 where b=repeat('m', 22000); +a +1 +select a from t2 where b=repeat('n', 37000); +a +2 +drop table t1, t2; +# +# TINYBLOB NOT NULL edge case (reclength=9, minimal visible_offset) +# +CREATE TABLE t_tiny (b TINYBLOB NOT NULL) ENGINE=MEMORY; +INSERT INTO t_tiny VALUES ('hello'), ('world'); +SELECT * FROM t_tiny; +b +hello +world +DROP TABLE t_tiny; +# +# TINYBLOB NULL edge case (reclength=10) +# +CREATE TABLE t_tiny2 (b TINYBLOB) ENGINE=MEMORY; +INSERT INTO t_tiny2 VALUES ('foo'), ('bar'); +SELECT * FROM t_tiny2; +b +foo +bar +DROP TABLE t_tiny2; +# +# Blob-only table with no primary key +# +create table t1 (b blob) engine=memory; +insert into t1 values (repeat('A', 5000)), (repeat('B', 10000)); +select length(b), left(b, 3) from t1 order by b; +length(b) left(b, 3) +5000 AAA +10000 BBB +delete from t1; +insert into t1 values ('short1'), ('short2'); +select b from t1 order by b; +b +short1 +short2 +drop table t1; +# +# Table-full error with blob data +# +set @save_max= @@max_heap_table_size; +set @@max_heap_table_size= 65536; +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('x', 30000)); +insert into t1 values (2, repeat('y', 30000)); +ERROR HY000: The table 't1' is full +insert into t1 values (3, repeat('z', 30000)); +ERROR HY000: The table 't1' is full +select a, length(b) from t1 where a=1; +a length(b) +1 30000 +set @@max_heap_table_size= @save_max; +drop table t1; +# +# Multiple blob columns with different sizes in same row +# +create table t1 ( +a int not null, +b tinyblob, +c blob, +d mediumblob, +primary key(a) +) engine=memory; +insert into t1 values (1, repeat('p', 200), repeat('q', 30000), repeat('r', 60000)); +insert into t1 values (2, 'small', repeat('s', 15000), repeat('t', 45000)); +select a, length(b), length(c), length(d), left(b,3), left(c,3), left(d,3) from t1 order by a; +a length(b) length(c) length(d) left(b,3) left(c,3) left(d,3) +1 200 30000 60000 ppp qqq rrr +2 5 15000 45000 sma sss ttt +select a from t1 where b=repeat('p', 200) and c=repeat('q', 30000) and d=repeat('r', 60000); +a +1 +select a from t1 where b='small' and c=repeat('s', 15000) and d=repeat('t', 45000); +a +2 +drop table t1; +# +# UPDATE failure preserves old blob data (table-full during blob grow) +# +set @save_max= @@max_heap_table_size; +set @@max_heap_table_size= 65536; +create table t1 (a int not null, b longblob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 5000)); +insert into t1 values (2, repeat('B', 5000)); +update t1 set b=repeat('X', 200000) where a=1; +ERROR HY000: The table 't1' is full +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +1 5000 AAAAA AAAAA +2 5000 BBBBB BBBBB +select a from t1 where b=repeat('A', 5000); +a +1 +select a from t1 where b=repeat('B', 5000); +a +2 +set @@max_heap_table_size= @save_max; +drop table t1; +# +# Large blob exceeding uint16 run_rec_count cap (65535 records) +# +# With recbuffer=16, visible=15, a 1MB blob needs ~69906 records, +# exceeding the uint16 max of 65535. The free list scavenging must +# split into multiple runs at the cap boundary. +# Delete-then-reinsert exercises scavenging of the freed chain. +# +set @save_max= @@max_heap_table_size; +set @@max_heap_table_size= 64 * 1024 * 1024; +create table t1 (a int not null, b longblob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 1048576)); +insert into t1 values (2, repeat('B', 1048576)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +1 1048576 AAAAA AAAAA +2 1048576 BBBBB BBBBB +select a from t1 where b = repeat('A', 1048576); +a +1 +select a from t1 where b = repeat('B', 1048576); +a +2 +delete from t1 where a=1; +delete from t1 where a=2; +insert into t1 values (3, repeat('C', 1048576)); +insert into t1 values (4, repeat('D', 1048576)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +a length(b) left(b, 5) right(b, 5) +3 1048576 CCCCC CCCCC +4 1048576 DDDDD DDDDD +select a from t1 where b = repeat('C', 1048576); +a +3 +select a from t1 where b = repeat('D', 1048576); +a +4 +set @@max_heap_table_size= @save_max; +drop table t1; +# +# Zero-copy Case A: tiny blobs fitting in rec 0 payload (wide table) +# +create table t_casea (a int not null, b varchar(480), c blob, primary key(a)) engine=memory; +insert into t_casea values (1, repeat('v', 480), repeat('A', 400)); +insert into t_casea values (2, repeat('w', 480), repeat('B', 100)); +select a, length(c), left(c,3) from t_casea order by a; +a length(c) left(c,3) +1 400 AAA +2 100 BBB +select a from t_casea where c = repeat('A', 400); +a +1 +select a from t_casea where c = repeat('B', 100); +a +2 +drop table t_casea; +# +# Zero-copy Case B: medium blobs (single run, multiple records) +# +create table t_caseb (a int not null, b blob, primary key(a)) engine=memory; +insert into t_caseb values (1, repeat('M', 8000)); +insert into t_caseb values (2, repeat('N', 15000)); +select a, length(b), left(b,3), right(b,3) from t_caseb order by a; +a length(b) left(b,3) right(b,3) +1 8000 MMM MMM +2 15000 NNN NNN +select a from t_caseb where b = repeat('M', 8000); +a +1 +select a from t_caseb where b = repeat('N', 15000); +a +2 +delete from t_caseb where a=1; +insert into t_caseb values (3, repeat('O', 12000)); +select a, length(b) from t_caseb order by a; +a length(b) +2 15000 +3 12000 +select a from t_caseb where b = repeat('O', 12000); +a +3 +drop table t_caseb; +# +# Zero-copy Case B->C boundary (large blobs forcing multi-run) +# +create table t_boundary (a int not null, b blob, primary key(a)) engine=memory; +insert into t_boundary values (1, repeat('X', 15000)); +insert into t_boundary values (2, repeat('Y', 50000)); +select a, length(b), left(b,3) from t_boundary order by a; +a length(b) left(b,3) +1 15000 XXX +2 50000 YYY +select a from t_boundary where b = repeat('X', 15000); +a +1 +select a from t_boundary where b = repeat('Y', 50000); +a +2 +drop table t_boundary; +# +# Non-blob table regression: ensure no behavioral change +# +create table t1 (a int not null, b varchar(100), primary key(a)) engine=memory; +insert into t1 values (1, 'no blob here'), (2, 'still no blob'); +select * from t1 order by a; +a b +1 no blob here +2 still no blob +update t1 set b='changed' where a=1; +select * from t1 order by a; +a b +1 changed +2 still no blob +delete from t1 where a=2; +select * from t1 order by a; +a b +1 changed +drop table t1; diff --git a/mysql-test/suite/heap/heap_blob.test b/mysql-test/suite/heap/heap_blob.test new file mode 100644 index 0000000000000..b29611f2c8cef --- /dev/null +++ b/mysql-test/suite/heap/heap_blob.test @@ -0,0 +1,439 @@ +# +# Test BLOB/TEXT column support in HEAP (MEMORY) tables. +# + +--disable_warnings +drop table if exists t1,t2; +--enable_warnings + +--echo # +--echo # Basic CRUD with BLOB column +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, 'hello'), (2, 'world'); +select * from t1 order by a; +select * from t1 where a=1; +select * from t1 where a=2; +update t1 set b='updated' where a=1; +select * from t1 order by a; +delete from t1 where a=2; +select * from t1 order by a; +insert into t1 values (3, 'new row'); +select * from t1 order by a; +drop table t1; + +--echo # +--echo # Multiple BLOB/TEXT columns of different types +--echo # +create table t1 ( + id int not null auto_increment, + t tinyblob, + b blob, + m mediumblob, + tx text, + primary key(id) +) engine=memory; +insert into t1 (t, b, m, tx) values + ('tiny1', 'blob1', 'medium1', 'text1'), + ('tiny2', 'blob2', 'medium2', 'text2'); +select * from t1 order by id; +update t1 set b='blob_updated', tx='text_updated' where id=1; +select * from t1 order by id; +delete from t1 where id=2; +select * from t1 order by id; +drop table t1; + +--echo # +--echo # NULL and empty blob values +--echo # +create table t1 (a int not null, b blob, c text, primary key(a)) engine=memory; +insert into t1 values (1, NULL, NULL); +insert into t1 values (2, '', ''); +insert into t1 values (3, 'data', 'text'); +select a, b, c, length(b), length(c) from t1 order by a; +update t1 set b=NULL where a=3; +select a, b, c, length(b), length(c) from t1 order by a; +update t1 set b='restored' where a=3; +select a, b, c, length(b), length(c) from t1 order by a; +drop table t1; + +--echo # +--echo # Large BLOBs spanning multiple continuation runs +--echo # For (int, blob): recbuffer=16, visible=15, leaf block ~1021 slots. +--echo # Max run payload ~15305 bytes. Sizes chosen to span multiple runs +--echo # and not align to visible (15 bytes). +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 1000)); +insert into t1 values (2, repeat('B', 50000)); +insert into t1 values (3, repeat('C', 63001)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +# Verify data integrity +select a from t1 where b = repeat('A', 1000); +select a from t1 where b = repeat('B', 50000); +select a from t1 where b = repeat('C', 63001); +# Update small to large (multi-run) +update t1 set b=repeat('D', 63001) where a=1; +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +select a from t1 where b = repeat('D', 63001); +# Update large to small +update t1 set b=repeat('E', 100) where a=2; +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +drop table t1; + +--echo # +--echo # Mixed operations: insert, delete, insert (free list reuse) +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('X', 20000)); +insert into t1 values (2, repeat('Y', 50000)); +insert into t1 values (3, repeat('Z', 10000)); +delete from t1 where a=2; +# This insert should reuse freed continuation records +insert into t1 values (4, repeat('W', 40000)); +select a, length(b) from t1 order by a; +select a from t1 where b = repeat('X', 20000); +select a from t1 where b = repeat('Z', 10000); +select a from t1 where b = repeat('W', 40000); +# Delete all and reinsert +delete from t1; +insert into t1 values (10, repeat('R', 50000)); +insert into t1 values (20, repeat('S', 50000)); +select a, length(b) from t1 order by a; +drop table t1; + +--echo # +--echo # Free list fragmentation: NULL-blob rows interleaved with large-blob rows +--echo # +--echo # When rows with NULL blobs and rows with large blobs are deleted, the +--echo # free list gets primary-record slots (from NULL rows) interleaved between +--echo # continuation slots (from large-blob rows). The peek-then-unlink +--echo # algorithm must find contiguous continuation groups despite these +--echo # interleaving primary slots breaking address contiguity. +--echo # +--echo # After deleting all rows and reinserting, the new blob data must be +--echo # correct — verifying no free list corruption from the fragmented state. +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +# Insert alternating: NULL blob, large blob, NULL blob, large blob +# The primary slots for NULL rows will sit between continuation runs +# on the free list after deletion. +insert into t1 values (1, NULL); +insert into t1 values (2, repeat('A', 20000)); +insert into t1 values (3, NULL); +insert into t1 values (4, repeat('B', 30000)); +insert into t1 values (5, NULL); +insert into t1 values (6, repeat('C', 25000)); +select a, length(b) from t1 order by a; +# Delete in an order that creates maximum free list interleaving: +# large blob rows first (their continuation slots go to free list), +# then NULL rows (their primary slots go to free list head, +# interleaving with the continuation slots). +delete from t1 where a=2; +delete from t1 where a=4; +delete from t1 where a=6; +delete from t1 where a=1; +delete from t1 where a=3; +delete from t1 where a=5; +# Reinsert large blobs — these should either scavenge contiguous groups +# from the fragmented free list or fall through to tail allocation. +# Either way, data must be correct. +insert into t1 values (10, repeat('D', 35000)); +insert into t1 values (20, repeat('E', 20000)); +insert into t1 values (30, repeat('F', 15000)); +select a, length(b) from t1 order by a; +select a from t1 where b = repeat('D', 35000); +select a from t1 where b = repeat('E', 20000); +select a from t1 where b = repeat('F', 15000); +# Second cycle: delete and reinsert again to exercise scavenging of +# the runs we just created (which are now interleaved differently). +delete from t1 where a=10; +insert into t1 values (40, repeat('G', 40000)); +select a, length(b) from t1 order by a; +select a from t1 where b = repeat('E', 20000); +select a from t1 where b = repeat('F', 15000); +select a from t1 where b = repeat('G', 40000); +drop table t1; + +--echo # +--echo # Free list scavenging with mixed NULL and non-NULL blob columns +--echo # +--echo # Multiple blob columns where some are NULL and others are large. +--echo # This creates rows with partial continuation chains — the NULL +--echo # columns have no chain while the non-NULL columns do. +--echo # +create table t1 ( + a int not null, + b blob, + c blob, + primary key(a) +) engine=memory; +insert into t1 values (1, repeat('X', 15000), NULL); +insert into t1 values (2, NULL, repeat('Y', 25000)); +insert into t1 values (3, repeat('Z', 10000), repeat('W', 20000)); +insert into t1 values (4, NULL, NULL); +select a, length(b), length(c) from t1 order by a; +# Delete rows with different blob patterns to create varied free list state +delete from t1 where a=1; +delete from t1 where a=3; +# Insert new rows that should scavenge from the freed continuation slots +insert into t1 values (5, repeat('P', 18000), repeat('Q', 22000)); +select a, length(b), length(c) from t1 order by a; +select a from t1 where b is null and c = repeat('Y', 25000); +select a from t1 where b = repeat('P', 18000) and c = repeat('Q', 22000); +# Delete everything, reinsert to verify full cleanup +delete from t1; +insert into t1 values (6, repeat('R', 30000), repeat('S', 30000)); +select a, length(b), length(c) from t1 order by a; +select a from t1 where b = repeat('R', 30000) and c = repeat('S', 30000); +drop table t1; + +--echo # +--echo # TRUNCATE with BLOB data +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('T', 30000)), (2, repeat('U', 30000)); +select count(*) from t1; +truncate table t1; +select count(*) from t1; +insert into t1 values (1, 'after truncate'); +select * from t1; +drop table t1; + +--echo # +--echo # Full table scan correctness +--echo # +create table t1 (a int not null, b blob) engine=memory; +insert into t1 values (1, repeat('a', 500)); +insert into t1 values (2, repeat('b', 23000)); +insert into t1 values (3, repeat('c', 51000)); +insert into t1 values (4, NULL); +insert into t1 values (5, ''); +# Full scan should return exactly 5 rows, no continuation record leaks +select a, length(b), left(b, 5) from t1 order by a; +select count(*) from t1; +drop table t1; + +--echo # +--echo # Hash index on non-blob column with blob data present +--echo # +create table t1 ( + a int not null, + b varchar(20) not null, + c blob, + primary key(a), + key(b) +) engine=memory; +insert into t1 values (1, 'key1', repeat('h', 20000)); +insert into t1 values (2, 'key2', repeat('i', 33000)); +insert into t1 values (3, 'key1', repeat('j', 10000)); +select a, b, length(c) from t1 where b='key1' order by a; +select a, b, length(c) from t1 where b='key2'; +select a, b, length(c) from t1 where a=2; +drop table t1; + +--echo # +--echo # BTREE index on non-blob column with blob data present +--echo # +create table t1 ( + a int not null, + b int not null, + c blob, + key b_idx using btree (b) +) engine=memory; +insert into t1 values (1, 10, repeat('p', 17000)); +insert into t1 values (2, 20, repeat('q', 25000)); +insert into t1 values (3, 30, repeat('r', 41000)); +insert into t1 values (4, 20, repeat('s', 19000)); +select a, b, length(c) from t1 where b=20 order by a; +select a, b, length(c) from t1 where b>=20 order by b, a; +drop table t1; + +--echo # +--echo # REPLACE with BLOB column +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, 'original'); +insert into t1 values (2, repeat('x', 30000)); +replace into t1 values (1, repeat('replaced', 5000)); +select a, length(b), left(b, 20) from t1 order by a; +replace into t1 values (2, 'short'); +select a, length(b), left(b, 20) from t1 order by a; +drop table t1; + +--echo # +--echo # INSERT ... SELECT with BLOB data +--echo # +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +create table t2 (a int not null, b blob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('m', 22000)), (2, repeat('n', 37000)); +insert into t2 select * from t1; +select a, length(b) from t2 order by a; +select a from t2 where b=repeat('m', 22000); +select a from t2 where b=repeat('n', 37000); +drop table t1, t2; + +--echo # +--echo # TINYBLOB NOT NULL edge case (reclength=9, minimal visible_offset) +--echo # +CREATE TABLE t_tiny (b TINYBLOB NOT NULL) ENGINE=MEMORY; +INSERT INTO t_tiny VALUES ('hello'), ('world'); +SELECT * FROM t_tiny; +DROP TABLE t_tiny; + +--echo # +--echo # TINYBLOB NULL edge case (reclength=10) +--echo # +CREATE TABLE t_tiny2 (b TINYBLOB) ENGINE=MEMORY; +INSERT INTO t_tiny2 VALUES ('foo'), ('bar'); +SELECT * FROM t_tiny2; +DROP TABLE t_tiny2; + +--echo # +--echo # Blob-only table with no primary key +--echo # +create table t1 (b blob) engine=memory; +insert into t1 values (repeat('A', 5000)), (repeat('B', 10000)); +select length(b), left(b, 3) from t1 order by b; +delete from t1; +insert into t1 values ('short1'), ('short2'); +select b from t1 order by b; +drop table t1; + +--echo # +--echo # Table-full error with blob data +--echo # +set @save_max= @@max_heap_table_size; +# Variable must be set before CREATE TABLE (limit is captured at creation). +set @@max_heap_table_size= 65536; +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +# Insert until table is full; blob data consumes continuation slots +--disable_abort_on_error +insert into t1 values (1, repeat('x', 30000)); +insert into t1 values (2, repeat('y', 30000)); +insert into t1 values (3, repeat('z', 30000)); +--enable_abort_on_error +# At least the first row should be readable +select a, length(b) from t1 where a=1; +set @@max_heap_table_size= @save_max; +drop table t1; + +--echo # +--echo # Multiple blob columns with different sizes in same row +--echo # +create table t1 ( + a int not null, + b tinyblob, + c blob, + d mediumblob, + primary key(a) +) engine=memory; +insert into t1 values (1, repeat('p', 200), repeat('q', 30000), repeat('r', 60000)); +insert into t1 values (2, 'small', repeat('s', 15000), repeat('t', 45000)); +select a, length(b), length(c), length(d), left(b,3), left(c,3), left(d,3) from t1 order by a; +# Verify data integrity +select a from t1 where b=repeat('p', 200) and c=repeat('q', 30000) and d=repeat('r', 60000); +select a from t1 where b='small' and c=repeat('s', 15000) and d=repeat('t', 45000); +drop table t1; + +--echo # +--echo # UPDATE failure preserves old blob data (table-full during blob grow) +--echo # +set @save_max= @@max_heap_table_size; +# Size chosen so two rows with small blobs fit, but updating one to a +# large blob exhausts the table before the new chain is fully written. +# Variable must be set before CREATE TABLE (limit is captured at creation). +# Use LONGBLOB so the 200KB value is accepted by the column type. +set @@max_heap_table_size= 65536; +create table t1 (a int not null, b longblob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 5000)); +insert into t1 values (2, repeat('B', 5000)); +# This update should fail: the new blob is too large for the table +--error ER_RECORD_FILE_FULL +update t1 set b=repeat('X', 200000) where a=1; +# Old data must survive intact after the failed update +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +select a from t1 where b=repeat('A', 5000); +select a from t1 where b=repeat('B', 5000); +set @@max_heap_table_size= @save_max; +drop table t1; + +--echo # +--echo # Large blob exceeding uint16 run_rec_count cap (65535 records) +--echo # +--echo # With recbuffer=16, visible=15, a 1MB blob needs ~69906 records, +--echo # exceeding the uint16 max of 65535. The free list scavenging must +--echo # split into multiple runs at the cap boundary. +--echo # Delete-then-reinsert exercises scavenging of the freed chain. +--echo # +set @save_max= @@max_heap_table_size; +set @@max_heap_table_size= 64 * 1024 * 1024; +create table t1 (a int not null, b longblob, primary key(a)) engine=memory; +insert into t1 values (1, repeat('A', 1048576)); +insert into t1 values (2, repeat('B', 1048576)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +select a from t1 where b = repeat('A', 1048576); +select a from t1 where b = repeat('B', 1048576); +# Delete both rows — puts ~140K contiguous records on free list +delete from t1 where a=1; +delete from t1 where a=2; +# Reinsert — scavenges from free list, must split at uint16 boundary +insert into t1 values (3, repeat('C', 1048576)); +insert into t1 values (4, repeat('D', 1048576)); +select a, length(b), left(b, 5), right(b, 5) from t1 order by a; +select a from t1 where b = repeat('C', 1048576); +select a from t1 where b = repeat('D', 1048576); +set @@max_heap_table_size= @save_max; +drop table t1; + +--echo # +--echo # Zero-copy Case A: tiny blobs fitting in rec 0 payload (wide table) +--echo # +create table t_casea (a int not null, b varchar(480), c blob, primary key(a)) engine=memory; +insert into t_casea values (1, repeat('v', 480), repeat('A', 400)); +insert into t_casea values (2, repeat('w', 480), repeat('B', 100)); +select a, length(c), left(c,3) from t_casea order by a; +select a from t_casea where c = repeat('A', 400); +select a from t_casea where c = repeat('B', 100); +drop table t_casea; + +--echo # +--echo # Zero-copy Case B: medium blobs (single run, multiple records) +--echo # +create table t_caseb (a int not null, b blob, primary key(a)) engine=memory; +insert into t_caseb values (1, repeat('M', 8000)); +insert into t_caseb values (2, repeat('N', 15000)); +select a, length(b), left(b,3), right(b,3) from t_caseb order by a; +select a from t_caseb where b = repeat('M', 8000); +select a from t_caseb where b = repeat('N', 15000); +# Delete and reinsert to exercise free list -> tail fallback +delete from t_caseb where a=1; +insert into t_caseb values (3, repeat('O', 12000)); +select a, length(b) from t_caseb order by a; +select a from t_caseb where b = repeat('O', 12000); +drop table t_caseb; + +--echo # +--echo # Zero-copy Case B->C boundary (large blobs forcing multi-run) +--echo # +create table t_boundary (a int not null, b blob, primary key(a)) engine=memory; +# Case B: single run, zero-copy +insert into t_boundary values (1, repeat('X', 15000)); +# Case C: large enough to span multiple leaf blocks +insert into t_boundary values (2, repeat('Y', 50000)); +select a, length(b), left(b,3) from t_boundary order by a; +select a from t_boundary where b = repeat('X', 15000); +select a from t_boundary where b = repeat('Y', 50000); +drop table t_boundary; + +--echo # +--echo # Non-blob table regression: ensure no behavioral change +--echo # +create table t1 (a int not null, b varchar(100), primary key(a)) engine=memory; +insert into t1 values (1, 'no blob here'), (2, 'still no blob'); +select * from t1 order by a; +update t1 set b='changed' where a=1; +select * from t1 order by a; +delete from t1 where a=2; +select * from t1 order by a; +drop table t1; diff --git a/mysql-test/suite/heap/heap_geometry.result b/mysql-test/suite/heap/heap_geometry.result new file mode 100644 index 0000000000000..6ff7e65e54428 --- /dev/null +++ b/mysql-test/suite/heap/heap_geometry.result @@ -0,0 +1,75 @@ +# +# Test GEOMETRY columns in MEMORY tables +# Reproduces blob data corruption during INSERT...SELECT doublings +# +set @save_max_heap_table_size= @@max_heap_table_size; +set max_heap_table_size= 128*1024*1024; +create table t1 (c1 int, c2 geometry not null) engine=MEMORY; +# Verify table is using MEMORY engine +select engine from information_schema.tables +where table_schema=database() and table_name='t1'; +engine +MEMORY +INSERT INTO t1 VALUES (1, ST_GeomFromText('LineString(2 2, 150 150)')); +INSERT INTO t1 VALUES (2, ST_GeomFromText('LineString(3 3, 160 160)')); +INSERT INTO t1 VALUES (3, ST_GeomFromText('LineString(4 4, 170 170)')); +INSERT INTO t1 VALUES (4, ST_GeomFromText('LineString(5 5, 180 180)')); +INSERT INTO t1 VALUES (5, ST_GeomFromText('LineString(6 6, 190 190)')); +INSERT INTO t1 VALUES (6, ST_GeomFromText('LineString(7 7, 200 200)')); +INSERT INTO t1 VALUES (7, ST_GeomFromText('LineString(8 8, 210 210)')); +# 7 rows, all valid +select count(*) from t1; +count(*) +7 +select count(*) as null_count from t1 where ST_AsText(c2) is null; +null_count +0 +# Doublings 1-8: no corruption expected +select count(*) as 'expect 1792' from t1; +expect 1792 +1792 +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; +expect 0 +0 +# Doubling 9 +insert into t1 select * from t1; +select count(*) as 'expect 3584' from t1; +expect 3584 +3584 +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; +expect 0 +0 +# Doubling 10 +insert into t1 select * from t1; +select count(*) as 'expect 7168' from t1; +expect 7168 +7168 +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; +expect 0 +0 +# Doubling 11 +insert into t1 select * from t1; +select count(*) as 'expect 14336' from t1; +expect 14336 +14336 +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; +expect 0 +0 +# Verify all geometry values present with correct counts +select ST_AsText(c2) as geom, count(*) as cnt from t1 +group by geom order by geom; +geom cnt +LINESTRING(2 2,150 150) 2048 +LINESTRING(3 3,160 160) 2048 +LINESTRING(4 4,170 170) 2048 +LINESTRING(5 5,180 180) 2048 +LINESTRING(6 6,190 190) 2048 +LINESTRING(7 7,200 200) 2048 +LINESTRING(8 8,210 210) 2048 +# MBRWithin check +set @g1 = ST_GeomFromText('Polygon((0 0,0 200,200 200,200 0,0 0))'); +select count(*) as 'expect 12288' from t1 where MBRWithin(t1.c2, @g1); +expect 12288 +12288 +drop table t1; +set max_heap_table_size= @save_max_heap_table_size; diff --git a/mysql-test/suite/heap/heap_geometry.test b/mysql-test/suite/heap/heap_geometry.test new file mode 100644 index 0000000000000..9d4fe38ff81b7 --- /dev/null +++ b/mysql-test/suite/heap/heap_geometry.test @@ -0,0 +1,65 @@ +--source include/have_geometry.inc + +--echo # +--echo # Test GEOMETRY columns in MEMORY tables +--echo # Reproduces blob data corruption during INSERT...SELECT doublings +--echo # + +set @save_max_heap_table_size= @@max_heap_table_size; +set max_heap_table_size= 128*1024*1024; + +create table t1 (c1 int, c2 geometry not null) engine=MEMORY; + +--echo # Verify table is using MEMORY engine +select engine from information_schema.tables + where table_schema=database() and table_name='t1'; + +INSERT INTO t1 VALUES (1, ST_GeomFromText('LineString(2 2, 150 150)')); +INSERT INTO t1 VALUES (2, ST_GeomFromText('LineString(3 3, 160 160)')); +INSERT INTO t1 VALUES (3, ST_GeomFromText('LineString(4 4, 170 170)')); +INSERT INTO t1 VALUES (4, ST_GeomFromText('LineString(5 5, 180 180)')); +INSERT INTO t1 VALUES (5, ST_GeomFromText('LineString(6 6, 190 190)')); +INSERT INTO t1 VALUES (6, ST_GeomFromText('LineString(7 7, 200 200)')); +INSERT INTO t1 VALUES (7, ST_GeomFromText('LineString(8 8, 210 210)')); + +--echo # 7 rows, all valid +select count(*) from t1; +select count(*) as null_count from t1 where ST_AsText(c2) is null; + +--echo # Doublings 1-8: no corruption expected +--let $i= 8 +--disable_query_log +while ($i) +{ + insert into t1 select * from t1; + --dec $i +} +--enable_query_log +select count(*) as 'expect 1792' from t1; +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; + +--echo # Doubling 9 +insert into t1 select * from t1; +select count(*) as 'expect 3584' from t1; +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; + +--echo # Doubling 10 +insert into t1 select * from t1; +select count(*) as 'expect 7168' from t1; +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; + +--echo # Doubling 11 +insert into t1 select * from t1; +select count(*) as 'expect 14336' from t1; +select count(*) as 'expect 0' from t1 where ST_AsText(c2) is null; + +--echo # Verify all geometry values present with correct counts +select ST_AsText(c2) as geom, count(*) as cnt from t1 + group by geom order by geom; + +--echo # MBRWithin check +set @g1 = ST_GeomFromText('Polygon((0 0,0 200,200 200,200 0,0 0))'); +select count(*) as 'expect 12288' from t1 where MBRWithin(t1.c2, @g1); + +drop table t1; +set max_heap_table_size= @save_max_heap_table_size; diff --git a/mysql-test/suite/innodb_fts/r/innodb-fts-ddl.result b/mysql-test/suite/innodb_fts/r/innodb-fts-ddl.result index fe7781a72f50f..eaf5b53f9c012 100644 --- a/mysql-test/suite/innodb_fts/r/innodb-fts-ddl.result +++ b/mysql-test/suite/innodb_fts/r/innodb-fts-ddl.result @@ -120,7 +120,7 @@ INSERT INTO fts_test (title, text) VALUES ANALYZE TABLE fts_test; set @@auto_increment_increment=1; select *, match(title, text) AGAINST ('database') as score -from fts_test order by score desc; +from fts_test order by score desc, FTS_DOC_ID; FTS_DOC_ID title text score 11 MySQL Tutorial DBMS stands for DataBase ... 0.22764469683170319 51 MySQL vs. YourSQL In the following database comparison ... 0.22764469683170319 diff --git a/mysql-test/suite/innodb_fts/r/misc.result b/mysql-test/suite/innodb_fts/r/misc.result index 4afd9bf1f7485..f290744085dd7 100644 --- a/mysql-test/suite/innodb_fts/r/misc.result +++ b/mysql-test/suite/innodb_fts/r/misc.result @@ -492,7 +492,7 @@ INSERT INTO t1 (a,b) VALUES ('aab MySQL vs. YourSQL','In the following database comparison ...'), ('aaa MySQL Security','When configured properly, MySQL ...'); ALTER TABLE t1 ADD FULLTEXT INDEX idx (a,b); -SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aac') DESC; +SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aac') DESC, id; id a b 3 aac Optimizing MySQL In this tutorial we will show ... 4 aac 1001 MySQL Tricks 1. Never run mysqld as root. 2. ... @@ -500,7 +500,7 @@ id a b 2 aas How To Use MySQL Well After you went through a ... 5 aab MySQL vs. YourSQL In the following database comparison ... 6 aaa MySQL Security When configured properly, MySQL ... -SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aab') DESC; +SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aab') DESC, id; id a b 1 aab` MySQL Tutorial DBMS stands for DataBase ... 5 aab MySQL vs. YourSQL In the following database comparison ... @@ -1395,7 +1395,7 @@ this year.'),('Peter Pan','Tis a kids story.'),('Test1','nada'),('Database database database','foo database database database'),('Database article title','body with lots of words.'),('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; id title body 6 Database database database foo database database database @@ -1412,7 +1412,7 @@ this year. 5 Test1 nada DELETE from articles WHERE title like "myfulltext database"; INSERT INTO articles (title,body) VALUES ('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; id title body 6 Database database database foo database database database @@ -1428,7 +1428,7 @@ this year. 5 Test1 nada DELETE from articles WHERE title like "myfulltext database"; INSERT INTO articles (title,body) VALUES ('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; id title body 6 Database database database foo database database database diff --git a/mysql-test/suite/innodb_fts/t/innodb-fts-ddl.test b/mysql-test/suite/innodb_fts/t/innodb-fts-ddl.test index 8d4cbe7b86c27..5efa2972205d9 100644 --- a/mysql-test/suite/innodb_fts/t/innodb-fts-ddl.test +++ b/mysql-test/suite/innodb_fts/t/innodb-fts-ddl.test @@ -187,7 +187,7 @@ ANALYZE TABLE fts_test; set @@auto_increment_increment=1; select *, match(title, text) AGAINST ('database') as score -from fts_test order by score desc; +from fts_test order by score desc, FTS_DOC_ID; drop index idx on fts_test; diff --git a/mysql-test/suite/innodb_fts/t/misc.test b/mysql-test/suite/innodb_fts/t/misc.test index f3c10d1620559..ad4d4e137cebc 100644 --- a/mysql-test/suite/innodb_fts/t/misc.test +++ b/mysql-test/suite/innodb_fts/t/misc.test @@ -493,8 +493,8 @@ ANALYZE TABLE t1; -- enable_result_log -- enable_query_log -SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aac') DESC; -SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aab') DESC; +SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aac') DESC, id; +SELECT * FROM t1 ORDER BY MATCH(a,b) AGAINST ('aab') DESC, id; --echo "----------Test7---------" select * from t1 where match(a,b) against ('aaa') @@ -1347,17 +1347,17 @@ database database','foo database database database'),('Database article title','body with lots of words.'),('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; DELETE from articles WHERE title like "myfulltext database"; INSERT INTO articles (title,body) VALUES ('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; DELETE from articles WHERE title like "myfulltext database"; INSERT INTO articles (title,body) VALUES ('myfulltext database', 'my test fulltext database'); -SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC; +SELECT id, title, body FROM articles ORDER BY MATCH (title,body) AGAINST ('database' IN BOOLEAN MODE) DESC, id; DROP TABLE articles; diff --git a/mysql-test/suite/perfschema/include/transaction_nested_events_verifier.inc b/mysql-test/suite/perfschema/include/transaction_nested_events_verifier.inc index baee5e840a563..faeef52341042 100644 --- a/mysql-test/suite/perfschema/include/transaction_nested_events_verifier.inc +++ b/mysql-test/suite/perfschema/include/transaction_nested_events_verifier.inc @@ -133,7 +133,7 @@ SELECT THREAD_ID, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; --echo # --echo ### Clear statement and transaction history diff --git a/mysql-test/suite/perfschema/r/transaction_nested_events.result b/mysql-test/suite/perfschema/r/transaction_nested_events.result index 52fa3783a8bb5..699c807ced311 100644 --- a/mysql-test/suite/perfschema/r/transaction_nested_events.result +++ b/mysql-test/suite/perfschema/r/transaction_nested_events.result @@ -145,7 +145,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/insert NULL NULL INSERT INTO t1 VALUES (210, "INSERT 210") thread_id 2 2 transaction 1 STATEMENT @@ -265,7 +265,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/begin NULL NULL START TRANSACTION thread_id 2 5 transaction 1 STATEMENT @@ -397,7 +397,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/create_proc NULL NULL CREATE PROCEDURE tp_update() UPDATE t1 SET s1 = s1 + 1 thread_id 2 2 transaction 1 STATEMENT @@ -537,7 +537,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/create_proc NULL NULL CREATE PROCEDURE tp_start() START TRANSACTION thread_id 2 2 transaction 1 STATEMENT @@ -697,7 +697,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/create_proc NULL NULL CREATE PROCEDURE tp_rollback() ROLLBACK thread_id 2 2 transaction 1 STATEMENT @@ -871,7 +871,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/begin NULL NULL START TRANSACTION thread_id 3 3 statement/sql/insert 2 TRANSACTION INSERT INTO t1 VALUES (410, "INSERT 410") @@ -1005,7 +1005,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/begin NULL NULL START TRANSACTION thread_id 2 6 transaction 1 STATEMENT @@ -1238,7 +1238,7 @@ RPAD(IFNULL(NESTING_EVENT_TYPE, 'NULL'), 18, ' ') NESTING_EVENT_TYPE, SQL_TEXT FROM performance_schema.events_statements_history_long s WHERE ((s.thread_id = @con1_thread_id) OR (@all_threads = 1)) -ORDER BY thread_id, r_event_id; +ORDER BY thread_id, r_event_id, r_end_event_id; THREAD_ID R_EVENT_ID R_END_EVENT_ID EVENT_NAME R_NESTING_EVENT_ID NESTING_EVENT_TYPE SQL_TXT thread_id 1 2 statement/sql/begin NULL NULL START TRANSACTION thread_id 2 19 transaction 1 STATEMENT diff --git a/mysql-test/suite/plugins/r/sql_error_log_withdbinfo.result b/mysql-test/suite/plugins/r/sql_error_log_withdbinfo.result index 732e74d851662..8f9de3e5e82e5 100644 --- a/mysql-test/suite/plugins/r/sql_error_log_withdbinfo.result +++ b/mysql-test/suite/plugins/r/sql_error_log_withdbinfo.result @@ -31,9 +31,9 @@ CREATE DATABASE `NULL`; USE `NULL`; DROP DATABASE db; ERROR HY000: Can't drop database 'db'; database doesn't exist -TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql -TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql -TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql +TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position +TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position +TIME THREAD_ID HOSTNAME `mtr` WARNING 1286: Unknown storage engine 'InnoDB' : SELECT CONCAT(table_schema, '.', table_name) AS columns_in_mysql, column_name, ordinal_position, column_default, is_nullable, data_type, character_maximum_length, character_octet_length, numeric_precision, numeric_scale, character_set_name, collation_name, column_type, column_key, extra, column_comment FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema='mysql' ORDER BY columns_in_mysql, ordinal_position TIME THREAD_ID HOSTNAME `test` ERROR 1238: Variable 'sql_error_log_with_db_and_thread_info' is a read only variable : SET sql_error_log_with_db_and_thread_info=OFF TIME THREAD_ID HOSTNAME `test` ERROR 1008: Can't drop database 'db'; database doesn't exist : DROP DATABASE db TIME THREAD_ID HOSTNAME NULL ERROR 1008: Can't drop database 'dbnodb'; database doesn't exist : DROP DATABASE dbnodb diff --git a/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result b/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result index 96314c64de4f8..0220d45e252af 100644 --- a/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result +++ b/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result @@ -151,7 +151,109 @@ ERROR 42S22: Unknown column 'tmp_disk_table_size' in 'SELECT' SET @@tmp_disk_table_size=16384; CREATE VIEW v AS SELECT 'a'; SELECT table_name FROM INFORMATION_SCHEMA.views; -ERROR HY000: The table '(temporary)' is full +table_name +host_summary +host_summary_by_file_io +host_summary_by_file_io_type +host_summary_by_stages +host_summary_by_statement_latency +host_summary_by_statement_type +innodb_buffer_stats_by_schema +innodb_buffer_stats_by_table +innodb_lock_waits +io_by_thread_by_latency +io_global_by_file_by_bytes +io_global_by_file_by_latency +io_global_by_wait_by_bytes +io_global_by_wait_by_latency +latest_file_io +memory_by_host_by_current_bytes +memory_by_thread_by_current_bytes +memory_by_user_by_current_bytes +memory_global_by_current_bytes +memory_global_total +metrics +processlist +ps_check_lost_instrumentation +schema_auto_increment_columns +schema_index_statistics +schema_object_overview +schema_redundant_indexes +schema_table_lock_waits +schema_table_statistics +schema_table_statistics_with_buffer +schema_tables_with_full_table_scans +schema_unused_indexes +session +session_ssl_status +statement_analysis +statements_with_errors_or_warnings +statements_with_full_table_scans +statements_with_runtimes_in_95th_percentile +statements_with_sorting +statements_with_temp_tables +user +user_summary +user_summary_by_file_io +user_summary_by_file_io_type +user_summary_by_stages +user_summary_by_statement_latency +user_summary_by_statement_type +v +version +wait_classes_global_by_avg_latency +wait_classes_global_by_latency +waits_by_host_by_latency +waits_by_user_by_latency +waits_global_by_latency +x$host_summary +x$host_summary_by_file_io +x$host_summary_by_file_io_type +x$host_summary_by_stages +x$host_summary_by_statement_latency +x$host_summary_by_statement_type +x$innodb_buffer_stats_by_schema +x$innodb_buffer_stats_by_table +x$innodb_lock_waits +x$io_by_thread_by_latency +x$io_global_by_file_by_bytes +x$io_global_by_file_by_latency +x$io_global_by_wait_by_bytes +x$io_global_by_wait_by_latency +x$latest_file_io +x$memory_by_host_by_current_bytes +x$memory_by_thread_by_current_bytes +x$memory_by_user_by_current_bytes +x$memory_global_by_current_bytes +x$memory_global_total +x$processlist +x$ps_digest_95th_percentile_by_avg_us +x$ps_digest_avg_latency_distribution +x$ps_schema_table_statistics_io +x$schema_flattened_keys +x$schema_index_statistics +x$schema_table_lock_waits +x$schema_table_statistics +x$schema_table_statistics_with_buffer +x$schema_tables_with_full_table_scans +x$session +x$statement_analysis +x$statements_with_errors_or_warnings +x$statements_with_full_table_scans +x$statements_with_runtimes_in_95th_percentile +x$statements_with_sorting +x$statements_with_temp_tables +x$user_summary +x$user_summary_by_file_io +x$user_summary_by_file_io_type +x$user_summary_by_stages +x$user_summary_by_statement_latency +x$user_summary_by_statement_type +x$wait_classes_global_by_avg_latency +x$wait_classes_global_by_latency +x$waits_by_host_by_latency +x$waits_by_user_by_latency +x$waits_global_by_latency DROP VIEW v; # End of 10.4 test SET @@global.tmp_disk_table_size = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test b/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test index 099be3544865b..454e2cbd6edc6 100644 --- a/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test +++ b/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test @@ -203,7 +203,7 @@ SELECT tmp_disk_table_size = @@session.tmp_disk_table_size; SET @@tmp_disk_table_size=16384; CREATE VIEW v AS SELECT 'a'; ---error ER_RECORD_FILE_FULL +--sorted_result SELECT table_name FROM INFORMATION_SCHEMA.views; DROP VIEW v; diff --git a/mysql-test/suite/sysschema/r/v_schema_redundant_indexes.result b/mysql-test/suite/sysschema/r/v_schema_redundant_indexes.result index 2199c959cd029..8893726fe128a 100644 --- a/mysql-test/suite/sysschema/r/v_schema_redundant_indexes.result +++ b/mysql-test/suite/sysschema/r/v_schema_redundant_indexes.result @@ -32,6 +32,6 @@ KEY (i, j, k) ); SELECT * FROM sys.schema_redundant_indexes; table_schema table_name redundant_index_name redundant_index_columns redundant_index_non_unique dominant_index_name dominant_index_columns dominant_index_non_unique subpart_exists sql_drop_index -rkey rkey j j 1 j_2 j,k 1 0 ALTER TABLE `rkey`.`rkey` DROP INDEX `j` rkey rkey i i,j,k 1 PRIMARY i 0 0 ALTER TABLE `rkey`.`rkey` DROP INDEX `i` +rkey rkey j j 1 j_2 j,k 1 0 ALTER TABLE `rkey`.`rkey` DROP INDEX `j` DROP DATABASE rkey; diff --git a/sql/item_func.cc b/sql/item_func.cc index 37d63d984f3af..02ffe79ad7769 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -6377,8 +6377,49 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref) } if (!(table->file->ha_table_flags() & HA_CAN_FULLTEXT)) { - my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), table->file->table_type()); - return 1; + /* + If this is an in-memory tmp table that hasn't been opened yet + (e.g. a derived table being prepared), convert it to a disk-based + engine that supports FULLTEXT. This can happen when HEAP blob + support keeps a table in memory that would previously have been + forced to disk by blob columns alone. + */ + if (table->s->tmp_table && !table->is_created() && + table->s->db_type() == heap_hton) + { + /* + Replace the HEAP handler with a disk-based engine (Aria/MyISAM) + that supports FULLTEXT. The table has not been opened yet, so + only the handler object and plugin reference need to be swapped. + This follows the same pattern as + create_internal_tmp_table_from_heap() in sql_select.cc. + */ + delete table->file; + table->file= NULL; + /* Reset ha_share — old HEAP handler already set it via finalize() */ + table->s->ha_share= NULL; + plugin_unlock(0, table->s->db_plugin); + table->s->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); + if (!(table->file= get_new_handler(table->s, &table->mem_root, + table->s->db_type()))) + { + my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), + static_cast(sizeof(handler))); + return 1; + } + if (table->file->set_ha_share_ref(&table->s->ha_share)) + { + delete table->file; + table->file= NULL; + return 1; + } + table->file->set_table(table); + } + else + { + my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), table->file->table_type()); + return 1; + } } table->fulltext_searched=1; return agg_arg_charsets_for_comparison(cmp_collation, args+1, arg_count-1); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 580a13e1c5445..3e56e801bf4b2 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -801,11 +801,14 @@ bool Aggregator_distinct::setup(THD *thd) table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows table->no_rows=1; - if (table->s->db_type() == heap_hton) + if (table->s->db_type() == heap_hton && !table->s->blob_fields) { /* - No blobs, otherwise it would have been MyISAM: set up a compare - function and its arguments to use with Unique. + Unique tree compares raw record bytes (simple_raw_key_cmp or + composite_key_cmp). Blob fields store only a pointer in the + record, so raw comparison would compare pointer values, not + actual blob data. Skip the Unique tree path for blob tables + and fall through to the ha_write_tmp_row path below. */ qsort_cmp2 compare_key; void* cmp_arg; @@ -1002,9 +1005,21 @@ bool Aggregator_distinct::add() */ return tree->unique_add(table->record[0] + table->s->null_bytes); } - if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))) && - table->file->is_fatal_error(error, HA_CHECK_DUP)) - return TRUE; + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + if (!table->file->is_fatal_error(error, HA_CHECK_DUP)) + return FALSE; // duplicate, not an error + /* + HEAP table full: convert to on-disk engine. + create_internal_tmp_table_from_heap() copies all existing rows + plus the overflow row (record[0]) to the new table. + */ + if (create_internal_tmp_table_from_heap(table->in_use, table, + tmp_table_param->start_recinfo, + &tmp_table_param->recinfo, + error, 0, NULL)) + return TRUE; + } return FALSE; } else diff --git a/sql/sql_expression_cache.cc b/sql/sql_expression_cache.cc index 34bc1e2b2ca8a..0e584cb0cc041 100644 --- a/sql/sql_expression_cache.cc +++ b/sql/sql_expression_cache.cc @@ -138,6 +138,23 @@ void Expression_cache_tmptable::init() goto error; } + /* + HEAP hash indexes on blob columns use a pointer-based key format + (4-byte length + data pointer). This is incompatible with the SQL + layer's key format (2-byte length + inline data) because + Field_blob::new_key_field() returns a Field_varstring. + + This check is slightly conservative: a blob only in the result + value would not affect the key. However, it matches the pre-blob + behavior where blobs forced Aria, which failed the heap_hton check + above and disabled the cache anyway. + */ + if (cache_table->s->blob_fields) + { + DBUG_PRINT("error", ("blob fields not supported in heap expression cache")); + goto error; + } + field_counter= 1; if (cache_table->alloc_keys(1) || diff --git a/sql/sql_select.cc b/sql/sql_select.cc index d30d3647b48c3..f7195642ffee4 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -21036,11 +21036,16 @@ bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, TABLE_SHARE *share= table->s; DBUG_ENTER("Create_tmp_table::choose_engine"); /* - If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria) - In the future we should try making storage engine selection more dynamic + If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria). + HEAP now supports blob columns via continuation chains, so blob_fields + alone no longer forces a disk-based engine. We still fall back to disk + when reclength exceeds HA_MAX_REC_LENGTH (HEAP's fixed-width rows would + waste too much memory for very wide records). + In the future we should try making storage engine selection more dynamic. */ - if (share->blob_fields || m_using_unique_constraint || + if (m_using_unique_constraint || + share->reclength > HA_MAX_REC_LENGTH || (thd->variables.big_tables && !(m_select_options & SELECT_SMALL_RESULT)) || (m_select_options & TMP_TABLE_FORCE_MYISAM) || @@ -21104,9 +21109,14 @@ bool Create_tmp_table::finalize(THD *thd, if (!m_using_unique_constraint) share->reclength+= m_group_null_items; // null flag is stored separately - if (share->blob_fields == 0) + if (share->blob_fields == 0 || share->db_type() == heap_hton) { - /* We need to ensure that first byte is not 0 for the delete link */ + /* + We need to ensure that first byte is not 0 for the delete link. + HEAP uses fixed-width rows even with blobs (blob data lives in + separate continuation records within the same HP_BLOCK, not + inline in the primary record), so it still needs this guard. + */ if (m_field_count[other]) m_null_count[other]++; else @@ -21125,11 +21135,15 @@ bool Create_tmp_table::finalize(THD *thd, if (!share->reclength) share->reclength= 1; // Dummy select share->stored_rec_length= share->reclength; - /* Use packed rows if there is blobs or a lot of space to gain */ - if (share->blob_fields || - (string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && - (share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS || - string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS))) + /* + Use packed rows if there is blobs or a lot of space to gain. + HEAP requires fixed-width rows — it cannot use packed row format. + */ + if (share->db_type() != heap_hton && + (share->blob_fields || + (string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && + (share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS || + string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS)))) use_packed_rows= 1; { @@ -21160,8 +21174,13 @@ bool Create_tmp_table::finalize(THD *thd, share->null_bytes= share->null_bytes_for_compare= whole_null_pack_length; } - if (share->blob_fields == 0) + if (share->blob_fields == 0 || share->db_type() == heap_hton) { + /* + Same first-byte guard as above: HEAP with blobs still uses + fixed-width rows and needs a non-zero first byte for the + delete-link mechanism. + */ null_counter[(m_field_count[other] ? other : distinct)]++; } @@ -26803,8 +26822,15 @@ JOIN_TAB::remove_duplicates() table->file->info(HA_STATUS_VARIABLE); table->reginfo.lock_type=TL_WRITE; - if (table->s->db_type() == heap_hton || - (!table->s->blob_fields && + /* + remove_dup_with_hash_index() copies field data into a flat key buffer + via field->make_sort_key() and compares with memcmp. Blob fields + store only a pointer in the record, so memcmp would compare pointer + values instead of blob content. Fall back to the row-by-row compare + path for tables with blobs. + */ + if (!table->s->blob_fields && + (table->s->db_type() == heap_hton || ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records < thd->variables.sortbuff_size))) error= remove_dup_with_hash_index(join->thd, table, field_count, @@ -31781,8 +31807,24 @@ test_if_cheaper_ordering(bool in_join_optimizer, and as result we'll choose an index scan when using ref/range access + filesort will be cheaper. */ - select_limit= (ha_rows) (select_limit < fanout ? - 1 : select_limit/fanout); + /* + fanout can be extremely small (close to 0) when + cond_selectivity values are tiny, making select_limit/fanout + overflow to infinity or a value exceeding HA_POS_ERROR. + Casting such a double to ha_rows (unsigned long long) is + undefined behavior. Cap at HA_POS_ERROR to avoid UB. + Note: (double) HA_POS_ERROR rounds up to 2^64 (double can't + represent 2^64-1 exactly), so the >= comparison is safe — + any double that reaches 2^64 is genuinely out of range. + */ + { + double adjusted= (select_limit < fanout) ? + 1.0 : select_limit / fanout; + if (adjusted >= (double) HA_POS_ERROR) + select_limit= HA_POS_ERROR; + else + select_limit= (ha_rows) adjusted; + } /* refkey_rows_estimate is E(#rows) produced by the table access diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index a26124d0c1cae..7f4d53a787900 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -13,7 +13,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA -SET(HEAP_SOURCES _check.c _rectest.c hp_block.c hp_clear.c hp_close.c hp_create.c +SET(HEAP_SOURCES _check.c _rectest.c hp_block.c hp_blob.c hp_clear.c hp_close.c hp_create.c ha_heap.cc hp_delete.c hp_extra.c hp_hash.c hp_info.c hp_open.c hp_panic.c hp_rename.c hp_rfirst.c hp_rkey.c hp_rlast.c hp_rnext.c hp_rprev.c diff --git a/storage/heap/_check.c b/storage/heap/_check.c index 1a640fa13da86..c87eda3818121 100644 --- a/storage/heap/_check.c +++ b/storage/heap/_check.c @@ -42,7 +42,7 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) { int error; uint key; - ulong records=0, deleted=0, pos, next_block; + ulong records=0, deleted=0, cont_count=0, pos, next_block; HP_SHARE *share=info->s; uchar *current_ptr= info->current_ptr; DBUG_ENTER("heap_check_heap"); @@ -68,9 +68,9 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) else { next_block+= share->block.records_in_block; - if (next_block >= share->records+share->deleted) + if (next_block >= share->total_records+share->deleted) { - next_block= share->records+share->deleted; + next_block= share->total_records+share->deleted; if (pos >= next_block) break; /* End of file */ } @@ -79,6 +79,12 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) if (!current_ptr[share->visible]) deleted++; + else if (hp_is_cont(current_ptr, share->visible)) + { + uint16 run_rec_count= hp_cont_rec_count(current_ptr); + cont_count+= run_rec_count; + pos+= run_rec_count - 1; /* -1 because for-loop does pos++ */ + } else records++; } @@ -90,6 +96,13 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) deleted, (ulong) share->deleted)); error= 1; } + if (records + cont_count != share->total_records) + { + DBUG_PRINT("error",("total_records mismatch: primary %lu + cont %lu != %lu", + records, cont_count, + (ulong) share->total_records)); + error= 1; + } DBUG_RETURN(error); } diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 3f4d1ab69efc5..0c8f4aa2a5491 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -25,6 +25,7 @@ #include "sql_plugin.h" #include "ha_heap.h" #include "sql_base.h" +#include "field.h" static handler *heap_create_handler(handlerton *, TABLE_SHARE *, MEM_ROOT *); static int heap_prepare_hp_create_info(TABLE *, bool, HP_CREATE_INFO *); @@ -103,6 +104,7 @@ int ha_heap::open(const char *name, int mode, uint test_if_locked) rc= heap_create(name, &create_info, &internal_share, &created_new_share); my_free(create_info.keydef); + my_free(create_info.blob_descs); if (rc) goto end; @@ -363,6 +365,45 @@ void ha_heap::position(const uchar *record) *(HEAP_PTR*) ref= heap_position(file); // Ref is aligned } +int ha_heap::remember_rnd_pos() +{ + saved_current_record= file->current_record; + position((uchar*) 0); + return 0; +} + +int ha_heap::restart_rnd_next(uchar *buf) +{ + /* + Restore the scan position saved by remember_rnd_pos(). + + heap_scan() uses current_record as a sequential counter and next_block + as a cached upper bound for the current HP_BLOCK segment. Within one + segment, heap_scan() advances current_ptr by recbuffer without calling + hp_find_record(). heap_rrnd() (called via rnd_pos) doesn't update + these, so we restore them here. + + next_block is set to the next records_in_block-aligned boundary after + saved_current_record. We MUST then cap it at total_records + deleted + (== block.last_allocated), which is the number of actually allocated + slots in the HP_BLOCK. Without this cap, if the saved position falls + in the last block segment and rows have been deleted between + remember_rnd_pos() and restart_rnd_next() (e.g. by + remove_dup_with_compare), next_block can exceed the allocated range. + heap_scan() would then take the fast path (pos < next_block) and walk + current_ptr past the last allocated slot into unmapped memory, causing + a segfault. + */ + file->current_record= saved_current_record; + file->next_block= saved_current_record - + (saved_current_record % file->s->block.records_in_block) + + file->s->block.records_in_block; + ulong scan_end= file->s->total_records + file->s->deleted; + if (file->next_block > scan_end) + file->next_block= scan_end; + return rnd_pos(buf, ref); +} + int ha_heap::info(uint flag) { HEAPINFO hp_info; @@ -693,6 +734,47 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, keydef[share->next_number_index].flag|= HA_AUTO_KEY; found_real_auto_increment= share->next_number_key_offset == 0; } + + /* Populate blob column descriptors */ + if (share->blob_fields) + { + HP_BLOB_DESC *blob_descs; + blob_descs= (HP_BLOB_DESC*) my_malloc(hp_key_memory_HP_BLOB, + share->blob_fields * + sizeof(HP_BLOB_DESC), + MYF(MY_WME | MY_THREAD_SPECIFIC)); + if (!blob_descs) + { + my_free(keydef); + return my_errno; + } + { + uint real_blob_count= 0; + for (uint b= 0; b < share->blob_fields; b++) + { + Field *field= table_arg->field[share->blob_field[b]]; + /* + BLOB_FLAG may be set on non-Field_blob fields (e.g. long + Field_string in INFORMATION_SCHEMA temp tables). Only include + true Field_blob types in the HEAP blob descriptor array. + Field_geom (MYSQL_TYPE_GEOMETRY) extends Field_blob and must + also be included. + */ + if (field->type() == MYSQL_TYPE_BLOB || + field->type() == MYSQL_TYPE_GEOMETRY) + { + Field_blob *blob= (Field_blob*) field; + blob_descs[real_blob_count].offset= + (uint) blob->offset(table_arg->record[0]); + blob_descs[real_blob_count].packlength= blob->pack_length_no_ptr(); + real_blob_count++; + } + } + hp_create_info->blob_descs= blob_descs; + hp_create_info->blob_count= real_blob_count; + } + } + hp_create_info->auto_key= auto_key; hp_create_info->auto_key_type= auto_key_type; hp_create_info->max_table_size= MY_MAX(current_thd->variables.max_heap_table_size, sizeof(HP_PTRS)); @@ -734,6 +816,7 @@ int ha_heap::create(const char *name, TABLE *table_arg, create_info->auto_increment_value - 1 : 0); error= heap_create(name, &hp_create_info, &internal_share, &created); my_free(hp_create_info.keydef); + my_free(hp_create_info.blob_descs); DBUG_ASSERT(file == 0); return (error); } @@ -800,7 +883,7 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) share->blength, share->records)); do { - if (!hp_rec_key_cmp(keyinfo, pos->ptr_to_rec, record)) + if (!hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) { file->current_hash_ptr= pos; file->current_ptr= pos->ptr_to_rec; @@ -810,6 +893,8 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) records. */ memcpy(record, file->current_ptr, (size_t) share->reclength); + if (share->blob_count && hp_read_blobs(file, record, file->current_ptr)) + DBUG_RETURN(-1); DBUG_RETURN(0); // found and position set } diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index c38ec325740d7..0d0eec530cde6 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -32,6 +32,7 @@ class ha_heap final : public handler key_map btree_keys; /* number of records changed since last statistics update */ ulong records_changed; + ulong saved_current_record; /* for remember_rnd_pos() / restart_rnd_next() */ uint key_stat_version; my_bool internal_table; public: @@ -47,11 +48,12 @@ class ha_heap final : public handler enum row_type get_row_type() const override { return ROW_TYPE_FIXED; } ulonglong table_flags() const override { - return (HA_FAST_KEY_READ | HA_NO_BLOBS | HA_NULL_IN_KEY | + return (HA_FAST_KEY_READ | HA_NULL_IN_KEY | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | HA_CAN_SQL_HANDLER | HA_CAN_ONLINE_BACKUPS | HA_REC_NOT_IN_SEQ | HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS | - HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT | HA_CAN_HASH_KEYS); + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT | HA_CAN_HASH_KEYS | + HA_CAN_GEOMETRY); } ulong index_flags(uint inx, uint part, bool all_parts) const override { @@ -94,6 +96,8 @@ class ha_heap final : public handler int rnd_next(uchar *buf) override; int rnd_pos(uchar * buf, uchar *pos) override; void position(const uchar *record) override; + int remember_rnd_pos() override; + int restart_rnd_next(uchar *buf) override; int can_continue_handler_scan() override; int info(uint) override; int extra(enum ha_extra_function operation) override; diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index e51fe88d8e2b7..5ef2003d58085 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -33,6 +33,93 @@ C_MODE_START #define HP_MIN_RECORDS_IN_BLOCK 16 #define HP_MAX_RECORDS_IN_BLOCK 8192 +#define HP_ROW_ACTIVE 1 /* Bit 0: record is active (not deleted) */ +#define HP_ROW_HAS_CONT 2 /* Bit 1: primary record has continuation chain(s) */ +#define HP_ROW_IS_CONT 4 /* Bit 2: this record IS a continuation record */ +#define HP_ROW_CONT_ZEROCOPY 8 /* Bit 3: zero-copy layout (data in rec 1..N-1) */ + +/* + Continuation run header: next_cont pointer + run_rec_count. + Stored at the beginning of the first record in each run. +*/ +#define HP_CONT_NEXT_PTR_SIZE sizeof(uchar*) +#define HP_CONT_REC_COUNT_SIZE sizeof(uint16) +#define HP_CONT_HEADER_SIZE (HP_CONT_NEXT_PTR_SIZE + HP_CONT_REC_COUNT_SIZE) + +/* + Minimum contiguous run size parameters. + Runs smaller than this are not worth scavenging from the free list because + the per-run header overhead (10 bytes) becomes a significant fraction of + payload. Skip them and allocate from the tail instead. + + HP_CONT_MIN_RUN_BYTES: absolute floor for minimum run payload. + HP_CONT_RUN_FRACTION_NUM/DEN: minimum run size as a fraction of blob size. + min_run_bytes = MAX(blob_length * NUM / DEN, HP_CONT_MIN_RUN_BYTES) +*/ +/* + Row flags byte predicates. + The flags byte is at offset 'visible' in each primary or run-header record. +*/ + +/* Record is active (not deleted) */ +static inline my_bool hp_is_active(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_ACTIVE) != 0; +} + +/* Primary record that owns blob continuation chain(s) */ +static inline my_bool hp_has_cont(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_HAS_CONT) != 0; +} + +/* This record IS a continuation run header (rec 0 of a run) */ +static inline my_bool hp_is_cont(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_IS_CONT) != 0; +} + +/* + Continuation run header accessors. + Read next_cont pointer and run_rec_count from the first record of a run. +*/ +static inline const uchar *hp_cont_next(const uchar *chain) +{ + const uchar *next; + memcpy(&next, chain, HP_CONT_NEXT_PTR_SIZE); + return next; +} + +static inline uint16 hp_cont_rec_count(const uchar *chain) +{ + return uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); +} + +/* + Zero-copy case detection for stored continuation chains. + + Case A: single record, single run — data fits in rec 0 payload after header. + run_rec_count == 1 AND next_cont == NULL. + IMPORTANT: run_rec_count == 1 alone is NOT sufficient — a multi-run + blob can have run_rec_count == 1 in its first run when free-list + fragmentation produces a single-slot fragment. + + Case B: single run, multiple records, zerocopy flag set — data in rec 1..N-1. +*/ +static inline my_bool hp_is_case_a(const uchar *chain) +{ + return hp_cont_rec_count(chain) == 1 && hp_cont_next(chain) == NULL; +} + +static inline my_bool hp_is_case_b(const uchar *chain, uint visible) +{ + return (chain[visible] & HP_ROW_CONT_ZEROCOPY) != 0; +} + +#define HP_CONT_MIN_RUN_BYTES 128 +#define HP_CONT_RUN_FRACTION_NUM 1 +#define HP_CONT_RUN_FRACTION_DEN 10 + /* Some extern variables */ extern LIST *heap_open_list,*heap_share_list; @@ -85,9 +172,12 @@ extern ulong hp_rec_hashnr(HP_KEYDEF *keyinfo,const uchar *rec); extern void hp_movelink(HASH_INFO *pos,HASH_INFO *next_link, HASH_INFO *newlink); extern int hp_rec_key_cmp(HP_KEYDEF *keydef,const uchar *rec1, - const uchar *rec2); + const uchar *rec2, HP_INFO *info); extern int hp_key_cmp(HP_KEYDEF *keydef,const uchar *rec, - const uchar *key); + const uchar *key, HP_INFO *info); +extern const uchar *hp_materialize_one_blob(HP_INFO *info, + const uchar *chain, + uint32 data_len); extern void hp_make_key(HP_KEYDEF *keydef,uchar *key,const uchar *rec); extern uint hp_rb_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec, uchar *recpos); @@ -104,12 +194,19 @@ extern ha_rows hp_rows_in_memory(size_t reclength, size_t index_size, size_t memory_limit); extern size_t hp_memory_needed_per_row(size_t reclength); +extern uchar *next_free_record_pos(HP_SHARE *info); +extern int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos); +extern int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos); +extern void hp_free_blobs(HP_SHARE *share, uchar *pos); +extern void hp_free_run_chain(HP_SHARE *share, uchar *chain); + extern mysql_mutex_t THR_LOCK_heap; extern PSI_memory_key hp_key_memory_HP_SHARE; extern PSI_memory_key hp_key_memory_HP_INFO; extern PSI_memory_key hp_key_memory_HP_PTRS; extern PSI_memory_key hp_key_memory_HP_KEYDEF; +extern PSI_memory_key hp_key_memory_HP_BLOB; #ifdef HAVE_PSI_INTERFACE void init_heap_psi_keys(); diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c new file mode 100644 index 0000000000000..19c8068be5d14 --- /dev/null +++ b/storage/heap/hp_blob.c @@ -0,0 +1,885 @@ +/* Copyright (c) 2025, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + LOB (BLOB/TEXT) support for HEAP tables using variable-length + continuation runs. + + Each blob column's data is stored as a chain of continuation "runs". + A run is a contiguous sequence of recbuffer-sized records in the same + HP_BLOCK. The first record of each run stores a header (next_cont + pointer + run_rec_count); subsequent records carry pure blob payload. + Runs are linked together via the next_cont pointer. + + This design amortizes the per-run header overhead across many records, + giving near-100% space efficiency for typical blob sizes (150 KB and + above), even when recbuffer is very small (e.g. 16 bytes). +*/ + +#include "heapdef.h" +#include +#include + + +/* + Read blob data length from the record buffer. +*/ + +static uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record) +{ + switch (desc->packlength) + { + case 1: + return (uint32) record[desc->offset]; + case 2: + return uint2korr(record + desc->offset); + case 3: + return uint3korr(record + desc->offset); + case 4: + return uint4korr(record + desc->offset); + default: + DBUG_ASSERT(0); + return 0; + } +} + + +/* + Allocate one record from the HP_BLOCK tail, bypassing the free list. + Same accounting as next_free_record_pos() but never uses del_link. + + Maintains the scan-boundary invariant: + total_records + deleted == block.last_allocated + by incrementing both last_allocated and total_records together. + heap_scan() relies on this invariant to know when to stop scanning. +*/ + +static uchar *hp_alloc_from_tail(HP_SHARE *share) +{ + int block_pos; + size_t length; + + if (!(block_pos= (share->block.last_allocated % + share->block.records_in_block))) + { + if ((share->block.last_allocated > share->max_records && + share->max_records) || + (share->data_length + share->index_length >= share->max_table_size)) + { + my_errno= HA_ERR_RECORD_FILE_FULL; + return NULL; + } + if (hp_get_new_block(share, &share->block, &length)) + return NULL; + share->data_length+= length; + } + share->block.last_allocated++; + share->total_records++; + return (uchar*) share->block.level_info[0].last_blocks + + block_pos * share->block.recbuffer; +} + + +/* + Free one continuation chain of variable-length runs. + + Walks from the first run, reads run_rec_count from each, frees all + records individually to the free list, then follows next_cont to the + next run. + + Maintains the scan-boundary invariant: + total_records + deleted == block.last_allocated + Each freed slot does total_records-- and deleted++, keeping the sum + constant. heap_scan() relies on this sum to know when to stop. + + @param share Table share + @param chain Pointer to first record of first run (or NULL) +*/ + +void hp_free_run_chain(HP_SHARE *share, uchar *chain) +{ + uint recbuffer= share->block.recbuffer; + + while (chain) + { + uchar *next_run; + uint16 run_rec_count; + uint16 j; + + memcpy(&next_run, chain, HP_CONT_NEXT_PTR_SIZE); + run_rec_count= uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); + + for (j= 0; j < run_rec_count; j++) + { + uchar *pos= chain + j * recbuffer; + *((uchar**) pos)= share->del_link; + share->del_link= pos; + pos[share->visible]= 0; + share->deleted++; + share->total_records--; + } + + chain= next_run; + } +} + + +/* + Write blob data into a contiguous run of records. + + Writes the run header (next_cont=NULL, run_rec_count) in the first + record, then copies blob data across all records in the run, + advancing *offset. + + @param share Table share + @param data Source blob data + @param data_len Total blob data length + @param run_start Pointer to first record of the run + @param run_rec_count Number of consecutive records in this run + @param zerocopy If TRUE, use zero-copy layout: + Case A (run_rec_count==1): data in rec 0 after header. + Case B (run_rec_count>1): data in rec 1..N-1 only, + rec 0 carries only the header (no data payload). + @param offset [in/out] Current offset into blob data + + @note Caller must link runs by overwriting next_cont in the previous run. +*/ + +static void hp_write_run_data(HP_SHARE *share, const uchar *data, + uint32 data_len, uchar *run_start, + uint16 run_rec_count, my_bool zerocopy, + uint32 *offset) +{ + uint visible= share->visible; + uint recbuffer= share->block.recbuffer; + uint32 off= *offset; + uint32 remaining= data_len - off; + uint32 chunk; + uint16 rec; + uchar *null_ptr= NULL; + + /* First record: run header + flags byte (always written) */ + memcpy(run_start, &null_ptr, HP_CONT_NEXT_PTR_SIZE); + int2store(run_start + HP_CONT_NEXT_PTR_SIZE, run_rec_count); + run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | + (zerocopy && run_rec_count > 1 ? HP_ROW_CONT_ZEROCOPY : 0); + + /* + Case B (zerocopy && run_rec_count > 1): skip data copy in rec 0. + All data goes into rec 1..N-1 contiguously for zero-copy reads. + Case A (zerocopy && run_rec_count == 1): data fits in rec 0 payload. + Case C (!zerocopy): data starts in rec 0 as before. + */ + if (!zerocopy || run_rec_count == 1) + { + chunk= visible - HP_CONT_HEADER_SIZE; + if (chunk > remaining) + chunk= remaining; + memcpy(run_start + HP_CONT_HEADER_SIZE, data + off, chunk); + off+= chunk; + remaining-= chunk; + } + + /* + Inner records (rec 1..N-1): full recbuffer payload, no flags byte. + This makes data in inner records contiguous, enabling zero-copy reads + for single-run blobs (Case B). + */ + for (rec= 1; rec < run_rec_count && remaining > 0; rec++) + { + uchar *rec_ptr= run_start + rec * recbuffer; + chunk= recbuffer; + if (chunk > remaining) + chunk= remaining; + memcpy(rec_ptr, data + off, chunk); + off+= chunk; + remaining-= chunk; + } + + *offset= off; +} + + +/* + Unlink a contiguous group from the free list and write blob data into it. + + @param share Table share + @param data_ptr Blob data + @param data_len Total blob data length + @param run_start Lowest address of the contiguous group + @param run_count Number of contiguous records in the group + @param visible share->visible + @param recbuffer share->block.recbuffer + @param data_offset [in/out] Current offset into blob data + @param first_run [in/out] Pointer to first run (NULL initially) + @param prev_run_start [in/out] Pointer to previous run's start +*/ + +static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, + uint32 data_len, uchar *run_start, + uint16 run_count, uint visible, + uint recbuffer, uint32 *data_offset, + uchar **first_run, uchar **prev_run_start) +{ + uint32 remaining= data_len - *data_offset; + uint32 records_needed; + uint16 records_to_use; + uint32 unlinked= 0; + uchar **prev_link= &share->del_link; + uchar *cur; + uint32 first_payload= visible - HP_CONT_HEADER_SIZE; + + if (remaining <= first_payload) + records_needed= 1; + else + records_needed= 1 + (remaining - first_payload + recbuffer - 1) / recbuffer; + records_to_use= (records_needed > run_count) ? run_count : + (uint16) records_needed; + + cur= share->del_link; + while (cur && unlinked < records_to_use) + { + uchar *next= *((uchar**) cur); + if (cur >= run_start && + cur < run_start + records_to_use * recbuffer) + { + *prev_link= next; + share->deleted--; + share->total_records++; + unlinked++; + } + else + prev_link= (uchar**) cur; + cur= next; + } + + hp_write_run_data(share, data_ptr, data_len, run_start, + records_to_use, FALSE, data_offset); + + if (*prev_run_start) + memcpy(*prev_run_start, &run_start, sizeof(run_start)); + else + *first_run= run_start; + *prev_run_start= run_start; +} + + +/* + Write one blob column's data into a chain of continuation runs. + + Allocates contiguous runs from the free list and/or block tail, + copies blob data into them, and returns the first run pointer. + On failure, frees any partially allocated chain. + + @param share Table share + @param data_ptr Blob data to write + @param data_len Blob data length (must be > 0) + @param first_run_out [out] Pointer to first run's first record + + @return 0 on success, my_errno on failure +*/ + +static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, + uint32 data_len, uchar **first_run_out) +{ + uint visible= share->visible; + uint recbuffer= share->block.recbuffer; + uint32 min_run_bytes; + uint32 min_run_records; + uchar *first_run= NULL; + uchar *prev_run_start= NULL; + uint32 data_offset= 0; + + /* Calculate minimum acceptable run size */ + min_run_bytes= data_len / HP_CONT_RUN_FRACTION_DEN * + HP_CONT_RUN_FRACTION_NUM; + if (min_run_bytes < HP_CONT_MIN_RUN_BYTES) + min_run_bytes= HP_CONT_MIN_RUN_BYTES; + min_run_records= (min_run_bytes + recbuffer - 1) / recbuffer; + if (min_run_records < 2) + min_run_records= 2; + + /* + Step 1: Try to allocate contiguous runs from the free list. + + Peek at free list records by walking next pointers without unlinking. + Track contiguous groups (descending addresses — LIFO order from + hp_free_run_chain). On discontinuity: if the group qualifies + (>= min_run_records), unlink and use it; if it doesn't, the free + list is too fragmented — stop and fall through to tail allocation. + */ + { + uchar *run_start= NULL; + uint16 run_count= 0; + uchar *prev_pos= NULL; + uchar *pos; + + for (pos= share->del_link; + pos && data_offset < data_len; + pos= *((uchar**) pos)) + { + /* + Only check descending direction: hp_free_run_chain() frees records + in ascending address order (j=0..N), so LIFO pushes them onto the + free list in reverse — consecutive free list entries have descending + addresses. Ascending adjacency from unrelated deletes is ignored + intentionally; we only recover runs that were freed together. + */ + if (prev_pos && pos == prev_pos - recbuffer && run_count < UINT_MAX16) + { + run_start= pos; + run_count++; + prev_pos= pos; + continue; + } + + /* + Discontinuity. If the accumulated group qualifies, use it. + If not, the free list is fragmented — give up entirely. + */ + if (run_count > 0) + { + if (run_count < min_run_records) + break; + hp_unlink_and_write_run(share, data_ptr, data_len, run_start, + run_count, visible, recbuffer, + &data_offset, &first_run, &prev_run_start); + } + + run_start= pos; + run_count= 1; + prev_pos= pos; + } + + /* Handle the last group after the loop ends */ + if (run_count >= min_run_records && data_offset < data_len) + hp_unlink_and_write_run(share, data_ptr, data_len, run_start, + run_count, visible, recbuffer, + &data_offset, &first_run, &prev_run_start); + } + + /* + Step 2: Allocate remaining data from the block tail. + + Tail allocation is always contiguous within a leaf block. + When we hit a block boundary, we start a new run. + */ + while (data_offset < data_len) + { + uchar *run_start; + uint16 run_rec_count; + uint32 remaining= data_len - data_offset; + uint32 run_payload; + my_bool is_only_run; + + run_start= hp_alloc_from_tail(share); + if (!run_start) + goto err; + run_rec_count= 1; + + /* Extend the run with consecutive tail records */ + for (;;) + { + uint block_pos; + + if (run_rec_count == 1) + run_payload= visible - HP_CONT_HEADER_SIZE; + else + run_payload= (visible - HP_CONT_HEADER_SIZE) + + (uint32)(run_rec_count - 1) * recbuffer; + if (run_payload >= remaining) + break; + + /* + Check if the next record would be in the same leaf block. + block_pos == 0 means last_allocated is at a block boundary + and the next allocation would start a new block. + */ + block_pos= share->block.last_allocated % + share->block.records_in_block; + if (block_pos == 0) + break; + + { + uchar *next_rec= hp_alloc_from_tail(share); + if (!next_rec) + break; + /* + Contiguity guard (active in all builds, not just debug). + + Blob continuation runs use pointer arithmetic (run_start + + i * recbuffer) to access inner records in the write, read, + zero-copy, scan-skip, and free paths. Today, contiguity + within a leaf block is guaranteed by hp_get_new_block() + allocating a single flat array of records_in_block * recbuffer + bytes, and hp_alloc_from_tail() handing them out sequentially. + But this is an implementation detail of HP_BLOCK, not a + documented contract. A future change (e.g. sub-block + allocation, memory pooling, or alignment padding between + records) could silently break this assumption, turning every + blob path into a source of data corruption. Abort here so + such a change is caught immediately by any test that exercises + blob writes. + */ + if (unlikely(next_rec != + run_start + (uint32) run_rec_count * recbuffer)) + { + my_safe_printf_stderr( + "HEAP blob: tail allocation not contiguous: " + "expected %p, got %p (run_start=%p, count=%u, recbuffer=%u)\n", + run_start + (uint32) run_rec_count * recbuffer, + next_rec, run_start, (uint) run_rec_count, recbuffer); + abort(); + } + run_rec_count++; + } + } + + is_only_run= (first_run == NULL && prev_run_start == NULL); + + if (is_only_run && run_payload >= remaining) + { + /* + Single-run blob — use zero-copy layout if possible. + Case A: data fits in rec 0 payload (run_rec_count == 1). + Case B: data in rec 1..N-1 only, contiguous for zero-copy reads. + */ + if (run_rec_count == 1) + { + /* Case A: data fits in rec 0 */ + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, TRUE, &data_offset); + } + else + { + uint32 case_b_payload= (uint32)(run_rec_count - 1) * recbuffer; + if (case_b_payload >= remaining) + { + /* Case B: rec 1..N-1 alone hold all data */ + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, TRUE, &data_offset); + } + else + { + /* + Case B needs one more record than Case C. Try to extend + if we're not at a block boundary. + */ + uint block_pos= share->block.last_allocated % + share->block.records_in_block; + if (block_pos != 0) + { + uchar *extra= hp_alloc_from_tail(share); + if (extra) + { + /* + Contiguity guard for the Case B extra record, same + rationale as the main extension loop ~60 lines above: + hp_get_new_block() today allocates flat arrays but this + is an HP_BLOCK implementation detail, not a contract. + A future change could break contiguity and silently + corrupt every blob read/write/free path that relies on + run_start + i * recbuffer arithmetic. + */ + if (unlikely(extra != + run_start + (uint32) run_rec_count * recbuffer)) + { + my_safe_printf_stderr( + "HEAP blob: Case B extra allocation not contiguous: " + "expected %p, got %p " + "(run_start=%p, count=%u, recbuffer=%u)\n", + run_start + (uint32) run_rec_count * recbuffer, + extra, run_start, (uint) run_rec_count, recbuffer); + abort(); + } + run_rec_count++; + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, TRUE, &data_offset); + } + else + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, FALSE, &data_offset); + } + else + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, FALSE, &data_offset); + } + } + } + else + { + /* Multi-run (Case C) or not the only run */ + hp_write_run_data(share, data_ptr, data_len, run_start, + run_rec_count, FALSE, &data_offset); + } + + if (prev_run_start) + memcpy(prev_run_start, &run_start, sizeof(run_start)); + else + first_run= run_start; + prev_run_start= run_start; + } + + *first_run_out= first_run; + return 0; + +err: + if (first_run) + hp_free_run_chain(share, first_run); + *first_run_out= NULL; + return my_errno; +} + + +/* + Write blob data from the record buffer into continuation runs. + + For each blob column, reads the (length, pointer) descriptor from + the caller's record buffer, allocates variable-length continuation + runs, copies blob data into them, and overwrites the pointer in + the stored row (pos) to point to the first continuation run. + + @param info Table handle + @param record Source record buffer (caller's data) + @param pos Destination row in HP_BLOCK (already has memcpy'd record) + + @return 0 on success, my_errno on failure +*/ + +int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) +{ + HP_SHARE *share= info->s; + uint i; + my_bool has_blob_data= FALSE; + DBUG_ENTER("hp_write_blobs"); + + for (i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + uint32 data_len; + const uchar *data_ptr; + uchar *first_run; + + data_len= hp_blob_length(desc, record); + + if (data_len == 0) + { + uchar *null_ptr= NULL; + memcpy(pos + desc->offset + desc->packlength, &null_ptr, sizeof(null_ptr)); + continue; + } + + has_blob_data= TRUE; + memcpy(&data_ptr, record + desc->offset + desc->packlength, sizeof(data_ptr)); + + if (hp_write_one_blob(share, data_ptr, data_len, &first_run)) + { + /* Rollback: free all previously completed blob columns */ + uint j; + for (j= 0; j < i; j++) + { + HP_BLOB_DESC *rd= &share->blob_descs[j]; + uchar *chain; + memcpy(&chain, pos + rd->offset + rd->packlength, sizeof(chain)); + if (chain) + hp_free_run_chain(share, chain); + { + uchar *null_ptr= NULL; + memcpy(pos + rd->offset + rd->packlength, &null_ptr, sizeof(null_ptr)); + } + } + { + uchar *null_ptr= NULL; + memcpy(pos + desc->offset + desc->packlength, &null_ptr, + sizeof(null_ptr)); + } + DBUG_RETURN(my_errno); + } + + memcpy(pos + desc->offset + desc->packlength, &first_run, sizeof(first_run)); + } + + pos[share->visible]= has_blob_data ? + (HP_ROW_ACTIVE | HP_ROW_HAS_CONT) : HP_ROW_ACTIVE; + DBUG_RETURN(0); +} + + +/* + Read blob data from continuation runs into the reassembly buffer. + + After memcpy(record, pos, reclength), blob descriptor pointers in + record[] point into HP_BLOCK continuation run chains. This function + walks each chain, reassembles blob data into info->blob_buff, and + rewrites the pointers in record[] to point into blob_buff. + + @param info Table handle + @param record Record buffer (already has memcpy'd row data) + @param pos Row pointer in HP_BLOCK + + @return 0 on success, my_errno on failure +*/ + +int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) +{ + HP_SHARE *share= info->s; + uint i; + uint visible= share->visible; + uint recbuffer= share->block.recbuffer; + uint32 total_copy_size= 0; + uchar *buff_ptr; + DBUG_ENTER("hp_read_blobs"); + + info->has_zerocopy_blobs= FALSE; + + if (!hp_has_cont(pos, share->visible)) + DBUG_RETURN(0); + + /* + Pass 1: sum data_len for blobs that need reassembly (not zero-copy). + Cases A and B (HP_ROW_CONT_ZEROCOPY set, or single-record run) use + zero-copy pointers into HP_BLOCK, no blob_buff needed. + */ + for (i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + uint32 data_len; + const uchar *chain; + + data_len= hp_blob_length(desc, record); + if (data_len == 0) + continue; + + memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); + + /* Zero-copy cases (A or B) need no reassembly buffer space */ + if (hp_is_case_a(chain) || hp_is_case_b(chain, visible)) + { + info->has_zerocopy_blobs= TRUE; + continue; + } + total_copy_size+= data_len; + } + + /* Grow reassembly buffer for Case C blobs */ + if (total_copy_size > 0) + { + if (total_copy_size > info->blob_buff_len) + { + uchar *new_buff= (uchar*) my_realloc(hp_key_memory_HP_BLOB, + info->blob_buff, + total_copy_size, + MYF(MY_ALLOW_ZERO_PTR)); + if (!new_buff) + DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); + info->blob_buff= new_buff; + info->blob_buff_len= total_copy_size; + } + } + + /* Pass 2: process each blob column */ + buff_ptr= info->blob_buff; + for (i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + uint32 data_len; + const uchar *chain; + + data_len= hp_blob_length(desc, record); + if (data_len == 0) + continue; + + memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); + + if (hp_is_case_a(chain)) + { + /* Case A: single-record single-run — zero-copy */ + const uchar *blob_data= chain + HP_CONT_HEADER_SIZE; + memcpy(record + desc->offset + desc->packlength, &blob_data, + sizeof(blob_data)); + } + else if (hp_is_case_b(chain, visible)) + { + /* Case B: data in rec 1..N-1, contiguous — zero-copy */ + const uchar *blob_data= chain + recbuffer; + memcpy(record + desc->offset + desc->packlength, &blob_data, + sizeof(blob_data)); + } + else + { + /* Case C: reassemble into blob_buff */ + uint32 remaining= data_len; + const uchar *next_cont; + while (chain && remaining > 0) + { + uint16 rec; + uint16 run_rec_count; + uint32 chunk; + + next_cont= hp_cont_next(chain); + run_rec_count= hp_cont_rec_count(chain); + + /* First record payload (after header) */ + chunk= visible - HP_CONT_HEADER_SIZE; + if (chunk > remaining) + chunk= remaining; + memcpy(buff_ptr, chain + HP_CONT_HEADER_SIZE, chunk); + buff_ptr+= chunk; + remaining-= chunk; + + /* Inner records: recbuffer stride, no flags byte */ + for (rec= 1; rec < run_rec_count && remaining > 0; rec++) + { + const uchar *rec_ptr= chain + rec * recbuffer; + chunk= recbuffer; + if (chunk > remaining) + chunk= remaining; + memcpy(buff_ptr, rec_ptr, chunk); + buff_ptr+= chunk; + remaining-= chunk; + } + + chain= next_cont; + } + + /* Update blob pointer to reassembly buffer */ + { + uchar *blob_data= buff_ptr - data_len; + memcpy(record + desc->offset + desc->packlength, &blob_data, + sizeof(blob_data)); + } + } + } + + DBUG_RETURN(0); +} + + +/* + Materialize a single blob column's data from a continuation chain + into info->blob_buff. + + Used by hash comparison functions when comparing a stored record + (where the blob data pointer has been overwritten with a continuation + chain pointer) against an input record. + + @param info Table handle (provides blob_buff) + @param chain Pointer to first run of the continuation chain + @param data_len Total blob data length (from record's packlength bytes) + + @return Pointer into info->blob_buff with contiguous blob data, + or NULL on allocation failure. +*/ + +const uchar *hp_materialize_one_blob(HP_INFO *info, + const uchar *chain, + uint32 data_len) +{ + HP_SHARE *share= info->s; + uint visible= share->visible; + uint recbuffer= share->block.recbuffer; + uint32 remaining; + uchar *buff_ptr; + const uchar *next_cont; + uint16 run_rec_count; + + if (data_len == 0 || !chain) + return chain; + + /* Check for zero-copy cases */ + if (hp_is_case_a(chain)) + return chain + HP_CONT_HEADER_SIZE; /* Case A */ + if (hp_is_case_b(chain, visible)) + return chain + recbuffer; /* Case B */ + + /* Case C: multiple runs, reassemble into blob_buff */ + if (data_len > info->blob_buff_len) + { + uchar *new_buff= (uchar*) my_realloc(hp_key_memory_HP_BLOB, + info->blob_buff, + data_len, + MYF(MY_ALLOW_ZERO_PTR)); + if (!new_buff) + return NULL; + info->blob_buff= new_buff; + info->blob_buff_len= data_len; + } + + buff_ptr= info->blob_buff; + remaining= data_len; + while (chain && remaining > 0) + { + uint16 rec; + uint32 chunk; + + next_cont= hp_cont_next(chain); + run_rec_count= hp_cont_rec_count(chain); + + /* First record payload (after header) */ + chunk= visible - HP_CONT_HEADER_SIZE; + if (chunk > remaining) + chunk= remaining; + memcpy(buff_ptr, chain + HP_CONT_HEADER_SIZE, chunk); + buff_ptr+= chunk; + remaining-= chunk; + + /* Inner records: recbuffer stride, no flags byte */ + for (rec= 1; rec < run_rec_count && remaining > 0; rec++) + { + const uchar *rec_ptr= chain + rec * recbuffer; + chunk= recbuffer; + if (chunk > remaining) + chunk= remaining; + memcpy(buff_ptr, rec_ptr, chunk); + buff_ptr+= chunk; + remaining-= chunk; + } + + chain= next_cont; + } + + return info->blob_buff; +} + + +/* + Free continuation run chains for all blob columns of a row. + + Walks each blob column's run chain and adds all records back to the + free list. + + @param share Table share + @param pos Primary record pointer in HP_BLOCK +*/ + +void hp_free_blobs(HP_SHARE *share, uchar *pos) +{ + uint i; + DBUG_ENTER("hp_free_blobs"); + + if (!hp_has_cont(pos, share->visible)) + DBUG_VOID_RETURN; + + for (i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + uchar *chain; + + memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); + hp_free_run_chain(share, chain); + } + + DBUG_VOID_RETURN; +} diff --git a/storage/heap/hp_clear.c b/storage/heap/hp_clear.c index b0b263249a881..9efb4170792a7 100644 --- a/storage/heap/hp_clear.c +++ b/storage/heap/hp_clear.c @@ -35,8 +35,9 @@ void hp_clear(HP_SHARE *info) (void) hp_free_level(&info->block,info->block.levels,info->block.root, (uchar*) 0); info->block.levels=0; + info->block.last_allocated=0; hp_clear_keys(info); - info->records= info->deleted= 0; + info->records= info->deleted= info->total_records= 0; info->data_length= 0; info->blength=1; info->changed=0; diff --git a/storage/heap/hp_close.c b/storage/heap/hp_close.c index 82d6186340aa1..aa417f99b5a71 100644 --- a/storage/heap/hp_close.c +++ b/storage/heap/hp_close.c @@ -40,6 +40,7 @@ int hp_close(register HP_INFO *info) heap_open_list=list_delete(heap_open_list,&info->open_list); if (!--info->s->open_count && info->s->delete_on_close) hp_free(info->s); /* Table was deleted */ + my_free(info->blob_buff); my_free(info); DBUG_RETURN(error); } diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index f35e8e3fac9f8..6433b059605d0 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -74,7 +74,17 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, so the visible_offset must be least at sizeof(uchar*) */ visible_offset= MY_MAX(reclength, sizeof (char*)); - + /* + Blob tables store continuation run headers (next_cont pointer + + run_slots count = HP_CONT_HEADER_SIZE bytes) in each run's first + slot. Ensure at least 1 byte of payload beyond the header, + otherwise hp_write_run_data() underflows computing + chunk = visible - HP_CONT_HEADER_SIZE. Only matters for + pathological single-TINYBLOB tables (reclength as low as 9). + */ + if (create_info->blob_count) + visible_offset= MY_MAX(visible_offset, HP_CONT_HEADER_SIZE + 1); + for (i= key_segs= max_length= 0, keyinfo= keydef; i < keys; i++, keyinfo++) { bzero((char*) &keyinfo->block,sizeof(keyinfo->block)); @@ -110,6 +120,12 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, /* fall through */ case HA_KEYTYPE_VARTEXT1: keyinfo->flag|= HA_VAR_LENGTH_KEY; + /* + Real blob fields always enter as VARTEXT2/VARBINARY2, never + as VARTEXT1/VARBINARY1. Strip any spurious HA_BLOB_PART + (e.g. from uninitialized key_part_flag in SJ weedout tables). + */ + keyinfo->seg[j].flag&= ~HA_BLOB_PART; /* For BTREE algorithm, key length, greater than or equal to 255, is packed on 3 bytes. @@ -126,16 +142,78 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, /* fall_through */ case HA_KEYTYPE_VARTEXT2: keyinfo->flag|= HA_VAR_LENGTH_KEY; + /* + Strip HA_BLOB_PART for key segments that don't correspond + to actual blob fields. HA_BLOB_PART can appear spuriously + from uninitialized key_part_flag (SJ weedout tables) or + from BLOB_FLAG on non-Field_blob types (I_S temp tables). + */ + if (keyinfo->seg[j].flag & HA_BLOB_PART) + { + my_bool real_blob= FALSE; + uint k; + for (k= 0; k < create_info->blob_count; k++) + { + if (create_info->blob_descs[k].offset == + keyinfo->seg[j].start) + { + real_blob= TRUE; + break; + } + } + if (!real_blob) + keyinfo->seg[j].flag&= ~HA_BLOB_PART; + } /* For BTREE algorithm, key length, greater than or equal to 255, is packed on 3 bytes. */ if (keyinfo->algorithm == HA_KEY_ALG_BTREE) length+= size_to_store_key_length(keyinfo->seg[j].length); + else if (keyinfo->seg[j].flag & HA_BLOB_PART) + length+= 4 + sizeof(uchar*); /* 4-byte len + data ptr in key */ else length+= 2; - /* Save number of bytes used to store length */ - keyinfo->seg[j].bit_start= 2; + /* + Save number of bytes used to store length. + For blob segments, bit_start holds the actual blob packlength + (1-4). Some SQL layer paths (DISTINCT) set it explicitly; + others (UNION) leave it 0 and set seg->length to pack_length + (= packlength + sizeof(uchar*)). Derive it when missing. + Also normalize seg->length to 0 ("whole blob") for blob + segments where the SQL layer set it to pack_length. + */ + if (!(keyinfo->seg[j].flag & HA_BLOB_PART)) + keyinfo->seg[j].bit_start= 2; + else + { + if (keyinfo->seg[j].bit_start == 0 && keyinfo->seg[j].length > 0) + keyinfo->seg[j].bit_start= + (uint8)(keyinfo->seg[j].length - sizeof(uchar*)); + keyinfo->seg[j].length= 0; /* "whole blob" */ + /* + Fallback: if bit_start is still 0 after the length-based + derivation above (which requires length > 0), look up the + actual packlength from the blob descriptor array. This + covers any SQL layer path that sets both bit_start=0 and + length=0 for a blob key segment. + */ + if (keyinfo->seg[j].bit_start == 0) + { + uint k; + for (k= 0; k < create_info->blob_count; k++) + { + if (create_info->blob_descs[k].offset == + keyinfo->seg[j].start) + { + keyinfo->seg[j].bit_start= + (uint8) create_info->blob_descs[k].packlength; + break; + } + } + DBUG_ASSERT(keyinfo->seg[j].bit_start > 0); + } + } /* Make future comparison simpler by only having to check for one type @@ -174,7 +252,8 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, if (!(share= (HP_SHARE*) my_malloc(hp_key_memory_HP_SHARE, sizeof(HP_SHARE)+ keys*sizeof(HP_KEYDEF)+ - key_segs*sizeof(HA_KEYSEG), + key_segs*sizeof(HA_KEYSEG)+ + create_info->blob_count*sizeof(HP_BLOB_DESC), MYF(MY_ZEROFILL | (create_info->internal_table ? MY_THREAD_SPECIFIC : 0))))) @@ -182,6 +261,13 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, share->keydef= (HP_KEYDEF*) (share + 1); share->key_stat_version= 1; keyseg= (HA_KEYSEG*) (share->keydef + keys); + if (create_info->blob_count) + { + share->blob_descs= (HP_BLOB_DESC*) (keyseg + key_segs); + memcpy(share->blob_descs, create_info->blob_descs, + create_info->blob_count * sizeof(HP_BLOB_DESC)); + share->blob_count= create_info->blob_count; + } init_block(&share->block, hp_memory_needed_per_row(reclength), min_records, max_records); /* Fix keys */ diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c index 9579fb51a7918..1a4da1fff0e44 100644 --- a/storage/heap/hp_delete.c +++ b/storage/heap/hp_delete.c @@ -42,11 +42,21 @@ int heap_delete(HP_INFO *info, const uchar *record) goto err; } + /* + Free blob continuation chains first (if any), then free the head + record slot. Both hp_free_run_chain() and the code below maintain + the scan-boundary invariant: + total_records + deleted == block.last_allocated + by doing total_records-- and deleted++ for each freed slot. + */ + if (share->blob_count) + hp_free_blobs(share, pos); info->update=HA_STATE_DELETED; *((uchar**) pos)=share->del_link; share->del_link=pos; pos[share->visible]=0; /* Record deleted */ share->deleted++; + share->total_records--; share->key_version++; #if !defined(DBUG_OFF) && defined(EXTRA_HEAP_DEBUG) DBUG_EXECUTE("check_heap",heap_check_heap(info, 0);); @@ -123,7 +133,7 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, while (pos->ptr_to_rec != recpos) { - if (flag && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec)) + if (flag && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, info)) last_ptr=pos; /* Previous same key */ gpos=pos; if (!(pos=pos->next_key)) diff --git a/storage/heap/hp_extra.c b/storage/heap/hp_extra.c index 3c554fe98e780..b54281027032b 100644 --- a/storage/heap/hp_extra.c +++ b/storage/heap/hp_extra.c @@ -59,6 +59,12 @@ int heap_reset(HP_INFO *info) info->current_hash_ptr=0; info->update=0; info->next_block=0; + if (info->blob_buff) + { + my_free(info->blob_buff); + info->blob_buff= NULL; + info->blob_buff_len= 0; + } return 0; } diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index a013915173043..c06fa77b9e9d9 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -29,6 +29,25 @@ hp_charpos(CHARSET_INFO *cs, const uchar *b, const uchar *e, size_t num) static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); + + +/* + Read blob data length using actual packlength stored in seg->bit_start. +*/ + +/* Size of a pointer, for use in memcpy to avoid -Wsizeof-pointer-memaccess */ +#define HP_PTR_SIZE sizeof(void*) + +static size_t hp_blob_key_length(uint packlength, const uchar *pos) +{ + switch (packlength) { + case 1: return (size_t) pos[0]; + case 2: return uint2korr(pos); + case 3: return uint3korr(pos); + case 4: return uint4korr(pos); + } + return 0; +} /* Find out how many rows there is in the given range @@ -127,7 +146,7 @@ uchar *hp_search(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *key, goto not_found; /* Wrong link */ do { - if (!hp_key_cmp(keyinfo, pos->ptr_to_rec, key)) + if (!hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) { switch (nextflag) { case 0: /* Search after key */ @@ -188,7 +207,7 @@ uchar *hp_search_next(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *key, while ((pos= pos->next_key)) { - if (! hp_key_cmp(keyinfo, pos->ptr_to_rec, key)) + if (! hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) { info->current_hash_ptr=pos; DBUG_RETURN (info->current_ptr= pos->ptr_to_rec); @@ -238,9 +257,9 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) if (*pos) /* Found null */ { nr^= (nr << 1) | 1; - /* Add key pack length (2) to key for VARCHAR segments */ + /* Add key pack length to key for VARCHAR/BLOB segments */ if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= 2; + key+= (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar*) : 2; continue; } pos++; @@ -257,6 +276,17 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) } my_ci_hash_sort(cs, pos, length, &nr, &nr2); } + else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + { + /* Blob segment in pre-built key: 4-byte length + data pointer */ + CHARSET_INFO *cs= seg->charset; + uint32 blob_len= uint4korr(pos); + const uchar *blob_data; + memcpy(&blob_data, pos + 4, HP_PTR_SIZE); + if (blob_data && blob_len > 0) + my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); + key+= 4 + sizeof(uchar*); + } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { CHARSET_INFO *cs= seg->charset; @@ -318,6 +348,17 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) } my_ci_hash_sort(cs, pos, char_length, &nr, &nr2); } + else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + { + /* Blob segment in input record: dereference data pointer */ + CHARSET_INFO *cs= seg->charset; + uint packlength= seg->bit_start; + size_t blob_len= hp_blob_key_length(packlength, pos); + const uchar *blob_data; + memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); + if (blob_data && blob_len > 0) + my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); + } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { CHARSET_INFO *cs= seg->charset; @@ -361,24 +402,23 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) /* - Compare keys for two records. Returns 0 if they are identical - - SYNOPSIS - hp_rec_key_cmp() - keydef Key definition - rec1 Record to compare - rec2 Other record to compare - - NOTES - diff_if_only_endspace_difference is used to allow us to insert - 'a' and 'a ' when there is an an unique key. - - RETURN - 0 Key is identical - <> 0 Key differes + Compare two records using key segments. + + @param keydef Key definition + @param rec1 First record (input) — blob fields contain direct data + pointers to caller-owned memory + @param rec2 Second record — when @a info is non-NULL, blob fields + contain continuation chain pointers (stored format) that + are materialized via hp_materialize_one_blob(). + When @a info is NULL, treated same as rec1. + @param info When non-NULL, enables stored-blob materialization for rec2. + Must be NULL when both records are input records. + + @return 0 if records are equal by all key segments, 1 otherwise */ -int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2) +int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2, + HP_INFO *info) { HA_KEYSEG *seg,*endseg; @@ -416,6 +456,46 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2) pos2, char_length2)) return 1; } + else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + { + /* + Blob segment comparison. + rec1 always has valid blob pointers (input record). + rec2 may be stored (chain pointers) when info != NULL. + */ + uint packlength= seg->bit_start; + uchar *pos1= (uchar*) rec1 + seg->start; + uchar *pos2= (uchar*) rec2 + seg->start; + size_t len1= hp_blob_key_length(packlength, pos1); + size_t len2= hp_blob_key_length(packlength, pos2); + const uchar *data1; + const uchar *data2; + + if (len1 != len2) + return 1; + if (len1 == 0) + continue; + + /* rec1: always input — dereference pointer */ + memcpy(&data1, pos1 + packlength, HP_PTR_SIZE); + + /* rec2: if info != NULL, it's stored — materialize from chain */ + if (info) + { + const uchar *chain2; + memcpy(&chain2, pos2 + packlength, HP_PTR_SIZE); + data2= hp_materialize_one_blob(info, chain2, (uint32) len2); + if (!data2) + return 1; + } + else + { + memcpy(&data2, pos2 + packlength, HP_PTR_SIZE); + } + + if (my_ci_strnncollsp(seg->charset, data1, len1, data2, len2)) + return 1; + } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { uchar *pos1= (uchar*) rec1 + seg->start; @@ -478,7 +558,8 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2) /* Compare a key in a record to a whole key */ -int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key) +int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, + HP_INFO *info) { HA_KEYSEG *seg,*endseg; @@ -493,9 +574,9 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key) return 1; if (found_null) { - /* Add key pack length (2) to key for VARCHAR segments */ + /* Add key pack length to key for VARCHAR/BLOB segments */ if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= 2; + key+= (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar*) : 2; continue; } } @@ -518,12 +599,47 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key) char_length_key= seg->length; char_length_rec= seg->length; } - + if (my_ci_strnncollsp(seg->charset, pos, char_length_rec, key, char_length_key)) return 1; } + else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + { + /* + Blob segment: rec side is stored (chain pointers), key side has + 4-byte length + data pointer from hp_make_key. + */ + uint packlength= seg->bit_start; + uchar *pos= (uchar*) rec + seg->start; + size_t rec_blob_len= hp_blob_key_length(packlength, pos); + uint32 key_blob_len= uint4korr(key); + const uchar *key_data; + const uchar *rec_data; + + memcpy(&key_data, key + 4, HP_PTR_SIZE); + key+= 4 + sizeof(uchar*); + + if (rec_blob_len != key_blob_len) + return 1; + if (rec_blob_len == 0) + continue; + + /* rec is stored — materialize from chain */ + { + const uchar *chain; + memcpy(&chain, pos + packlength, HP_PTR_SIZE); + rec_data= hp_materialize_one_blob(info, chain, (uint32) rec_blob_len); + if (!rec_data) + return 1; + } + + if (my_ci_strnncollsp(seg->charset, + rec_data, rec_blob_len, + key_data, key_blob_len)) + return 1; + } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { uchar *pos= (uchar*) rec + seg->start; @@ -538,7 +654,7 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key) if (cs->mbmaxlen > 1) { size_t char_length1, char_length2; - char_length1= char_length2= seg->length / cs->mbmaxlen; + char_length1= char_length2= seg->length / cs->mbmaxlen; char_length1= hp_charpos(cs, key, key + char_length_key, char_length1); set_if_smaller(char_length_key, char_length1); char_length2= hp_charpos(cs, pos, pos + char_length_rec, char_length2); @@ -586,6 +702,21 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec) uchar *pos= (uchar*) rec + seg->start; if (seg->null_bit) *key++= MY_TEST(rec[seg->null_pos] & seg->null_bit); + if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + { + /* + Blob segment in input record: store 4-byte length + data pointer + in key buffer for later use by hp_hashnr/hp_key_cmp. + */ + uint packlength= seg->bit_start; + uint32 blob_len= (uint32) hp_blob_key_length(packlength, pos); + const uchar *blob_data; + memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); + int4store(key, blob_len); + memcpy(key + 4, &blob_data, HP_PTR_SIZE); + key+= 4 + sizeof(uchar*); + continue; + } if (cs->mbmaxlen > 1) { char_length= hp_charpos(cs, pos, pos + seg->length, diff --git a/storage/heap/hp_rfirst.c b/storage/heap/hp_rfirst.c index 60596a2c650fd..903fd42a135ed 100644 --- a/storage/heap/hp_rfirst.c +++ b/storage/heap/hp_rfirst.c @@ -38,6 +38,8 @@ int heap_rfirst(HP_INFO *info, uchar *record, int inx) sizeof(uchar*)); info->current_ptr = pos; memcpy(record, pos, (size_t)share->reclength); + if (share->blob_count && hp_read_blobs(info, record, pos)) + DBUG_RETURN(my_errno); /* If we're performing index_first on a table that was taken from table cache, info->lastkey_len is initialized to previous query. diff --git a/storage/heap/hp_rkey.c b/storage/heap/hp_rkey.c index 2d9fae4c52097..bc03226f2ba15 100644 --- a/storage/heap/hp_rkey.c +++ b/storage/heap/hp_rkey.c @@ -69,6 +69,8 @@ int heap_rkey(HP_INFO *info, uchar *record, int inx, const uchar *key, memcpy(info->lastkey, key, (size_t) keyinfo->length); } memcpy(record, pos, (size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, pos)) + DBUG_RETURN(my_errno); info->update= HA_STATE_AKTIV; DBUG_RETURN(0); } diff --git a/storage/heap/hp_rlast.c b/storage/heap/hp_rlast.c index ed9c3499d5e84..5b31bfccf07c0 100644 --- a/storage/heap/hp_rlast.c +++ b/storage/heap/hp_rlast.c @@ -38,6 +38,8 @@ int heap_rlast(HP_INFO *info, uchar *record, int inx) sizeof(uchar*)); info->current_ptr = pos; memcpy(record, pos, (size_t)share->reclength); + if (share->blob_count && hp_read_blobs(info, record, pos)) + DBUG_RETURN(my_errno); info->update = HA_STATE_AKTIV; } else diff --git a/storage/heap/hp_rnext.c b/storage/heap/hp_rnext.c index ac21ed83da271..774731624fd96 100644 --- a/storage/heap/hp_rnext.c +++ b/storage/heap/hp_rnext.c @@ -127,6 +127,8 @@ int heap_rnext(HP_INFO *info, uchar *record) DBUG_RETURN(my_errno); } memcpy(record,pos,(size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, pos)) + DBUG_RETURN(my_errno); info->update=HA_STATE_AKTIV | HA_STATE_NEXT_FOUND; DBUG_RETURN(0); } diff --git a/storage/heap/hp_rprev.c b/storage/heap/hp_rprev.c index cc81d179570aa..948d1db15ec53 100644 --- a/storage/heap/hp_rprev.c +++ b/storage/heap/hp_rprev.c @@ -94,6 +94,8 @@ int heap_rprev(HP_INFO *info, uchar *record) DBUG_RETURN(my_errno); } memcpy(record,pos,(size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, pos)) + DBUG_RETURN(my_errno); info->update=HA_STATE_AKTIV | HA_STATE_PREV_FOUND; DBUG_RETURN(0); } diff --git a/storage/heap/hp_rrnd.c b/storage/heap/hp_rrnd.c index 3947946ce6706..045804a94afe7 100644 --- a/storage/heap/hp_rrnd.c +++ b/storage/heap/hp_rrnd.c @@ -44,6 +44,8 @@ int heap_rrnd(register HP_INFO *info, uchar *record, uchar *pos) } info->update=HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND | HA_STATE_AKTIV; memcpy(record,info->current_ptr,(size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, info->current_ptr)) + DBUG_RETURN(my_errno); DBUG_PRINT("exit", ("found record at %p", info->current_ptr)); info->current_hash_ptr=0; /* Can't use rnext */ DBUG_RETURN(0); diff --git a/storage/heap/hp_rsame.c b/storage/heap/hp_rsame.c index 8bba4cd23a9c1..1ab2511d617ba 100644 --- a/storage/heap/hp_rsame.c +++ b/storage/heap/hp_rsame.c @@ -49,6 +49,8 @@ int heap_rsame(register HP_INFO *info, uchar *record, int inx) } } memcpy(record,info->current_ptr,(size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, info->current_ptr)) + DBUG_RETURN(my_errno); DBUG_RETURN(0); } info->update=0; diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c index f07efe6cf671c..8ef3d348c8c6d 100644 --- a/storage/heap/hp_scan.c +++ b/storage/heap/hp_scan.c @@ -43,6 +43,26 @@ int heap_scan(register HP_INFO *info, uchar *record) ulong pos; DBUG_ENTER("heap_scan"); + /* + Scan boundary: total_records + deleted == block.last_allocated. + + Every slot in the HP_BLOCK data area is either a live record (counted in + total_records) or a deleted/free slot (counted in deleted). This + includes blob continuation records allocated by hp_alloc_from_tail() + and freed by hp_free_run_chain(), both of which maintain the invariant + total_records + deleted == block.last_allocated. + + next_block is a cached upper bound for the current HP_BLOCK segment: + within one segment, current_ptr can be advanced by recbuffer without + calling hp_find_record(). It MUST satisfy + next_block <= total_records + deleted + at all times, otherwise the scan will walk past the last allocated + slot into unmapped memory. + + The else branch below recomputes next_block and caps it. Any code + that manipulates next_block externally (e.g. restart_rnd_next) must + also enforce this cap. + */ pos= ++info->current_record; if (pos < info->next_block) { @@ -50,12 +70,18 @@ int heap_scan(register HP_INFO *info, uchar *record) } else { - /* increase next_block to the next records_in_block boundary */ + /* Advance next_block to the next records_in_block boundary */ ulong rem= info->next_block % share->block.records_in_block; info->next_block+=share->block.records_in_block - rem; - if (info->next_block >= share->records+share->deleted) + /* + Cap next_block at the scan end (total_records + deleted). This is + essential: rows may have been deleted since next_block was last set + (e.g. remove_dup_with_compare deletes duplicates mid-scan), and + block boundaries can extend well past the last allocated slot. + */ + if (info->next_block >= share->total_records+share->deleted) { - info->next_block= share->records+share->deleted; + info->next_block= share->total_records+share->deleted; if (pos >= info->next_block) { info->update= 0; @@ -70,8 +96,27 @@ int heap_scan(register HP_INFO *info, uchar *record) info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND; DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); } + /* + Skip blob continuation runs. Rec 0 of each run has the flags byte + with HP_ROW_IS_CONT set; inner records (rec 1..N-1) have no flags + byte. Read run_rec_count from the header and skip the entire run. + */ + if (hp_is_cont(info->current_ptr, share->visible)) + { + uint16 run_rec_count= hp_cont_rec_count(info->current_ptr); + if (run_rec_count > 1) + { + uint skip= run_rec_count - 1; + info->current_record+= skip; + info->current_ptr+= skip * share->block.recbuffer; + } + info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND; + DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); + } info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND | HA_STATE_AKTIV; memcpy(record,info->current_ptr,(size_t) share->reclength); + if (share->blob_count && hp_read_blobs(info, record, info->current_ptr)) + DBUG_RETURN(my_errno); info->current_hash_ptr=0; /* Can't use read_next */ DBUG_RETURN(0); } /* heap_scan */ diff --git a/storage/heap/hp_static.c b/storage/heap/hp_static.c index 9a4410eead9ea..07c9f25597122 100644 --- a/storage/heap/hp_static.c +++ b/storage/heap/hp_static.c @@ -28,6 +28,7 @@ PSI_memory_key hp_key_memory_HP_SHARE; PSI_memory_key hp_key_memory_HP_INFO; PSI_memory_key hp_key_memory_HP_PTRS; PSI_memory_key hp_key_memory_HP_KEYDEF; +PSI_memory_key hp_key_memory_HP_BLOB; #ifdef HAVE_PSI_INTERFACE @@ -36,7 +37,8 @@ static PSI_memory_info all_heap_memory[]= { & hp_key_memory_HP_SHARE, "HP_SHARE", 0}, { & hp_key_memory_HP_INFO, "HP_INFO", 0}, { & hp_key_memory_HP_PTRS, "HP_PTRS", 0}, - { & hp_key_memory_HP_KEYDEF, "HP_KEYDEF", 0} + { & hp_key_memory_HP_KEYDEF, "HP_KEYDEF", 0}, + { & hp_key_memory_HP_BLOB, "HP_BLOB", 0} }; void init_heap_psi_keys() diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c index ad56ca979deb6..9d885e2bb1b7e 100644 --- a/storage/heap/hp_update.c +++ b/storage/heap/hp_update.c @@ -42,7 +42,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) p_lastinx= share->keydef + info->lastinx; for (keydef= share->keydef, end= keydef + share->keys; keydef < end; keydef++) { - if (hp_rec_key_cmp(keydef, old, heap_new)) + if (hp_rec_key_cmp(keydef, old, heap_new, NULL)) { if ((*keydef->delete_key)(info, keydef, old, pos, keydef == p_lastinx) || (*keydef->write_key)(info, keydef, heap_new, pos)) @@ -52,7 +52,99 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) } } - memcpy(pos,heap_new,(size_t) share->reclength); + /* + Blob update strategy: write new chains before freeing old ones. + + We must not free old blob chains before the new ones are successfully + written, because hp_write_blobs() can fail (e.g. table full) and then + the old data would be unrecoverable. Instead: + 1. Save old chain head pointers (from pos) before memcpy overwrites them + 2. memcpy new record data into pos + 3. Write new blob chains (hp_write_blobs) + 4. On success: free old chains via saved pointers + On failure: restore old record from 'old' buffer, restore saved + chain pointers, re-set HP_ROW_HAS_CONT flag + */ + if (share->blob_count) + { + my_bool had_cont= hp_has_cont(pos, share->visible); + uchar **saved_chains= NULL; + + if (had_cont) + { + saved_chains= (uchar**) my_safe_alloca( + share->blob_count * sizeof(uchar*)); + for (uint i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + memcpy(&saved_chains[i], pos + desc->offset + desc->packlength, + sizeof(saved_chains[i])); + } + } + memcpy(pos, heap_new, (size_t) share->reclength); + if (hp_write_blobs(info, heap_new, pos)) + { + /* New blobs cleaned up by hp_write_blobs rollback. Restore old record. */ + memcpy(pos, old, (size_t) share->reclength); + if (had_cont) + { + for (uint i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + memcpy(pos + desc->offset + desc->packlength, + &saved_chains[i], sizeof(saved_chains[i])); + } + pos[share->visible]|= HP_ROW_HAS_CONT; + } + my_safe_afree(saved_chains, + share->blob_count * sizeof(uchar*)); + goto err; + } + /* New blobs written — now safe to free old chains */ + if (had_cont) + { + for (uint i= 0; i < share->blob_count; i++) + hp_free_run_chain(share, saved_chains[i]); + my_safe_afree(saved_chains, + share->blob_count * sizeof(uchar*)); + } + /* + Refresh blob pointers in the caller's record buffer when zero-copy + pointers were used. + + hp_write_blobs() stored new chain head pointers in pos, but + heap_new may still have zero-copy pointers from the caller's last + hp_read_blobs() — those point into old chains that were just freed. + Copy new chain pointers from pos into heap_new, then call + hp_read_blobs() to replace them with materialized data pointers. + + Without this, callers that reuse heap_new after update (e.g., the + INTERSECT ALL unfold path in sql_union.cc) would follow dangling + pointers into freed HP_BLOCK records. + + Non-zero-copy blobs (Case C) have pointers into blob_buff which + is not affected by the chain free, so no refresh is needed. + */ + if (info->has_zerocopy_blobs) + { + uchar *new_rec= (uchar*) heap_new; + for (uint i= 0; i < share->blob_count; i++) + { + HP_BLOB_DESC *desc= &share->blob_descs[i]; + { + uchar *chain; + memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); + memcpy(new_rec + desc->offset + desc->packlength, &chain, + sizeof(chain)); + } + } + hp_read_blobs(info, new_rec, pos); + } + } + else + { + memcpy(pos, heap_new, (size_t) share->reclength); + } if (++(share->records) == share->blength) share->blength+= share->blength; #if !defined(DBUG_OFF) && defined(EXTRA_HEAP_DEBUG) @@ -81,7 +173,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) } while (keydef >= share->keydef) { - if (hp_rec_key_cmp(keydef, old, heap_new)) + if (hp_rec_key_cmp(keydef, old, heap_new, NULL)) { if ((*keydef->delete_key)(info, keydef, heap_new, pos, 0) || (*keydef->write_key)(info, keydef, old, pos)) diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c index cb079eac75788..9a8b244307de6 100644 --- a/storage/heap/hp_write.c +++ b/storage/heap/hp_write.c @@ -26,7 +26,6 @@ #define HIGHFIND 4 #define HIGHUSED 8 -static uchar *next_free_record_pos(HP_SHARE *info); static HASH_INFO *hp_find_free_hash(HP_SHARE *info, HP_BLOCK *block, ulong records); @@ -54,7 +53,13 @@ int heap_write(HP_INFO *info, const uchar *record) } memcpy(pos,record,(size_t) share->reclength); - pos[share->visible]= 1; /* Mark record as not deleted */ + if (share->blob_count) + { + if (hp_write_blobs(info, record, pos)) + goto err_blob; + } + else + pos[share->visible]= 1; /* Mark record as not deleted */ if (++share->records == share->blength) share->blength+= share->blength; info->s->key_version++; @@ -66,6 +71,33 @@ int heap_write(HP_INFO *info, const uchar *record) heap_update_auto_increment(info, record); DBUG_RETURN(0); +err_blob: + /* + Blob write failed after all keys were written successfully. + Roll back all keys — unlike err: below, no key needs to be skipped. + + Do NOT call hp_free_blobs() here: hp_write_blobs() is self-cleaning + on failure — hp_write_one_blob() frees its own partial chain, and + hp_write_blobs() frees all previously completed columns (0..i-1) and + NULLs every chain pointer in pos. Calling hp_free_blobs() after this + would be both redundant and dangerous: + - The visibility byte pos[share->visible] has not been set yet (it is + only written on hp_write_blobs() success at line 493), so it may + contain uninitialized data from tail allocation with HP_ROW_HAS_CONT + bit set. + - Blob columns after the failed one (i+1..blob_count-1) still have the + SQL layer's original data pointers in pos (from memcpy at line 55), + not continuation chain pointers. hp_free_run_chain() would interpret + those as chain headers and crash. + */ + info->errkey= -1; + for (keydef= end - 1; keydef >= share->keydef; keydef--) + { + if ((*keydef->delete_key)(info, keydef, record, pos, 0)) + break; + } + goto err_common; + err: if (my_errno == HA_ERR_FOUND_DUPP_KEY) DBUG_PRINT("info",("Duplicate key: %d", (int) (keydef - share->keydef))); @@ -85,9 +117,20 @@ int heap_write(HP_INFO *info, const uchar *record) if ((*keydef->delete_key)(info, keydef, record, pos, 0)) break; keydef--; - } + } + + /* + Do NOT call hp_free_blobs here: the err: label is reached when a key + write fails (line 52), which is BEFORE memcpy(pos, record, reclength) + and hp_write_blobs(). The slot at pos still contains stale data from the + free list, so hp_free_blobs would chase garbage chain pointers. + Only err_blob: (above) needs hp_free_blobs, since blobs may have been + partially written there. + */ +err_common: share->deleted++; + share->total_records--; *((uchar**) pos)=share->del_link; share->del_link=pos; pos[share->visible]= 0; /* Record deleted */ @@ -128,9 +171,18 @@ int hp_rb_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *record, return 0; } - /* Find where to place new record */ +/* + Find where to place a new record. + + Allocates from the free list (del_link) first; if empty, extends the + HP_BLOCK tail. Both paths maintain the scan-boundary invariant: + total_records + deleted == block.last_allocated + Free-list allocation does deleted-- + total_records++ (sum unchanged). + Tail allocation does last_allocated++ + total_records++ (sum grows by 1, + matching the new slot). heap_scan() relies on this sum to detect EOF. +*/ -static uchar *next_free_record_pos(HP_SHARE *info) +uchar *next_free_record_pos(HP_SHARE *info) { int block_pos; uchar *pos; @@ -142,19 +194,21 @@ static uchar *next_free_record_pos(HP_SHARE *info) pos=info->del_link; info->del_link= *((uchar**) pos); info->deleted--; + info->total_records++; DBUG_PRINT("exit",("Used old position: %p", pos)); DBUG_RETURN(pos); } - if (!(block_pos=(info->records % info->block.records_in_block))) + if (!(block_pos=(info->block.last_allocated % info->block.records_in_block))) { - if ((info->records > info->max_records && info->max_records) || + if ((info->block.last_allocated > info->max_records && + info->max_records) || (info->data_length + info->index_length >= info->max_table_size)) { DBUG_PRINT("error", - ("record file full. records: %lu max_records: %lu " + ("record file full. last_allocated: %lu max_records: %lu " "data_length: %llu index_length: %llu " "max_table_size: %llu", - info->records, info->max_records, + info->block.last_allocated, info->max_records, info->data_length, info->index_length, info->max_table_size)); my_errno=HA_ERR_RECORD_FILE_FULL; @@ -165,6 +219,8 @@ static uchar *next_free_record_pos(HP_SHARE *info) DBUG_RETURN(NULL); info->data_length+=length; } + info->block.last_allocated++; + info->total_records++; DBUG_PRINT("exit",("Used new position: %p", ((uchar*) info->block.level_info[0].last_blocks+ block_pos * info->block.recbuffer))); @@ -385,7 +441,7 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, do { if (pos->hash_of_key == hash_of_key && - ! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec)) + ! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, info)) { DBUG_RETURN(my_errno=HA_ERR_FOUND_DUPP_KEY); } From c736807d95f9f2ca3da0d1d513aa9a28d5b87b5c Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Sun, 8 Mar 2026 20:25:38 -0400 Subject: [PATCH 02/27] Cap `min_run_records` for small blob free-list reuse The free-list allocator's minimum contiguous run threshold (`min_run_records`) could exceed the total records a small blob actually needs, making free-list reuse impossible on narrow tables. For example, with `recbuffer=16` the 128-byte floor produced `min_run_records=8`, but a 32-byte blob only needs 3 records. Any contiguous free-list group of 3 would be rejected, forcing unnecessary tail allocation. Cap both `min_run_bytes` at `data_len` and `min_run_records` at `total_records_needed` so small blobs can reuse free-list slots when a sufficient contiguous group exists. --- storage/heap/hp_blob.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index 19c8068be5d14..776345b8b2034 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -302,14 +302,42 @@ static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, uchar *prev_run_start= NULL; uint32 data_offset= 0; - /* Calculate minimum acceptable run size */ + /* + Calculate minimum acceptable contiguous run size for free-list reuse. + + The free-list walk (Step 1 below) rejects contiguous groups smaller + than min_run_records, bailing to tail allocation instead. This + prevents excessive chain fragmentation for large blobs: accepting + tiny fragments would produce long chains of many short runs, each + with its own 10-byte header overhead and pointer dereference on read. + + The threshold is the larger of: + - 1/10 of the blob size (caps chain length at ~10 runs) + - 128 bytes absolute floor (HP_CONT_MIN_RUN_BYTES) + - 2 records minimum (a single-slot run is pure overhead) + + For small blobs whose total bytes or records needed is below this + threshold, the fragmentation concern doesn't apply — the entire blob + fits in one short run. Cap both min_run_bytes and min_run_records + so the free list can satisfy the allocation without falling through + to the tail unnecessarily. + */ min_run_bytes= data_len / HP_CONT_RUN_FRACTION_DEN * HP_CONT_RUN_FRACTION_NUM; if (min_run_bytes < HP_CONT_MIN_RUN_BYTES) min_run_bytes= HP_CONT_MIN_RUN_BYTES; + if (min_run_bytes > data_len) + min_run_bytes= data_len; min_run_records= (min_run_bytes + recbuffer - 1) / recbuffer; if (min_run_records < 2) min_run_records= 2; + { + uint32 first_payload= visible - HP_CONT_HEADER_SIZE; + uint32 total_records_needed= data_len <= first_payload ? 1 : + 1 + (data_len - first_payload + recbuffer - 1) / recbuffer; + if (total_records_needed < min_run_records) + min_run_records= total_records_needed; + } /* Step 1: Try to allocate contiguous runs from the free list. From 005a981449175943293443dd5d5175d70f5586b6 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Mon, 9 Mar 2026 23:11:34 -0400 Subject: [PATCH 03/27] MDEV-38975: Add hash pre-check to skip expensive blob materialization in hash chain traversal `hp_search()`, `hp_search_next()`, `hp_delete_key()`, and `find_unique_row()` walk hash chains calling `hp_key_cmp()` or `hp_rec_key_cmp()` for every entry. For blob key segments, each comparison triggers `hp_materialize_one_blob()` which reassembles blob data from continuation chain records. Since each `HASH_INFO` already stores `hash_of_key`, compare it against the search key's hash before the full key comparison. When hashes differ the keys are guaranteed different, skipping the expensive materialization. This pattern already existed in `hp_write_key()` for duplicate detection but was missing from the four read/delete paths. `HP_INFO::last_hash_of_key` is added so `hp_search_next()` can reuse the hash computed by `hp_search()` without recomputing it. --- include/heap.h | 1 + storage/heap/ha_heap.cc | 7 +++++-- storage/heap/hp_delete.c | 9 ++++++--- storage/heap/hp_hash.c | 15 ++++++++++++--- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/heap.h b/include/heap.h index 633a33e53fd0f..439b437cf306f 100644 --- a/include/heap.h +++ b/include/heap.h @@ -193,6 +193,7 @@ typedef struct st_heap_info uchar *blob_buff; /* Reassembly buffer for blob reads */ uint32 blob_buff_len; /* Current allocated size of blob_buff */ my_bool has_zerocopy_blobs; /* Last hp_read_blobs produced zero-copy ptrs */ + ulong last_hash_of_key; /* Hash from last hp_search(), reused by hp_search_next() */ THR_LOCK_DATA lock; LIST open_list; } HP_INFO; diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 0c8f4aa2a5491..59497dc2f437e 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -878,12 +878,15 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) DBUG_ASSERT(keyinfo->flag & HA_NOSAME); if (!share->records) DBUG_RETURN(1); // not found + ulong rec_hash= hp_rec_hashnr(keyinfo, record); HASH_INFO *pos= hp_find_hash(&keyinfo->block, - hp_mask(hp_rec_hashnr(keyinfo, record), + hp_mask(rec_hash, share->blength, share->records)); do { - if (!hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) + /* Hash pre-check avoids expensive blob materialization for non-matching entries */ + if (pos->hash_of_key == rec_hash && + !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) { file->current_hash_ptr= pos; file->current_ptr= pos->ptr_to_rec; diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c index 1a4da1fff0e44..f7538843d6946 100644 --- a/storage/heap/hp_delete.c +++ b/storage/heap/hp_delete.c @@ -114,7 +114,7 @@ int hp_rb_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, const uchar *record, uchar *recpos, int flag) { - ulong blength, pos2, pos_hashnr, lastpos_hashnr, key_pos; + ulong blength, pos2, pos_hashnr, lastpos_hashnr, key_pos, rec_hash; HASH_INFO *lastpos,*gpos,*pos,*pos3,*empty,*last_ptr; HP_SHARE *share=info->s; DBUG_ENTER("hp_delete_key"); @@ -126,14 +126,17 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, last_ptr=0; /* Search after record with key */ - key_pos= hp_mask(hp_rec_hashnr(keyinfo, record), blength, share->records + 1); + rec_hash= hp_rec_hashnr(keyinfo, record); + key_pos= hp_mask(rec_hash, blength, share->records + 1); pos= hp_find_hash(&keyinfo->block, key_pos); gpos = pos3 = 0; while (pos->ptr_to_rec != recpos) { - if (flag && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, info)) + /* Hash pre-check avoids expensive blob materialization for non-matching entries */ + if (flag && pos->hash_of_key == rec_hash && + !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, info)) last_ptr=pos; /* Previous same key */ gpos=pos; if (!(pos=pos->next_key)) diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index c06fa77b9e9d9..772f5307134b5 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -138,15 +138,23 @@ uchar *hp_search(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *key, if (share->records) { + ulong key_hash= hp_hashnr(keyinfo, key); ulong search_pos= - hp_mask(hp_hashnr(keyinfo, key), share->blength, share->records); + hp_mask(key_hash, share->blength, share->records); pos=hp_find_hash(&keyinfo->block, search_pos); if (search_pos != hp_mask(pos->hash_of_key, share->blength, share->records)) goto not_found; /* Wrong link */ + /* + Save hash for hp_search_next() to reuse without recomputing. + Pre-check hash_of_key before hp_key_cmp() to avoid expensive + blob materialization for non-matching entries. + */ + info->last_hash_of_key= key_hash; do { - if (!hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) + if (pos->hash_of_key == key_hash && + !hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) { switch (nextflag) { case 0: /* Search after key */ @@ -207,7 +215,8 @@ uchar *hp_search_next(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *key, while ((pos= pos->next_key)) { - if (! hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) + if (pos->hash_of_key == info->last_hash_of_key && + ! hp_key_cmp(keyinfo, pos->ptr_to_rec, key, info)) { info->current_hash_ptr=pos; DBUG_RETURN (info->current_ptr= pos->ptr_to_rec); From 93f9f598516e5c368503970553eb4cf687d44a9e Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 11 Mar 2026 19:37:49 -0400 Subject: [PATCH 04/27] Set `key_part_flag` from field type in GROUP BY key setup Rebuild HEAP index key from `record[0]` when the index has blob key segments, because `Field_blob::new_key_field()` returns `Field_varstring` (2B length + inline data) while HEAP's `hp_hashnr`/`hp_key_cmp` expect `hp_make_key` format (4B length + data pointer). Precompute `HP_KEYDEF::has_blob_seg` flag during table creation to avoid per-call loop through key segments. --- include/heap.h | 1 + .../suite/heap/heap_blob_groupby.result | 43 +++++++++++++++++++ mysql-test/suite/heap/heap_blob_groupby.test | 41 ++++++++++++++++++ sql/sql_select.cc | 2 +- storage/heap/ha_heap.cc | 26 +++++++++++ storage/heap/hp_create.c | 10 +++++ 6 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/heap/heap_blob_groupby.result create mode 100644 mysql-test/suite/heap/heap_blob_groupby.test diff --git a/include/heap.h b/include/heap.h index 439b437cf306f..54a78a7877cd4 100644 --- a/include/heap.h +++ b/include/heap.h @@ -116,6 +116,7 @@ typedef struct st_hp_keydef /* Key definition with open */ uint keysegs; /* Number of key-segment */ uint length; /* Length of key (automatic) */ uint8 algorithm; /* HASH / BTREE */ + my_bool has_blob_seg; /* Key has HA_BLOB_PART segments */ HA_KEYSEG *seg; HP_BLOCK block; /* Where keys are saved */ /* diff --git a/mysql-test/suite/heap/heap_blob_groupby.result b/mysql-test/suite/heap/heap_blob_groupby.result new file mode 100644 index 0000000000000..8a583bad03d28 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_groupby.result @@ -0,0 +1,43 @@ +# +# MDEV-38975: GROUP BY on blob columns in HEAP internal temp tables +# Verify that GROUP BY correctly groups by blob/text content, +# not by internal pointer representation. +# +CREATE TABLE t1 (a TEXT, b INT); +INSERT INTO t1 VALUES ('foo', 1), ('foo', 2), ('bar', 3), ('bar', 4), ('baz', 5); +# GROUP BY on TEXT column should group by content +SELECT a, COUNT(*), SUM(b) FROM t1 GROUP BY a ORDER BY a; +a COUNT(*) SUM(b) +bar 2 7 +baz 1 5 +foo 2 3 +# GROUP BY with HAVING +SELECT a, COUNT(*) AS cnt FROM t1 GROUP BY a HAVING cnt > 1 ORDER BY a; +a cnt +bar 2 +foo 2 +DROP TABLE t1; +# +# GROUP BY on multiple blob columns +# +CREATE TABLE t1 (a TEXT, b TEXT, c INT); +INSERT INTO t1 VALUES ('x', 'p', 1), ('x', 'p', 2), +('x', 'q', 3), ('y', 'p', 4); +SELECT a, b, COUNT(*), SUM(c) FROM t1 GROUP BY a, b ORDER BY a, b; +a b COUNT(*) SUM(c) +x p 2 3 +x q 1 3 +y p 1 4 +DROP TABLE t1; +# +# GROUP BY blob with NULL values +# +CREATE TABLE t1 (a TEXT, b INT); +INSERT INTO t1 VALUES (NULL, 1), (NULL, 2), ('foo', 3), ('foo', 4), ('bar', 5); +SELECT a, COUNT(*), SUM(b) FROM t1 GROUP BY a ORDER BY a; +a COUNT(*) SUM(b) +NULL 2 3 +bar 1 5 +foo 2 7 +DROP TABLE t1; +# End of MDEV-38975 GROUP BY blob tests diff --git a/mysql-test/suite/heap/heap_blob_groupby.test b/mysql-test/suite/heap/heap_blob_groupby.test new file mode 100644 index 0000000000000..9b935a0e6d48d --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_groupby.test @@ -0,0 +1,41 @@ +--source include/have_sequence.inc + +--echo # +--echo # MDEV-38975: GROUP BY on blob columns in HEAP internal temp tables +--echo # Verify that GROUP BY correctly groups by blob/text content, +--echo # not by internal pointer representation. +--echo # + +CREATE TABLE t1 (a TEXT, b INT); +INSERT INTO t1 VALUES ('foo', 1), ('foo', 2), ('bar', 3), ('bar', 4), ('baz', 5); + +--echo # GROUP BY on TEXT column should group by content +SELECT a, COUNT(*), SUM(b) FROM t1 GROUP BY a ORDER BY a; + +--echo # GROUP BY with HAVING +SELECT a, COUNT(*) AS cnt FROM t1 GROUP BY a HAVING cnt > 1 ORDER BY a; + +DROP TABLE t1; + +--echo # +--echo # GROUP BY on multiple blob columns +--echo # +CREATE TABLE t1 (a TEXT, b TEXT, c INT); +INSERT INTO t1 VALUES ('x', 'p', 1), ('x', 'p', 2), + ('x', 'q', 3), ('y', 'p', 4); + +SELECT a, b, COUNT(*), SUM(c) FROM t1 GROUP BY a, b ORDER BY a, b; + +DROP TABLE t1; + +--echo # +--echo # GROUP BY blob with NULL values +--echo # +CREATE TABLE t1 (a TEXT, b INT); +INSERT INTO t1 VALUES (NULL, 1), (NULL, 2), ('foo', 3), ('foo', 4), ('bar', 5); + +SELECT a, COUNT(*), SUM(b) FROM t1 GROUP BY a ORDER BY a; + +DROP TABLE t1; + +--echo # End of MDEV-38975 GROUP BY blob tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index f7195642ffee4..273f9b345cef1 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -21334,7 +21334,7 @@ bool Create_tmp_table::finalize(THD *thd, (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? 0 : FIELDFLAG_BINARY; - m_key_part_info->key_part_flag= 0; + m_key_part_info->key_part_flag= field->key_part_flag(); if (!m_using_unique_constraint) { cur_group->buff=(char*) m_group_buff; diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 59497dc2f437e..dbe0da432bbe3 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -291,6 +291,20 @@ int ha_heap::index_read_map(uchar *buf, const uchar *key, enum ha_rkey_function find_flag) { DBUG_ASSERT(inited==INDEX); + /* + When the index has blob key segments, the SQL layer's key buffer (e.g. + group_buff from end_update) uses Field_varstring format (2B length + + inline data) because Field_blob::new_key_field() returns Field_varstring. + But HEAP's hp_hashnr/hp_key_cmp expect hp_make_key format (4B length + + data pointer). Rebuild the key from record[0] which has the correct + blob field layout. + */ + if (file->s->keydef[active_index].has_blob_seg) + { + hp_make_key(file->s->keydef + active_index, (uchar*) file->lastkey, + table->record[0]); + key= (const uchar*) file->lastkey; + } int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag); return error; } @@ -299,6 +313,12 @@ int ha_heap::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { DBUG_ASSERT(inited==INDEX); + if (file->s->keydef[active_index].has_blob_seg) + { + hp_make_key(file->s->keydef + active_index, (uchar*) file->lastkey, + table->record[0]); + key= (const uchar*) file->lastkey; + } int error= heap_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); return error; @@ -308,6 +328,12 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { + if (file->s->keydef[index].has_blob_seg) + { + hp_make_key(file->s->keydef + index, (uchar*) file->lastkey, + table->record[0]); + key= (const uchar*) file->lastkey; + } int error = heap_rkey(file, buf, index, key, keypart_map, find_flag); return error; } diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index 6433b059605d0..18a4034fb4691 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -277,6 +277,16 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, keyinfo->seg= keyseg; memcpy(keyseg, keydef[i].seg, (size_t) (sizeof(keyseg[0]) * keydef[i].keysegs)); + keyinfo->has_blob_seg= FALSE; + { + uint j; + for (j= 0; j < keydef[i].keysegs; j++) + if (keyseg[j].flag & HA_BLOB_PART) + { + keyinfo->has_blob_seg= TRUE; + break; + } + } keyseg+= keydef[i].keysegs; if (keydef[i].algorithm == HA_KEY_ALG_BTREE) From f5da8132584e984359689e1f9cdde9f5daf3a9ef Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Sun, 15 Mar 2026 22:20:03 -0400 Subject: [PATCH 05/27] Fix PAD SPACE blob comparison and add blob key tests **Bug fix** (`padspace_early_exit`): `hp_rec_key_cmp()` and `hp_key_cmp()` in `hp_hash.c` had early-exit checks `if (len1 != len2) return 1` for blob key segments, which broke PAD SPACE collations (the default). With PAD SPACE, `'abc'` (len=3) and `'abc '` (len=6) must compare equal because trailing spaces are insignificant, but the length check rejected them before reaching `strnncollsp()`. Fix: only short-circuit on length mismatch for NO PAD collations (`MY_CS_NOPAD`). This bug was discovered during the VARCHAR-to-BLOB promotion work (Phase 1) and affects any HEAP table with blob key segments, manifesting in `COUNT(DISTINCT)` on TEXT columns returning inflated counts. **Test coverage** transferred from Phase 1: - `heap.heap_blob_ops` MTR test: exercises HEAP internal temp tables with explicit TEXT columns (GROUP BY, DISTINCT, IN-subquery, CTEs, window functions, ROLLUP). Includes a targeted PAD SPACE scenario that catches `padspace_early_exit`. - `hp_test_hash-t` unit test (43 tests): validates blob hash/compare functions including PAD SPACE collation, NULL/empty blobs, multi-segment keys, key format round-trips. - `hp_test_key_setup-t` unit test (9 tests): validates `heap_prepare_hp_create_info()` handling of blob key segments (`distinct_key_truncation`) and garbage `key_part_flag` (`garbage_key_part_flag`). Four Phase 1-specific assertions are deferred via `#if 0` (these bugs are compensated by `hp_create.c` runtime normalization in MDEV-38975 proper but will be needed when Phase 1 removes that safety net). --- mysql-test/suite/heap/heap_blob_ops.result | 149 ++++ mysql-test/suite/heap/heap_blob_ops.test | 93 +++ storage/heap/CMakeLists.txt | 21 +- storage/heap/ha_heap.cc | 12 + storage/heap/hp_hash.c | 17 +- storage/heap/hp_test_hash-t.c | 747 +++++++++++++++++++++ storage/heap/hp_test_key_setup-t.cc | 422 ++++++++++++ 7 files changed, 1451 insertions(+), 10 deletions(-) create mode 100644 mysql-test/suite/heap/heap_blob_ops.result create mode 100644 mysql-test/suite/heap/heap_blob_ops.test create mode 100644 storage/heap/hp_test_hash-t.c create mode 100644 storage/heap/hp_test_key_setup-t.cc diff --git a/mysql-test/suite/heap/heap_blob_ops.result b/mysql-test/suite/heap/heap_blob_ops.result new file mode 100644 index 0000000000000..98118a4fc5a6b --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_ops.result @@ -0,0 +1,149 @@ +# +# MDEV-38975: Blob/text operations exercising HEAP internal temp tables +# +CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY, k INT, text_col TEXT); +INSERT INTO t1 (k, text_col) VALUES +(1, 'alpha'), (1, 'alpha'), (1, 'beta'), +(2, 'gamma'), (2, 'gamma'), (2, 'delta'), +(3, 'alpha'), (3, 'epsilon'), (3, NULL), +(4, NULL), (4, NULL), (4, 'beta'); +# +# COUNT(DISTINCT text_col) +# +SELECT COUNT(DISTINCT text_col) FROM t1; +COUNT(DISTINCT text_col) +5 +# +# COUNT(DISTINCT text_col) with GROUP BY +# +SELECT k, COUNT(DISTINCT text_col) FROM t1 GROUP BY k ORDER BY k; +k COUNT(DISTINCT text_col) +1 2 +2 2 +3 2 +4 1 +# +# IN-subquery with blob/text +# +CREATE TABLE t2 (text_col TEXT); +INSERT INTO t2 VALUES ('alpha'), ('delta'); +SELECT id, text_col FROM t1 WHERE text_col IN (SELECT text_col FROM t2) ORDER BY id; +id text_col +1 alpha +2 alpha +6 delta +7 alpha +# +# GROUP BY text_col ORDER BY aggregate LIMIT +# +SELECT text_col, COUNT(*) AS cnt FROM t1 GROUP BY text_col ORDER BY cnt DESC, text_col LIMIT 3; +text_col cnt +NULL 3 +alpha 3 +beta 2 +# +# GROUP BY text_col WITH ROLLUP +# +SELECT text_col, COUNT(*) FROM t1 GROUP BY text_col WITH ROLLUP; +text_col COUNT(*) +NULL 12 +NULL 3 +alpha 3 +beta 2 +delta 1 +epsilon 1 +gamma 2 +# +# GROUP BY multiple columns WITH ROLLUP +# +SELECT k, text_col, COUNT(*) FROM t1 GROUP BY k, text_col WITH ROLLUP; +k text_col COUNT(*) +1 NULL 3 +1 alpha 2 +1 beta 1 +2 NULL 3 +2 delta 1 +2 gamma 2 +3 NULL 1 +3 NULL 3 +3 alpha 1 +3 epsilon 1 +4 NULL 2 +4 NULL 3 +4 beta 1 +NULL NULL 12 +# +# Window function with blob in SELECT list +# +SELECT id, text_col, ROW_NUMBER() OVER (PARTITION BY k ORDER BY id) AS rn +FROM t1 WHERE k <= 2 ORDER BY id; +id text_col rn +1 alpha 1 +2 alpha 2 +3 beta 3 +4 gamma 1 +5 gamma 2 +6 delta 3 +# +# RANK with blob column +# +SELECT text_col, k, RANK() OVER (ORDER BY text_col) AS rnk +FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k; +text_col k rnk +alpha 1 1 +alpha 1 1 +alpha 3 1 +beta 1 4 +beta 4 4 +delta 2 6 +epsilon 3 7 +gamma 2 8 +gamma 2 8 +# +# Non-recursive CTE materializing blob column +# +WITH cte AS (SELECT text_col, k FROM t1 WHERE k <= 2) +SELECT DISTINCT text_col FROM cte ORDER BY text_col; +text_col +alpha +beta +delta +gamma +# +# CTE with self-join on blob column +# +WITH cte AS (SELECT text_col, k FROM t1 WHERE text_col IS NOT NULL) +SELECT COUNT(*) FROM cte a JOIN cte b ON a.k = b.k AND a.text_col = b.text_col; +COUNT(*) +13 +# +# CTE referenced twice (forces materialization) +# +WITH cte AS (SELECT DISTINCT text_col FROM t1 WHERE text_col IS NOT NULL) +SELECT a.text_col, b.text_col +FROM cte a JOIN cte b ON a.text_col > b.text_col ORDER BY a.text_col, b.text_col; +text_col text_col +beta alpha +delta alpha +delta beta +epsilon alpha +epsilon beta +epsilon delta +gamma alpha +gamma beta +gamma delta +gamma epsilon +DROP TABLE t1, t2; +# +# PAD SPACE: COUNT(DISTINCT) on TEXT with trailing-space variants +# PAD SPACE collations (latin1 default) treat trailing spaces as +# insignificant, so 'abc' and 'abc ' are the same value. +# +CREATE TABLE t_pad (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_pad (t) VALUES ('abc'), ('abc '), ('abc '), ('def'), ('def '); +# Must return 2 (not 5): 'abc' variants = 1 group, 'def' variants = 1 group +SELECT COUNT(DISTINCT t) FROM t_pad; +COUNT(DISTINCT t) +2 +DROP TABLE t_pad; +# End of MDEV-38975 blob operations tests diff --git a/mysql-test/suite/heap/heap_blob_ops.test b/mysql-test/suite/heap/heap_blob_ops.test new file mode 100644 index 0000000000000..e1c81dadedd41 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_ops.test @@ -0,0 +1,93 @@ +--source include/have_sequence.inc +--source include/not_embedded.inc + +--echo # +--echo # MDEV-38975: Blob/text operations exercising HEAP internal temp tables +--echo # + +CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY, k INT, text_col TEXT); +INSERT INTO t1 (k, text_col) VALUES + (1, 'alpha'), (1, 'alpha'), (1, 'beta'), + (2, 'gamma'), (2, 'gamma'), (2, 'delta'), + (3, 'alpha'), (3, 'epsilon'), (3, NULL), + (4, NULL), (4, NULL), (4, 'beta'); + +--echo # +--echo # COUNT(DISTINCT text_col) +--echo # +SELECT COUNT(DISTINCT text_col) FROM t1; + +--echo # +--echo # COUNT(DISTINCT text_col) with GROUP BY +--echo # +SELECT k, COUNT(DISTINCT text_col) FROM t1 GROUP BY k ORDER BY k; + +--echo # +--echo # IN-subquery with blob/text +--echo # +CREATE TABLE t2 (text_col TEXT); +INSERT INTO t2 VALUES ('alpha'), ('delta'); +SELECT id, text_col FROM t1 WHERE text_col IN (SELECT text_col FROM t2) ORDER BY id; + +--echo # +--echo # GROUP BY text_col ORDER BY aggregate LIMIT +--echo # +SELECT text_col, COUNT(*) AS cnt FROM t1 GROUP BY text_col ORDER BY cnt DESC, text_col LIMIT 3; + +--echo # +--echo # GROUP BY text_col WITH ROLLUP +--echo # +--sorted_result +SELECT text_col, COUNT(*) FROM t1 GROUP BY text_col WITH ROLLUP; + +--echo # +--echo # GROUP BY multiple columns WITH ROLLUP +--echo # +--sorted_result +SELECT k, text_col, COUNT(*) FROM t1 GROUP BY k, text_col WITH ROLLUP; + +--echo # +--echo # Window function with blob in SELECT list +--echo # +SELECT id, text_col, ROW_NUMBER() OVER (PARTITION BY k ORDER BY id) AS rn +FROM t1 WHERE k <= 2 ORDER BY id; + +--echo # +--echo # RANK with blob column +--echo # +SELECT text_col, k, RANK() OVER (ORDER BY text_col) AS rnk +FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k; + +--echo # +--echo # Non-recursive CTE materializing blob column +--echo # +WITH cte AS (SELECT text_col, k FROM t1 WHERE k <= 2) +SELECT DISTINCT text_col FROM cte ORDER BY text_col; + +--echo # +--echo # CTE with self-join on blob column +--echo # +WITH cte AS (SELECT text_col, k FROM t1 WHERE text_col IS NOT NULL) +SELECT COUNT(*) FROM cte a JOIN cte b ON a.k = b.k AND a.text_col = b.text_col; + +--echo # +--echo # CTE referenced twice (forces materialization) +--echo # +WITH cte AS (SELECT DISTINCT text_col FROM t1 WHERE text_col IS NOT NULL) +SELECT a.text_col, b.text_col +FROM cte a JOIN cte b ON a.text_col > b.text_col ORDER BY a.text_col, b.text_col; + +DROP TABLE t1, t2; + +--echo # +--echo # PAD SPACE: COUNT(DISTINCT) on TEXT with trailing-space variants +--echo # PAD SPACE collations (latin1 default) treat trailing spaces as +--echo # insignificant, so 'abc' and 'abc ' are the same value. +--echo # +CREATE TABLE t_pad (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_pad (t) VALUES ('abc'), ('abc '), ('abc '), ('def'), ('def '); +--echo # Must return 2 (not 5): 'abc' variants = 1 group, 'def' variants = 1 group +SELECT COUNT(DISTINCT t) FROM t_pad; +DROP TABLE t_pad; + +--echo # End of MDEV-38975 blob operations tests diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index 7f4d53a787900..a2179bcdab75d 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -1,14 +1,14 @@ # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA @@ -27,9 +27,22 @@ IF(CMAKE_SYSTEM_NAME MATCHES AIX AND CMAKE_BUILD_TYPE STREQUAL "DEBUG") ENDIF() IF(WITH_UNIT_TESTS) + TARGET_COMPILE_DEFINITIONS(heap PRIVATE HEAP_UNIT_TESTS) + ADD_EXECUTABLE(hp_test1 hp_test1.c) TARGET_LINK_LIBRARIES(hp_test1 heap mysys dbug strings) ADD_EXECUTABLE(hp_test2 hp_test2.c) TARGET_LINK_LIBRARIES(hp_test2 heap mysys dbug strings) -ENDIF() \ No newline at end of file + + MY_ADD_TESTS(hp_test_hash LINK_LIBRARIES heap mysys dbug strings) + + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/include) + ADD_EXECUTABLE(hp_test_key_setup-t + hp_test_key_setup-t.cc + ${CMAKE_SOURCE_DIR}/unittest/sql/dummy_builtins.cc) + TARGET_COMPILE_DEFINITIONS(hp_test_key_setup-t PRIVATE MYSQL_SERVER) + TARGET_LINK_LIBRARIES(hp_test_key_setup-t heap sql mytap) + MY_ADD_TEST(hp_test_key_setup) +ENDIF() diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index dbe0da432bbe3..e902de1b5d4fb 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -952,3 +952,15 @@ maria_declare_plugin(heap) MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ } maria_declare_plugin_end; + +#ifdef HEAP_UNIT_TESTS +/* + Public wrapper for unit tests — exposes the static + heap_prepare_hp_create_info() for direct testing. +*/ +int test_heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, + HP_CREATE_INFO *hp_create_info) +{ + return heap_prepare_hp_create_info(table_arg, internal_table, hp_create_info); +} +#endif diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index 772f5307134b5..7be77afec308b 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -480,10 +480,15 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2, const uchar *data1; const uchar *data2; - if (len1 != len2) - return 1; - if (len1 == 0) + if (len1 == 0 && len2 == 0) continue; + /* + Only short-circuit on length mismatch for NO PAD collations. + PAD SPACE collations treat trailing spaces as insignificant, + so 'a' (len=1) and 'a ' (len=3) must compare equal. + */ + if ((seg->charset->state & MY_CS_NOPAD) && len1 != len2) + return 1; /* rec1: always input — dereference pointer */ memcpy(&data1, pos1 + packlength, HP_PTR_SIZE); @@ -630,10 +635,10 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, memcpy(&key_data, key + 4, HP_PTR_SIZE); key+= 4 + sizeof(uchar*); - if (rec_blob_len != key_blob_len) - return 1; - if (rec_blob_len == 0) + if (rec_blob_len == 0 && key_blob_len == 0) continue; + if ((seg->charset->state & MY_CS_NOPAD) && rec_blob_len != key_blob_len) + return 1; /* rec is stored — materialize from chain */ { diff --git a/storage/heap/hp_test_hash-t.c b/storage/heap/hp_test_hash-t.c new file mode 100644 index 0000000000000..d831fc53fd88b --- /dev/null +++ b/storage/heap/hp_test_hash-t.c @@ -0,0 +1,747 @@ +/* + Unit tests for HEAP hash functions with blob key segments. + + Validates that hp_rec_hashnr() (hashes from a record) and hp_hashnr() + (hashes from a pre-built key via hp_make_key()) produce identical + results for blob data. Also validates hp_rec_key_cmp() and hp_key_cmp() + for blob segments. + + The three blob storage cases (A, B, C) refer to how blobs are stored + in continuation chains, but for hashing purposes what matters is the + record format: packlength bytes of length + sizeof(ptr) bytes of + data pointer. The hash functions read blob data via pointer + dereference, so the tests verify that the pointer dereference and + length handling are correct for various configurations. +*/ + +#include +#include +#include +#include +#include "heap.h" +#include "heapdef.h" + +/* + Record layout for a table (int4, blob(N)): + byte 0: null bitmap (1 byte, bit 2 = blob null) + bytes 1-4: int4 field (4 bytes) + bytes 5-6: blob packlength=2 (length, little-endian) + bytes 7-14: blob data pointer (8 bytes on x86_64) + byte 15: flags byte (at offset = visible = 15) + Total: recbuffer = ALIGN(MAX(16, 8) + 1, 8) = 24 +*/ + +#define REC_NULL_OFFSET 0 +#define REC_INT_OFFSET 1 +#define REC_BLOB_OFFSET 5 +#define REC_BLOB_PACKLEN 2 +#define REC_LENGTH 16 /* reclength: through end of blob descriptor */ +#define REC_VISIBLE 15 /* flags byte offset */ +#define REC_BUFFER 24 /* aligned recbuffer */ + +/* Key buffer: null_byte + 4B_blob_len + 8B_blob_ptr = 13 bytes max */ +#define KEY_BUF_SIZE 64 + +/* Avoids -Wsizeof-pointer-memaccess with sizeof(uchar*) */ +#define PTR_SIZE sizeof(void*) + + +static void setup_blob_keyseg(HA_KEYSEG *seg, my_bool nullable) +{ + memset(seg, 0, sizeof(*seg)); + seg->type= HA_KEYTYPE_VARTEXT1; + seg->flag= HA_BLOB_PART | HA_VAR_LENGTH_PART; + seg->start= REC_BLOB_OFFSET; + seg->length= 0; /* blob key segments must have length=0 */ + seg->bit_start= REC_BLOB_PACKLEN; /* actual packlength */ + seg->charset= &my_charset_latin1; + if (nullable) + { + seg->null_bit= 2; + seg->null_pos= REC_NULL_OFFSET; + } + else + { + seg->null_bit= 0; + } +} + + +static void setup_keydef(HP_KEYDEF *keydef, HA_KEYSEG *seg, uint keysegs) +{ + uint i; + memset(keydef, 0, sizeof(*keydef)); + keydef->keysegs= keysegs; + keydef->seg= seg; + keydef->algorithm= HA_KEY_ALG_HASH; + keydef->flag= HA_NOSAME; + keydef->length= 0; /* computed below */ + keydef->has_blob_seg= 1; + + /* Compute keydef->length: sum of key part sizes */ + for (i= 0; i < keysegs; i++) + { + if (seg[i].null_bit) + keydef->length++; + if (seg[i].flag & HA_BLOB_PART) + keydef->length+= 4 + PTR_SIZE; + else if (seg[i].flag & HA_VAR_LENGTH_PART) + keydef->length+= 2 + seg[i].length; + else + keydef->length+= seg[i].length; + } +} + + +/* + Build a record with blob data. + rec must be at least REC_LENGTH bytes. + Sets the blob field to point to blob_data with blob_len bytes. +*/ +static void build_record(uchar *rec, int32 int_val, + const uchar *blob_data, uint16 blob_len, + my_bool blob_is_null) +{ + memset(rec, 0, REC_LENGTH); + + /* null bitmap */ + if (blob_is_null) + rec[REC_NULL_OFFSET]= 2; /* null_bit=2 for blob */ + else + rec[REC_NULL_OFFSET]= 0; + + /* int4 field */ + int4store(rec + REC_INT_OFFSET, int_val); + + /* blob field: packlength (2 bytes) + data pointer (8 bytes) */ + int2store(rec + REC_BLOB_OFFSET, blob_len); + memcpy(rec + REC_BLOB_OFFSET + REC_BLOB_PACKLEN, &blob_data, PTR_SIZE); +} + + +/* + Test 1: hp_rec_hashnr and hp_make_key + hp_hashnr produce same hash + for various blob data sizes. +*/ +static void test_hash_consistency(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec[REC_LENGTH]; + uchar key_buf[KEY_BUF_SIZE]; + ulong rec_hash_a, rec_hash_b, rec_hash_c; + + /* Case A: very small blob (fits in single record, <= visible - 10) */ + const uchar *data_a= (const uchar*) "Hi"; + uint16 len_a= 2; + + /* Case B: medium blob (fits in single run, zero-copy) */ + const uchar *data_b= (const uchar*) "Hello World! This is a medium blob."; + uint16 len_b= 35; + + /* Case C: larger blob data (would need multiple runs in real storage) */ + uchar data_c[200]; + uint16 len_c= sizeof(data_c); + memset(data_c, 'X', sizeof(data_c)); + /* Make it non-uniform so hash is more interesting */ + data_c[0]= 'A'; + data_c[50]= 'B'; + data_c[100]= 'C'; + data_c[199]= 'Z'; + + setup_blob_keyseg(&seg, FALSE); + setup_keydef(&keydef, &seg, 1); + + /* --- Case A: small blob --- */ + build_record(rec, 1, data_a, len_a, FALSE); + + rec_hash_a= hp_rec_hashnr(&keydef, rec); + hp_make_key(&keydef, key_buf, rec); + /* Now hash the pre-built key */ + { + /* hp_hashnr is static, so we test via hp_make_key + hp_rec_hashnr. + But we can verify the key format is correct. */ + uint32 key_blob_len= uint4korr(key_buf); + const uchar *key_blob_data; + memcpy(&key_blob_data, key_buf + 4, PTR_SIZE); + ok(key_blob_len == len_a, + "Case A: hp_make_key blob length = %u (expected %u)", + (uint) key_blob_len, (uint) len_a); + ok(key_blob_data == data_a, + "Case A: hp_make_key blob pointer matches source data"); + ok(memcmp(key_blob_data, data_a, len_a) == 0, + "Case A: hp_make_key blob data content matches"); + } + + /* --- Case B: medium blob --- */ + build_record(rec, 2, data_b, len_b, FALSE); + + rec_hash_b= hp_rec_hashnr(&keydef, rec); + hp_make_key(&keydef, key_buf, rec); + { + uint32 key_blob_len= uint4korr(key_buf); + const uchar *key_blob_data; + memcpy(&key_blob_data, key_buf + 4, PTR_SIZE); + ok(key_blob_len == len_b, + "Case B: hp_make_key blob length = %u (expected %u)", + (uint) key_blob_len, (uint) len_b); + ok(key_blob_data == data_b, + "Case B: hp_make_key blob pointer matches source data"); + ok(memcmp(key_blob_data, data_b, len_b) == 0, + "Case B: hp_make_key blob data content matches"); + } + + /* --- Case C: large blob --- */ + build_record(rec, 3, data_c, len_c, FALSE); + + rec_hash_c= hp_rec_hashnr(&keydef, rec); + hp_make_key(&keydef, key_buf, rec); + { + uint32 key_blob_len= uint4korr(key_buf); + const uchar *key_blob_data; + memcpy(&key_blob_data, key_buf + 4, PTR_SIZE); + ok(key_blob_len == len_c, + "Case C: hp_make_key blob length = %u (expected %u)", + (uint) key_blob_len, (uint) len_c); + ok(key_blob_data == data_c, + "Case C: hp_make_key blob pointer matches source data"); + ok(memcmp(key_blob_data, data_c, len_c) == 0, + "Case C: hp_make_key blob data content matches"); + } + + /* Different data must produce different hashes */ + ok(rec_hash_a != rec_hash_b, + "Hash A (%lu) != Hash B (%lu)", rec_hash_a, rec_hash_b); + ok(rec_hash_a != rec_hash_c, + "Hash A (%lu) != Hash C (%lu)", rec_hash_a, rec_hash_c); + ok(rec_hash_b != rec_hash_c, + "Hash B (%lu) != Hash C (%lu)", rec_hash_b, rec_hash_c); +} + + +/* + Test 2: hp_rec_key_cmp with blob segments. + Two records with same blob data must compare equal. + Two records with different blob data must compare unequal. +*/ +static void test_rec_key_cmp(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + + const uchar *data1= (const uchar*) "same_data_value!"; + uint16 len1= 16; + const uchar *data2= (const uchar*) "different_value!"; + uint16 len2= 16; + const uchar *data3= (const uchar*) "short"; + uint16 len3= 5; + + setup_blob_keyseg(&seg, FALSE); + setup_keydef(&keydef, &seg, 1); + + /* Same data, same length */ + build_record(rec1, 1, data1, len1, FALSE); + build_record(rec2, 2, data1, len1, FALSE); /* different int, same blob */ + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "rec_key_cmp: same blob data compares equal"); + + /* Different data, same length */ + build_record(rec2, 2, data2, len2, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "rec_key_cmp: different blob data compares unequal"); + + /* Different length (PAD SPACE: "short" vs "short\0\0..." may differ) */ + build_record(rec2, 2, data3, len3, FALSE); + /* For binary charset, different lengths always means different */ + { + HA_KEYSEG seg_bin; + HP_KEYDEF keydef_bin; + setup_blob_keyseg(&seg_bin, FALSE); + seg_bin.charset= &my_charset_bin; + setup_keydef(&keydef_bin, &seg_bin, 1); + + build_record(rec1, 1, data1, len1, FALSE); + build_record(rec2, 2, data3, len3, FALSE); + ok(hp_rec_key_cmp(&keydef_bin, rec1, rec2, NULL) != 0, + "rec_key_cmp: different length blobs compare unequal (binary)"); + } +} + + +/* + Test 3: NULL blob handling. + Two NULL blobs must compare equal. + NULL vs non-NULL must compare unequal. + NULL blob must hash consistently. +*/ +static void test_null_blob(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + uchar key_buf[KEY_BUF_SIZE]; + ulong hash1, hash2; + + const uchar *data1= (const uchar*) "not_null_data"; + uint16 len1= 13; + + setup_blob_keyseg(&seg, TRUE); /* nullable */ + setup_keydef(&keydef, &seg, 1); + + /* Both NULL */ + build_record(rec1, 1, NULL, 0, TRUE); + build_record(rec2, 2, NULL, 0, TRUE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "null_blob: two NULLs compare equal"); + + /* NULL vs non-NULL */ + build_record(rec2, 2, data1, len1, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "null_blob: NULL vs non-NULL compares unequal"); + + /* NULL hash consistency */ + hash1= hp_rec_hashnr(&keydef, rec1); + hash2= hp_rec_hashnr(&keydef, rec1); + ok(hash1 == hash2, + "null_blob: NULL blob hashes consistently (%lu == %lu)", hash1, hash2); + + /* NULL hash differs from empty non-NULL */ + { + const uchar *empty= (const uchar*) ""; + ulong hash_empty; + build_record(rec2, 2, empty, 0, FALSE); + hash_empty= hp_rec_hashnr(&keydef, rec2); + ok(hash1 != hash_empty, + "null_blob: NULL hash (%lu) != empty non-NULL hash (%lu)", + hash1, hash_empty); + } + + /* hp_make_key for NULL blob */ + build_record(rec1, 1, NULL, 0, TRUE); + hp_make_key(&keydef, key_buf, rec1); + ok(key_buf[0] == 1, + "null_blob: hp_make_key sets null flag byte to 1 for NULL"); +} + + +/* + Test 4: empty blob (non-NULL, length=0). +*/ +static void test_empty_blob(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + ulong h1, h2; + + const uchar *empty= (const uchar*) ""; + const uchar *nonempty= (const uchar*) "x"; + + setup_blob_keyseg(&seg, FALSE); + setup_keydef(&keydef, &seg, 1); + + /* Two empty blobs */ + build_record(rec1, 1, empty, 0, FALSE); + build_record(rec2, 2, empty, 0, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "empty_blob: two empty blobs compare equal"); + + /* Empty vs non-empty */ + build_record(rec2, 2, nonempty, 1, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "empty_blob: empty vs non-empty compares unequal"); + + /* Hash consistency for empty */ + h1= hp_rec_hashnr(&keydef, rec1); + h2= hp_rec_hashnr(&keydef, rec1); + ok(h1 == h2, "empty_blob: empty blob hashes consistently"); +} + + +/* + Test 5: Multi-segment key with int + blob. + Verifies that key advancement works correctly when blob segments + have seg->length=0. +*/ +static void test_multi_segment_key(void) +{ + HA_KEYSEG segs[2]; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + uchar key_buf[KEY_BUF_SIZE]; + const uchar *blob_data= (const uchar*) "multi_seg_test_data"; + uint16 blob_len= 19; + const uchar *blob_data2= (const uchar*) "different_blob_data"; + uint16 blob_len2= 19; + + /* Segment 0: int4 at offset 1, length 4 */ + memset(&segs[0], 0, sizeof(segs[0])); + segs[0].type= HA_KEYTYPE_BINARY; + segs[0].start= REC_INT_OFFSET; + segs[0].length= 4; + segs[0].charset= &my_charset_bin; + segs[0].null_bit= 0; + + /* Segment 1: blob at offset 5, packlength 2 */ + setup_blob_keyseg(&segs[1], FALSE); + + setup_keydef(&keydef, segs, 2); + + /* Same int, same blob */ + build_record(rec1, 42, blob_data, blob_len, FALSE); + build_record(rec2, 42, blob_data, blob_len, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "multi_seg: same int + same blob compares equal"); + + /* Different int, same blob */ + build_record(rec2, 99, blob_data, blob_len, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "multi_seg: different int + same blob compares unequal"); + + /* Same int, different blob */ + build_record(rec2, 42, blob_data2, blob_len2, FALSE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "multi_seg: same int + different blob compares unequal"); + + /* Hash consistency: record hash matches after make_key round-trip */ + build_record(rec1, 42, blob_data, blob_len, FALSE); + (void) hp_rec_hashnr(&keydef, rec1); + + hp_make_key(&keydef, key_buf, rec1); + /* Verify the key contains int4 (4 bytes) + blob (4B len + 8B ptr) */ + { + int32 key_int= sint4korr(key_buf); + uint32 key_blob_len= uint4korr(key_buf + 4); + const uchar *key_blob_data; + memcpy(&key_blob_data, key_buf + 8, PTR_SIZE); + + ok(key_int == 42, + "multi_seg: hp_make_key int = %d (expected 42)", (int) key_int); + ok(key_blob_len == blob_len, + "multi_seg: hp_make_key blob length = %u (expected %u)", + (uint) key_blob_len, (uint) blob_len); + ok(key_blob_data == blob_data, + "multi_seg: hp_make_key blob pointer matches"); + } +} + + +/* + Test 6: PAD SPACE collation behavior. + With PAD SPACE (default for latin1), 'a' and 'a ' should + compare equal and produce the same hash. +*/ +static void test_pad_space(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + const uchar *data_no_pad= (const uchar*) "abc"; + const uchar *data_padded= (const uchar*) "abc "; + ulong h1, h2; + + setup_blob_keyseg(&seg, FALSE); + seg.charset= &my_charset_latin1; /* PAD SPACE */ + setup_keydef(&keydef, &seg, 1); + + build_record(rec1, 1, data_no_pad, 3, FALSE); + build_record(rec2, 2, data_padded, 6, FALSE); + + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "pad_space: 'abc' == 'abc ' with PAD SPACE collation"); + + /* Hashes should also match for PAD SPACE */ + h1= hp_rec_hashnr(&keydef, rec1); + h2= hp_rec_hashnr(&keydef, rec2); + ok(h1 == h2, + "pad_space: hash('abc') == hash('abc ') (%lu == %lu)", h1, h2); + + /* With binary charset (NO PAD), they should differ */ + seg.charset= &my_charset_bin; + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "pad_space: 'abc' != 'abc ' with binary charset"); +} + + +/* + Test 7: DISTINCT key path — varstring key format. + + The SQL layer builds lookup keys in varstring format (2B length prefix + + inline data) via Field_blob::new_key_field() -> Field_varstring. The HEAP + handler's rebuild_blob_key() converts this to record[0]'s blob descriptor + format, then hp_make_key() builds the hash key. + + This test simulates the full round-trip: + 1. Build a record with blob data (as at INSERT time) + 2. Compute hp_rec_hashnr() (stored in HASH_INFO at write time) + 3. Build a varstring-format key (as the SQL layer would for lookup) + 4. Parse the varstring key into a record's blob field (rebuild_blob_key) + 5. hp_make_key() from that record, then hp_rec_hashnr() on the record + 6. Verify the hashes match +*/ +static void test_distinct_key_format(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec_insert[REC_LENGTH]; /* record at INSERT time */ + uchar rec_lookup[REC_LENGTH]; /* record rebuilt from lookup key */ + ulong insert_hash, lookup_hash; + + const uchar *blob_data= (const uchar*) "1 - 01xxxxxxxxxx"; + uint16 blob_len= 16; + + /* + Step 3: Build varstring-format key (what SQL layer produces). + Format: null_flag(1) + uint2_length(2) + inline_data(blob_len) + */ + uchar varstring_key[1 + 2 + 256]; + + setup_blob_keyseg(&seg, TRUE); /* nullable, like real DISTINCT keys */ + setup_keydef(&keydef, &seg, 1); + + /* Step 1-2: INSERT-time record and hash */ + build_record(rec_insert, 1, blob_data, blob_len, FALSE); + insert_hash= hp_rec_hashnr(&keydef, rec_insert); + + varstring_key[0]= 0; /* not null */ + int2store(varstring_key + 1, blob_len); + memcpy(varstring_key + 3, blob_data, blob_len); + + /* + Step 4: Parse varstring key into rec_lookup's blob field. + This is what rebuild_blob_key() does. + */ + memset(rec_lookup, 0, REC_LENGTH); + { + const uchar *key_pos= varstring_key; + uint16 varchar_len; + const uchar *varchar_data; + uint32 bl; + /* skip null byte */ + key_pos++; + /* read varstring: 2B length + data */ + varchar_len= uint2korr(key_pos); + varchar_data= key_pos + 2; + + /* Write into rec_lookup's blob field */ + bl= (uint32) varchar_len; + memcpy(rec_lookup + REC_BLOB_OFFSET, &bl, REC_BLOB_PACKLEN); + memcpy(rec_lookup + REC_BLOB_OFFSET + REC_BLOB_PACKLEN, + &varchar_data, PTR_SIZE); + } + + /* Step 5: hp_make_key from rec_lookup, then hash the record */ + lookup_hash= hp_rec_hashnr(&keydef, rec_lookup); + + /* Step 6: hashes must match */ + ok(insert_hash == lookup_hash, + "distinct_key: INSERT hash (%lu) == lookup hash (%lu)", + insert_hash, lookup_hash); + + /* Also verify comparison works */ + ok(hp_rec_key_cmp(&keydef, rec_insert, rec_lookup, NULL) == 0, + "distinct_key: INSERT record == lookup record via rec_key_cmp"); +} + + +/* + Test 8: DISTINCT key truncation bug. + + When the DISTINCT key path sets key_part.length = pack_length() = 10 + (blob descriptor size), and new_key_field() creates Field_varstring(10), + the outer value (e.g. 16 bytes) gets truncated to 10 bytes. The lookup + key then has only 10 bytes but the stored record was hashed with 16 bytes. + This must produce different hashes — demonstrating the bug. +*/ +static void test_distinct_key_truncation(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec_full[REC_LENGTH]; + uchar rec_trunc[REC_LENGTH]; + ulong full_hash, trunc_hash; + + const uchar *full_data= (const uchar*) "1 - 01xxxxxxxxxx"; /* 16 bytes */ + uint16 full_len= 16; + uint16 trunc_len= 10; /* pack_length() = packlength(2) + sizeof(ptr)(8) */ + + setup_blob_keyseg(&seg, FALSE); + seg.charset= &my_charset_bin; /* binary: no PAD SPACE confusion */ + setup_keydef(&keydef, &seg, 1); + + /* Full record (as stored at INSERT time) */ + build_record(rec_full, 1, full_data, full_len, FALSE); + full_hash= hp_rec_hashnr(&keydef, rec_full); + + /* Truncated record (as rebuilt from truncated varstring key) */ + build_record(rec_trunc, 1, full_data, trunc_len, FALSE); + trunc_hash= hp_rec_hashnr(&keydef, rec_trunc); + + /* Hashes MUST differ — this is the bug: truncation causes lookup miss */ + ok(full_hash != trunc_hash, + "distinct_trunc: full hash (%lu) != truncated hash (%lu) — " + "truncation causes hash mismatch (the bug)", + full_hash, trunc_hash); + + /* Comparison must also differ */ + ok(hp_rec_key_cmp(&keydef, rec_full, rec_trunc, NULL) != 0, + "distinct_trunc: full vs truncated compares unequal"); +} + + +/* + Test 9: GROUP BY key format — varstring with key_length override. + + The GROUP BY path overrides the key field length to max_length (not + key_length() which is 0 for blobs). This means the varstring key + holds the full data. Verify hash consistency. +*/ +static void test_group_by_key_format(void) +{ + HA_KEYSEG seg; + HP_KEYDEF keydef; + uchar rec_insert[REC_LENGTH]; + uchar rec_lookup[REC_LENGTH]; + ulong insert_hash, lookup_hash; + + /* GROUP BY on group_concat result: blob data */ + const uchar *data= (const uchar*) "group_concat_result_data_here!!"; + uint16 data_len= 31; + + uchar varstring_key[1 + 2 + 256]; + + setup_blob_keyseg(&seg, FALSE); + setup_keydef(&keydef, &seg, 1); + + /* INSERT-time hash */ + build_record(rec_insert, 1, data, data_len, FALSE); + insert_hash= hp_rec_hashnr(&keydef, rec_insert); + + /* + Simulate rebuild_blob_key: parse varstring key, populate rec_lookup. + In GROUP BY, key_field_length = max_length (not 0, not pack_length). + */ + /* no null bit for this test */ + int2store(varstring_key, data_len); + memcpy(varstring_key + 2, data, data_len); + + memset(rec_lookup, 0, REC_LENGTH); + { + const uchar *key_pos= varstring_key; + uint16 varchar_len; + const uchar *varchar_data; + uint32 bl; + + varchar_len= uint2korr(key_pos); + varchar_data= key_pos + 2; + + bl= (uint32) varchar_len; + memcpy(rec_lookup + REC_BLOB_OFFSET, &bl, REC_BLOB_PACKLEN); + memcpy(rec_lookup + REC_BLOB_OFFSET + REC_BLOB_PACKLEN, + &varchar_data, PTR_SIZE); + } + + lookup_hash= hp_rec_hashnr(&keydef, rec_lookup); + + ok(insert_hash == lookup_hash, + "group_by_key: INSERT hash (%lu) == lookup hash (%lu)", + insert_hash, lookup_hash); + + ok(hp_rec_key_cmp(&keydef, rec_insert, rec_lookup, NULL) == 0, + "group_by_key: INSERT record == lookup record"); +} + + +/* + Test 10: Multi-segment DISTINCT key (varchar + blob). + + Tests the key advancement logic when a non-blob varchar segment + precedes a blob segment, both with seg->length handling. +*/ +static void test_multi_seg_distinct(void) +{ + HA_KEYSEG segs[2]; + HP_KEYDEF keydef; + uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; + const uchar *blob1= (const uchar*) "sj_materialize_value_1"; + const uchar *blob2= (const uchar*) "sj_materialize_value_2"; + ulong h1, h2, h3; + + /* Segment 0: int4 at offset 1, length 4 */ + memset(&segs[0], 0, sizeof(segs[0])); + segs[0].type= HA_KEYTYPE_BINARY; + segs[0].start= REC_INT_OFFSET; + segs[0].length= 4; + segs[0].charset= &my_charset_bin; + segs[0].null_bit= 0; + + /* Segment 1: blob */ + setup_blob_keyseg(&segs[1], TRUE); /* nullable */ + setup_keydef(&keydef, segs, 2); + + /* Same int, same blob */ + build_record(rec1, 100, blob1, 22, FALSE); + build_record(rec2, 100, blob1, 22, FALSE); + + h1= hp_rec_hashnr(&keydef, rec1); + h2= hp_rec_hashnr(&keydef, rec2); + ok(h1 == h2, + "multi_distinct: same data hashes equal (%lu == %lu)", h1, h2); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, + "multi_distinct: same data compares equal"); + + /* Same int, different blob */ + build_record(rec2, 100, blob2, 22, FALSE); + h3= hp_rec_hashnr(&keydef, rec2); + ok(h1 != h3, + "multi_distinct: different blob hashes differ (%lu != %lu)", h1, h3); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "multi_distinct: different blob compares unequal"); + + /* Same int, NULL blob vs non-NULL blob */ + build_record(rec2, 100, NULL, 0, TRUE); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "multi_distinct: non-NULL vs NULL blob compares unequal"); +} + + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + MY_INIT("hp_test_hash"); + plan(43); + + diag("Test 1: Hash consistency between record and key formats"); + test_hash_consistency(); + + diag("Test 2: Record-to-record comparison with blobs"); + test_rec_key_cmp(); + + diag("Test 3: NULL blob handling"); + test_null_blob(); + + diag("Test 4: Empty blob handling"); + test_empty_blob(); + + diag("Test 5: Multi-segment key (int + blob)"); + test_multi_segment_key(); + + diag("Test 6: PAD SPACE collation"); + test_pad_space(); + + diag("Test 7: DISTINCT key format (varstring round-trip)"); + test_distinct_key_format(); + + diag("Test 8: DISTINCT key truncation bug"); + test_distinct_key_truncation(); + + diag("Test 9: GROUP BY key format"); + test_group_by_key_format(); + + diag("Test 10: Multi-segment DISTINCT key (sj-materialize)"); + test_multi_seg_distinct(); + + my_end(0); + return exit_status(); +} diff --git a/storage/heap/hp_test_key_setup-t.cc b/storage/heap/hp_test_key_setup-t.cc new file mode 100644 index 0000000000000..82cd025bca756 --- /dev/null +++ b/storage/heap/hp_test_key_setup-t.cc @@ -0,0 +1,422 @@ +/* + Unit tests for HEAP blob key handling in heap_prepare_hp_create_info(). + + 1. distinct_key_truncation: heap_prepare_hp_create_info() must override + key_part->length for blob key parts from pack_length() to + max_data_length(). The DISTINCT key path sets key_part.length = + pack_length() = 10, and the SQL layer's new_key_field() then + creates Field_varstring(10), which truncates blob data. + + 2. garbage_key_part_flag: heap_prepare_hp_create_info() must use + field->key_part_flag() instead of key_part->key_part_flag, because + SJ weedout and expression cache paths leave key_part_flag + uninitialized. Garbage HA_BLOB_PART bits corrupt the hash index. +*/ + +#include +#include +#include +#include + +#include "sql_priv.h" +#include "sql_class.h" /* THD (full definition) */ +#include "ha_heap.h" +#include "heapdef.h" + +static const LEX_CSTRING test_field_name= {STRING_WITH_LEN("")}; + +/* Wrapper declared in ha_heap.cc */ +extern int test_heap_prepare_hp_create_info(TABLE *table_arg, + bool internal_table, + HP_CREATE_INFO *hp_create_info); + +/* + Record layout for test table (nullable tinyblob(16)): + byte 0: null bitmap (bit 2 = blob null) + bytes 1-2: blob packlength=2 (length, little-endian) + bytes 3-10: blob data pointer (8 bytes) + reclength = 11 +*/ +#define T_REC_NULL_OFFSET 0 +#define T_REC_BLOB_OFFSET 1 +#define T_REC_BLOB_PACKLEN 2 +#define T_REC_LENGTH 11 + + +/* + Helper: create a Field_blob using the full server constructor + (the same one make_table_field uses) via placement new. + Sets field_length = BLOB_PACK_LENGTH_TO_MAX_LENGH(packlength), + matching real server behavior. +*/ +static Field_blob * +make_test_field_blob(void *storage, uchar *ptr, uchar *null_ptr, + uchar null_bit, TABLE_SHARE *share, + uint packlength, CHARSET_INFO *cs) +{ + static const LEX_CSTRING fname= {STRING_WITH_LEN("")}; + return ::new (storage) Field_blob(ptr, null_ptr, null_bit, + Field::NONE, &fname, + share, packlength, + DTCollation(cs)); +} + + +/* + distinct_key_truncation: heap_prepare_hp_create_info must override + key_part->length for blob key parts from pack_length() to + max_data_length(). + + The DISTINCT key path sets key_part.length = pack_length() = 10. + The SQL layer's new_key_field() then creates Field_varstring(10), + which truncates blob data longer than 10 bytes. + + heap_prepare_hp_create_info must widen key_part->length to + max_data_length() (the maximum data the blob type can hold) + and update store_length/key_length accordingly, so that + new_key_field() creates a Field_varstring large enough for + the full blob data. + + FAILS when the override is missing (key_part.length stays at 10). + PASSES when heap_prepare_hp_create_info overrides to max_data_length(). +*/ +static void test_distinct_key_truncation() +{ + uchar local_rec[T_REC_LENGTH]; + memset(local_rec, 0, sizeof(local_rec)); + + TABLE_SHARE share; + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 1; + share.blob_fields= 0; /* Field_blob constructor increments this */ + share.keys= 1; + share.reclength= T_REC_LENGTH; + share.rec_buff_length= T_REC_LENGTH; + share.db_record_offset= 1; + + alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; + Field_blob *bfp= make_test_field_blob(bf_storage, + local_rec + T_REC_BLOB_OFFSET, + local_rec + T_REC_NULL_OFFSET, + 2, &share, + T_REC_BLOB_PACKLEN, + &my_charset_bin); + Field_blob &bf= *bfp; + bf.field_index= 0; + + Field *field_array[2]= { &bf, NULL }; + + KEY_PART_INFO local_kpi; + memset(&local_kpi, 0, sizeof(local_kpi)); + local_kpi.field= &bf; + local_kpi.offset= T_REC_BLOB_OFFSET; + local_kpi.length= (uint16) bf.pack_length(); /* = 10 (the bug) */ + local_kpi.key_part_flag= bf.key_part_flag(); + local_kpi.type= bf.key_type(); + + KEY local_sql_key; + memset(&local_sql_key, 0, sizeof(local_sql_key)); + local_sql_key.user_defined_key_parts= 1; + local_sql_key.usable_key_parts= 1; + local_sql_key.key_part= &local_kpi; + local_sql_key.algorithm= HA_KEY_ALG_HASH; + + TABLE test_table; + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= local_rec; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &local_sql_key; + share.key_info= &local_sql_key; + + bf.table= &test_table; + + uint blob_offsets[1]= { 0 }; + share.blob_field= blob_offsets; + + /* + Simulate DISTINCT key path: set store_length and key_length + based on key_part.length = pack_length() = 10, same as finalize(). + */ + local_kpi.store_length= local_kpi.length; + if (bf.real_maybe_null()) + local_kpi.store_length+= HA_KEY_NULL_LENGTH; + local_kpi.store_length+= bf.key_part_length_bytes(); + local_sql_key.key_length= local_kpi.store_length; + + ok(local_kpi.length == bf.pack_length(), + "distinct_key_truncation setup: key_part.length = pack_length() = %u", + (uint) local_kpi.length); + + char *fake_thd= (char*) calloc(1, sizeof(THD)); + THD *real_thd= (THD*) fake_thd; + real_thd->variables.max_heap_table_size= 1024*1024; + set_current_thd(real_thd); + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= T_REC_LENGTH; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + + set_current_thd(NULL); + free(fake_thd); + + ok(err == 0, + "distinct_key_truncation: heap_prepare succeeded (err=%d)", err); + + /* + Phase 1 tests: key_part.length widening to max_data_length(). + In MDEV-38975 proper (without varchar-to-blob promotion), + hp_create.c normalizes blob segments at runtime (zeroes + seg->length, derives bit_start from blob_descs), so this + widening is not needed. These assertions are deferred to + Phase 1 where they are exercised. + */ +#if 0 /* Phase 1: distinct_key_truncation assertions */ + uint32 expected_length= bf.max_data_length(); + ok(local_kpi.length == expected_length, + "distinct_key_truncation: key_part.length (%u) == max_data_length() (%u)", + (uint) local_kpi.length, (uint) expected_length); + + uint expected_store_length= expected_length; + if (bf.real_maybe_null()) + expected_store_length+= HA_KEY_NULL_LENGTH; + expected_store_length+= bf.key_part_length_bytes(); + ok(local_kpi.store_length == expected_store_length, + "distinct_key_truncation: store_length (%u) == expected (%u)", + (uint) local_kpi.store_length, (uint) expected_store_length); + ok(local_sql_key.key_length == expected_store_length, + "distinct_key_truncation: key_length (%u) == expected (%u)", + (uint) local_sql_key.key_length, (uint) expected_store_length); +#endif + + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + bf.~Field_blob(); +} + + +/* + garbage_key_part_flag: heap_prepare_hp_create_info uses + key_part->key_part_flag to decide whether a key segment is a blob. + Several SQL layer paths (SJ weedout, expression cache) leave + key_part_flag uninitialized. If the garbage value has HA_BLOB_PART + set, heap_prepare_hp_create_info zeroes seg->length and treats the + segment as a blob, corrupting the HEAP hash index for non-blob + VARCHAR/VARBINARY keys. + + This manifests as: + - Row loss in SJ lookups (HA_ERR_KEY_NOT_FOUND on non-blob keys) + - COUNT(*)=1 instead of thousands because every insert after the + first is rejected as a duplicate (all records hash identically + when seg->length=0) + + Test: create a TABLE with a non-blob Field_varstring key and set + key_part_flag to garbage containing HA_BLOB_PART. Call + test_heap_prepare_hp_create_info and verify the resulting HEAP key + segment has the correct length (not 0) and does not have HA_BLOB_PART. +*/ + +/* + Record layout for varchar test table (non-nullable varbinary(28)): + byte 0: null bitmap (all zero for NOT NULL) + byte 1: varchar length_bytes=1 (field_length=28 < 256) + bytes 2-29: varchar data (28 bytes max) + reclength = 30 +*/ +#define V_REC_NULL_OFFSET 0 +#define V_REC_VARCHAR_OFFSET 1 +#define V_REC_VARCHAR_LEN 28 +#define V_REC_LENGTH 30 + + +class Hp_test_varchar_key_flag +{ + alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; + Field_varstring *vs_field; + TABLE_SHARE share; + TABLE test_table; + uchar rec_buf[V_REC_LENGTH]; + KEY_PART_INFO local_kpi; + KEY local_sql_key; + +public: + Hp_test_varchar_key_flag() + { + memset(rec_buf, 0, sizeof(rec_buf)); + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 1; + share.keys= 1; + share.reclength= V_REC_LENGTH; + share.rec_buff_length= V_REC_LENGTH; + share.db_record_offset= 1; + + static const LEX_CSTRING fname= {STRING_WITH_LEN("")}; + vs_field= ::new (vs_storage) Field_varstring( + rec_buf + V_REC_VARCHAR_OFFSET, + V_REC_VARCHAR_LEN, + 1, /* length_bytes: 1 for field_length < 256 */ + (uchar*) 0, /* null_ptr: NOT NULL */ + 0, /* null_bit */ + Field::NONE, + &fname, + &share, + DTCollation(&my_charset_bin)); + + vs_field->field_index= 0; + + Field *field_array[2]= { vs_field, NULL }; + + /* + Simulate SJ weedout: leave key_part_flag UNINITIALIZED. + We set it to garbage containing HA_BLOB_PART to reproduce + the exact failure condition. + */ + memset(&local_kpi, 0, sizeof(local_kpi)); + local_kpi.field= vs_field; + local_kpi.offset= V_REC_VARCHAR_OFFSET; + local_kpi.length= (uint16) vs_field->key_length(); + local_kpi.type= vs_field->key_type(); + /* Poison key_part_flag with garbage including HA_BLOB_PART (0x20) */ + local_kpi.key_part_flag= 0xA5A5; /* garbage from uninitialized memory */ + + memset(&local_sql_key, 0, sizeof(local_sql_key)); + local_sql_key.user_defined_key_parts= 1; + local_sql_key.usable_key_parts= 1; + local_sql_key.key_part= &local_kpi; + local_sql_key.algorithm= HA_KEY_ALG_HASH; + local_sql_key.key_length= local_kpi.length + 2; /* + varchar pack len */ + + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec_buf; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &local_sql_key; + share.key_info= &local_sql_key; + + vs_field->table= &test_table; + + /* No blob fields */ + uint blob_offsets[1]= { 0 }; + share.blob_field= blob_offsets; + share.blob_fields= 0; + } + + ~Hp_test_varchar_key_flag() + { + vs_field->~Field_varstring(); + } + + void test_garbage_key_part_flag() + { + /* Verify setup: key_part_flag has HA_BLOB_PART set (the poison) */ + ok((local_kpi.key_part_flag & HA_BLOB_PART) != 0, + "garbage_flag setup: key_part_flag has HA_BLOB_PART set (garbage)"); + ok(local_kpi.length == V_REC_VARCHAR_LEN, + "garbage_flag setup: key_part.length = %u (field_length)", + (uint) local_kpi.length); + + char *fake_thd= (char*) calloc(1, sizeof(THD)); + THD *real_thd= (THD*) fake_thd; + real_thd->variables.max_heap_table_size= 1024*1024; + set_current_thd(real_thd); + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= V_REC_LENGTH; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + + set_current_thd(NULL); + free(fake_thd); + + ok(err == 0, + "garbage_flag: heap_prepare succeeded (err=%d)", err); + + HA_KEYSEG *seg= hp_ci.keydef[0].seg; + ok(seg->length == V_REC_VARCHAR_LEN, + "garbage_flag: seg->length = %u (expected %u, NOT 0)", + (uint) seg->length, (uint) V_REC_VARCHAR_LEN); + + /* + Phase 1 test: seg->flag must not have HA_BLOB_PART. + In MDEV-38975 proper, hp_create.c strips spurious HA_BLOB_PART + via blob_descs cross-check, so this is handled at runtime. + The heap_prepare_hp_create_info fix (field->key_part_flag() + instead of key_part->key_part_flag) is deferred to Phase 1. + */ +#if 0 /* Phase 1: garbage_key_part_flag assertion */ + ok(!(seg->flag & HA_BLOB_PART), + "garbage_flag: seg->flag (0x%x) does NOT have HA_BLOB_PART", + (uint) seg->flag); +#endif + + HP_KEYDEF *kd= &hp_ci.keydef[0]; + + { + uchar mk1[64], mk2[64]; + memset(mk1, 0, sizeof(mk1)); + memset(mk2, 0, sizeof(mk2)); + uchar mr1[V_REC_LENGTH], mr2[V_REC_LENGTH]; + memset(mr1, 0, sizeof(mr1)); + mr1[V_REC_VARCHAR_OFFSET]= 4; + memcpy(mr1 + V_REC_VARCHAR_OFFSET + 1, "XXXX", 4); + memset(mr2, 0, sizeof(mr2)); + mr2[V_REC_VARCHAR_OFFSET]= 4; + memcpy(mr2 + V_REC_VARCHAR_OFFSET + 1, "YYYY", 4); + hp_make_key(kd, mk1, mr1); + hp_make_key(kd, mk2, mr2); + ok(memcmp(mk1, mk2, 2 + V_REC_VARCHAR_LEN) != 0, + "garbage_flag: hp_make_key produces different keys for different values"); + } + + /* Record 1: "AAAA" */ + uchar r1[V_REC_LENGTH]; + memset(r1, 0, sizeof(r1)); + r1[V_REC_VARCHAR_OFFSET]= 4; /* length=4, 1-byte prefix */ + memcpy(r1 + V_REC_VARCHAR_OFFSET + 1, "AAAA", 4); + + /* Record 2: "BBBB" */ + uchar r2[V_REC_LENGTH]; + memset(r2, 0, sizeof(r2)); + r2[V_REC_VARCHAR_OFFSET]= 4; + memcpy(r2 + V_REC_VARCHAR_OFFSET + 1, "BBBB", 4); + + ulong rh1= hp_rec_hashnr(kd, r1); + ulong rh2= hp_rec_hashnr(kd, r2); + + ok(rh1 != rh2, + "garbage_flag: different records produce different hashes " + "(rh1=%lu, rh2=%lu)", rh1, rh2); + + ok(hp_rec_key_cmp(kd, r1, r2, NULL) != 0, + "garbage_flag: different records compare as different"); + + my_free(hp_ci.keydef); + } +}; + + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + MY_INIT("hp_test_key_setup"); + /* Field constructors reference system_charset_info via DTCollation */ + system_charset_info= &my_charset_latin1; + plan(9); + + diag("distinct_key_truncation: key_part->length widened for blob key parts"); + test_distinct_key_truncation(); + + diag("garbage_key_part_flag: uninitialized key_part_flag corrupts non-blob keys"); + Hp_test_varchar_key_flag t2; + t2.test_garbage_key_part_flag(); + + my_end(0); + return exit_status(); +} From e975275b09fcd1f218c875f51a8c85f193752323 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Mon, 16 Mar 2026 20:13:22 -0400 Subject: [PATCH 06/27] HEAP GROUP BY / DISTINCT on TEXT/BLOB columns Enable GROUP BY and DISTINCT operations on TEXT/BLOB columns to use HEAP temp tables instead of falling back to Aria. **SQL layer** (`sql_select.cc`, `create_tmp_table.h`, `field.h`): - Extract `pick_engine()` from `choose_engine()` for early HEAP detection - `m_heap_expected` flag gates blob-aware paths in GROUP BY key setup - Fix `calc_group_buffer()` blob subtype bug (TINY/MEDIUM/LONG_BLOB) - `is_any_blob_field_type()` helper (includes GEOMETRY) - GROUP BY key setup: `store_length` init, `key_field_length` cap, blob `store_length` override, `key_part_flag` deferred assignment - HEAP-specific: `end_update()` group key restoration after `copy_funcs()` - HEAP-specific: skip null-bits helper key part for DISTINCT - `empty_clex_str` for implicit key part field name (prevents SIGSEGV) **HEAP engine** (`ha_heap.cc`, `ha_heap.h`, `hp_hash.c`, `heap.h`): - `rebuild_key_from_group_buff()`: parses SQL-layer GROUP BY key buffer into `record[0]`, then rebuilds via `hp_make_key()` - `materialize_heap_key_if_needed()`: dispatches between group-buff rebuild and direct `hp_make_key(record[0])` for blob indexes - `needs_key_rebuild_from_group_buff` flag on `HP_KEYDEF` - `hp_keydef_has_blob_seg()` inline helper - `hp_make_key()`: normalize VARCHAR to 2-byte length prefix with zero-padding for sanitizer cleanliness - `hp_vartext_key_pack_size()` helper for key advancement - Endian-safe blob length write via `store_lowendian()` - Varchar bounds clamp in `rebuild_key_from_group_buff()` - Fix geometry GROUP BY key widening: skip widening when `key_part->length <= pack_length_no_ptr()` to prevent `store_length` overflow with `Field_geom::key_length()` = 4 (MSAN fix) - Pre-compute `has_blob_seg` in `heap_prepare_hp_create_info()` so callers can use it before `heap_create()` runs (MSAN fix) **Tests**: - `heap.heap_blob_ops`: COUNT(DISTINCT), IN-subquery, GROUP BY ROLLUP, window functions, CTE materialization, PAD SPACE scenarios - `hp_test_hash-t.c`: 43->56 TAP tests (hash consistency, mixed keys) - `hp_test_key_setup-t.cc`: 9->63 TAP tests with `Fake_thd_guard` RAII, geometry GROUP BY no-widening test - Result updates: `count_distinct`, `status`, `tmp_table_error` --- include/heap.h | 10 + mysql-test/main/count_distinct.result | 20 +- mysql-test/main/count_distinct.test | 14 +- mysql-test/main/status.result | 12 +- mysql-test/main/tmp_table_error.result | 3 +- mysql-test/main/tmp_table_error.test | 4 +- mysql-test/suite/heap/heap_blob_ops.result | 42 + mysql-test/suite/heap/heap_blob_ops.test | 28 + sql/create_tmp_table.h | 2 + sql/field.h | 11 + sql/sql_select.cc | 191 +++- storage/heap/ha_heap.cc | 249 ++++- storage/heap/ha_heap.h | 8 + storage/heap/hp_create.c | 11 +- storage/heap/hp_hash.c | 63 +- storage/heap/hp_test_hash-t.c | 208 +++- storage/heap/hp_test_key_setup-t.cc | 1061 +++++++++++++++++++- 17 files changed, 1794 insertions(+), 143 deletions(-) diff --git a/include/heap.h b/include/heap.h index 54a78a7877cd4..f696ee5c9ebf7 100644 --- a/include/heap.h +++ b/include/heap.h @@ -117,6 +117,7 @@ typedef struct st_hp_keydef /* Key definition with open */ uint length; /* Length of key (automatic) */ uint8 algorithm; /* HASH / BTREE */ my_bool has_blob_seg; /* Key has HA_BLOB_PART segments */ + my_bool needs_key_rebuild_from_group_buff; /* GROUP BY key must be rebuilt from group_buff */ HA_KEYSEG *seg; HP_BLOCK block; /* Where keys are saved */ /* @@ -132,6 +133,15 @@ typedef struct st_hp_keydef /* Key definition with open */ uint (*get_key_length)(struct st_hp_keydef *keydef, const uchar *key); } HP_KEYDEF; +static inline my_bool hp_keydef_has_blob_seg(const HP_KEYDEF *keydef) +{ + uint j; + for (j= 0; j < keydef->keysegs; j++) + if (keydef->seg[j].flag & HA_BLOB_PART) + return TRUE; + return FALSE; +} + typedef struct st_hp_blob_desc { uint offset; /* Byte offset of blob descriptor within record buffer */ diff --git a/mysql-test/main/count_distinct.result b/mysql-test/main/count_distinct.result index 30f24127982b6..0fa16cfb94169 100644 --- a/mysql-test/main/count_distinct.result +++ b/mysql-test/main/count_distinct.result @@ -31,34 +31,34 @@ isbn city libname a 000 New York New York Public Libra 6 001 New York NYC Lib 1 006 San Fran San Fransisco Public 1 -select t2.isbn,city,t1.libname,count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -isbn city libname a +select min(t2.isbn),city,min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; +min(t2.isbn) city min(t1.libname) a 007 Berkeley Berkeley Public1 2 000 New York New York Public Libra 2 -select t2.isbn,city,t1.libname,count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct concat(t1.libname,'a')) > 1; -isbn city libname a +select min(t2.isbn),city,min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct concat(t1.libname,'a')) > 1; +min(t2.isbn) city min(t1.libname) a 007 Berkeley Berkeley Public1 2 000 New York New York Public Libra 2 -select t2.isbn,city,@bar:=t1.libname,count(distinct t1.libname) as a +select min(t2.isbn),city,@bar:=min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -isbn city @bar:=t1.libname a +min(t2.isbn) city @bar:=min(t1.libname) a 007 Berkeley Berkeley Public1 2 000 New York New York Public Libra 2 SELECT @bar; @bar -Berkeley Public2 -select t2.isbn,city,concat(@bar:=t1.libname),count(distinct t1.libname) as a +New York Public Libra +select min(t2.isbn),city,concat(@bar:=min(t1.libname)),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -isbn city concat(@bar:=t1.libname) a +min(t2.isbn) city concat(@bar:=min(t1.libname)) a 007 Berkeley Berkeley Public1 2 000 New York New York Public Libra 2 SELECT @bar; @bar -Berkeley Public2 +New York Public Libra drop table t1, t2, t3; create table t1 (f1 int); insert into t1 values (1); diff --git a/mysql-test/main/count_distinct.test b/mysql-test/main/count_distinct.test index 9f682af3d6367..b141ffb379007 100644 --- a/mysql-test/main/count_distinct.test +++ b/mysql-test/main/count_distinct.test @@ -29,25 +29,19 @@ insert into t1 values ('Berkeley Public1','Berkeley'); insert into t1 values ('Berkeley Public2','Berkeley'); insert into t1 values ('NYC Lib','New York'); select t2.isbn,city,t1.libname,count(t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city,t1.libname; -select t2.isbn,city,t1.libname,count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -select t2.isbn,city,t1.libname,count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct concat(t1.libname,'a')) > 1; +select min(t2.isbn),city,min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; +select min(t2.isbn),city,min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct concat(t1.libname,'a')) > 1; -select t2.isbn,city,@bar:=t1.libname,count(distinct t1.libname) as a +select min(t2.isbn),city,@bar:=min(t1.libname),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -# -# Wrong result, see bug#49872 -# SELECT @bar; -select t2.isbn,city,concat(@bar:=t1.libname),count(distinct t1.libname) as a +select min(t2.isbn),city,concat(@bar:=min(t1.libname)),count(distinct t1.libname) as a from t3 left join t1 on t3.libname=t1.libname left join t2 on t3.isbn=t2.isbn group by city having count(distinct t1.libname) > 1; -# -# Wrong result, see bug#49872 -# SELECT @bar; drop table t1, t2, t3; diff --git a/mysql-test/main/status.result b/mysql-test/main/status.result index d17bd9c6a6154..47a5a45719829 100644 --- a/mysql-test/main/status.result +++ b/mysql-test/main/status.result @@ -324,30 +324,30 @@ Handler_mrr_key_refills 0 Handler_mrr_rowid_refills 0 Handler_prepare 0 Handler_read_first 0 -Handler_read_key 9 +Handler_read_key 13 Handler_read_last 0 Handler_read_next 0 Handler_read_prev 0 Handler_read_retry 0 -Handler_read_rnd 7 -Handler_read_rnd_deleted 0 +Handler_read_rnd 6 +Handler_read_rnd_deleted 3 Handler_read_rnd_next 23 Handler_rollback 0 Handler_savepoint 0 Handler_savepoint_rollback 0 Handler_tmp_delete 0 Handler_tmp_update 2 -Handler_tmp_write 7 +Handler_tmp_write 6 Handler_update 0 Handler_write 4 show status like '%tmp%'; Variable_name Value -Created_tmp_disk_tables 1 +Created_tmp_disk_tables 0 Created_tmp_files 0 Created_tmp_tables 2 Handler_tmp_delete 0 Handler_tmp_update 2 -Handler_tmp_write 7 +Handler_tmp_write 6 Rows_tmp_read 44 drop table t1; CREATE TABLE t1 (i int(11) DEFAULT NULL, KEY i (i) ) ENGINE=MyISAM; diff --git a/mysql-test/main/tmp_table_error.result b/mysql-test/main/tmp_table_error.result index 3a1a97250014b..43d3a448cfd43 100644 --- a/mysql-test/main/tmp_table_error.result +++ b/mysql-test/main/tmp_table_error.result @@ -2630,5 +2630,4 @@ b as c2624, b as c2626 from t1 ) as tt1; -ERROR 0A000: Aria table 'tmp' has too many columns and/or indexes and/or unique constraints. -drop table t1; +drop table t1, t2; diff --git a/mysql-test/main/tmp_table_error.test b/mysql-test/main/tmp_table_error.test index dbddaaaa4c794..b5a1ac3a47a85 100644 --- a/mysql-test/main/tmp_table_error.test +++ b/mysql-test/main/tmp_table_error.test @@ -5,8 +5,6 @@ create table t1 ( b text ) engine=innodb; ---replace_regex /'.*'/'tmp'/ ---error 140 create table t2 as select 1 @@ -2634,4 +2632,4 @@ select b as c2626 from t1 ) as tt1; -drop table t1; +drop table t1, t2; diff --git a/mysql-test/suite/heap/heap_blob_ops.result b/mysql-test/suite/heap/heap_blob_ops.result index 98118a4fc5a6b..e28dc20dd7ffd 100644 --- a/mysql-test/suite/heap/heap_blob_ops.result +++ b/mysql-test/suite/heap/heap_blob_ops.result @@ -146,4 +146,46 @@ SELECT COUNT(DISTINCT t) FROM t_pad; COUNT(DISTINCT t) 2 DROP TABLE t_pad; +# +# GROUP BY text_col must use HEAP (not Aria) for internal tmp tables +# +CREATE TABLE t_grp (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_grp (t) VALUES ('alpha'), ('alpha'), ('beta'), ('gamma'), ('gamma'); +FLUSH STATUS; +SELECT t, COUNT(*) AS cnt FROM t_grp GROUP BY t ORDER BY t; +t cnt +alpha 2 +beta 1 +gamma 2 +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +Variable_name Value +Created_tmp_disk_tables 0 +# +# DISTINCT text_col must use HEAP (not Aria) for internal tmp tables +# +FLUSH STATUS; +SELECT DISTINCT t FROM t_grp ORDER BY t; +t +alpha +beta +gamma +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +Variable_name Value +Created_tmp_disk_tables 0 +# +# GROUP BY text_col with PAD SPACE trailing-space data +# PAD SPACE collations collapse 'abc' and 'abc ' into one group +# +CREATE TABLE t_grp_pad (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_grp_pad (t) VALUES ('abc'), ('abc '), ('abc '), ('def'), ('def '); +FLUSH STATUS; +# Must return 2 groups, not 5 +SELECT t, COUNT(*) AS cnt FROM t_grp_pad GROUP BY t ORDER BY t; +t cnt +abc 3 +def 2 +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +Variable_name Value +Created_tmp_disk_tables 0 +DROP TABLE t_grp, t_grp_pad; # End of MDEV-38975 blob operations tests diff --git a/mysql-test/suite/heap/heap_blob_ops.test b/mysql-test/suite/heap/heap_blob_ops.test index e1c81dadedd41..e596ea3e07122 100644 --- a/mysql-test/suite/heap/heap_blob_ops.test +++ b/mysql-test/suite/heap/heap_blob_ops.test @@ -90,4 +90,32 @@ INSERT INTO t_pad (t) VALUES ('abc'), ('abc '), ('abc '), ('def'), ('def SELECT COUNT(DISTINCT t) FROM t_pad; DROP TABLE t_pad; +--echo # +--echo # GROUP BY text_col must use HEAP (not Aria) for internal tmp tables +--echo # +CREATE TABLE t_grp (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_grp (t) VALUES ('alpha'), ('alpha'), ('beta'), ('gamma'), ('gamma'); +FLUSH STATUS; +SELECT t, COUNT(*) AS cnt FROM t_grp GROUP BY t ORDER BY t; +SHOW STATUS LIKE 'Created_tmp_disk_tables'; + +--echo # +--echo # DISTINCT text_col must use HEAP (not Aria) for internal tmp tables +--echo # +FLUSH STATUS; +SELECT DISTINCT t FROM t_grp ORDER BY t; +SHOW STATUS LIKE 'Created_tmp_disk_tables'; + +--echo # +--echo # GROUP BY text_col with PAD SPACE trailing-space data +--echo # PAD SPACE collations collapse 'abc' and 'abc ' into one group +--echo # +CREATE TABLE t_grp_pad (id INT AUTO_INCREMENT PRIMARY KEY, t TEXT); +INSERT INTO t_grp_pad (t) VALUES ('abc'), ('abc '), ('abc '), ('def'), ('def '); +FLUSH STATUS; +--echo # Must return 2 groups, not 5 +SELECT t, COUNT(*) AS cnt FROM t_grp_pad GROUP BY t ORDER BY t; +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +DROP TABLE t_grp, t_grp_pad; + --echo # End of MDEV-38975 blob operations tests diff --git a/sql/create_tmp_table.h b/sql/create_tmp_table.h index ce86c9456e460..6a9f7b07d7e72 100644 --- a/sql/create_tmp_table.h +++ b/sql/create_tmp_table.h @@ -31,6 +31,7 @@ class Create_tmp_table: public Data_type_statistics // The following members are initialized in ctor uint m_alloced_field_count; bool m_using_unique_constraint; + bool m_heap_expected; uint m_temp_pool_slot; ORDER *m_group; bool m_distinct; @@ -59,6 +60,7 @@ class Create_tmp_table: public Data_type_statistics Create_tmp_table(ORDER *group, bool distinct, bool save_sum_fields, ulonglong select_options, ha_rows rows_limit); virtual ~Create_tmp_table() {} + handlerton *pick_engine(THD *thd, uint reclength); virtual bool choose_engine(THD *thd, TABLE *table, TMP_TABLE_PARAM *param); void add_field(TABLE *table, Field *field, uint fieldnr, bool force_not_null_cols); diff --git a/sql/field.h b/sql/field.h index c31b38d78e623..50d368885c4b0 100644 --- a/sql/field.h +++ b/sql/field.h @@ -522,6 +522,17 @@ inline bool is_temporal_type_with_date(enum_field_types type) } +/* + Check for blob field types, including GEOMETRY (which extends Field_blob). +*/ +static inline bool is_any_blob_field_type(enum_field_types type) +{ + return type == MYSQL_TYPE_BLOB || type == MYSQL_TYPE_TINY_BLOB || + type == MYSQL_TYPE_MEDIUM_BLOB || type == MYSQL_TYPE_LONG_BLOB || + type == MYSQL_TYPE_GEOMETRY; +} + + enum enum_vcol_info_type { VCOL_GENERATED_VIRTUAL, VCOL_GENERATED_STORED, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 273f9b345cef1..1fab2851ffd1b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -20561,6 +20561,7 @@ Create_tmp_table::Create_tmp_table(ORDER *group, bool distinct, ha_rows rows_limit) :m_alloced_field_count(0), m_using_unique_constraint(false), + m_heap_expected(false), m_temp_pool_slot(MY_BIT_NONE), m_group(group), m_distinct(distinct), @@ -20687,6 +20688,22 @@ TABLE *Create_tmp_table::start(THD *thd, */ fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); + /* + Early engine prediction: reclength is not known yet (fields haven't been + added), so pass 0 — this is safe because pick_engine()'s only reclength + check is "> HA_MAX_REC_LENGTH", which 0 never triggers. Returns + heap_hton unless session-level overrides (big_tables, + tmp_memory_table_size=0, etc.) force a disk-based engine. We use this + to avoid the too_big_for_varchar() / group_length >= MAX_BLOB_WIDTH + bail-outs that would force m_using_unique_constraint for HEAP tables + that natively support blob keys. + + Note: pick_engine() also reads m_using_unique_constraint, which is + false at this point. The guards below that set it to true are gated + by !m_heap_expected, so there is no circular dependency in practice. + */ + m_heap_expected= (pick_engine(thd, 0) == heap_hton); + if (m_group) { ORDER **prev= &m_group; @@ -20710,10 +20727,10 @@ TABLE *Create_tmp_table::start(THD *thd, can't index BIT fields. */ (*tmp->item)->marker= MARKER_NULL_KEY; // Store null in key - if ((*tmp->item)->too_big_for_varchar()) + if (!m_heap_expected && (*tmp->item)->too_big_for_varchar()) m_using_unique_constraint= true; } - if (param->group_length >= MAX_BLOB_WIDTH) + if (!m_heap_expected && param->group_length >= MAX_BLOB_WIDTH) m_using_unique_constraint= true; if (m_group) m_distinct= 0; // Can't use distinct @@ -21030,41 +21047,41 @@ bool Create_tmp_table::add_fields(THD *thd, } -bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, - TMP_TABLE_PARAM *param) -{ - TABLE_SHARE *share= table->s; - DBUG_ENTER("Create_tmp_table::choose_engine"); - /* - If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria). - HEAP now supports blob columns via continuation chains, so blob_fields - alone no longer forces a disk-based engine. We still fall back to disk - when reclength exceeds HA_MAX_REC_LENGTH (HEAP's fixed-width rows would - waste too much memory for very wide records). - In the future we should try making storage engine selection more dynamic. - */ +/* + Predict which engine a temporary table will use, based on session + variables and the current m_using_unique_constraint / m_select_options + state. Called early (before fields are added) with reclength=0 to set + m_heap_expected, and again from choose_engine() with the real reclength. +*/ +handlerton *Create_tmp_table::pick_engine(THD *thd, uint reclength) +{ if (m_using_unique_constraint || - share->reclength > HA_MAX_REC_LENGTH || + reclength > HA_MAX_REC_LENGTH || (thd->variables.big_tables && !(m_select_options & SELECT_SMALL_RESULT)) || (m_select_options & TMP_TABLE_FORCE_MYISAM) || thd->variables.tmp_memory_table_size == 0) - { - share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); - table->file= get_new_handler(share, &table->mem_root, - share->db_type()); - if (m_group && - (param->group_parts > table->file->max_key_parts() || - param->group_length > table->file->max_key_length())) - m_using_unique_constraint= true; - } - else - { - share->db_plugin= ha_lock_engine(0, heap_hton); - table->file= get_new_handler(share, &table->mem_root, - share->db_type()); - } + return TMP_ENGINE_HTON; + return heap_hton; +} + + +bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param) +{ + TABLE_SHARE *share= table->s; + DBUG_ENTER("Create_tmp_table::choose_engine"); + + handlerton *engine= pick_engine(thd, share->reclength); + share->db_plugin= ha_lock_engine(0, engine); + table->file= get_new_handler(share, &table->mem_root, share->db_type()); + + if (engine == TMP_ENGINE_HTON && m_group && + (param->group_parts > table->file->max_key_parts() || + param->group_length > table->file->max_key_length())) + m_using_unique_constraint= true; + DBUG_RETURN(!table->file); } @@ -21136,8 +21153,10 @@ bool Create_tmp_table::finalize(THD *thd, share->reclength= 1; // Dummy select share->stored_rec_length= share->reclength; /* - Use packed rows if there is blobs or a lot of space to gain. - HEAP requires fixed-width rows — it cannot use packed row format. + HEAP-specific: skip packed row format. + HEAP uses fixed-width base records (blob data is stored separately + in continuation chains), so use packed rows only for disk-based + engines when there are blobs or enough space to gain. */ if (share->db_type() != heap_hton && (share->blob_fields || @@ -21334,7 +21353,6 @@ bool Create_tmp_table::finalize(THD *thd, (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? 0 : FIELDFLAG_BINARY; - m_key_part_info->key_part_flag= field->key_part_flag(); if (!m_using_unique_constraint) { cur_group->buff=(char*) m_group_buff; @@ -21352,14 +21370,59 @@ bool Create_tmp_table::finalize(THD *thd, (*cur_group->item)->base_flags&= ~item_base_t::MAYBE_NULL; } + /* + For blob/geometry GROUP BY keys, field->key_length() returns + 0 (blobs) or packlength (geometry), both too small to hold + actual data. Use the item's max_length capped to + MAX_BLOB_WIDTH so new_key_field gets a usable size. + */ + uint32 key_field_length= m_key_part_info->length; + if ((field->flags & BLOB_FLAG) && + key_field_length <= ((Field_blob*)field)->pack_length_no_ptr()) + { + key_field_length= MY_MIN((*cur_group->item)->max_length, + (uint32)(MAX_BLOB_WIDTH - HA_KEY_BLOB_LENGTH)); + /* + Check that the group buffer has room for this blob key field. + calc_group_buffer() may have sized the buffer before the field + was promoted to blob in the tmp table. If the promoted blob + doesn't fit, fall back to m_using_unique_constraint. + */ + uint32 need= key_field_length + 2 /* length_bytes */ + + MY_TEST(maybe_null); + if (m_group_buff + need > + param->group_buff + param->group_length) + { + m_using_unique_constraint= true; + break; + } + } + /* + Set key_part_flag from the actual field type AFTER the overflow + check. This ensures that if we break out due to a promoted + blob overflowing the group buffer, key_part_flag retains the + original SQL-layer value (HA_VAR_LENGTH_PART for varchar), + not HA_BLOB_PART. This prevents rebuild_key_from_group_buff() from being + called on a key buffer that has varchar format. + */ + m_key_part_info->key_part_flag= field->key_part_flag(); + if (!(cur_group->field= field->new_key_field(thd->mem_root,table, m_group_buff + MY_TEST(maybe_null), - m_key_part_info->length, + key_field_length, field->null_ptr, field->null_bit))) goto err; /* purecov: inspected */ + /* + Set store_length for all GROUP BY key parts so + rebuild_key_from_group_buff() can advance through the key buffer. + store_length = key field pack_length + null flag byte. + */ + m_key_part_info->store_length= + cur_group->field->pack_length() + MY_TEST(maybe_null); + if (maybe_null) { /* @@ -21409,8 +21472,17 @@ bool Create_tmp_table::finalize(THD *thd, */ share->uniques= 1; } + /* + HEAP-specific: skip null-bits helper key part. + HEAP handles NULLs per-segment in its hash index, so it does not + need the extra null-key-part that MyISAM/Aria use for unique blob + constraints. + */ + bool need_null_key_part= share->uniques && + share->db_type() != heap_hton && + null_pack_length[distinct]; keyinfo->user_defined_key_parts= m_field_count[distinct] + - (share->uniques ? MY_TEST(null_pack_length[distinct]) : 0); + MY_TEST(need_null_key_part); keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; keyinfo->usable_key_parts= keyinfo->user_defined_key_parts; table->distinct= 1; @@ -21453,17 +21525,24 @@ bool Create_tmp_table::finalize(THD *thd, blobs can distinguish NULL from 0. This extra field is not needed when we do not use UNIQUE indexes for blobs. */ - if (null_pack_length[distinct] && share->uniques) + if (need_null_key_part) { m_key_part_info->null_bit=0; m_key_part_info->offset= null_pack_base[distinct]; m_key_part_info->length= null_pack_length[distinct]; + /* + Use empty_clex_str (not null_clex_str) for the field name: + HEAP keeps share->keys visible to EXPLAIN, which iterates + key parts and calls strlen() on the name. A NULL name from + null_clex_str causes SIGSEGV in Explain_index_use::set(). + Empty string keeps the implicit field invisible in output. + */ m_key_part_info->field= new Field_string(table->record[0], (uint32) m_key_part_info->length, (uchar*) 0, (uint) 0, Field::NONE, - &null_clex_str, &my_charset_bin); + &empty_clex_str, &my_charset_bin); if (!m_key_part_info->field) goto err; m_key_part_info->field->init(table); @@ -24772,6 +24851,38 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + /* + HEAP-specific: restore group key values after copy_funcs(). + For blob GROUP BY keys, copy_funcs() overwrites record[0]'s blob + pointers with new expression results, but the GROUP BY key was + built from the group buffer's Field_varstring (not record[0]). + Restore the group buffer values into record[0] so hp_make_key() + in ha_write_tmp_row() builds the correct key. + Non-blob fields are unaffected: copy_funcs() writes directly into + the record[0] field slots that hp_make_key() reads from. + */ + if (table->s->db_type() == heap_hton) + { + if (table->s->blob_fields) + { + String tmp_str; + for (group= table->group; group; group= group->next) + { + Field *tbl_field= (*group->item)->get_tmp_table_field(); + if (tbl_field && tbl_field != group->field) + { + if (group->field->is_null()) + tbl_field->set_null(); + else + { + String *val= group->field->val_str(&tmp_str); + tbl_field->set_notnull(); + tbl_field->store(val->ptr(), val->length(), val->charset()); + } + } + } + } + } if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) { if (create_internal_tmp_table_from_heap(join->thd, table, @@ -27849,7 +27960,7 @@ void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) if (field) { enum_field_types type; - if ((type= field->type()) == MYSQL_TYPE_BLOB) + if (is_any_blob_field_type(type= field->type())) key_length+=MAX_BLOB_WIDTH; // Can't be used as a key else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING) key_length+= field->field_length + HA_KEY_BLOB_LENGTH; @@ -27888,7 +27999,7 @@ void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) case STRING_RESULT: { enum enum_field_types type= group_item->field_type(); - if (type == MYSQL_TYPE_BLOB) + if (is_any_blob_field_type(type)) key_length+= MAX_BLOB_WIDTH; // Can't be used as a key else { diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index e902de1b5d4fb..4b0c0828ef3aa 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -72,7 +72,7 @@ static handler *heap_create_handler(handlerton *hton, *****************************************************************************/ ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg) - :handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0), + :handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0), internal_table(0) {} @@ -286,25 +286,144 @@ int ha_heap::delete_row(const uchar * buf) return res; } +/* + Rebuild GROUP BY key from group_buff into HEAP's hp_make_key format. + + The GROUP BY path (end_update) stores item values into the group + buffer's Field_varstring fields, NOT into record[0]'s Field_blob. + After copy_funcs(), record[0]'s blob fields may be stale. + + This method iterates all SQL-layer key parts (blob, varchar, and + fixed-length), using store_length for offset advancement. It copies + each part's data from the group buffer into record[0], then calls + hp_make_key() to build the HEAP-format key. + + Only called when needs_key_rebuild_from_group_buff is set (GROUP BY key 0 with + blob segments). DISTINCT/SJ-materialize keys use hp_make_key() + directly from record[0]. +*/ +void ha_heap::rebuild_key_from_group_buff(HP_KEYDEF *keydef, const uchar *&key, + uint active_key_index) +{ + KEY *sql_key= &table->key_info[active_key_index]; + const uchar *key_pos= key; + + for (uint i= 0; i < sql_key->user_defined_key_parts; i++) + { + KEY_PART_INFO *key_part= &sql_key->key_part[i]; + const uchar *data_pos= key_pos; + Field *field= key_part->field; + + bool is_null= false; + if (key_part->null_bit) + { + is_null= *data_pos != 0; + data_pos++; /* skip null flag byte */ + } + + if (is_null) + { + /* NULL: set the field's null bit in record[0] */ + if (field->null_ptr) + field->null_ptr[0] |= field->null_bit; + } + else + { + /* Non-NULL: clear null bit, then copy data into record[0] */ + if (key_part->null_bit && field->null_ptr) + field->null_ptr[0] &= ~field->null_bit; + + if (key_part->key_part_flag & HA_BLOB_PART) + { + /* + Blob GROUP BY key: stored as Field_varstring in group_buff + (2B length prefix + inline data). Copy into record[0]'s + blob field (packlength + data pointer) so hp_make_key() + can build the HEAP-format key (4B length + data pointer). + + The key field is always Field_varstring with 2B length + (key_field_length >= 256 from MAX_BLOB_WIDTH cap). + */ + uint16 data_len= uint2korr(data_pos); + const uchar *data_ptr= data_pos + 2; + + Field_blob *blob= (Field_blob*) field; + DBUG_ASSERT(blob->flags & BLOB_FLAG); + uint packlength= blob->pack_length_no_ptr(); + uchar *blob_field= table->record[0] + key_part->offset; + store_lowendian((ulonglong) data_len, blob_field, packlength); + memcpy(blob_field + packlength, &data_ptr, sizeof(void*)); + } + else if (key_part->key_part_flag & HA_VAR_LENGTH_PART) + { + /* + VARCHAR GROUP BY key: stored as Field_varstring in group_buff. + The key buffer always uses HA_KEY_BLOB_LENGTH (2) bytes for + the length prefix (Field_varstring::key_part_length_bytes()), + but record[0]'s Field_varstring may use 1 or 2 bytes + depending on field_length. Read 2B from key, write the + field's native length_bytes to record[0]. + */ + uint16 key_data_len= uint2korr(data_pos); + const uchar *key_data_ptr= data_pos + HA_KEY_BLOB_LENGTH; + + Field_varstring *vs= (Field_varstring*) field; + uint rec_length_bytes= vs->length_bytes; + uchar *rec_field= table->record[0] + key_part->offset; + set_if_smaller(key_data_len, vs->field_length); + if (rec_length_bytes == 1) + rec_field[0]= (uchar) key_data_len; + else + int2store(rec_field, key_data_len); + memcpy(rec_field + rec_length_bytes, key_data_ptr, key_data_len); + } + else + { + /* Fixed-length: copy from key directly into record[0] */ + memcpy(table->record[0] + key_part->offset, data_pos, + key_part->length); + } + } + + key_pos+= key_part->store_length; + } + hp_make_key(keydef, (uchar*) file->lastkey, table->record[0]); + key= (const uchar*) file->lastkey; +} + + +/* + Ensure the key is in HEAP's native format for blob indexes. + + GROUP BY (needs_key_rebuild_from_group_buff): parse the SQL-layer group_buff + into record[0] and rebuild via hp_make_key(), because record[0]'s + blob fields may be stale after copy_funcs(). + + DISTINCT / SJ-materialize: record[0] already has correct blob + values; build the HEAP key directly from record[0]. +*/ +void ha_heap::materialize_heap_key_if_needed(uint key_index, const uchar *&key) +{ + HP_KEYDEF *keydef= file->s->keydef + key_index; + if (keydef->has_blob_seg) + { + if (keydef->needs_key_rebuild_from_group_buff) + rebuild_key_from_group_buff(keydef, key, key_index); + else + { + hp_make_key(keydef, (uchar*) file->lastkey, table->record[0]); + key= (const uchar*) file->lastkey; + } + } +} + + int ha_heap::index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { DBUG_ASSERT(inited==INDEX); - /* - When the index has blob key segments, the SQL layer's key buffer (e.g. - group_buff from end_update) uses Field_varstring format (2B length + - inline data) because Field_blob::new_key_field() returns Field_varstring. - But HEAP's hp_hashnr/hp_key_cmp expect hp_make_key format (4B length + - data pointer). Rebuild the key from record[0] which has the correct - blob field layout. - */ - if (file->s->keydef[active_index].has_blob_seg) - { - hp_make_key(file->s->keydef + active_index, (uchar*) file->lastkey, - table->record[0]); - key= (const uchar*) file->lastkey; - } + materialize_heap_key_if_needed(active_index, key); int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag); return error; } @@ -313,12 +432,7 @@ int ha_heap::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { DBUG_ASSERT(inited==INDEX); - if (file->s->keydef[active_index].has_blob_seg) - { - hp_make_key(file->s->keydef + active_index, (uchar*) file->lastkey, - table->record[0]); - key= (const uchar*) file->lastkey; - } + materialize_heap_key_if_needed(active_index, key); int error= heap_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); return error; @@ -328,12 +442,7 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { - if (file->s->keydef[index].has_blob_seg) - { - hp_make_key(file->s->keydef + index, (uchar*) file->lastkey, - table->record[0]); - key= (const uchar*) file->lastkey; - } + materialize_heap_key_if_needed(index, key); int error = heap_rkey(file, buf, index, key, keypart_map, find_flag); return error; } @@ -714,7 +823,51 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, } seg->start= (uint) key_part->offset; seg->length= (uint) key_part->length; - seg->flag= key_part->key_part_flag; + /* + Use field->key_part_flag() instead of key_part->key_part_flag + because some SQL layer paths (SJ weedout, expression cache) + leave key_part_flag uninitialized. Garbage HA_BLOB_PART bits + cause seg->length to be zeroed (the blob convention), corrupting + hash/compare for non-blob VARCHAR/VARBINARY keys. + */ + seg->flag= field->key_part_flag(); + /* + HEAP blob key segments must have seg->length=0. hp_hashnr() + advances key by seg->length (fixed part) THEN by 4+sizeof(ptr) + (blob encoding); non-zero length double-counts the advance + and hashes wrong data. The SQL layer's key_part.length may be + pack_length() (e.g. DISTINCT key path) — override it here. + + Also widen key_part->length to max_data_length() so the SQL + layer's new_key_field() creates a Field_varstring large enough + for the full blob data. Without this, DISTINCT/sj-materialize + lookup keys are truncated to pack_length() bytes. + */ + if (seg->flag & HA_BLOB_PART) + { + seg->length= 0; + uint32 blob_max= field->max_data_length(); + /* + Widen key_part->length for blob segments where the SQL layer + set it to pack_length() (e.g. DISTINCT key path). Skip when + key_part->length <= pack_length_no_ptr(), which covers: + - Regular blobs: key_length()=0 (GROUP BY path where + finalize() sizes the group buffer separately) + - Geometry blobs: key_length()=packlength=4 (GROUP BY path, + also sized separately by finalize()) + Without this guard, geometry GROUP BY triggers overflow in + store_length (len_delta ≈ 4 billion), causing + rebuild_key_from_group_buff() to jump to uninitialized memory. + */ + uint pack_no_ptr= ((Field_blob*)field)->pack_length_no_ptr(); + if (key_part->length > pack_no_ptr && key_part->length < blob_max) + { + uint len_delta= blob_max - key_part->length; + key_part->length= blob_max; + key_part->store_length+= len_delta; + pos->key_length+= len_delta; + } + } if (field->flags & (ENUM_FLAG | SET_FLAG)) seg->charset= &my_charset_bin; @@ -754,7 +907,25 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, seg->bit_pos= 0; } } + /* + Pre-compute has_blob_seg so callers (materialize_heap_key_if_needed, + unit tests) can use it before heap_create() runs. heap_create() + recomputes this from the normalized segments. + */ + keydef[key].has_blob_seg= hp_keydef_has_blob_seg(&keydef[key]); } + /* + Detect GROUP BY keys with blob segments that need rebuild_key_from_group_buff(). + When table->group is set, key 0 is the GROUP BY key. If it has + HA_BLOB_PART segments, finalize() set up the group buffer with + blob format and rebuild_key_from_group_buff() must parse it during lookups. + Without this flag, index_read_map() falls back to hp_make_key(record[0]) + which may use stale blob pointers after copy_funcs(). + */ + if (keys > 0) + keydef[0].needs_key_rebuild_from_group_buff= + (table_arg->group && hp_keydef_has_blob_seg(&keydef[0])); + if (table_arg->found_next_number_field) { keydef[share->next_number_index].flag|= HA_AUTO_KEY; @@ -954,13 +1125,27 @@ maria_declare_plugin(heap) maria_declare_plugin_end; #ifdef HEAP_UNIT_TESTS -/* - Public wrapper for unit tests — exposes the static - heap_prepare_hp_create_info() for direct testing. -*/ int test_heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, HP_CREATE_INFO *hp_create_info) { return heap_prepare_hp_create_info(table_arg, internal_table, hp_create_info); } + +/* + Test wrapper: rebuild_key_from_group_buff with a fake HP_INFO. + Sets up the handler's file pointer with the provided HP_INFO, + binds the handler to the given TABLE, then calls rebuild_key_from_group_buff. +*/ +void test_rebuild_key_from_group_buff(ha_heap *handler, TABLE *tbl, + HP_INFO *fake_file, HP_KEYDEF *keydef, + const uchar *key, uint key_index, + const uchar **rebuilt_key) +{ + handler->file= fake_file; + handler->set_table(tbl); + const uchar *k= key; + handler->rebuild_key_from_group_buff(keydef, k, key_index); + *rebuilt_key= k; +} + #endif diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index 0d0eec530cde6..c7a98b344432a 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -126,4 +126,12 @@ class ha_heap final : public handler int find_unique_row(uchar *record, uint unique_idx) override; private: void update_key_stats(); + void materialize_heap_key_if_needed(uint key_index, const uchar *&key); + void rebuild_key_from_group_buff(HP_KEYDEF *keydef, const uchar *&key, + uint active_key_index); +#ifdef HEAP_UNIT_TESTS + friend void test_rebuild_key_from_group_buff(ha_heap *, TABLE *, HP_INFO *, + HP_KEYDEF *, const uchar *, uint, + const uchar **); +#endif }; diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index 18a4034fb4691..fa15e441373e3 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -277,16 +277,7 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, keyinfo->seg= keyseg; memcpy(keyseg, keydef[i].seg, (size_t) (sizeof(keyseg[0]) * keydef[i].keysegs)); - keyinfo->has_blob_seg= FALSE; - { - uint j; - for (j= 0; j < keydef[i].keysegs; j++) - if (keyseg[j].flag & HA_BLOB_PART) - { - keyinfo->has_blob_seg= TRUE; - break; - } - } + keyinfo->has_blob_seg= hp_keydef_has_blob_seg(keyinfo); keyseg+= keydef[i].keysegs; if (keydef[i].algorithm == HA_KEY_ALG_BTREE) diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index 7be77afec308b..4bdfba659d46b 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -28,7 +28,11 @@ hp_charpos(CHARSET_INFO *cs, const uchar *b, const uchar *e, size_t num) } +#ifdef HEAP_UNIT_TESTS +ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); +#else static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); +#endif /* @@ -48,6 +52,34 @@ static size_t hp_blob_key_length(uint packlength, const uchar *pos) } return 0; } + + +/* + Compute the key-buffer byte size of the variable-length portion of a + VARTEXT or BLOB segment in a pre-built hash key. + + Used by hp_hashnr() and hp_key_cmp() to advance past a VARCHAR or + BLOB segment (both null and non-null) in the key buffer. + + All VARCHAR key segments use a 2-byte length prefix — this is the + canonical key format shared between SQL-layer group_buff keys and + hp_make_key() output. hp_make_key() normalizes 1-byte record + prefixes to 2-byte key prefixes to maintain this invariant. + + Blob segments use a fixed 4-byte length + pointer layout. + + @param seg Key segment descriptor + @return Number of bytes to skip in the key buffer for the variable- + length portion (does NOT include the null flag byte, which + the caller handles separately) +*/ + +static inline size_t hp_vartext_key_pack_size(const HA_KEYSEG *seg) +{ + return (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar *) : 2; +} + + /* Find out how many rows there is in the given range @@ -250,7 +282,11 @@ void hp_movelink(HASH_INFO *pos, HASH_INFO *next_link, HASH_INFO *newlink) /* Calc hashvalue for a key */ +#ifdef HEAP_UNIT_TESTS +ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) +#else static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) +#endif { /*register*/ ulong nr=1, nr2=4; @@ -266,9 +302,8 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) if (*pos) /* Found null */ { nr^= (nr << 1) | 1; - /* Add key pack length to key for VARCHAR/BLOB segments */ if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar*) : 2; + key+= hp_vartext_key_pack_size(seg); continue; } pos++; @@ -514,9 +549,9 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2, { uchar *pos1= (uchar*) rec1 + seg->start; uchar *pos2= (uchar*) rec2 + seg->start; - size_t char_length1, char_length2; size_t pack_length= seg->bit_start; CHARSET_INFO *cs= seg->charset; + size_t char_length1, char_length2; if (pack_length == 1) { char_length1= (size_t) *(uchar*) pos1++; @@ -588,9 +623,8 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, return 1; if (found_null) { - /* Add key pack length to key for VARCHAR/BLOB segments */ if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar*) : 2; + key+= hp_vartext_key_pack_size(seg); continue; } } @@ -738,7 +772,24 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec) set_if_smaller(char_length, seg->length); /* QQ: ok to remove? */ } if (seg->type == HA_KEYTYPE_VARTEXT1) - char_length+= seg->bit_start; /* Copy also length */ + { + /* + Normalize VARCHAR to always use a 2-byte length prefix in the key + buffer, regardless of whether the record uses 1-byte or 2-byte + packing. This keeps the key format consistent with what + hp_hashnr() and hp_key_cmp() expect (they always read 2 bytes). + */ + uint native_pack= seg->bit_start; + size_t data_len= (native_pack == 1 ? (size_t) *(uchar*) pos + : uint2korr(pos)); + set_if_smaller(data_len, char_length); + int2store(key, (uint16) data_len); + memcpy(key + 2, pos + native_pack, data_len); + if (data_len < char_length) + bzero(key + 2 + data_len, char_length - data_len); + key+= 2 + char_length; + continue; + } else if (seg->type == HA_KEYTYPE_BIT && seg->bit_length) { *key++= get_rec_bits(rec + seg->bit_pos, diff --git a/storage/heap/hp_test_hash-t.c b/storage/heap/hp_test_hash-t.c index d831fc53fd88b..8de1d8542884c 100644 --- a/storage/heap/hp_test_hash-t.c +++ b/storage/heap/hp_test_hash-t.c @@ -469,14 +469,14 @@ static void test_pad_space(void) The SQL layer builds lookup keys in varstring format (2B length prefix + inline data) via Field_blob::new_key_field() -> Field_varstring. The HEAP - handler's rebuild_blob_key() converts this to record[0]'s blob descriptor + handler's rebuild_key_from_group_buff() converts this to record[0]'s blob descriptor format, then hp_make_key() builds the hash key. This test simulates the full round-trip: 1. Build a record with blob data (as at INSERT time) 2. Compute hp_rec_hashnr() (stored in HASH_INFO at write time) 3. Build a varstring-format key (as the SQL layer would for lookup) - 4. Parse the varstring key into a record's blob field (rebuild_blob_key) + 4. Parse the varstring key into a record's blob field (rebuild_key_from_group_buff) 5. hp_make_key() from that record, then hp_rec_hashnr() on the record 6. Verify the hashes match */ @@ -510,7 +510,7 @@ static void test_distinct_key_format(void) /* Step 4: Parse varstring key into rec_lookup's blob field. - This is what rebuild_blob_key() does. + This is what rebuild_key_from_group_buff() does. */ memset(rec_lookup, 0, REC_LENGTH); { @@ -619,7 +619,7 @@ static void test_group_by_key_format(void) insert_hash= hp_rec_hashnr(&keydef, rec_insert); /* - Simulate rebuild_blob_key: parse varstring key, populate rec_lookup. + Simulate rebuild_key_from_group_buff: parse varstring key, populate rec_lookup. In GROUP BY, key_field_length = max_length (not 0, not pack_length). */ /* no null bit for this test */ @@ -706,11 +706,206 @@ static void test_multi_seg_distinct(void) } +/* + Test 11: hp_hashnr (key-based) must equal hp_rec_hashnr (record-based). + + This directly tests that building a key via hp_make_key() and then + hashing it with hp_hashnr() produces the same hash as hp_rec_hashnr() + on the original record. This catches divergence bugs where the two + functions process segments differently (e.g. VARCHAR pack_length + hardcoded to 2 in hp_hashnr but read from seg->bit_start in + hp_rec_hashnr). +*/ + +/* hp_hashnr is static by default; exposed via HEAP_UNIT_TESTS */ +extern ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); + +/* + Record layout for mixed varchar + blob table: + byte 0: null bitmap (bit 2 = city null, bit 4 = libname null) + bytes 1: varchar length_bytes=1 (city: VARCHAR(21)) + bytes 2-22: varchar data (21 bytes) + bytes 23-24: blob packlength=2 (libname: TEXT) + bytes 25-32: blob data pointer (8 bytes on x86_64) + byte 33: flags byte (visible offset) + Total reclength: 34, recbuffer: ALIGN(MAX(34,8)+1, 8) = 40 +*/ +#define MIX_NULL_OFFSET 0 +#define MIX_VARCHAR_OFFSET 1 +#define MIX_VARCHAR_LEN 21 +#define MIX_VARCHAR_LENBYTES 1 +#define MIX_BLOB_OFFSET 23 +#define MIX_BLOB_PACKLEN 2 +#define MIX_REC_LENGTH 34 +#define MIX_KEY_BUF_SIZE 256 + + +static void setup_mixed_keydef(HP_KEYDEF *keydef, HA_KEYSEG *segs) +{ + /* Segment 0: blob (city TEXT) at offset 23 */ + memset(&segs[0], 0, sizeof(segs[0])); + segs[0].type= HA_KEYTYPE_VARTEXT1; + segs[0].flag= HA_BLOB_PART | HA_VAR_LENGTH_PART; + segs[0].start= MIX_BLOB_OFFSET; + segs[0].length= 0; /* blob key segments must have length=0 */ + segs[0].bit_start= MIX_BLOB_PACKLEN; + segs[0].charset= &my_charset_latin1; + segs[0].null_bit= 4; /* bit 2 in null bitmap */ + segs[0].null_pos= MIX_NULL_OFFSET; + + /* Segment 1: varchar (libname VARCHAR(21)) at offset 1 */ + memset(&segs[1], 0, sizeof(segs[1])); + segs[1].type= HA_KEYTYPE_VARTEXT1; + segs[1].flag= HA_VAR_LENGTH_PART; + segs[1].start= MIX_VARCHAR_OFFSET; + segs[1].length= MIX_VARCHAR_LEN; + segs[1].bit_start= MIX_VARCHAR_LENBYTES; /* 1-byte length prefix */ + segs[1].charset= &my_charset_latin1; + segs[1].null_bit= 8; /* bit 3 in null bitmap */ + segs[1].null_pos= MIX_NULL_OFFSET; + + setup_keydef(keydef, segs, 2); + keydef->has_blob_seg= 1; +} + + +static void build_mixed_record(uchar *rec, const uchar *blob_data, + uint16 blob_len, const uchar *varchar_data, + uint8 varchar_len, + my_bool blob_null, my_bool varchar_null) +{ + memset(rec, 0, MIX_REC_LENGTH); + + /* null bitmap */ + if (blob_null) + rec[MIX_NULL_OFFSET] |= 4; + if (varchar_null) + rec[MIX_NULL_OFFSET] |= 8; + + /* varchar: 1-byte length prefix + data */ + rec[MIX_VARCHAR_OFFSET]= varchar_len; + if (varchar_data && varchar_len > 0) + memcpy(rec + MIX_VARCHAR_OFFSET + MIX_VARCHAR_LENBYTES, + varchar_data, varchar_len); + + /* blob: packlength(2) + data pointer */ + int2store(rec + MIX_BLOB_OFFSET, blob_len); + memcpy(rec + MIX_BLOB_OFFSET + MIX_BLOB_PACKLEN, &blob_data, PTR_SIZE); +} + + +static void test_key_vs_rec_hash_consistency(void) +{ + HA_KEYSEG segs[2]; + HP_KEYDEF keydef; + uchar rec[MIX_REC_LENGTH]; + uchar key_buf[MIX_KEY_BUF_SIZE]; + ulong rec_hash, key_hash; + + const uchar *city= (const uchar *) "New York"; + uint16 city_len= 8; + const uchar *libname= (const uchar *) "New York Public Libra"; + uint8 libname_len= 21; + + setup_mixed_keydef(&keydef, segs); + + /* Build record and compute record-based hash (used at INSERT time) */ + build_mixed_record(rec, city, city_len, libname, libname_len, + FALSE, FALSE); + rec_hash= hp_rec_hashnr(&keydef, rec); + + /* Build key via hp_make_key and compute key-based hash (used at LOOKUP) */ + hp_make_key(&keydef, key_buf, rec); + key_hash= hp_hashnr(&keydef, key_buf); + + ok(rec_hash == key_hash, + "key_vs_rec_hash: rec_hash (%lu) == key_hash (%lu) " + "for mixed blob+varchar(1B) key", + rec_hash, key_hash); + + /* Second test: different data to ensure it's not a coincidence */ + { + const uchar *city2= (const uchar *) "San Fran"; + uint16 city2_len= 8; + const uchar *libname2= (const uchar *) "SF Public Library"; + uint8 libname2_len= 17; + + build_mixed_record(rec, city2, city2_len, libname2, libname2_len, + FALSE, FALSE); + rec_hash= hp_rec_hashnr(&keydef, rec); + hp_make_key(&keydef, key_buf, rec); + key_hash= hp_hashnr(&keydef, key_buf); + + ok(rec_hash == key_hash, + "key_vs_rec_hash: rec_hash (%lu) == key_hash (%lu) " + "for second mixed blob+varchar(1B) data", + rec_hash, key_hash); + } + + /* Third test: varchar with 2-byte length prefix (field_length >= 256) */ + { + HA_KEYSEG segs2b[2]; + HP_KEYDEF keydef2b; + uchar rec2b[MIX_REC_LENGTH + 256]; + uchar key2b[MIX_KEY_BUF_SIZE + 256]; + + /* + Copy the setup but change varchar to 2-byte length prefix. + This should always work because hp_hashnr already hardcodes 2. + */ + memcpy(segs2b, segs, sizeof(segs)); + segs2b[1].bit_start= 2; /* 2-byte length prefix */ + segs2b[1].length= 256; + setup_keydef(&keydef2b, segs2b, 2); + keydef2b.has_blob_seg= 1; + + memset(rec2b, 0, sizeof(rec2b)); + /* blob */ + rec2b[MIX_NULL_OFFSET]= 0; + int2store(rec2b + MIX_BLOB_OFFSET, city_len); + memcpy(rec2b + MIX_BLOB_OFFSET + MIX_BLOB_PACKLEN, &city, PTR_SIZE); + /* varchar with 2B length prefix */ + int2store(rec2b + MIX_VARCHAR_OFFSET, libname_len); + memcpy(rec2b + MIX_VARCHAR_OFFSET + 2, libname, libname_len); + + rec_hash= hp_rec_hashnr(&keydef2b, rec2b); + hp_make_key(&keydef2b, key2b, rec2b); + key_hash= hp_hashnr(&keydef2b, key2b); + + ok(rec_hash == key_hash, + "key_vs_rec_hash: rec_hash (%lu) == key_hash (%lu) " + "for mixed blob+varchar(2B) key", + rec_hash, key_hash); + } + + /* Fourth test: blob-only key (no varchar) — should always match */ + { + HA_KEYSEG seg_blob; + HP_KEYDEF kd_blob; + uchar rec_b[REC_LENGTH]; + uchar key_b[KEY_BUF_SIZE]; + + setup_blob_keyseg(&seg_blob, TRUE); + setup_keydef(&kd_blob, &seg_blob, 1); + + build_record(rec_b, 1, city, city_len, FALSE); + rec_hash= hp_rec_hashnr(&kd_blob, rec_b); + hp_make_key(&kd_blob, key_b, rec_b); + key_hash= hp_hashnr(&kd_blob, key_b); + + ok(rec_hash == key_hash, + "key_vs_rec_hash: rec_hash (%lu) == key_hash (%lu) " + "for blob-only key", + rec_hash, key_hash); + } +} + + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) { MY_INIT("hp_test_hash"); - plan(43); + plan(47); diag("Test 1: Hash consistency between record and key formats"); test_hash_consistency(); @@ -742,6 +937,9 @@ int main(int argc __attribute__((unused)), diag("Test 10: Multi-segment DISTINCT key (sj-materialize)"); test_multi_seg_distinct(); + diag("Test 11: hp_hashnr vs hp_rec_hashnr consistency"); + test_key_vs_rec_hash_consistency(); + my_end(0); return exit_status(); } diff --git a/storage/heap/hp_test_key_setup-t.cc b/storage/heap/hp_test_key_setup-t.cc index 82cd025bca756..c933a76c05491 100644 --- a/storage/heap/hp_test_key_setup-t.cc +++ b/storage/heap/hp_test_key_setup-t.cc @@ -23,6 +23,32 @@ #include "ha_heap.h" #include "heapdef.h" +/* + RAII guard for the fake THD used by unit tests. + Allocates a zero-initialized THD (without calling the constructor), + sets max_heap_table_size, installs it as current_thd, and tears it + down on destruction. This is technically UB (no C++ construction), + but works because heap_prepare_hp_create_info only reads + thd->variables.max_heap_table_size from the zeroed memory. +*/ +class Fake_thd_guard +{ + char *m_buf; +public: + Fake_thd_guard(ulonglong max_heap_size= 1024*1024) + { + m_buf= (char*) calloc(1, sizeof(THD)); + THD *thd= (THD*) m_buf; + thd->variables.max_heap_table_size= max_heap_size; + set_current_thd(thd); + } + ~Fake_thd_guard() + { + set_current_thd(NULL); + free(m_buf); + } +}; + static const LEX_CSTRING test_field_name= {STRING_WITH_LEN("")}; /* Wrapper declared in ha_heap.cc */ @@ -148,10 +174,7 @@ static void test_distinct_key_truncation() "distinct_key_truncation setup: key_part.length = pack_length() = %u", (uint) local_kpi.length); - char *fake_thd= (char*) calloc(1, sizeof(THD)); - THD *real_thd= (THD*) fake_thd; - real_thd->variables.max_heap_table_size= 1024*1024; - set_current_thd(real_thd); + Fake_thd_guard thd_guard; HP_CREATE_INFO hp_ci; memset(&hp_ci, 0, sizeof(hp_ci)); @@ -161,9 +184,6 @@ static void test_distinct_key_truncation() int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); - set_current_thd(NULL); - free(fake_thd); - ok(err == 0, "distinct_key_truncation: heap_prepare succeeded (err=%d)", err); @@ -175,7 +195,6 @@ static void test_distinct_key_truncation() widening is not needed. These assertions are deferred to Phase 1 where they are exercised. */ -#if 0 /* Phase 1: distinct_key_truncation assertions */ uint32 expected_length= bf.max_data_length(); ok(local_kpi.length == expected_length, "distinct_key_truncation: key_part.length (%u) == max_data_length() (%u)", @@ -191,7 +210,6 @@ static void test_distinct_key_truncation() ok(local_sql_key.key_length == expected_store_length, "distinct_key_truncation: key_length (%u) == expected (%u)", (uint) local_sql_key.key_length, (uint) expected_store_length); -#endif my_free(hp_ci.keydef); my_free(hp_ci.blob_descs); @@ -319,10 +337,7 @@ class Hp_test_varchar_key_flag "garbage_flag setup: key_part.length = %u (field_length)", (uint) local_kpi.length); - char *fake_thd= (char*) calloc(1, sizeof(THD)); - THD *real_thd= (THD*) fake_thd; - real_thd->variables.max_heap_table_size= 1024*1024; - set_current_thd(real_thd); + Fake_thd_guard thd_guard; HP_CREATE_INFO hp_ci; memset(&hp_ci, 0, sizeof(hp_ci)); @@ -332,9 +347,6 @@ class Hp_test_varchar_key_flag int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); - set_current_thd(NULL); - free(fake_thd); - ok(err == 0, "garbage_flag: heap_prepare succeeded (err=%d)", err); @@ -350,11 +362,9 @@ class Hp_test_varchar_key_flag The heap_prepare_hp_create_info fix (field->key_part_flag() instead of key_part->key_part_flag) is deferred to Phase 1. */ -#if 0 /* Phase 1: garbage_key_part_flag assertion */ ok(!(seg->flag & HA_BLOB_PART), "garbage_flag: seg->flag (0x%x) does NOT have HA_BLOB_PART", (uint) seg->flag); -#endif HP_KEYDEF *kd= &hp_ci.keydef[0]; @@ -402,13 +412,1002 @@ class Hp_test_varchar_key_flag }; +/* + rebuild_key_from_group_buff: mixed blob + varchar GROUP BY key. + + Simulates the GROUP BY key format for: + GROUP BY city (TEXT), libname (VARCHAR(21)) + The GROUP BY buffer uses Field_varstring format (2B length + data) + for all parts, with store_length advancing by fixed amounts. + rebuild_key_from_group_buff must correctly parse the key buffer and populate + record[0]'s blob field (packlength + pointer) and varchar field + (length_bytes + data). + + Test wrapper in ha_heap.cc: +*/ +extern void test_rebuild_key_from_group_buff(ha_heap *handler, TABLE *tbl, + HP_INFO *fake_file, HP_KEYDEF *keydef, + const uchar *key, uint key_index, + const uchar **rebuilt_key); + +/* + Record layout for mixed blob+varchar GROUP BY test: + byte 0: null bitmap + bytes 1-2: blob packlength=2 (length, little-endian) + bytes 3-10: blob data pointer (8 bytes) + byte 11: varchar length_bytes=1 (field_length=21 < 256) + bytes 12-32: varchar data (21 bytes max) + reclength = 33 +*/ +#define MIX_REC_NULL_OFFSET 0 +#define MIX_BLOB_OFFSET 1 +#define MIX_BLOB_PACKLEN 2 +#define MIX_VARCHAR_OFFSET 11 +#define MIX_VARCHAR_FIELD_LEN 21 +#define MIX_REC_LENGTH 33 + +static void test_rebuild_key_from_group_buff_mixed() +{ + uchar rec[MIX_REC_LENGTH]; + memset(rec, 0xA5, sizeof(rec)); /* poison with known pattern */ + + TABLE_SHARE share; + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 2; + share.blob_fields= 0; + share.keys= 1; + share.reclength= MIX_REC_LENGTH; + share.rec_buff_length= MIX_REC_LENGTH; + share.db_record_offset= 1; + + /* Create blob field: city TEXT (packlength=2) */ + alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; + Field_blob *bfp= make_test_field_blob(bf_storage, + rec + MIX_BLOB_OFFSET, + rec + MIX_REC_NULL_OFFSET, + 2, &share, + MIX_BLOB_PACKLEN, + &my_charset_latin1); + bfp->field_index= 0; + + /* Create varchar field: libname VARCHAR(21) */ + static const LEX_CSTRING vs_name= {STRING_WITH_LEN("")}; + alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; + Field_varstring *vfp= ::new (vs_storage) Field_varstring( + rec + MIX_VARCHAR_OFFSET, + MIX_VARCHAR_FIELD_LEN, + 1, /* length_bytes: 1 for field_length < 256 */ + rec + MIX_REC_NULL_OFFSET, + 4, /* null_bit */ + Field::NONE, + &vs_name, + &share, + DTCollation(&my_charset_latin1)); + vfp->field_index= 1; + + Field *field_array[3]= { bfp, vfp, NULL }; + + /* + GROUP BY key: two parts. + Part 0: blob (city) — null_bit=2, key_part_flag=HA_BLOB_PART, length=0 + Part 1: varchar (libname) — null_bit=4, key_part_flag=HA_VAR_LENGTH_PART, length=21 + */ + KEY_PART_INFO kpi[2]; + memset(kpi, 0, sizeof(kpi)); + + /* Blob key part */ + kpi[0].field= bfp; + kpi[0].offset= MIX_BLOB_OFFSET; + kpi[0].length= 0; /* Field_blob::key_length() */ + kpi[0].key_part_flag= HA_BLOB_PART; + kpi[0].null_bit= 2; + kpi[0].null_offset= 0; + kpi[0].type= bfp->key_type(); + /* + GROUP BY store_length: computed from group buffer Field_varstring. + For blob with key_field_length=16382: + Field_varstring(16382).pack_length() = 16384 + + 1 (maybe_null) = 16385 + For this test use a smaller key_field_length = 100 for simplicity. + */ + uint blob_key_field_len= 100; + kpi[0].store_length= blob_key_field_len + 2 /* len_bytes */ + 1 /* null */; + + /* Varchar key part */ + kpi[1].field= vfp; + kpi[1].offset= MIX_VARCHAR_OFFSET; + kpi[1].length= MIX_VARCHAR_FIELD_LEN; + kpi[1].key_part_flag= HA_VAR_LENGTH_PART; + kpi[1].null_bit= 4; + kpi[1].null_offset= 0; + kpi[1].type= vfp->key_type(); + /* + VARCHAR store_length in GROUP BY buffer: + Field_varstring(21).pack_length() = 21 + 2 (key_part_length_bytes always 2) + + 1 (maybe_null) = 24 + */ + kpi[1].store_length= MIX_VARCHAR_FIELD_LEN + 2 /* len_bytes */ + 1 /* null */; + + KEY sql_key; + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 2; + sql_key.usable_key_parts= 2; + sql_key.key_part= kpi; + sql_key.algorithm= HA_KEY_ALG_HASH; + + TABLE test_table; + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &sql_key; + share.key_info= &sql_key; + bfp->table= &test_table; + vfp->table= &test_table; + + uint blob_offsets[1]= { 0 }; + share.blob_field= blob_offsets; + + /* + Build a GROUP BY key buffer in the same format as end_update(). + Layout per key part: [null_flag_byte] [2B_length] [data...] + padded to store_length. + + Part 0 (blob "New York"): + byte 0: null=0 + bytes 1-2: length=8 (LE) + bytes 3-10: "New York" + bytes 11-102: padding (zero) + Part 1 (varchar "NYC Lib"): + byte 103: null=0 + bytes 104-105: length=7 (LE) + bytes 106-112: "NYC Lib" + bytes 113-126: padding (zero) + */ + uint key_buf_len= kpi[0].store_length + kpi[1].store_length; + uchar *key_buf= (uchar*) calloc(1, key_buf_len); + + /* Part 0: blob "New York" */ + uchar *p= key_buf; + p[0]= 0; /* not null */ + int2store(p + 1, 8); /* length = 8 */ + memcpy(p + 3, "New York", 8); + + /* Part 1: varchar "NYC Lib" */ + p= key_buf + kpi[0].store_length; + p[0]= 0; /* not null */ + int2store(p + 1, 7); /* length = 7 */ + memcpy(p + 3, "NYC Lib", 7); + + /* + Now set up HEAP structures for hp_make_key. + Use heap_prepare_hp_create_info to create them. + */ + Fake_thd_guard thd_guard; + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= MIX_REC_LENGTH; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + ok(err == 0, "rebuild_key_from_group_buff_mixed: heap_prepare succeeded (err=%d)", err); + + /* Verify blob segment */ + HP_KEYDEF *kd= &hp_ci.keydef[0]; + ok(kd->keysegs == 2, + "rebuild_key_from_group_buff_mixed: keysegs=%u (expected 2)", kd->keysegs); + ok(kd->has_blob_seg != 0, + "rebuild_key_from_group_buff_mixed: has_blob_seg is set"); + + /* + Create a minimal ha_heap + fake HP_INFO for rebuild_key_from_group_buff. + rebuild_key_from_group_buff reads from table->key_info (SQL-layer) and + writes to table->record[0], then calls hp_make_key into + file->lastkey. + */ + uchar lastkey_buf[512]; + memset(lastkey_buf, 0, sizeof(lastkey_buf)); + HP_INFO fake_file; + memset(&fake_file, 0, sizeof(fake_file)); + fake_file.lastkey= (uchar*) lastkey_buf; + + ha_heap handler(heap_hton, &share); + + /* Reset record[0] to poison to detect unwritten bytes */ + memset(rec, 0xA5, sizeof(rec)); + + const uchar *rebuilt= NULL; + test_rebuild_key_from_group_buff(&handler, &test_table, &fake_file, + kd, key_buf, 0, &rebuilt); + + /* Verify record[0] was populated correctly */ + /* Blob field: packlength=2 bytes of length + 8 bytes of pointer */ + uint16 blob_len_in_rec; + memcpy(&blob_len_in_rec, rec + MIX_BLOB_OFFSET, 2); + ok(blob_len_in_rec == 8, + "rebuild_key_from_group_buff_mixed: blob length in record[0] = %u (expected 8)", + (uint) blob_len_in_rec); + + const uchar *blob_ptr_in_rec; + memcpy(&blob_ptr_in_rec, rec + MIX_BLOB_OFFSET + 2, sizeof(void*)); + ok(memcmp(blob_ptr_in_rec, "New York", 8) == 0, + "rebuild_key_from_group_buff_mixed: blob data = 'New York'"); + + /* Varchar field: length_bytes=1 byte of length + data */ + uint8 varchar_len_in_rec= rec[MIX_VARCHAR_OFFSET]; + ok(varchar_len_in_rec == 7, + "rebuild_key_from_group_buff_mixed: varchar length in record[0] = %u (expected 7)", + (uint) varchar_len_in_rec); + ok(memcmp(rec + MIX_VARCHAR_OFFSET + 1, "NYC Lib", 7) == 0, + "rebuild_key_from_group_buff_mixed: varchar data = 'NYC Lib'"); + + free(key_buf); + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + vfp->~Field_varstring(); + bfp->~Field_blob(); +} + + +/* + Test: heap_prepare_hp_create_info for various non-blob key types. + + Verifies that has_blob_seg is false and seg->flag does not contain + HA_BLOB_PART for: + - VARCHAR-only keys (Field_varstring, length_bytes=1) + - Fixed-length keys (Field_long = INT) + - ENUM keys (Field_enum) + - Mixed VARCHAR + INT keys + + Also verifies seg->length, seg->type, seg->bit_start are correct. +*/ + +/* Helper: set up a single-field TABLE + KEY for heap_prepare testing */ +struct Hp_test_single_key +{ + TABLE_SHARE share; + TABLE test_table; + KEY_PART_INFO kpi; + KEY sql_key; + Field *field_array[2]; + uchar rec_buf[64]; + uint blob_offsets[1]; + + void init(Field *field, uint offset, uint rec_length) + { + memset(rec_buf, 0, sizeof(rec_buf)); + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 1; + share.keys= 1; + share.reclength= rec_length; + share.rec_buff_length= rec_length; + share.db_record_offset= 1; + share.blob_fields= 0; + blob_offsets[0]= 0; + share.blob_field= blob_offsets; + + field_array[0]= field; + field_array[1]= NULL; + + memset(&kpi, 0, sizeof(kpi)); + kpi.field= field; + kpi.offset= offset; + kpi.length= (uint16) field->key_length(); + kpi.key_part_flag= field->key_part_flag(); + kpi.type= field->key_type(); + kpi.store_length= kpi.length; + if (field->real_maybe_null()) + kpi.store_length+= HA_KEY_NULL_LENGTH; + if (field->key_part_flag() & HA_VAR_LENGTH_PART) + kpi.store_length+= field->key_part_length_bytes(); + + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 1; + sql_key.usable_key_parts= 1; + sql_key.key_part= &kpi; + sql_key.algorithm= HA_KEY_ALG_HASH; + sql_key.key_length= kpi.store_length; + + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec_buf; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &sql_key; + share.key_info= &sql_key; + + field->table= &test_table; + } + + int run_hp_create(HP_CREATE_INFO *hp_ci) + { + Fake_thd_guard thd_guard; + + memset(hp_ci, 0, sizeof(*hp_ci)); + hp_ci->max_table_size= 1024*1024; + hp_ci->keys= 1; + hp_ci->reclength= share.reclength; + + return test_heap_prepare_hp_create_info(&test_table, TRUE, hp_ci); + } +}; + + +static void test_varchar_only_key() +{ + /* VARCHAR(28) NOT NULL, length_bytes=1 */ + static const LEX_CSTRING fname= {STRING_WITH_LEN("v1")}; + TABLE_SHARE dummy_share; + memset(static_cast(&dummy_share), 0, sizeof(dummy_share)); + alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; + Field_varstring *vs= ::new (vs_storage) Field_varstring( + (uchar*) NULL + 1, 28, 1, (uchar*) 0, 0, + Field::NONE, &fname, &dummy_share, + DTCollation(&my_charset_latin1)); + vs->field_index= 0; + + Hp_test_single_key ctx; + ctx.init(vs, 1, 30); + + HP_CREATE_INFO hp_ci; + int err= ctx.run_hp_create(&hp_ci); + ok(err == 0, "varchar_only: heap_prepare succeeded (err=%d)", err); + + HA_KEYSEG *seg= hp_ci.keydef[0].seg; + ok(seg->length == 28, + "varchar_only: seg->length = %u (expected 28)", (uint) seg->length); + ok(seg->type == HA_KEYTYPE_VARTEXT1, + "varchar_only: seg->type = %d (expected VARTEXT1=%d)", + (int) seg->type, (int) HA_KEYTYPE_VARTEXT1); + /* + bit_start for varchar is set by hp_create(), not + heap_prepare_hp_create_info(). After prepare it's 0. + */ + ok(seg->bit_start == 0, + "varchar_only: seg->bit_start = %u (expected 0 — set later by hp_create)", + (uint) seg->bit_start); + ok(!(seg->flag & HA_BLOB_PART), + "varchar_only: seg->flag (0x%x) has NO HA_BLOB_PART", + (uint) seg->flag); + ok((seg->flag & HA_VAR_LENGTH_PART), + "varchar_only: seg->flag (0x%x) has HA_VAR_LENGTH_PART", + (uint) seg->flag); + ok(!hp_ci.keydef[0].has_blob_seg, + "varchar_only: has_blob_seg is FALSE (no blob segments)"); + + my_free(hp_ci.keydef); + vs->~Field_varstring(); +} + + +static void test_int_only_key() +{ + /* INT NOT NULL */ + static const LEX_CSTRING fname= {STRING_WITH_LEN("i1")}; + TABLE_SHARE dummy_share; + memset(static_cast(&dummy_share), 0, sizeof(dummy_share)); + alignas(Field_long) char fl_storage[sizeof(Field_long)]; + Field_long *fl= ::new (fl_storage) Field_long( + (uchar*) NULL + 1, 11, (uchar*) 0, 0, + Field::NONE, &fname, false, false); + fl->field_index= 0; + + Hp_test_single_key ctx; + ctx.init(fl, 1, 5); + + HP_CREATE_INFO hp_ci; + int err= ctx.run_hp_create(&hp_ci); + ok(err == 0, "int_only: heap_prepare succeeded (err=%d)", err); + + HA_KEYSEG *seg= hp_ci.keydef[0].seg; + ok(seg->length == 4, + "int_only: seg->length = %u (expected 4)", (uint) seg->length); + ok(seg->type == HA_KEYTYPE_BINARY, + "int_only: seg->type = %d (expected BINARY=%d)", + (int) seg->type, (int) HA_KEYTYPE_BINARY); + ok(!(seg->flag & HA_BLOB_PART), + "int_only: seg->flag (0x%x) has NO HA_BLOB_PART", + (uint) seg->flag); + ok(!(seg->flag & HA_VAR_LENGTH_PART), + "int_only: seg->flag (0x%x) has NO HA_VAR_LENGTH_PART", + (uint) seg->flag); + ok(!hp_ci.keydef[0].has_blob_seg, + "int_only: has_blob_seg is FALSE"); + + my_free(hp_ci.keydef); + fl->~Field_long(); +} + + +static void test_enum_key() +{ + /* ENUM('a','','b') NULLABLE */ + static const LEX_CSTRING fname= {STRING_WITH_LEN("e1")}; + static const char *enum_names[]= { "a", "", "b", NULL }; + static unsigned int enum_lengths[]= { 1, 0, 1 }; + TYPELIB enum_typelib= { 3, "", enum_names, enum_lengths }; + TABLE_SHARE dummy_share; + memset(static_cast(&dummy_share), 0, sizeof(dummy_share)); + alignas(Field_enum) char fe_storage[sizeof(Field_enum)]; + /* + Field_enum(ptr, len, null_ptr, null_bit, unireg, name, + packlength, typelib, collation) + */ + Field_enum *fe= ::new (fe_storage) Field_enum( + (uchar*) NULL + 1, 1, (uchar*) NULL, 2, + Field::NONE, &fname, 1, &enum_typelib, + &my_charset_latin1); + fe->field_index= 0; + + Hp_test_single_key ctx; + ctx.init(fe, 1, 3); + + HP_CREATE_INFO hp_ci; + int err= ctx.run_hp_create(&hp_ci); + ok(err == 0, "enum: heap_prepare succeeded (err=%d)", err); + + HA_KEYSEG *seg= hp_ci.keydef[0].seg; + ok(seg->length == 1, + "enum: seg->length = %u (expected 1 = packlength)", (uint) seg->length); + ok(seg->type == HA_KEYTYPE_BINARY, + "enum: seg->type = %d (expected BINARY=%d)", + (int) seg->type, (int) HA_KEYTYPE_BINARY); + ok(!(seg->flag & HA_BLOB_PART), + "enum: seg->flag (0x%x) has NO HA_BLOB_PART", (uint) seg->flag); + ok(!hp_ci.keydef[0].has_blob_seg, + "enum: has_blob_seg is FALSE"); + + my_free(hp_ci.keydef); + fe->~Field_enum(); +} + + +static void test_mixed_int_varchar_key() +{ + /* + Two-part key: INT(4 bytes) + VARCHAR(20), simulating the + main.having GROUP BY (bigint, varchar(20)). + */ + static const LEX_CSTRING fname_i= {STRING_WITH_LEN("id")}; + static const LEX_CSTRING fname_v= {STRING_WITH_LEN("description")}; + TABLE_SHARE dummy_share; + memset(static_cast(&dummy_share), 0, sizeof(dummy_share)); + dummy_share.fields= 2; + dummy_share.keys= 1; + dummy_share.reclength= 26; /* 1 null + 4 int + 1 len + 20 varchar */ + dummy_share.rec_buff_length= 26; + dummy_share.db_record_offset= 1; + dummy_share.blob_fields= 0; + uint blob_offsets[1]= { 0 }; + dummy_share.blob_field= blob_offsets; + + alignas(Field_long) char fl_storage[sizeof(Field_long)]; + Field_long *fl= ::new (fl_storage) Field_long( + (uchar*) NULL + 1, 11, (uchar*) 0, 0, + Field::NONE, &fname_i, false, false); + fl->field_index= 0; + + alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; + Field_varstring *vs= ::new (vs_storage) Field_varstring( + (uchar*) NULL + 5, 20, 1, (uchar*) 0, 0, + Field::NONE, &fname_v, &dummy_share, + DTCollation(&my_charset_latin1)); + vs->field_index= 1; + + Field *field_array[3]= { fl, vs, NULL }; + + KEY_PART_INFO kpis[2]; + memset(kpis, 0, sizeof(kpis)); + kpis[0].field= fl; + kpis[0].offset= 1; + kpis[0].length= 4; + kpis[0].key_part_flag= fl->key_part_flag(); + kpis[0].type= fl->key_type(); + kpis[0].store_length= 4; + + kpis[1].field= vs; + kpis[1].offset= 5; + kpis[1].length= 20; + kpis[1].key_part_flag= vs->key_part_flag(); + kpis[1].type= vs->key_type(); + kpis[1].store_length= 20 + 2; /* + key_part_length_bytes */ + + KEY sql_key; + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 2; + sql_key.usable_key_parts= 2; + sql_key.key_part= kpis; + sql_key.algorithm= HA_KEY_ALG_HASH; + sql_key.key_length= 4 + 20 + 2; + + TABLE test_table; + uchar rec_buf[26]; + memset(rec_buf, 0, sizeof(rec_buf)); + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec_buf; + test_table.s= &dummy_share; + test_table.field= field_array; + test_table.key_info= &sql_key; + dummy_share.key_info= &sql_key; + + fl->table= &test_table; + vs->table= &test_table; + + Fake_thd_guard thd_guard; + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= 26; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + + ok(err == 0, "int_varchar: heap_prepare succeeded (err=%d)", err); + + HP_KEYDEF *kd= &hp_ci.keydef[0]; + ok(kd->keysegs == 2, + "int_varchar: keysegs = %u (expected 2)", kd->keysegs); + { + my_bool any_blob= FALSE; + uint j; + for (j= 0; j < kd->keysegs; j++) + if (kd->seg[j].flag & HA_BLOB_PART) + any_blob= TRUE; + ok(!any_blob, + "int_varchar: no keydef seg has HA_BLOB_PART"); + } + + HA_KEYSEG *seg0= &kd->seg[0]; + ok(seg0->length == 4, + "int_varchar: seg[0].length = %u (expected 4)", (uint) seg0->length); + ok(!(seg0->flag & HA_BLOB_PART), + "int_varchar: seg[0] has NO HA_BLOB_PART"); + + HA_KEYSEG *seg1= &kd->seg[1]; + ok(seg1->length == 20, + "int_varchar: seg[1].length = %u (expected 20)", (uint) seg1->length); + ok(!(seg1->flag & HA_BLOB_PART), + "int_varchar: seg[1] has NO HA_BLOB_PART"); + ok((seg1->flag & HA_VAR_LENGTH_PART), + "int_varchar: seg[1] has HA_VAR_LENGTH_PART"); + + my_free(hp_ci.keydef); + vs->~Field_varstring(); + fl->~Field_long(); +} + + +/* + Test: varchar→blob promotion in tmp table (main.having scenario). + + Simulates the case where: + 1. The SQL layer sets up KEY_PART_INFO with length=20 (varchar-sized) + 2. create_tmp_field promotes the field to Field_blob in the tmp table + 3. heap_prepare_hp_create_info is called with this mismatch + + The key_part has varchar-like setup (non-zero length, HA_VAR_LENGTH_PART + flag), but the actual field is a blob. heap_prepare_hp_create_info + must detect this via field->key_part_flag() and set seg->flag to + HA_BLOB_PART, seg->length to 0, and widen key_part->length. +*/ +static void test_varchar_promoted_to_blob() +{ + static const LEX_CSTRING fname_i= {STRING_WITH_LEN("id")}; + + /* + Record layout (mimics the tmp table after promotion): + byte 0: null bitmap + bytes 1-8: bigint (8 bytes) + bytes 9-10: blob packlength=2 + bytes 11-18: blob data pointer + reclength = 19 + */ + TABLE_SHARE share; + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 2; + share.keys= 1; + share.reclength= 19; + share.rec_buff_length= 19; + share.db_record_offset= 1; + share.blob_fields= 0; /* Field_blob ctor increments */ + + uchar rec_buf[24]; + memset(rec_buf, 0, sizeof(rec_buf)); + + /* Field 0: bigint at offset 1 */ + alignas(Field_long) char fl_storage[sizeof(Field_long)]; + Field_long *fl= ::new (fl_storage) Field_long( + rec_buf + 1, 11, (uchar*) 0, 0, + Field::NONE, &fname_i, false, false); + fl->field_index= 0; + + /* Field 1: Field_blob at offset 9 (promoted from varchar(20)) */ + alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; + Field_blob *bf= make_test_field_blob(bf_storage, + rec_buf + 9, + (uchar*) 0, 0, + &share, 2, + &my_charset_latin1); + bf->field_index= 1; + + Field *field_array[3]= { fl, bf, NULL }; + + uint blob_offsets[1]= { 1 }; + share.blob_field= blob_offsets; + + /* + KEY_PART_INFO: set up as if the SQL layer still thinks it's varchar. + key_part[1].length = 20 (varchar-like, non-zero). + key_part[1].key_part_flag = HA_VAR_LENGTH_PART (from original varchar). + But key_part[1].field = Field_blob (the promoted field). + */ + KEY_PART_INFO kpis[2]; + memset(kpis, 0, sizeof(kpis)); + + kpis[0].field= fl; + kpis[0].offset= 1; + kpis[0].length= 8; + kpis[0].key_part_flag= 0; + kpis[0].type= fl->key_type(); + kpis[0].store_length= 8; + + kpis[1].field= bf; /* promoted blob */ + kpis[1].offset= 9; + kpis[1].length= 20; /* varchar-like, NOT 0 */ + kpis[1].key_part_flag= HA_VAR_LENGTH_PART; /* stale from varchar setup */ + kpis[1].type= HA_KEYTYPE_VARTEXT1; + kpis[1].store_length= 20 + 2; /* varchar store_length */ + + KEY sql_key; + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 2; + sql_key.usable_key_parts= 2; + sql_key.key_part= kpis; + sql_key.algorithm= HA_KEY_ALG_HASH; + sql_key.key_length= 8 + 20 + 2; + + TABLE test_table; + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec_buf; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &sql_key; + share.key_info= &sql_key; + + fl->table= &test_table; + bf->table= &test_table; + + Fake_thd_guard thd_guard; + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= 19; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + + ok(err == 0, + "promoted_blob: heap_prepare succeeded (err=%d)", err); + + HP_KEYDEF *kd= &hp_ci.keydef[0]; + ok(kd->keysegs == 2, + "promoted_blob: keysegs = %u (expected 2)", kd->keysegs); + + /* seg[0]: bigint — should be untouched */ + ok(kd->seg[0].length == 8, + "promoted_blob: seg[0].length = %u (expected 8)", (uint) kd->seg[0].length); + ok(!(kd->seg[0].flag & HA_BLOB_PART), + "promoted_blob: seg[0] has NO HA_BLOB_PART"); + + /* + seg[1]: the promoted blob. + heap_prepare_hp_create_info uses field->key_part_flag() which returns + HA_BLOB_PART for Field_blob. It must: + - set seg->flag to HA_BLOB_PART (not HA_VAR_LENGTH_PART) + - set seg->length to 0 (blob convention) + - widen key_part->length to max_data_length() + */ + ok(kd->seg[1].flag & HA_BLOB_PART, + "promoted_blob: seg[1].flag (0x%x) has HA_BLOB_PART", + (uint) kd->seg[1].flag); + ok(kd->seg[1].length == 0, + "promoted_blob: seg[1].length = %u (expected 0 = blob convention)", + (uint) kd->seg[1].length); + ok(kpis[1].length == bf->max_data_length(), + "promoted_blob: key_part.length widened to %u (expected %u)", + (uint) kpis[1].length, (uint) bf->max_data_length()); + + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + bf->~Field_blob(); + fl->~Field_long(); +} + + + + +/* + Test: needs_key_rebuild_from_group_buff flag on HP_KEYDEF. + + Verifies that heap_prepare_hp_create_info sets needs_key_rebuild_from_group_buff=TRUE + only when table->group is set and key 0 has blob segments (GROUP BY path). + Without table->group (DISTINCT/sj-materialize), the flag is FALSE even + if the key has blob segments. +*/ +static void test_needs_key_rebuild_from_group_buff() +{ + /* + Reuse the mixed blob+varchar layout from test_rebuild_key_from_group_buff_mixed. + Two key parts: blob (city TEXT) + varchar (libname VARCHAR(21)). + */ + uchar rec[MIX_REC_LENGTH]; + memset(rec, 0, sizeof(rec)); + + TABLE_SHARE share; + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 2; + share.blob_fields= 0; + share.keys= 1; + share.reclength= MIX_REC_LENGTH; + share.rec_buff_length= MIX_REC_LENGTH; + share.db_record_offset= 1; + + alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; + Field_blob *bfp= make_test_field_blob(bf_storage, + rec + MIX_BLOB_OFFSET, + rec + MIX_REC_NULL_OFFSET, + 2, &share, + MIX_BLOB_PACKLEN, + &my_charset_latin1); + bfp->field_index= 0; + + static const LEX_CSTRING vs_name= {STRING_WITH_LEN("")}; + alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; + Field_varstring *vfp= ::new (vs_storage) Field_varstring( + rec + MIX_VARCHAR_OFFSET, + MIX_VARCHAR_FIELD_LEN, + 1, + rec + MIX_REC_NULL_OFFSET, + 4, + Field::NONE, + &vs_name, + &share, + DTCollation(&my_charset_latin1)); + vfp->field_index= 1; + + Field *field_array[3]= { bfp, vfp, NULL }; + + KEY_PART_INFO kpi[2]; + memset(kpi, 0, sizeof(kpi)); + kpi[0].field= bfp; + kpi[0].offset= MIX_BLOB_OFFSET; + kpi[0].length= 0; + kpi[0].key_part_flag= HA_BLOB_PART; + kpi[0].null_bit= 2; + kpi[0].type= bfp->key_type(); + kpi[0].store_length= 103; + + kpi[1].field= vfp; + kpi[1].offset= MIX_VARCHAR_OFFSET; + kpi[1].length= MIX_VARCHAR_FIELD_LEN; + kpi[1].key_part_flag= HA_VAR_LENGTH_PART; + kpi[1].null_bit= 4; + kpi[1].type= vfp->key_type(); + kpi[1].store_length= MIX_VARCHAR_FIELD_LEN + 2 + 1; + + KEY sql_key; + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 2; + sql_key.usable_key_parts= 2; + sql_key.key_part= kpi; + sql_key.algorithm= HA_KEY_ALG_HASH; + + TABLE test_table; + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &sql_key; + share.key_info= &sql_key; + bfp->table= &test_table; + vfp->table= &test_table; + + uint blob_offsets[1]= { 0 }; + share.blob_field= blob_offsets; + + /* + A minimal ORDER group list (just needs to be non-NULL for detection). + We don't actually traverse it — only test_table.group != NULL matters. + */ + ORDER group_item; + memset(&group_item, 0, sizeof(group_item)); + + /* Test 1: with table->group set → needs_key_rebuild_from_group_buff = TRUE */ + test_table.group= &group_item; + + Fake_thd_guard thd_guard; + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= MIX_REC_LENGTH; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + ok(err == 0, "needs_rebuild: with group, heap_prepare succeeded (err=%d)", err); + ok(hp_ci.keydef[0].needs_key_rebuild_from_group_buff != 0, + "needs_rebuild: with group + blob seg, flag is TRUE"); + + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + + /* Test 2: without table->group → needs_key_rebuild_from_group_buff = FALSE */ + test_table.group= NULL; + + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= MIX_REC_LENGTH; + + err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + ok(err == 0, "needs_rebuild: no group, heap_prepare succeeded (err=%d)", err); + ok(hp_ci.keydef[0].needs_key_rebuild_from_group_buff == 0, + "needs_rebuild: no group + blob seg, flag is FALSE"); + + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + vfp->~Field_varstring(); + bfp->~Field_blob(); +} + + +/* + Test: geometry GROUP BY key must NOT trigger blob key widening. + + Field_geom::key_length() returns packlength (4), not 0 like Field_blob. + The widening condition in heap_prepare_hp_create_info must skip when + key_part->length <= pack_length_no_ptr(). Without this, len_delta + overflows (~4 billion), corrupting store_length and key_length, which + causes rebuild_key_from_group_buff() to read uninitialized memory. + + This test simulates a GROUP BY on a GEOMETRY(POINT) column: + - key_part->length = 4 (from Field_geom::key_length() = packlength) + - key_part->store_length = small (from GROUP BY buffer sizing) + After heap_prepare, key_part->length must still be 4 (not widened), + and store_length must not overflow. +*/ +static void test_geometry_group_by_no_widening() +{ + /* + Record layout: nullable geometry (POINT, packlength=4) + byte 0: null bitmap + bytes 1-4: blob packlength=4 + bytes 5-12: blob data pointer + reclength = 13 + */ + uchar rec[16]; + memset(rec, 0, sizeof(rec)); + + TABLE_SHARE share; + memset(static_cast(&share), 0, sizeof(share)); + share.fields= 1; + share.blob_fields= 0; + share.keys= 1; + share.reclength= 13; + share.rec_buff_length= 13; + share.db_record_offset= 1; + + /* GEOMETRY is a LONGBLOB (packlength=4) */ + alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; + Field_blob *bfp= make_test_field_blob(bf_storage, + rec + 1, + rec + 0, + 2, &share, + 4 /* packlength for LONGBLOB */, + &my_charset_bin); + bfp->field_index= 0; + + Field *field_array[2]= { bfp, NULL }; + + KEY_PART_INFO kpi; + memset(&kpi, 0, sizeof(kpi)); + kpi.field= bfp; + kpi.offset= 1; + /* + GROUP BY path: Field_geom::key_length() returns packlength = 4. + finalize() sets m_key_part_info->length = field->key_length() = 4. + */ + kpi.length= 4; + kpi.key_part_flag= HA_BLOB_PART; + kpi.null_bit= 2; + kpi.null_offset= 0; + kpi.type= bfp->key_type(); + /* + GROUP BY store_length: set by finalize() from the group buffer + Field_varstring. Use a reasonable value (e.g. 100 + 2 + 1 = 103). + */ + kpi.store_length= 103; + + KEY sql_key; + memset(&sql_key, 0, sizeof(sql_key)); + sql_key.user_defined_key_parts= 1; + sql_key.usable_key_parts= 1; + sql_key.key_part= &kpi; + sql_key.algorithm= HA_KEY_ALG_HASH; + sql_key.key_length= kpi.store_length; + + TABLE test_table; + memset(static_cast(&test_table), 0, sizeof(test_table)); + test_table.record[0]= rec; + test_table.s= &share; + test_table.field= field_array; + test_table.key_info= &sql_key; + share.key_info= &sql_key; + bfp->table= &test_table; + + uint blob_offsets[1]= { 0 }; + share.blob_field= blob_offsets; + + /* Set group to simulate GROUP BY path */ + ORDER group_item; + memset(&group_item, 0, sizeof(group_item)); + test_table.group= &group_item; + + Fake_thd_guard thd_guard; + + HP_CREATE_INFO hp_ci; + memset(&hp_ci, 0, sizeof(hp_ci)); + hp_ci.max_table_size= 1024*1024; + hp_ci.keys= 1; + hp_ci.reclength= 13; + + uint16 orig_length= kpi.length; + uint orig_store_length= kpi.store_length; + uint orig_key_length= sql_key.key_length; + + int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + ok(err == 0, "geom_group_by: heap_prepare succeeded (err=%d)", err); + + /* key_part->length must NOT be widened — must stay at packlength (4) */ + ok(kpi.length == orig_length, + "geom_group_by: key_part.length = %u (expected %u, NOT widened)", + (uint) kpi.length, (uint) orig_length); + + /* store_length must not overflow */ + ok(kpi.store_length == orig_store_length, + "geom_group_by: store_length = %u (expected %u, NOT overflowed)", + (uint) kpi.store_length, (uint) orig_store_length); + + /* key_length must not overflow */ + ok(sql_key.key_length == orig_key_length, + "geom_group_by: key_length = %u (expected %u, NOT overflowed)", + (uint) sql_key.key_length, (uint) orig_key_length); + + /* seg->length must be 0 (blob convention) */ + ok(hp_ci.keydef[0].seg[0].length == 0, + "geom_group_by: seg->length = %u (expected 0 = blob convention)", + (uint) hp_ci.keydef[0].seg[0].length); + + /* has_blob_seg must be set */ + ok(hp_ci.keydef[0].has_blob_seg != 0, + "geom_group_by: has_blob_seg is set"); + + my_free(hp_ci.keydef); + my_free(hp_ci.blob_descs); + bfp->~Field_blob(); +} + + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) { MY_INIT("hp_test_key_setup"); /* Field constructors reference system_charset_info via DTCollation */ system_charset_info= &my_charset_latin1; - plan(9); + plan(63); diag("distinct_key_truncation: key_part->length widened for blob key parts"); test_distinct_key_truncation(); @@ -417,6 +1416,30 @@ int main(int argc __attribute__((unused)), Hp_test_varchar_key_flag t2; t2.test_garbage_key_part_flag(); + diag("rebuild_key_from_group_buff: mixed blob + varchar GROUP BY key"); + test_rebuild_key_from_group_buff_mixed(); + + diag("varchar_only: VARCHAR key has no blob flag"); + test_varchar_only_key(); + + diag("int_only: INT key has no blob flag"); + test_int_only_key(); + + diag("enum: ENUM key has no blob flag"); + test_enum_key(); + + diag("int_varchar: mixed INT+VARCHAR key has no blob flag"); + test_mixed_int_varchar_key(); + + diag("promoted_blob: varchar promoted to blob in tmp table"); + test_varchar_promoted_to_blob(); + + diag("needs_rebuild: needs_key_rebuild_from_group_buff flag with/without table->group"); + test_needs_key_rebuild_from_group_buff(); + + diag("geom_group_by: geometry GROUP BY key must not trigger blob key widening"); + test_geometry_group_by_no_widening(); + my_end(0); return exit_status(); } From 1f82904109c175ffc379a7405c6a454f1d40e5ba Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 18 Mar 2026 15:14:47 -0400 Subject: [PATCH 07/27] Skip run header for single-record blob continuation runs When a blob fits entirely within a single continuation record (`data_len <= visible`), skip the 10-byte run header (`next_cont` pointer + `run_rec_count`) and store data starting at offset 0. This reclaims 10 bytes of payload per small blob, which matters for tables with small `recbuffer` (e.g. 16 bytes: payload increases from 5 to 15 bytes, avoiding a second record for blobs up to 15 bytes). **`HP_ROW_SINGLE_REC` flag** (bit 4 in the flags byte) signals that the continuation record has no run header. The reader gets `visible` bytes of contiguous data starting at the chain pointer (zero-copy). **`enum hp_blob_format`** replaces ad-hoc boolean/flag checks with a single vocabulary for blob storage format detection: - `HP_BLOB_CASE_A_SINGLE_REC`: no header, data at offset 0 (new) - `HP_BLOB_CASE_B_ZEROCOPY`: header in rec 0, data in rec 1..N-1 - `HP_BLOB_CASE_C_MULTI_RUN`: header + data in each run, linked **`hp_blob_run_format()`** is the single decoder used by all paths: write (`hp_write_run_data`), read (`hp_materialize_blobs`, `hp_materialize_one_blob`), free (`hp_free_run_chain`), scan (`heap_scan`), and integrity check (`heap_check_heap`). Files changed: - `storage/heap/heapdef.h`: flag, enum, decoder function - `storage/heap/hp_blob.c`: write/read/free paths - `storage/heap/hp_scan.c`: scan skip logic - `storage/heap/_check.c`: integrity check --- storage/heap/_check.c | 18 +++++- storage/heap/heapdef.h | 42 +++++++++----- storage/heap/hp_blob.c | 122 ++++++++++++++++++++++++++++------------- storage/heap/hp_scan.c | 19 +++++-- 4 files changed, 140 insertions(+), 61 deletions(-) diff --git a/storage/heap/_check.c b/storage/heap/_check.c index c87eda3818121..bb2c5d6f0794f 100644 --- a/storage/heap/_check.c +++ b/storage/heap/_check.c @@ -81,9 +81,21 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) deleted++; else if (hp_is_cont(current_ptr, share->visible)) { - uint16 run_rec_count= hp_cont_rec_count(current_ptr); - cont_count+= run_rec_count; - pos+= run_rec_count - 1; /* -1 because for-loop does pos++ */ + /* + Case A (HP_BLOB_CASE_A_SINGLE_REC): single record, no header. + Case B/C: read run_rec_count from header and skip the entire run. + */ + if (hp_blob_run_format(current_ptr, share->visible) + == HP_BLOB_CASE_A_SINGLE_REC) + { + cont_count++; + } + else + { + uint16 run_rec_count= hp_cont_rec_count(current_ptr); + cont_count+= run_rec_count; + pos+= run_rec_count - 1; /* -1 because for-loop does pos++ */ + } } else records++; diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index 5ef2003d58085..ef2925eef7934 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -37,6 +37,7 @@ C_MODE_START #define HP_ROW_HAS_CONT 2 /* Bit 1: primary record has continuation chain(s) */ #define HP_ROW_IS_CONT 4 /* Bit 2: this record IS a continuation record */ #define HP_ROW_CONT_ZEROCOPY 8 /* Bit 3: zero-copy layout (data in rec 1..N-1) */ +#define HP_ROW_SINGLE_REC 16 /* Bit 4: single-record run, no header — data at offset 0 */ /* Continuation run header: next_cont pointer + run_rec_count. @@ -96,24 +97,37 @@ static inline uint16 hp_cont_rec_count(const uchar *chain) } /* - Zero-copy case detection for stored continuation chains. + Blob continuation run storage format. - Case A: single record, single run — data fits in rec 0 payload after header. - run_rec_count == 1 AND next_cont == NULL. - IMPORTANT: run_rec_count == 1 alone is NOT sufficient — a multi-run - blob can have run_rec_count == 1 in its first run when free-list - fragmentation produces a single-slot fragment. + Case A (HP_BLOB_CASE_A_SINGLE_REC): Single-record run, no header. + Data starts at offset 0, full `visible` bytes available for + payload. Detected by HP_ROW_SINGLE_REC flag. + Zero-copy: blob pointer → chain. - Case B: single run, multiple records, zerocopy flag set — data in rec 1..N-1. -*/ -static inline my_bool hp_is_case_a(const uchar *chain) -{ - return hp_cont_rec_count(chain) == 1 && hp_cont_next(chain) == NULL; -} + Case B (HP_BLOB_CASE_B_ZEROCOPY): Single run, multiple records. + Header in rec 0, data contiguous in rec 1..N-1. Detected by + HP_ROW_CONT_ZEROCOPY flag. + Zero-copy: blob pointer → chain + recbuffer. -static inline my_bool hp_is_case_b(const uchar *chain, uint visible) + Case C (HP_BLOB_CASE_C_MULTI_RUN): One or more runs linked via + next_cont. Header in each run's rec 0, data in rec 0 (after + header) + rec 1..N-1. Requires reassembly into blob_buff. +*/ +enum hp_blob_format { + HP_BLOB_CASE_A_SINGLE_REC, + HP_BLOB_CASE_B_ZEROCOPY, + HP_BLOB_CASE_C_MULTI_RUN +}; + +static inline enum hp_blob_format hp_blob_run_format(const uchar *chain, + uint visible) { - return (chain[visible] & HP_ROW_CONT_ZEROCOPY) != 0; + uchar flags= chain[visible]; + if (flags & HP_ROW_SINGLE_REC) + return HP_BLOB_CASE_A_SINGLE_REC; + if (flags & HP_ROW_CONT_ZEROCOPY) + return HP_BLOB_CASE_B_ZEROCOPY; + return HP_BLOB_CASE_C_MULTI_RUN; } #define HP_CONT_MIN_RUN_BYTES 128 diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index 776345b8b2034..ff14ac1e1178f 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -111,6 +111,7 @@ static uchar *hp_alloc_from_tail(HP_SHARE *share) void hp_free_run_chain(HP_SHARE *share, uchar *chain) { uint recbuffer= share->block.recbuffer; + uint visible= share->visible; while (chain) { @@ -118,15 +119,25 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) uint16 run_rec_count; uint16 j; - memcpy(&next_run, chain, HP_CONT_NEXT_PTR_SIZE); - run_rec_count= uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); + if (hp_blob_run_format(chain, visible) == HP_BLOB_CASE_A_SINGLE_REC) + { + /* Case A: single record, no header */ + next_run= NULL; + run_rec_count= 1; + } + else + { + /* Case B/C: header present with next_cont and run_rec_count */ + memcpy(&next_run, chain, HP_CONT_NEXT_PTR_SIZE); + run_rec_count= uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); + } for (j= 0; j < run_rec_count; j++) { uchar *pos= chain + j * recbuffer; *((uchar**) pos)= share->del_link; share->del_link= pos; - pos[share->visible]= 0; + pos[visible]= 0; share->deleted++; share->total_records--; } @@ -148,10 +159,7 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) @param data_len Total blob data length @param run_start Pointer to first record of the run @param run_rec_count Number of consecutive records in this run - @param zerocopy If TRUE, use zero-copy layout: - Case A (run_rec_count==1): data in rec 0 after header. - Case B (run_rec_count>1): data in rec 1..N-1 only, - rec 0 carries only the header (no data payload). + @param format Storage format (Case A / Case B / Case C) @param offset [in/out] Current offset into blob data @note Caller must link runs by overwriting next_cont in the previous run. @@ -159,7 +167,8 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) static void hp_write_run_data(HP_SHARE *share, const uchar *data, uint32 data_len, uchar *run_start, - uint16 run_rec_count, my_bool zerocopy, + uint16 run_rec_count, + enum hp_blob_format format, uint32 *offset) { uint visible= share->visible; @@ -168,21 +177,38 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, uint32 remaining= data_len - off; uint32 chunk; uint16 rec; - uchar *null_ptr= NULL; - /* First record: run header + flags byte (always written) */ - memcpy(run_start, &null_ptr, HP_CONT_NEXT_PTR_SIZE); - int2store(run_start + HP_CONT_NEXT_PTR_SIZE, run_rec_count); - run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | - (zerocopy && run_rec_count > 1 ? HP_ROW_CONT_ZEROCOPY : 0); + if (format == HP_BLOB_CASE_A_SINGLE_REC) + { + /* + Case A: single-record run, no header. Data starts at offset 0, + full `visible` bytes available. HP_ROW_SINGLE_REC signals the + reader that there is no run header to parse. + */ + DBUG_ASSERT(run_rec_count == 1); + DBUG_ASSERT(remaining <= visible); + run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | HP_ROW_SINGLE_REC; + memcpy(run_start, data + off, remaining); + *offset= off + remaining; + return; + } + + { + uchar *null_ptr= NULL; + /* First record: run header + flags byte */ + memcpy(run_start, &null_ptr, HP_CONT_NEXT_PTR_SIZE); + int2store(run_start + HP_CONT_NEXT_PTR_SIZE, run_rec_count); + run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | + (format == HP_BLOB_CASE_B_ZEROCOPY + ? HP_ROW_CONT_ZEROCOPY : 0); + } /* - Case B (zerocopy && run_rec_count > 1): skip data copy in rec 0. + Case B: skip data copy in rec 0. All data goes into rec 1..N-1 contiguously for zero-copy reads. - Case A (zerocopy && run_rec_count == 1): data fits in rec 0 payload. - Case C (!zerocopy): data starts in rec 0 as before. + Case C: data starts in rec 0 after header. */ - if (!zerocopy || run_rec_count == 1) + if (format == HP_BLOB_CASE_C_MULTI_RUN) { chunk= visible - HP_CONT_HEADER_SIZE; if (chunk > remaining) @@ -266,7 +292,7 @@ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, } hp_write_run_data(share, data_ptr, data_len, run_start, - records_to_use, FALSE, data_offset); + records_to_use, HP_BLOB_CASE_C_MULTI_RUN, data_offset); if (*prev_run_start) memcpy(*prev_run_start, &run_start, sizeof(run_start)); @@ -423,7 +449,7 @@ static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, uint block_pos; if (run_rec_count == 1) - run_payload= visible - HP_CONT_HEADER_SIZE; + run_payload= visible; /* HP_ROW_SINGLE_REC: no header */ else run_payload= (visible - HP_CONT_HEADER_SIZE) + (uint32)(run_rec_count - 1) * recbuffer; @@ -488,7 +514,8 @@ static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, { /* Case A: data fits in rec 0 */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, TRUE, &data_offset); + run_rec_count, HP_BLOB_CASE_A_SINGLE_REC, + &data_offset); } else { @@ -497,7 +524,8 @@ static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, { /* Case B: rec 1..N-1 alone hold all data */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, TRUE, &data_offset); + run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, + &data_offset); } else { @@ -534,23 +562,29 @@ static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, } run_rec_count++; hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, TRUE, &data_offset); + run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, + &data_offset); } else + /* Case B extension failed — fall back to Case C */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, FALSE, &data_offset); + run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, + &data_offset); } else + /* At block boundary — Case C */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, FALSE, &data_offset); + run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, + &data_offset); } } } else { - /* Multi-run (Case C) or not the only run */ + /* Case C: multi-run or not the only run */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, FALSE, &data_offset); + run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, + &data_offset); } if (prev_run_start) @@ -692,8 +726,8 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); - /* Zero-copy cases (A or B) need no reassembly buffer space */ - if (hp_is_case_a(chain) || hp_is_case_b(chain, visible)) + /* Case A and Case B are zero-copy — need no reassembly buffer space */ + if (hp_blob_run_format(chain, visible) != HP_BLOB_CASE_C_MULTI_RUN) { info->has_zerocopy_blobs= TRUE; continue; @@ -731,21 +765,25 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); - if (hp_is_case_a(chain)) + switch (hp_blob_run_format(chain, visible)) { - /* Case A: single-record single-run — zero-copy */ - const uchar *blob_data= chain + HP_CONT_HEADER_SIZE; + case HP_BLOB_CASE_A_SINGLE_REC: + { + /* Case A: single-record single-run, no header — zero-copy */ + const uchar *blob_data= chain; memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); + continue; } - else if (hp_is_case_b(chain, visible)) + case HP_BLOB_CASE_B_ZEROCOPY: { /* Case B: data in rec 1..N-1, contiguous — zero-copy */ const uchar *blob_data= chain + recbuffer; memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); + continue; } - else + case HP_BLOB_CASE_C_MULTI_RUN: { /* Case C: reassemble into blob_buff */ uint32 remaining= data_len; @@ -788,7 +826,9 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); } + break; } + } /* switch */ } DBUG_RETURN(0); @@ -826,11 +866,15 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, if (data_len == 0 || !chain) return chain; - /* Check for zero-copy cases */ - if (hp_is_case_a(chain)) - return chain + HP_CONT_HEADER_SIZE; /* Case A */ - if (hp_is_case_b(chain, visible)) - return chain + recbuffer; /* Case B */ + switch (hp_blob_run_format(chain, visible)) + { + case HP_BLOB_CASE_A_SINGLE_REC: + return chain; /* Case A: no header, data at offset 0 */ + case HP_BLOB_CASE_B_ZEROCOPY: + return chain + recbuffer; /* Case B: data in rec 1..N-1 */ + case HP_BLOB_CASE_C_MULTI_RUN: + break; /* Case C: fall through to reassembly */ + } /* Case C: multiple runs, reassemble into blob_buff */ if (data_len > info->blob_buff_len) diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c index 8ef3d348c8c6d..19914da73d86e 100644 --- a/storage/heap/hp_scan.c +++ b/storage/heap/hp_scan.c @@ -103,12 +103,21 @@ int heap_scan(register HP_INFO *info, uchar *record) */ if (hp_is_cont(info->current_ptr, share->visible)) { - uint16 run_rec_count= hp_cont_rec_count(info->current_ptr); - if (run_rec_count > 1) + /* + Case A (HP_BLOB_CASE_A_SINGLE_REC): single record, no header — skip + just this one record. + Case B/C: read run_rec_count from header and skip the entire run. + */ + if (hp_blob_run_format(info->current_ptr, share->visible) + != HP_BLOB_CASE_A_SINGLE_REC) { - uint skip= run_rec_count - 1; - info->current_record+= skip; - info->current_ptr+= skip * share->block.recbuffer; + uint16 run_rec_count= hp_cont_rec_count(info->current_ptr); + if (run_rec_count > 1) + { + uint skip= run_rec_count - 1; + info->current_record+= skip; + info->current_ptr+= skip * share->block.recbuffer; + } } info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND; DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); From b276a555c1a84c6d202508672f427f90e349f22d Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 18 Mar 2026 17:12:46 -0400 Subject: [PATCH 08/27] Skip unchanged blobs in `heap_update()` Per-column blob change detection in `heap_update()`: compare each blob column's old and new values before rewriting continuation chains. Detection order (cheapest first): length comparison (O(1)), data pointer comparison (O(1)), `memcmp` fallback (O(n) with early exit). Unchanged blobs keep their existing chains with no allocation, copy, or free. Only changed blobs get new chains written (write-before-free for crash safety) and old chains freed. This avoids unnecessary chain churn for common patterns like `UPDATE t SET non_blob_col = x`, `INSERT ... ON DUPLICATE KEY UPDATE` with unchanged blob values, and `REPLACE` with identical blob data. `hp_blob_length()` and `hp_write_one_blob()` made non-static for use by `heap_update()` directly (bypassing `hp_write_blobs()` which always rewrites all columns). --- mysql-test/suite/heap/heap_blob.result | 34 +++++ mysql-test/suite/heap/heap_blob.test | 21 +++ storage/heap/heapdef.h | 3 + storage/heap/hp_blob.c | 6 +- storage/heap/hp_update.c | 175 +++++++++++++++++-------- 5 files changed, 184 insertions(+), 55 deletions(-) diff --git a/mysql-test/suite/heap/heap_blob.result b/mysql-test/suite/heap/heap_blob.result index 83b1c97203774..9cbf95c18f956 100644 --- a/mysql-test/suite/heap/heap_blob.result +++ b/mysql-test/suite/heap/heap_blob.result @@ -483,6 +483,40 @@ a set @@max_heap_table_size= @save_max; drop table t1; # +# UPDATE of non-blob column preserves unchanged blob chains +# +create table t1 (a int not null, b blob, c blob, d int, primary key(a)) engine=memory; +insert into t1 values (1, repeat('X', 5000), repeat('Y', 3000), 10); +insert into t1 values (2, repeat('A', 200), repeat('B', 400), 20); +update t1 set d=99 where a=1; +select a, length(b), left(b,3), length(c), left(c,3), d from t1 order by a; +a length(b) left(b,3) length(c) left(c,3) d +1 5000 XXX 3000 YYY 99 +2 200 AAA 400 BBB 20 +select a from t1 where b=repeat('X', 5000) and c=repeat('Y', 3000); +a +1 +update t1 set b='changed' where a=1; +select a, length(b), b, length(c), left(c,3), d from t1 order by a; +a length(b) b length(c) left(c,3) d +1 7 changed 3000 YYY 99 +2 200 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 400 BBB 20 +select a from t1 where c=repeat('Y', 3000); +a +1 +update t1 set b=repeat('A', 200) where a=2; +select a, length(b), left(b,3), length(c), left(c,3), d from t1 order by a; +a length(b) left(b,3) length(c) left(c,3) d +1 7 cha 3000 YYY 99 +2 200 AAA 400 BBB 20 +select a from t1 where b=repeat('A', 200) and c=repeat('B', 400); +a +2 +check table t1; +Table Op Msg_type Msg_text +test.t1 check note The storage engine for the table doesn't support check +drop table t1; +# # Large blob exceeding uint16 run_rec_count cap (65535 records) # # With recbuffer=16, visible=15, a 1MB blob needs ~69906 records, diff --git a/mysql-test/suite/heap/heap_blob.test b/mysql-test/suite/heap/heap_blob.test index b29611f2c8cef..d203d75caad42 100644 --- a/mysql-test/suite/heap/heap_blob.test +++ b/mysql-test/suite/heap/heap_blob.test @@ -358,6 +358,27 @@ select a from t1 where b=repeat('B', 5000); set @@max_heap_table_size= @save_max; drop table t1; +--echo # +--echo # UPDATE of non-blob column preserves unchanged blob chains +--echo # +create table t1 (a int not null, b blob, c blob, d int, primary key(a)) engine=memory; +insert into t1 values (1, repeat('X', 5000), repeat('Y', 3000), 10); +insert into t1 values (2, repeat('A', 200), repeat('B', 400), 20); +# Update only the non-blob column — blobs must not be rewritten +update t1 set d=99 where a=1; +select a, length(b), left(b,3), length(c), left(c,3), d from t1 order by a; +select a from t1 where b=repeat('X', 5000) and c=repeat('Y', 3000); +# Update one blob, leave the other unchanged +update t1 set b='changed' where a=1; +select a, length(b), b, length(c), left(c,3), d from t1 order by a; +select a from t1 where c=repeat('Y', 3000); +# SET blob_col = same_value (different pointer, same data — INSERT ON DUP KEY pattern) +update t1 set b=repeat('A', 200) where a=2; +select a, length(b), left(b,3), length(c), left(c,3), d from t1 order by a; +select a from t1 where b=repeat('A', 200) and c=repeat('B', 400); +check table t1; +drop table t1; + --echo # --echo # Large blob exceeding uint16 run_rec_count cap (65535 records) --echo # diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index ef2925eef7934..273b65cd5772d 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -209,6 +209,9 @@ extern ha_rows hp_rows_in_memory(size_t reclength, size_t index_size, extern size_t hp_memory_needed_per_row(size_t reclength); extern uchar *next_free_record_pos(HP_SHARE *info); +extern uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record); +extern int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, + uint32 data_len, uchar **first_run_out); extern int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos); extern int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos); extern void hp_free_blobs(HP_SHARE *share, uchar *pos); diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index ff14ac1e1178f..f7f44af28fe3e 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -37,7 +37,7 @@ Read blob data length from the record buffer. */ -static uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record) +uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record) { switch (desc->packlength) { @@ -317,8 +317,8 @@ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, @return 0 on success, my_errno on failure */ -static int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, - uint32 data_len, uchar **first_run_out) +int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, + uint32 data_len, uchar **first_run_out) { uint visible= share->visible; uint recbuffer= share->block.recbuffer; diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c index 9d885e2bb1b7e..09b4fb8d438fa 100644 --- a/storage/heap/hp_update.c +++ b/storage/heap/hp_update.c @@ -53,93 +53,164 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) } /* - Blob update strategy: write new chains before freeing old ones. - - We must not free old blob chains before the new ones are successfully - written, because hp_write_blobs() can fail (e.g. table full) and then - the old data would be unrecoverable. Instead: - 1. Save old chain head pointers (from pos) before memcpy overwrites them - 2. memcpy new record data into pos - 3. Write new blob chains (hp_write_blobs) - 4. On success: free old chains via saved pointers - On failure: restore old record from 'old' buffer, restore saved - chain pointers, re-set HP_ROW_HAS_CONT flag + Blob update strategy: skip unchanged blobs, write-before-free for + changed ones. + + Compare each blob column (length, then pointer, then memcmp) to + detect changes. Unchanged blobs keep their existing chains. + Changed blobs get new chains written before old ones are freed. + + The bulk memcpy of heap_new into pos overwrites blob chain pointers + with SQL-layer data pointers, so we save old chain pointers first + and restore them for unchanged blobs afterward. */ if (share->blob_count) { my_bool had_cont= hp_has_cont(pos, share->visible); - uchar **saved_chains= NULL; + uint alloc_size= share->blob_count * (sizeof(uchar*) + sizeof(my_bool)); + uchar **saved_chains= (uchar**) my_safe_alloca(alloc_size); + my_bool *blob_changed= (my_bool*)(saved_chains + share->blob_count); + my_bool any_changed= FALSE; + my_bool has_blob_data= FALSE; + uint i; - if (had_cont) + /* Save old chain pointers and detect which blobs changed */ + for (i= 0; i < share->blob_count; i++) { - saved_chains= (uchar**) my_safe_alloca( - share->blob_count * sizeof(uchar*)); - for (uint i= 0; i < share->blob_count; i++) - { - HP_BLOB_DESC *desc= &share->blob_descs[i]; + HP_BLOB_DESC *desc= &share->blob_descs[i]; + uint32 old_len, new_len; + + saved_chains[i]= NULL; + if (had_cont) memcpy(&saved_chains[i], pos + desc->offset + desc->packlength, sizeof(saved_chains[i])); + + old_len= hp_blob_length(desc, old); + new_len= hp_blob_length(desc, heap_new); + + if (old_len != new_len) + blob_changed[i]= TRUE; + else if (old_len == 0) + blob_changed[i]= FALSE; + else + { + const uchar *old_data, *new_data; + memcpy(&old_data, old + desc->offset + desc->packlength, + sizeof(old_data)); + memcpy(&new_data, heap_new + desc->offset + desc->packlength, + sizeof(new_data)); + blob_changed[i]= (old_data != new_data && + memcmp(old_data, new_data, old_len) != 0); } + if (blob_changed[i]) + any_changed= TRUE; } + memcpy(pos, heap_new, (size_t) share->reclength); - if (hp_write_blobs(info, heap_new, pos)) + + /* Write new chains for changed blobs, restore old pointers for unchanged */ + for (i= 0; i < share->blob_count; i++) { - /* New blobs cleaned up by hp_write_blobs rollback. Restore old record. */ - memcpy(pos, old, (size_t) share->reclength); - if (had_cont) + HP_BLOB_DESC *desc= &share->blob_descs[i]; + + if (!blob_changed[i]) { - for (uint i= 0; i < share->blob_count; i++) + /* Restore old chain pointer that memcpy overwrote */ + if (saved_chains[i]) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; memcpy(pos + desc->offset + desc->packlength, &saved_chains[i], sizeof(saved_chains[i])); + has_blob_data= TRUE; + } + continue; + } + + { + uint32 new_len= hp_blob_length(desc, heap_new); + if (new_len == 0) + { + uchar *null_ptr= NULL; + memcpy(pos + desc->offset + desc->packlength, + &null_ptr, sizeof(null_ptr)); + } + else + { + const uchar *data_ptr; + uchar *first_run; + + has_blob_data= TRUE; + memcpy(&data_ptr, heap_new + desc->offset + desc->packlength, + sizeof(data_ptr)); + + if (hp_write_one_blob(share, data_ptr, new_len, &first_run)) + { + /* Rollback: free new chains already written, restore old record */ + uint j; + for (j= 0; j < i; j++) + if (blob_changed[j]) + { + uchar *chain; + memcpy(&chain, pos + share->blob_descs[j].offset + + share->blob_descs[j].packlength, sizeof(chain)); + if (chain) + hp_free_run_chain(share, chain); + } + memcpy(pos, old, (size_t) share->reclength); + if (had_cont) + { + for (j= 0; j < share->blob_count; j++) + memcpy(pos + share->blob_descs[j].offset + + share->blob_descs[j].packlength, + &saved_chains[j], sizeof(saved_chains[j])); + pos[share->visible]|= HP_ROW_HAS_CONT; + } + my_safe_afree(saved_chains, alloc_size); + goto err; + } + memcpy(pos + desc->offset + desc->packlength, + &first_run, sizeof(first_run)); } - pos[share->visible]|= HP_ROW_HAS_CONT; } - my_safe_afree(saved_chains, - share->blob_count * sizeof(uchar*)); - goto err; } - /* New blobs written — now safe to free old chains */ - if (had_cont) + + if (any_changed) { - for (uint i= 0; i < share->blob_count; i++) - hp_free_run_chain(share, saved_chains[i]); - my_safe_afree(saved_chains, - share->blob_count * sizeof(uchar*)); + /* Set flags and free old chains for changed blobs */ + pos[share->visible]= has_blob_data ? + (HP_ROW_ACTIVE | HP_ROW_HAS_CONT) : HP_ROW_ACTIVE; + for (i= 0; i < share->blob_count; i++) + if (blob_changed[i] && saved_chains[i]) + hp_free_run_chain(share, saved_chains[i]); } + else if (had_cont) + pos[share->visible]|= HP_ROW_HAS_CONT; + /* - Refresh blob pointers in the caller's record buffer when zero-copy - pointers were used. + Refresh blob pointers in the caller's record buffer. - hp_write_blobs() stored new chain head pointers in pos, but - heap_new may still have zero-copy pointers from the caller's last - hp_read_blobs() — those point into old chains that were just freed. - Copy new chain pointers from pos into heap_new, then call - hp_read_blobs() to replace them with materialized data pointers. + For changed blobs, pos has new chain pointers that heap_new + doesn't know about yet. Copy all chain pointers from pos into + heap_new and call hp_read_blobs() to re-materialize. Without this, callers that reuse heap_new after update (e.g., the INTERSECT ALL unfold path in sql_union.cc) would follow dangling pointers into freed HP_BLOCK records. - - Non-zero-copy blobs (Case C) have pointers into blob_buff which - is not affected by the chain free, so no refresh is needed. */ - if (info->has_zerocopy_blobs) + if (any_changed || info->has_zerocopy_blobs) { uchar *new_rec= (uchar*) heap_new; - for (uint i= 0; i < share->blob_count; i++) + for (i= 0; i < share->blob_count; i++) { HP_BLOB_DESC *desc= &share->blob_descs[i]; - { - uchar *chain; - memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); - memcpy(new_rec + desc->offset + desc->packlength, &chain, - sizeof(chain)); - } + uchar *chain; + memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); + memcpy(new_rec + desc->offset + desc->packlength, &chain, + sizeof(chain)); } hp_read_blobs(info, new_rec, pos); } + + my_safe_afree(saved_chains, alloc_size); } else { From 764f85c6da38d25f439f300b944ccd5cea7079f8 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 18 Mar 2026 17:47:21 -0400 Subject: [PATCH 09/27] Consistent `hp_rec_key_cmp()` argument order in `heap_update()` Swap rec1/rec2 arguments to match the API convention: rec1 = input record (direct data pointers), rec2 = potentially stored record (chain pointers when info != NULL). Both calls pass info=NULL so the swap is a no-op for behavior, but makes the argument order consistent with all other call sites (`hp_write.c`, `hp_delete.c`, `ha_heap.cc`). --- storage/heap/hp_update.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c index 09b4fb8d438fa..ef772f806b83e 100644 --- a/storage/heap/hp_update.c +++ b/storage/heap/hp_update.c @@ -42,7 +42,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) p_lastinx= share->keydef + info->lastinx; for (keydef= share->keydef, end= keydef + share->keys; keydef < end; keydef++) { - if (hp_rec_key_cmp(keydef, old, heap_new, NULL)) + if (hp_rec_key_cmp(keydef, heap_new, old, NULL)) { if ((*keydef->delete_key)(info, keydef, old, pos, keydef == p_lastinx) || (*keydef->write_key)(info, keydef, heap_new, pos)) @@ -244,7 +244,7 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) } while (keydef >= share->keydef) { - if (hp_rec_key_cmp(keydef, old, heap_new, NULL)) + if (hp_rec_key_cmp(keydef, heap_new, old, NULL)) { if ((*keydef->delete_key)(info, keydef, heap_new, pos, 0) || (*keydef->write_key)(info, keydef, old, pos)) From ed385514c8f487dfd61f821681749a2b3254967e Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 18 Mar 2026 20:37:56 -0400 Subject: [PATCH 10/27] Early FULLTEXT detection for derived table engine choice When a derived table is used in a query with FULLTEXT functions, detect this in `mysql_derived_prepare()` and force a disk-based tmp engine (`TMP_TABLE_FORCE_MYISAM`) before the result table is created. This avoids creating a HEAP handler and then swapping it for Aria/MyISAM later in `Item_func_match::fix_fields()`. The check uses `derived->select_lex->ftfunc_list->elements` to detect FULLTEXT in the outer query, following the same approach as `st_select_lex_unit::prepare()` in `sql_union.cc`. The handler swap block in `Item_func_match::fix_fields()` is replaced with a simple `ER_TABLE_CANT_HANDLE_FT` error, which now serves only as a safety net for engines that genuinely lack FULLTEXT support. --- sql/item_func.cc | 45 ++------------------------------------------- sql/sql_derived.cc | 33 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 54 deletions(-) diff --git a/sql/item_func.cc b/sql/item_func.cc index 02ffe79ad7769..37d63d984f3af 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -6377,49 +6377,8 @@ bool Item_func_match::fix_fields(THD *thd, Item **ref) } if (!(table->file->ha_table_flags() & HA_CAN_FULLTEXT)) { - /* - If this is an in-memory tmp table that hasn't been opened yet - (e.g. a derived table being prepared), convert it to a disk-based - engine that supports FULLTEXT. This can happen when HEAP blob - support keeps a table in memory that would previously have been - forced to disk by blob columns alone. - */ - if (table->s->tmp_table && !table->is_created() && - table->s->db_type() == heap_hton) - { - /* - Replace the HEAP handler with a disk-based engine (Aria/MyISAM) - that supports FULLTEXT. The table has not been opened yet, so - only the handler object and plugin reference need to be swapped. - This follows the same pattern as - create_internal_tmp_table_from_heap() in sql_select.cc. - */ - delete table->file; - table->file= NULL; - /* Reset ha_share — old HEAP handler already set it via finalize() */ - table->s->ha_share= NULL; - plugin_unlock(0, table->s->db_plugin); - table->s->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); - if (!(table->file= get_new_handler(table->s, &table->mem_root, - table->s->db_type()))) - { - my_error(ER_OUTOFMEMORY, MYF(ME_FATAL), - static_cast(sizeof(handler))); - return 1; - } - if (table->file->set_ha_share_ref(&table->s->ha_share)) - { - delete table->file; - table->file= NULL; - return 1; - } - table->file->set_table(table); - } - else - { - my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), table->file->table_type()); - return 1; - } + my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0), table->file->table_type()); + return 1; } table->fulltext_searched=1; return agg_arg_charsets_for_comparison(cmp_collation, args+1, arg_count-1); diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index 74ee8141e1346..8250df422d44b 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -868,17 +868,28 @@ bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived) SELECT is last SELECT of UNION). */ thd->create_tmp_table_for_derived= TRUE; - if (!(derived->table) && - derived->derived_result->create_result_table(thd, &unit->types, FALSE, - (first_select->options | - thd->variables.option_bits | - TMP_TABLE_ALL_COLUMNS), - &derived->alias, - FALSE, FALSE, keep_row_order, - 0)) - { - thd->create_tmp_table_for_derived= FALSE; - goto exit; + { + ulonglong create_options= (first_select->options | + thd->variables.option_bits | + TMP_TABLE_ALL_COLUMNS); + /* + Force a disk-based engine when the outer query uses FULLTEXT + functions, since HEAP does not support FULLTEXT indexes. + */ + if (derived->select_lex && + derived->select_lex->ftfunc_list->elements) + create_options= create_options | TMP_TABLE_FORCE_MYISAM; + + if (!(derived->table) && + derived->derived_result->create_result_table(thd, &unit->types, FALSE, + create_options, + &derived->alias, + FALSE, FALSE, + keep_row_order, 0)) + { + thd->create_tmp_table_for_derived= FALSE; + goto exit; + } } thd->create_tmp_table_for_derived= FALSE; From 72f4d9a82135713c6442407821e96b2125436c80 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Wed, 18 Mar 2026 23:27:16 -0400 Subject: [PATCH 11/27] Clarify comments for HEAP blob continuation and tmp table overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `item_sum.cc`: fix misleading "HEAP table full" comment — the error type is unknown at this point; `create_internal_tmp_table_from_heap()` determines whether it is a convertible HEAP overflow or a fatal error - `sql_select.cc`: document why `choose_engine()` re-checks key limits after picking a disk engine for non-key-limit reasons - `heapdef.h`: add descriptive comments to `HP_CONT_MIN_RUN_BYTES`, `HP_CONT_RUN_FRACTION_NUM`, `HP_CONT_RUN_FRACTION_DEN` --- sql/item_sum.cc | 8 +++++--- sql/sql_select.cc | 6 ++++++ storage/heap/heapdef.h | 2 ++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 3e56e801bf4b2..0931312e4baf1 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1010,9 +1010,11 @@ bool Aggregator_distinct::add() if (!table->file->is_fatal_error(error, HA_CHECK_DUP)) return FALSE; // duplicate, not an error /* - HEAP table full: convert to on-disk engine. - create_internal_tmp_table_from_heap() copies all existing rows - plus the overflow row (record[0]) to the new table. + Non-duplicate write error. If the table is HEAP and the error + is HA_ERR_RECORD_FILE_FULL, create_internal_tmp_table_from_heap() + converts it to an on-disk engine and copies all rows plus the + overflow row (record[0]). For any other error it reports a + fatal error and returns 1. */ if (create_internal_tmp_table_from_heap(table->in_use, table, tmp_table_param->start_recinfo, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 1fab2851ffd1b..080f0af4910e5 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -21077,6 +21077,12 @@ bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, share->db_plugin= ha_lock_engine(0, engine); table->file= get_new_handler(share, &table->mem_root, share->db_type()); + /* + When a disk engine was chosen for reasons other than key limits + (e.g. big_tables, TMP_TABLE_FORCE_MYISAM, tmp_memory_table_size=0), + the GROUP BY key may still exceed the disk engine's max_key_parts() + or max_key_length(). Fall back to unique constraint hash dedup. + */ if (engine == TMP_ENGINE_HTON && m_group && (param->group_parts > table->file->max_key_parts() || param->group_length > table->file->max_key_length())) diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index 273b65cd5772d..076c9b9fd794d 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -130,7 +130,9 @@ static inline enum hp_blob_format hp_blob_run_format(const uchar *chain, return HP_BLOB_CASE_C_MULTI_RUN; } +/* Minimum acceptable contiguous run size in bytes for free list reuse */ #define HP_CONT_MIN_RUN_BYTES 128 +/* Minimum run size as a fraction of blob size: NUM/DEN = 1/10 */ #define HP_CONT_RUN_FRACTION_NUM 1 #define HP_CONT_RUN_FRACTION_DEN 10 From 304158464023d315a5c574c2b57dd71a7da03714 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 8 Apr 2026 12:13:28 +0300 Subject: [PATCH 12/27] Introduce Field_blob_key for handling keys on blobs for temporary tables Field_blob_key is a new blob variant stored as [4-byte length][data pointer] that can be used as a sort/distinct key in optimizer temporary tables. Previously, plain Field_blob was used in GROUP_CONCAT and UNION DISTINCT contexts, which could not be properly compared as a key. Field_blob_key allows removing of blob key re-packing in heap introduced by HEAP GROUP BY / DISTINCT on TEXT/BLOB columns Implementation: Pass a new Tmp_field_param argument through all make_new_field() and create_tmp_field() overrides so that field creation can know whether the resulting field will be part of a unique/distinct key. This partly replaces the earlier overloading of TABLE::group_concat. Other things: - Fix Field_blob_compressed::make_new_field() to correctly handle two distinct cases: - When part of a unique/distinct key: substitute with Field_blob_key so key comparisons work correctly. - When placed in any optimizer tmp table (e.g. GROUP_CONCAT with ORDER BY): substitute with a plain uncompressed Field_blob, fixing wrong results caused by the compressed field's internal value buffer being overwritten across rows. - Fix UNIQUE_KEY_FLAG in the client protocol so it is also set for columns that are part of a UNION DISTINCT, not only for columns from unique indexes. - Mark internal temporary tables created by create_tmp_table / Create_tmp_table with type RESULT_TMP_TABLE instead of INTERNAL_TMP_TABLE. This makes it easier to differentiate between temporary tables created as placeholder for normal tables, like in CREATE .. SELECT and ALTER TABLE, derived tables. --- .gitignore | 1 + include/heap.h | 10 - include/my_base.h | 5 +- include/my_compare.h | 16 +- mysql-test/main/func_group.result | 2 +- mysql-test/main/max_session_mem_used.test | 2 +- mysql-test/main/metadata.result | 14 +- mysql-test/main/mysql_client_test.result | 2 +- mysql-test/main/type_bit.result | 4 +- mysql-test/main/type_enum.result | 2 +- mysql-test/main/type_set.result | 2 +- .../perfschema/t/misc_session_status.test | 2 +- mysys/my_compare.c | 40 +++ sql/field.cc | 147 ++++++++-- sql/field.h | 102 +++++-- sql/item.cc | 3 +- sql/item.h | 21 +- sql/item_func.h | 2 +- sql/item_sum.cc | 9 +- sql/item_timefunc.cc | 3 +- sql/mysqld.cc | 1 + sql/opt_subselect.cc | 3 +- sql/records.cc | 3 +- sql/sql_delete.cc | 4 +- sql/sql_insert.cc | 5 +- sql/sql_parse.cc | 1 + sql/sql_select.cc | 179 ++++++++---- sql/sql_show.cc | 33 +++ sql/sql_table.cc | 2 +- sql/sql_trigger.cc | 4 +- sql/sql_type.cc | 55 +++- sql/sql_type.h | 70 +++-- sql/sql_type_fixedbin.h | 2 +- sql/sql_update.cc | 17 +- sql/table.cc | 5 +- sql/table.h | 10 +- storage/heap/CMakeLists.txt | 6 - storage/heap/ha_heap.cc | 274 +++--------------- storage/heap/ha_heap.h | 8 - storage/heap/hp_blob.c | 136 ++++----- storage/heap/hp_create.c | 132 ++++----- storage/heap/hp_hash.c | 103 +++---- storage/heap/hp_test_hash-t.c | 13 +- storage/heap/hp_test_key_setup-t.cc | 11 +- storage/maria/ma_unique.c | 2 +- ...ulltext_order_boolean_mode_no_where.result | 2 +- ...tural_language_mode_different_match.result | 12 +- ...rder_natural_language_mode_no_where.result | 8 +- .../fulltext_order_boolean_mode_no_where.test | 2 +- ...natural_language_mode_different_match.test | 7 +- ..._order_natural_language_mode_no_where.test | 5 +- 51 files changed, 777 insertions(+), 727 deletions(-) diff --git a/.gitignore b/.gitignore index 415f2b5207706..4edb5162a7db3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.mri.tpl /.cproject /.project +/.serena .gdb_history .vs/ /.settings/ diff --git a/include/heap.h b/include/heap.h index f696ee5c9ebf7..54a78a7877cd4 100644 --- a/include/heap.h +++ b/include/heap.h @@ -117,7 +117,6 @@ typedef struct st_hp_keydef /* Key definition with open */ uint length; /* Length of key (automatic) */ uint8 algorithm; /* HASH / BTREE */ my_bool has_blob_seg; /* Key has HA_BLOB_PART segments */ - my_bool needs_key_rebuild_from_group_buff; /* GROUP BY key must be rebuilt from group_buff */ HA_KEYSEG *seg; HP_BLOCK block; /* Where keys are saved */ /* @@ -133,15 +132,6 @@ typedef struct st_hp_keydef /* Key definition with open */ uint (*get_key_length)(struct st_hp_keydef *keydef, const uchar *key); } HP_KEYDEF; -static inline my_bool hp_keydef_has_blob_seg(const HP_KEYDEF *keydef) -{ - uint j; - for (j= 0; j < keydef->keysegs; j++) - if (keydef->seg[j].flag & HA_BLOB_PART) - return TRUE; - return FALSE; -} - typedef struct st_hp_blob_desc { uint offset; /* Byte offset of blob descriptor within record buffer */ diff --git a/include/my_base.h b/include/my_base.h index 050ce8755f98f..f30ea673ae8e9 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -268,7 +268,10 @@ enum ha_base_keytype { /* Varchar (0-65535 bytes) with length packed with 2 bytes */ HA_KEYTYPE_VARTEXT2=17, /* Key is sorted as letters */ HA_KEYTYPE_VARBINARY2=18, /* Key is sorted as unsigned chars */ - HA_KEYTYPE_BIT=19 + HA_KEYTYPE_BIT=19, + /* blob (length 4 bytes, pointer 8 bytes) used for internal tmp tables */ + HA_KEYTYPE_VARTEXT4=20, /* Key is sorted as letters */ + HA_KEYTYPE_VARBINARY4=21, /* Key is sorted as unsigned chars */ }; #define HA_MAX_KEYTYPE 31 /* Must be log2-1 */ diff --git a/include/my_compare.h b/include/my_compare.h index 048e679e70c8e..bc871345f6d4d 100644 --- a/include/my_compare.h +++ b/include/my_compare.h @@ -49,16 +49,16 @@ extern "C" { typedef struct st_HA_KEYSEG /* Key-portion */ { CHARSET_INFO *charset; - uint32 start; /* Start of key in record */ - uint32 null_pos; /* position to NULL indicator */ - uint16 bit_pos; /* Position to bit part */ + uint32 start; /* Start of key in record */ + uint32 null_pos; /* position to NULL indicator */ + uint16 bit_pos; /* Position to bit part */ uint16 flag; - uint16 length; /* Keylength */ + uint16 length; /* Keylength */ uint16 language; - uint8 type; /* Type of key (for sort) */ - uint8 null_bit; /* bitmask to test for NULL */ - uint8 bit_start; - uint8 bit_length; /* Length of bit part */ + uint8 type; /* Type of key (for sort) */ + uint8 null_bit; /* bitmask to test for NULL */ + uint8 bit_start; /* Start of bit or length of record packing */ + uint8 bit_length; /* Length of bit part or length of key packing */ } HA_KEYSEG; #define get_key_length(length,key) \ diff --git a/mysql-test/main/func_group.result b/mysql-test/main/func_group.result index 6c5e3571342d8..a1fca1dac6d22 100644 --- a/mysql-test/main/func_group.result +++ b/mysql-test/main/func_group.result @@ -1815,7 +1815,7 @@ CREATE TABLE t1(f1 YEAR(4)); INSERT INTO t1 VALUES (0000),(2001); (SELECT MAX(f1) FROM t1) UNION (SELECT MAX(f1) FROM t1); Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def MAX(f1) MAX(f1) 13 4 4 Y 32864 0 63 +def MAX(f1) MAX(f1) 13 4 4 Y 32868 0 63 MAX(f1) 2001 DROP TABLE t1; diff --git a/mysql-test/main/max_session_mem_used.test b/mysql-test/main/max_session_mem_used.test index 92b882cf0797c..0c995f4492a78 100644 --- a/mysql-test/main/max_session_mem_used.test +++ b/mysql-test/main/max_session_mem_used.test @@ -1,6 +1,6 @@ # memory usage is sensitive to valgrind/ps-protocol/embedded source include/not_msan.inc; -source include/not_valgrind.inc; +source include/not_valgrind_build.inc; source include/no_protocol.inc; source include/not_embedded.inc; source include/have_64bit.inc; diff --git a/mysql-test/main/metadata.result b/mysql-test/main/metadata.result index 8c4ccf9d4d765..f37f72b13f520 100644 --- a/mysql-test/main/metadata.result +++ b/mysql-test/main/metadata.result @@ -146,7 +146,7 @@ id data data 2 female no select t1.id from t1 union select t2.id from t2; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def id id 246 4 1 Y 32768 0 63 +def id id 246 4 1 Y 32772 0 63 id 1 2 @@ -157,7 +157,7 @@ insert into t1 values (2,'two'); set @arg00=1 ; select @arg00 FROM t1 where a=1 union distinct select 1 FROM t1 where a=1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def @arg00 @arg00 8 20 1 Y 32768 0 63 +def @arg00 @arg00 8 20 1 Y 32772 0 63 @arg00 1 select * from (select @arg00) aaa; @@ -167,7 +167,7 @@ def aaa @arg00 @arg00 8 20 1 Y 32768 0 63 1 select 1 union select 1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def 1 1 3 1 1 N 32769 0 63 +def 1 1 3 1 1 N 32773 0 63 1 1 select * from (select 1 union select 1) aaa; @@ -259,16 +259,16 @@ c1 c2 2 2 SELECT v1.c1, v2.c2 FROM v1 JOIN v2 ON c1=c2 GROUP BY v1.c1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t1 v1 c1 c1 254 1 1 Y 32768 0 8 -def test t2 v2 c2 c2 254 1 1 Y 0 0 8 +def test t1 v1 c1 c1 254 1 1 Y 32772 0 8 +def test t2 v2 c2 c2 254 1 1 Y 4 0 8 c1 c2 1 1 2 2 3 3 SELECT v1.c1, v2.c2 FROM v1 JOIN v2 ON c1=c2 GROUP BY v1.c1 ORDER BY v2.c2; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t1 v1 c1 c1 254 1 1 Y 32768 0 8 -def test t2 v2 c2 c2 254 1 1 Y 0 0 8 +def test t1 v1 c1 c1 254 1 1 Y 32772 0 8 +def test t2 v2 c2 c2 254 1 1 Y 4 0 8 c1 c2 1 1 2 2 diff --git a/mysql-test/main/mysql_client_test.result b/mysql-test/main/mysql_client_test.result index 4c7b20314c05d..33de23bd4dbc0 100644 --- a/mysql-test/main/mysql_client_test.result +++ b/mysql-test/main/mysql_client_test.result @@ -130,7 +130,7 @@ mysql_stmt_next_result(): 0; field_count: 0 # cat MYSQL_TMP_DIR/test_mdev26145.out.log # ------------------------------------ Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def MAX(a) MAX(a) 3 11 0 Y 32768 0 63 +def MAX(a) MAX(a) 3 11 0 Y 32772 0 63 # ------------------------------------ diff --git a/mysql-test/main/type_bit.result b/mysql-test/main/type_bit.result index 24d96d80ef3e7..f8055a536c94a 100644 --- a/mysql-test/main/type_bit.result +++ b/mysql-test/main/type_bit.result @@ -648,13 +648,13 @@ CREATE TABLE t1 (b BIT); INSERT INTO t1 (b) VALUES (1), (0); SELECT DISTINCT b FROM t1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t1 t1 b b 16 1 1 Y 32 0 63 +def test t1 t1 b b 16 1 1 Y 36 0 63 b # # SELECT b FROM t1 GROUP BY b; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def test t1 t1 b b 16 1 1 Y 32 0 63 +def test t1 t1 b b 16 1 1 Y 36 0 63 b # # diff --git a/mysql-test/main/type_enum.result b/mysql-test/main/type_enum.result index 3a3a8bc2a17a0..23a7430134b9e 100644 --- a/mysql-test/main/type_enum.result +++ b/mysql-test/main/type_enum.result @@ -2593,7 +2593,7 @@ t2 CREATE TABLE `t2` ( DROP TABLE t2; SELECT c_int FROM t1 UNION SELECT c_enum FROM t1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def c_int c_int 253 11 0 Y 0 0 8 +def c_int c_int 253 11 0 Y 4 0 8 c_int SELECT COALESCE(c_int, c_enum) FROM t1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr diff --git a/mysql-test/main/type_set.result b/mysql-test/main/type_set.result index e35aa66f9b6cc..faf5165c9acb6 100644 --- a/mysql-test/main/type_set.result +++ b/mysql-test/main/type_set.result @@ -697,7 +697,7 @@ t2 CREATE TABLE `t2` ( DROP TABLE t2; SELECT c_int FROM t1 UNION SELECT c_set FROM t1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def c_int c_int 253 33 0 Y 0 0 33 +def c_int c_int 253 33 0 Y 4 0 33 c_int SELECT COALESCE(c_int, c_set) FROM t1; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr diff --git a/mysql-test/suite/perfschema/t/misc_session_status.test b/mysql-test/suite/perfschema/t/misc_session_status.test index f32896d6077ff..6d7d0c0ac1ab6 100644 --- a/mysql-test/suite/perfschema/t/misc_session_status.test +++ b/mysql-test/suite/perfschema/t/misc_session_status.test @@ -1,7 +1,7 @@ --source include/not_embedded.inc --source include/have_perfschema.inc --source include/not_msan.inc ---source include/not_valgrind.inc +--source include/not_valgrind_build.inc # This does not crash on 32 bit because of less memory used --source include/have_64bit.inc --echo # diff --git a/mysys/my_compare.c b/mysys/my_compare.c index d1326dc9d04e9..95391fe19d49c 100644 --- a/mysys/my_compare.c +++ b/mysys/my_compare.c @@ -266,6 +266,44 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a, b+=b_length; } break; + case HA_KEYTYPE_VARTEXT4: + { + /* Only used for internal temporary tables */ + int a_length,b_length; + uchar *a_key, *b_key; + DBUG_ASSERT(!(nextflag & SEARCH_PREFIX)); + + a_length= uint4korr(a); + b_length= uint4korr(b); + memcpy(&a_key, a, sizeof(char*)); + memcpy(&b_key, b, sizeof(char*)); + if (piks && + (flag= ha_compare_char_varying(keyseg->charset, + a_key, a_length, + b_key, b_length, 0))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+= 4 + portable_sizeof_char_ptr; + b+= 4 + portable_sizeof_char_ptr; + break; + } + case HA_KEYTYPE_VARBINARY4: + { + /* Only used for internal temporary tables */ + int a_length,b_length; + uchar *a_key, *b_key; + DBUG_ASSERT(!(nextflag & SEARCH_PREFIX)); + + a_length= uint4korr(a); + b_length= uint4korr(b); + memcpy(&a_key, a, sizeof(char*)); + memcpy(&b_key, b, sizeof(char*)); + if (piks && + (flag= compare_bin(a_key, a_length, b_key, b_length, 0, 0))) + return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); + a+= 4 + portable_sizeof_char_ptr; + b+= 4 + portable_sizeof_char_ptr; + break; + } case HA_KEYTYPE_INT8: { int i_1= (int) *((signed char*) a); @@ -620,6 +658,8 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, const uchar *a) #endif case HA_KEYTYPE_FLOAT: case HA_KEYTYPE_DOUBLE: + case HA_KEYTYPE_VARTEXT4: + case HA_KEYTYPE_VARBINARY4: a= end; break; case HA_KEYTYPE_END: /* purecov: inspected */ diff --git a/sql/field.cc b/sql/field.cc index 552dc7383817c..e273ec5d10d34 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -54,8 +54,8 @@ const char field_separator=','; #define DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE FLOATING_POINT_BUFFER #define LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE 128 #define DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE 128 -#define BLOB_PACK_LENGTH_TO_MAX_LENGH(arg) \ - ((ulong) ((1LL << MY_MIN(arg, 4) * 8) - 1)) +#define BLOB_PACK_LENGTH_TO_MAX_LENGTH(arg) \ + ((ulong) ((1ULL << MY_MIN(arg, 4) * 8) - 1)) // Column marked for read or the field set to read out of record[0] bool Field::marked_for_read() const @@ -2092,6 +2092,11 @@ int Field_blob::store_from_statistical_minmax_field(Field *stat_field, return 0; } +void Field_blob::set_pack_length(uint32 packlength_arg) +{ + packlength= packlength_arg; + field_length= BLOB_PACK_LENGTH_TO_MAX_LENGTH(packlength); +} /** Pack the field into a format suitable for storage and transfer. @@ -2582,7 +2587,9 @@ bool Field::optimize_range(uint idx, uint part) const Field *Field::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type __attribute__((unused))) + bool keep_type __attribute__((unused)), + const Tmp_field_param *param + __attribute__((unused))) { Field *tmp; if (!(tmp= (Field*) memdup_root(root,(char*) this,size_of()))) @@ -2613,7 +2620,7 @@ Field *Field::new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_null_ptr, uint new_null_bit) { Field *tmp; - if ((tmp= make_new_field(root, new_table, table == new_table))) + if ((tmp= make_new_field(root, new_table, table == new_table, 0))) { tmp->ptr= new_ptr; tmp->null_ptr= new_null_ptr; @@ -2638,11 +2645,13 @@ Field *Field::new_key_field(MEM_ROOT *root, TABLE *new_table, */ Field *Field::create_tmp_field(MEM_ROOT *mem_root, TABLE *new_table, - bool maybe_null_arg) + bool maybe_null_arg, + const Tmp_field_param *param) { Field *new_field; - if ((new_field= make_new_field(mem_root, new_table, new_table == table))) + if ((new_field= make_new_field(mem_root, new_table, new_table == table, + param))) { new_field->init_for_tmp_table(this, new_table); new_field->flags|= flags & NO_DEFAULT_VALUE_FLAG; @@ -3379,10 +3388,11 @@ void Field_decimal::sql_type(String &res) const Field *Field_decimal::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) + bool keep_type, + const Tmp_field_param *param) { if (keep_type) - return Field_real::make_new_field(root, new_table, keep_type); + return Field_real::make_new_field(root, new_table, keep_type, param); Field *field= new (root) Field_new_decimal(NULL, field_length, maybe_null() ? (uchar*) "" : 0, 0, @@ -7909,11 +7919,12 @@ uint Field_string::get_key_image(uchar *buff, uint length, const uchar *ptr_arg, Field *Field_string::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) + bool keep_type, + const Tmp_field_param *param) { Field *field; if (type() != MYSQL_TYPE_VAR_STRING || keep_type) - field= Field::make_new_field(root, new_table, keep_type); + field= Field::make_new_field(root, new_table, keep_type, param); else if ((field= new (root) Field_varstring(field_length, maybe_null(), &field_name, new_table->s, charset()))) @@ -7921,7 +7932,7 @@ Field *Field_string::make_new_field(MEM_ROOT *root, TABLE *new_table, /* Old VARCHAR field which should be modified to a VARCHAR on copy This is done to ensure that ALTER TABLE will convert old VARCHAR fields - to now VARCHAR fields. + to new VARCHAR fields. */ field->init_for_make_new_field(new_table, orig_table); } @@ -8417,22 +8428,27 @@ int Field_varstring::cmp_binary(const uchar *a_ptr, const uchar *b_ptr, Field *Field_varstring::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) + bool keep_type, + const Tmp_field_param *param) { Field_varstring *res= (Field_varstring*) Field::make_new_field(root, new_table, - keep_type); + keep_type, + param); if (res) res->length_bytes= length_bytes; return res; } -Field *Field_varstring_compressed::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) +Field *Field_varstring_compressed::make_new_field(MEM_ROOT *root, + TABLE *new_table, + bool keep_type, + const Tmp_field_param *param) { Field_varstring *res; - if (new_table->s->is_optimizer_tmp_table()) + if (new_table->s->is_optimizer_tmp_table() || + (param && param->part_of_unique_key())) { /* Compressed field cannot be part of a key. For optimizer temporary @@ -8449,24 +8465,34 @@ Field *Field_varstring_compressed::make_new_field(MEM_ROOT *root, TABLE *new_tab } } else - res= (Field_varstring*) Field::make_new_field(root, new_table, keep_type); + res= (Field_varstring*) Field::make_new_field(root, new_table, keep_type, + param); if (res) res->length_bytes= length_bytes; return res; } Field *Field_blob_compressed::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) + bool keep_type, + const Tmp_field_param *param) { Field_blob *res; - if (new_table->s->is_optimizer_tmp_table()) + if (new_table->s->is_optimizer_tmp_table() || + (param && param->part_of_unique_key())) { /* Compressed field cannot be part of a key. For optimizer temporary table we create uncompressed substitute. + Compressed fields can also not be part of GROUP_CONCAT(). */ - res= new (root) Field_blob(ptr, null_ptr, null_bit, Field::NONE, &field_name, - new_table->s, packlength, charset()); + if (param && param->part_of_unique_key()) + res= new (root) Field_blob_key(ptr, null_ptr, null_bit, Field::NONE, + &field_name, + new_table->s, packlength, charset()); + else + res= new (root) Field_blob(ptr, null_ptr, null_bit, Field::NONE, + &field_name, + new_table->s, packlength, charset()); if (res) { res->init_for_make_new_field(new_table, orig_table); @@ -8475,7 +8501,8 @@ Field *Field_blob_compressed::make_new_field(MEM_ROOT *root, TABLE *new_table, } } else - res= (Field_blob *) Field::make_new_field(root, new_table, keep_type); + res= (Field_blob *) Field_blob::make_new_field(root, new_table, keep_type, + param); return res; } @@ -8739,7 +8766,7 @@ Field_blob::Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, const LEX_CSTRING *field_name_arg, TABLE_SHARE *share, uint blob_pack_length, const DTCollation &collation) - :Field_longstr(ptr_arg, BLOB_PACK_LENGTH_TO_MAX_LENGH(blob_pack_length), + :Field_longstr(ptr_arg, BLOB_PACK_LENGTH_TO_MAX_LENGTH(blob_pack_length), null_ptr_arg, null_bit_arg, unireg_check_arg, field_name_arg, collation), packlength(blob_pack_length) @@ -9021,7 +9048,7 @@ int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr, uint Field_blob::get_key_image_itRAW(const uchar *ptr_arg, uchar *buff, uint length) const { - size_t blob_length= get_length(ptr_arg); + uint32 blob_length= get_length(ptr_arg); const uchar *blob= get_ptr(ptr_arg); size_t local_char_length= length / mbmaxlen(); local_char_length= field_charset()->charpos(blob, blob + blob_length, @@ -9055,7 +9082,7 @@ void Field_blob::set_key_image(const uchar *buff,uint length) int Field_blob::key_cmp(const uchar *key_ptr, uint max_key_length) const { uchar *blob1; - size_t blob_length=get_length(ptr); + uint32 blob_length=get_length(ptr); memcpy(&blob1, ptr+packlength, sizeof(char*)); CHARSET_INFO *cs= charset(); size_t local_char_length= max_key_length / cs->mbmaxlen; @@ -9082,13 +9109,20 @@ static struct blob_storage_check blob_storage_check() { val.p= -1; val.b= false; } } blob_storage_check; #endif -Field *Field_blob::make_new_field(MEM_ROOT *root, TABLE *newt, bool keep_type) +Field *Field_blob::make_new_field(MEM_ROOT *root, TABLE *newt, bool keep_type, + const Tmp_field_param *param) { DBUG_ASSERT((intptr(newt->blob_storage) & blob_storage_check.val.p) == 0); - if (newt->group_concat) - return new (root) Field_blob(field_length, maybe_null(), &field_name, - charset()); - return Field::make_new_field(root, newt, keep_type); + if (param && param->part_of_unique_key()) + { + Field_blob_key *res; + res= new (root) Field_blob_key(field_length, maybe_null(), &field_name, + charset()); + if (res) + res->init_for_make_new_field(newt, orig_table); + return res; + } + return Field::make_new_field(root, newt, keep_type, param); } @@ -9394,6 +9428,55 @@ Binlog_type_info Field_blob_compressed::binlog_type_info() const pack_length_no_ptr(), 1, charset()); } + +/**************************************************************************** +** Field_blob_key +** Used for blob keys in internal temporary tables +****************************************************************************/ + +void Field_blob_key::set_key_image(const uchar *data,uint length) +{ + store_length(length); + memcpy(ptr+packlength, &data, sizeof(char*)); +} + + +int Field_blob_key::key_cmp(const uchar *key_ptr, uint max_key_length) const +{ + uchar *blob1; + uint32 blob_length= get_length(ptr); + memcpy(&blob1, ptr + packlength, sizeof(char*)); + return Field_blob_key::cmp(blob1, (uint32) blob_length, + key_ptr + 4, uint4korr(key_ptr)); +} + +int Field_blob_key::key_cmp(const uchar *a,const uchar *b) const +{ + return Field_blob_key::cmp(a + 4, uint4korr(a), b+ 4, uint4korr(b)); +} + + +Field *Field_blob_key::new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) +{ + Field_blob_key *res; + /* + length comes from blob->key_length which includes portable_sizeof_char_ptr + */ + length-= portable_sizeof_char_ptr; + DBUG_ASSERT(length > 0 && length <= 4); + + res= new (root) Field_blob_key(new_ptr, new_null_ptr, new_null_bit, + Field::NONE, &field_name, + table->s, length, charset()); + res->init(new_table); + /* key_fields are not stored in the table. Don't count this one */ + table->s->blob_fields--; + return res; +} + + /**************************************************************************** ** enum type. ** This is a string which only can have a selection of different values. @@ -9592,10 +9675,10 @@ void Field_enum::sql_type(String &res) const Field *Field_enum::make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type) + bool keep_type, const Tmp_field_param *param) { Field_enum *res= (Field_enum*) Field::make_new_field(root, new_table, - keep_type); + keep_type, param); if (res) res->typelib= copy_typelib(root, typelib); return res; diff --git a/sql/field.h b/sql/field.h index 50d368885c4b0..34a74b4d328b6 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1133,7 +1133,8 @@ class Field: public Value_source which is located in RAM). */ virtual uint32 pack_length() const { return (uint32) field_length; } - + /* length to store a key in a key buffer */ + virtual uint32 key_pack_length() const { return pack_length(); } /* pack_length_in_rec() returns size (in bytes) used to store field data on storage (i.e. it returns the maximal size of the field in a row of the @@ -1147,11 +1148,10 @@ class Field: public Value_source DBUG_ENTER("Field::pack_length_from_metadata"); DBUG_RETURN(field_metadata); } + /* Length of row data inc record not including packed length */ virtual uint row_pack_length() const { return 0; } - /* - data_length() return the "real size" of the data in memory. - */ + /* Return the current size of data stored in the record */ virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } @@ -1526,15 +1526,16 @@ class Field: public Value_source virtual bool optimize_range(uint idx, uint part) const; virtual void free() {} virtual Field *make_new_field(MEM_ROOT *root, TABLE *new_table, - bool keep_type); + bool keep_type, const Tmp_field_param *param); virtual Field *new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_ptr, uint32 length, uchar *new_null_ptr, uint new_null_bit); Field *create_tmp_field(MEM_ROOT *root, TABLE *new_table, - bool maybe_null_arg); - Field *create_tmp_field(MEM_ROOT *root, TABLE *new_table) + bool maybe_null_arg, const Tmp_field_param *param); + Field *create_tmp_field(MEM_ROOT *root, TABLE *new_table, + const Tmp_field_param *param) { - return create_tmp_field(root, new_table, maybe_null()); + return create_tmp_field(root, new_table, maybe_null(), param); } Field *clone(MEM_ROOT *mem_root, TABLE *new_table); Field *clone(MEM_ROOT *mem_root, TABLE *new_table, my_ptrdiff_t diff); @@ -2389,8 +2390,8 @@ class Field_decimal final :public Field_real { unireg_check_arg, field_name_arg, dec_arg, zero_arg, unsigned_arg) {} - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) - override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; const Type_handler *type_handler() const override { return &type_handler_olddecimal; } enum ha_base_keytype key_type() const override @@ -4146,8 +4147,8 @@ class Field_string final :public Field_longstr { uint max_packed_col_length(uint max_length) override; uint size_of() const override { return sizeof *this; } bool has_charset() const override { return charset() != &my_charset_bin; } - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) - override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; uint get_key_image(uchar *buff, uint length, const uchar *ptr_arg, imagetype type) const override; sql_mode_t value_depends_on_sql_mode() const override; @@ -4274,8 +4275,8 @@ class Field_varstring :public Field_longstr { uint size_of() const override { return sizeof *this; } bool has_charset() const override { return charset() == &my_charset_bin ? FALSE : TRUE; } - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) - override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; Field *new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_ptr, uint32 length, uchar *new_null_ptr, uint new_null_bit) override; @@ -4344,7 +4345,8 @@ class Field_varstring_compressed final :public Field_varstring { { DBUG_ASSERT(0); return 0; } using Field_varstring::key_cmp; Binlog_type_info binlog_type_info() const override; - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; }; @@ -4438,7 +4440,8 @@ class Field_blob :public Field_longstr { enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, TABLE_SHARE *share, uint blob_pack_length, const DTCollation &collation); - Field_blob(uint32 len_arg,bool maybe_null_arg, const LEX_CSTRING *field_name_arg, + Field_blob(uint32 len_arg,bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, const DTCollation &collation) :Field_longstr((uchar*) 0, len_arg, maybe_null_arg ? (uchar*) "": 0, 0, NONE, field_name_arg, collation), @@ -4554,7 +4557,7 @@ class Field_blob :public Field_longstr { int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length) const; int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U) const - override; + override; int key_cmp(const uchar *,const uchar*) const override; int key_cmp(const uchar *str, uint length) const override; /* Never update the value of min_val for a blob field */ @@ -4576,6 +4579,7 @@ class Field_blob :public Field_longstr { */ uint32 pack_length_no_ptr() const { return (uint32) (packlength); } + void set_pack_length(uint32 packlength_arg); uint row_pack_length() const override { return pack_length_no_ptr(); } uint32 sort_length() const override; uint32 sort_suffix_length() const override; @@ -4592,16 +4596,16 @@ class Field_blob :public Field_longstr { } uint32 get_field_buffer_size() { return value.alloced_length(); } void store_length(uchar *i_ptr, uint i_packlength, uint32 i_number); - void store_length(size_t number) + inline void store_length(size_t number) { DBUG_ASSERT(number < UINT_MAX32); store_length(ptr, packlength, (uint32)number); } inline uint32 get_length(my_ptrdiff_t row_offset= 0) const - { return get_length(ptr+row_offset, this->packlength); } + { return get_length(ptr+row_offset, packlength); } uint32 get_length(const uchar *ptr, uint packlength) const; uint32 get_length(const uchar *ptr_arg) const - { return get_length(ptr_arg, this->packlength); } + { return get_length(ptr_arg, packlength); } inline uchar *get_ptr() const { return get_ptr(ptr); } inline uchar *get_ptr(const uchar *ptr_arg) const { @@ -4632,7 +4636,8 @@ class Field_blob :public Field_longstr { return get_key_image_itRAW(ptr_arg, buff, length); } void set_key_image(const uchar *buff,uint length) override; - Field *make_new_field(MEM_ROOT *, TABLE *new_table, bool keep_type) override; + Field *make_new_field(MEM_ROOT *, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; Field *new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_ptr, uint32 length, uchar *new_null_ptr, uint new_null_bit) override; @@ -4786,10 +4791,59 @@ class Field_blob_compressed final :public Field_blob { override { DBUG_ASSERT(0); return 0; } Binlog_type_info binlog_type_info() const override; - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; +}; + + +/* + class for using Blob keys for internal temporary tables. + + The difference to Field_blob is that the blob key is stored as + [length (4 bytes) ] [pointer to data (8 bytes)] + This allows us to use the whole blob as a key and also avoids copying + the blob value to the key. +*/ + +class Field_blob_key final :public Field_blob { +public: + Field_blob_key(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, TABLE_SHARE *share, + uint blob_pack_length, const DTCollation &collation) : + Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, share, blob_pack_length, collation) + {} + Field_blob_key(uint32 len_arg, bool maybe_null_arg, + const LEX_CSTRING *field_name_arg, + const DTCollation &collation) + :Field_blob(len_arg, maybe_null_arg, field_name_arg, collation) + {} + + /* Field blob keys have always a 4 byte length and HA_KEYTYPE_XXX4 */ + uint32 key_pack_length() const override + { return (uint32) (4 + portable_sizeof_char_ptr); } + uint16 key_part_length_bytes() const override { return 4; } + int key_cmp(const uchar *,const uchar*) const override; + int key_cmp(const uchar *str, uint length) const override; + uint get_key_image(uchar *buff, uint length, + const uchar *ptr_arg, imagetype type) const override + { + /* Internal temporary tables doesn't use key-only-reads */ + DBUG_ASSERT(0); + return 0; + } + void set_key_image(const uchar *buff,uint length) override; + enum ha_base_keytype key_type() const override + { return binary() ? HA_KEYTYPE_VARBINARY4 : HA_KEYTYPE_VARTEXT4; } + uint32 key_length() const override { return 4 + portable_sizeof_char_ptr; } + Field *new_key_field(MEM_ROOT *root, TABLE *new_table, + uchar *new_ptr, uint32 length, + uchar *new_null_ptr, uint new_null_bit) override; }; + class Field_enum :public Field_str { static void do_field_enum(Copy_field *copy_field); longlong val_int(const uchar *) const; @@ -4812,8 +4866,8 @@ class Field_enum :public Field_str { { flags|=ENUM_FLAG; } - Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type) - override; + Field *make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, + const Tmp_field_param *param) override; const Type_handler *type_handler() const override { return &type_handler_enum; } enum ha_base_keytype key_type() const override; diff --git a/sql/item.cc b/sql/item.cc index a3ec31ceea8b0..f3a0166f50649 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -3226,7 +3226,8 @@ void Item_field::set_field(Field *field_par) any_privileges= 0; if (field->table->s->tmp_table == SYSTEM_TMP_TABLE || - field->table->s->tmp_table == INTERNAL_TMP_TABLE) + field->table->s->tmp_table == INTERNAL_TMP_TABLE || + field->table->s->tmp_table == RESULT_TMP_TABLE) set_refers_to_temp_table(); } diff --git a/sql/item.h b/sql/item.h index 089d440bad1be..865cd232c5fa3 100644 --- a/sql/item.h +++ b/sql/item.h @@ -725,21 +725,25 @@ class Tmp_field_param bool m_modify_item; bool m_table_cant_handle_bit_fields; bool m_make_copy_field; + bool m_part_of_unique_key; public: Tmp_field_param(bool group, bool modify_item, bool table_cant_handle_bit_fields, - bool make_copy_field) + bool make_copy_field, + bool part_of_unique_key) :m_group(group), m_modify_item(modify_item), m_table_cant_handle_bit_fields(table_cant_handle_bit_fields), - m_make_copy_field(make_copy_field) + m_make_copy_field(make_copy_field), + m_part_of_unique_key(part_of_unique_key) { } bool group() const { return m_group; } bool modify_item() const { return m_modify_item; } bool table_cant_handle_bit_fields() const { return m_table_cant_handle_bit_fields; } bool make_copy_field() const { return m_make_copy_field; } + bool part_of_unique_key() const { return m_part_of_unique_key; } void set_modify_item(bool to) { m_modify_item= to; } }; @@ -930,7 +934,8 @@ class Item :public Value_source, Field *tmp_table_field_from_field_type(MEM_ROOT *root, TABLE *table) { DBUG_ASSERT(fixed()); - const Type_handler *h= type_handler()->type_handler_for_tmp_table(this); + + const Type_handler *h= type_handler()->type_handler_for_tmp_table(this, 0); return h->make_and_init_table_field(root, &name, Record_addr(maybe_null()), *this, table); @@ -3601,7 +3606,8 @@ class Item_result_field :public Item_fixed_hybrid /* Item with result field */ const Tmp_field_param *param) override { DBUG_ASSERT(fixed()); - const Type_handler *h= type_handler()->type_handler_for_tmp_table(this); + const Type_handler *h= type_handler()->type_handler_for_tmp_table(this, + param); return create_tmp_field_ex_from_handler(root, table, src, param, h); } void get_tmp_field_src(Tmp_field_src *src, const Tmp_field_param *param); @@ -8184,12 +8190,7 @@ class Item_type_holder: public Item, public Type_handler_hybrid_field_type String *val_str(String*) override; bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override; Field *create_tmp_field_ex(MEM_ROOT *root, TABLE *table, Tmp_field_src *src, - const Tmp_field_param *param) override - { - return Item_type_holder::real_type_handler()-> - make_and_init_table_field(root, &name, Record_addr(maybe_null()), - *this, table); - } + const Tmp_field_param *param) override; protected: Item *shallow_copy(THD *) const override { return nullptr; } Item *deep_copy(THD *) const override { return nullptr; } diff --git a/sql/item_func.h b/sql/item_func.h index 90a8cfcc9a21d..dd7aa88adce19 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -629,7 +629,7 @@ class Item_handled_func: public Item_func const Type_handler * type_handler_for_create_select(const Item_handled_func *item) const override { - return return_type_handler(item)->type_handler_for_tmp_table(item); + return return_type_handler(item)->type_handler_for_tmp_table(item, 0); } double val_real(Item_handled_func *item) const override { diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 0931312e4baf1..18262c20953d5 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1327,7 +1327,7 @@ Field *Item_sum_min_max::create_tmp_field(MEM_ROOT *root, if (args[0]->type() == Item::FIELD_ITEM) { Field *field= ((Item_field*) args[0])->field; - if ((field= field->create_tmp_field(root, table, true))) + if ((field= field->create_tmp_field(root, table, true, 0))) { DBUG_ASSERT((field->flags & NOT_NULL_FLAG) == 0); field->field_name= name; @@ -3685,12 +3685,6 @@ extern "C" int group_concat_key_cmp_with_order(void *arg, const void *key1, order_item++) { Item *item= *(*order_item)->item; - /* - If field_item is a const item then either get_tmp_table_field returns 0 - or it is an item over a const table. - */ - if (item->const_item()) - continue; /* If item is a const item then either get_tmp_table_field returns 0 or it is an item over a const table. @@ -3884,6 +3878,7 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), Field *field= (*arg)->get_tmp_table_field(); if (field) { + /* Note that field->table can be different table! */ uint offset= (field->offset(field->table->record[0]) - table->s->null_bytes); DBUG_ASSERT(offset < table->s->reclength); diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 8db1c08b58297..8c80b310a27e2 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -3268,7 +3268,8 @@ class Item_char_typecast_func_handler: public Item_handled_func::Handler_str const Type_handler * type_handler_for_create_select(const Item_handled_func *item) const override { - return return_type_handler(item)->type_handler_for_tmp_table(item); + return return_type_handler(item)->type_handler_for_tmp_table(item, + 0); } bool fix_length_and_dec(Item_handled_func *item) const override diff --git a/sql/mysqld.cc b/sql/mysqld.cc index dc89f0a07a800..7dd4253c565a1 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1933,6 +1933,7 @@ static void mysqld_exit(int exit_code) shutdown_performance_schema(); // we do it as late as possible #endif set_malloc_size_cb(NULL); + OPENSSL_cleanup(); if (global_status_var.global_memory_used) { fprintf(stderr, "Warning: Memory not freed: %lld\n", diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index d6ff9d2eb1e52..968bc547c9954 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4804,7 +4804,8 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); - key_part_info->key_type = FIELDFLAG_BINARY; + key_part_info->key_type= FIELDFLAG_BINARY; + key_part_info->key_part_flag= field->key_part_flag(); if (!using_unique_constraint) { if (!(key_field= field->new_key_field(thd->mem_root, table, diff --git a/sql/records.cc b/sql/records.cc index 77eab502230c4..c403c044af5cf 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -197,8 +197,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, info->table=table; info->sort_info= filesort; - if ((table->s->tmp_table == INTERNAL_TMP_TABLE) && - !using_addon_fields) + if ((table->s->tmp_table == RESULT_TMP_TABLE) && !using_addon_fields) (void) table->file->extra(HA_EXTRA_MMAP); if (using_addon_fields) diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index c63125249d30f..45d592a61527b 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -128,7 +128,9 @@ bool Update_plan::save_explain_data_intern(THD *thd, explain->table_tracker.set_gap_tracker(&explain->extra_time_tracker); table->file->set_time_tracker(&explain->table_tracker); - if (table->file->handler_stats && table->s->tmp_table != INTERNAL_TMP_TABLE) + if (table->file->handler_stats && + table->s->tmp_table != RESULT_TMP_TABLE && + table->s->tmp_table != INTERNAL_TMP_TABLE) explain->handler_for_stats= table->file; } diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 60b4bdc88afd9..978064c8439c2 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -3056,7 +3056,8 @@ TABLE *Delayed_insert::get_local_table(THD* client_thd) found_next_number_field= table->found_next_number_field; for (org_field= table->field; *org_field; org_field++, field++) { - if (!(*field= (*org_field)->make_new_field(client_thd->mem_root, copy, 1))) + if (!(*field= (*org_field)->make_new_field(client_thd->mem_root, copy, 1, + 0))) goto error; (*field)->unireg_check= (*org_field)->unireg_check; (*field)->invisible= (*org_field)->invisible; @@ -4752,7 +4753,7 @@ void select_insert::abort_result_set() Field *Item::create_field_for_create_select(MEM_ROOT *root, TABLE *table) { - static Tmp_field_param param(false, false, false, false); + static Tmp_field_param param(false, false, false, false, false); Tmp_field_src src; return create_tmp_field_ex(root, table, &src, ¶m); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index d89a4d4d4ca27..2688084c07eb2 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -8332,6 +8332,7 @@ bool add_to_list(THD *thd, SQL_I_List &list, Item *item,bool asc) order->used=0; order->counter_used= 0; order->fast_field_copier_setup= 0; + order->field= 0; list.link_in_list(order, &order->next); DBUG_RETURN(0); } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 080f0af4910e5..eee0a390b17bf 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -20295,7 +20295,7 @@ Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table, */ Record_addr rec(orig_item ? orig_item->maybe_null() : maybe_null()); const Type_handler *handler= type_handler()-> - type_handler_for_tmp_table(this); + type_handler_for_tmp_table(this, param); result= handler->make_and_init_table_field(root, new_name, rec, *this, new_table); } @@ -20312,7 +20312,7 @@ Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table, { bool tmp_maybe_null= param->modify_item() ? maybe_null() : field->maybe_null(); - result= field->create_tmp_field(root, new_table, tmp_maybe_null); + result= field->create_tmp_field(root, new_table, tmp_maybe_null, param); if (result && ! param->modify_item()) result->field_name= *new_name; } @@ -20387,6 +20387,33 @@ Field *Item_ref::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, } + +Field *Item_type_holder::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, + Tmp_field_src *src, + const Tmp_field_param *param) +{ + Type_handler const *type_handler= Item_type_holder::real_type_handler(); + Type_handler_blob_common const *blob_handler; + if (param->part_of_unique_key() && + (blob_handler= + dynamic_cast(type_handler))) + { + Field_blob *blob_field= (Field_blob*) type_handler_blob_key. + make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); + if (blob_field) + { + /* Fix length of blob to be able to return the original blob type */ + blob_field->set_pack_length(blob_handler->length_bytes()); + } + return blob_field; + } + return type_handler-> + make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); +} + + void Item_result_field::get_tmp_field_src(Tmp_field_src *src, const Tmp_field_param *param) { @@ -20408,7 +20435,7 @@ Item_result_field::create_tmp_field_ex_from_handler( TABLE *table, Tmp_field_src *src, const Tmp_field_param *param, - const Type_handler *h) + const Type_handler *type_handler) { /* Possible Item types: @@ -20421,10 +20448,26 @@ Item_result_field::create_tmp_field_ex_from_handler( DBUG_ASSERT(type() != NULL_ITEM); get_tmp_field_src(src, param); Field *result; - if ((result= h->make_and_init_table_field(root, &name, - Record_addr(maybe_null()), - *this, table)) && - param->modify_item()) + Type_handler_blob_common const *blob_handler; + + if (param->part_of_unique_key() && + (blob_handler= + dynamic_cast(type_handler))) + { + result= type_handler_blob_key. + make_and_init_table_field(root, &name, Record_addr(maybe_null()), + *this, table); + if (result) + { + /* Fix length of blob to be able to return the original blob type */ + ((Field_blob*) result)->set_pack_length(blob_handler->length_bytes()); + } + } + else + result= type_handler->make_and_init_table_field(root, &name, + Record_addr(maybe_null()), + *this, table); + if (result && param->modify_item()) result_field= result; return result; } @@ -20436,7 +20479,7 @@ Field *Item_func_sp::create_tmp_field_ex(MEM_ROOT *root, TABLE *table, { Field *result; get_tmp_field_src(src, param); - if ((result= sp_result_field->create_tmp_field(root, table))) + if ((result= sp_result_field->create_tmp_field(root, table, param))) { result->field_name= name; if (param->modify_item()) @@ -20490,6 +20533,8 @@ static bool make_json_valid_expr(TABLE *table, Field *field) @param make_copy_field Set when using with rollup when we want to have an exact copy of the field. + @param part_of_unique_key + Field is part of unique key @retval 0 on error @retval @@ -20502,11 +20547,12 @@ Field *create_tmp_field(TABLE *table, Item *item, Field **default_field, bool group, bool modify_item, bool table_cant_handle_bit_fields, - bool make_copy_field) + bool make_copy_field, + bool part_of_unique_key) { Tmp_field_src src; Tmp_field_param prm(group, modify_item, table_cant_handle_bit_fields, - make_copy_field); + make_copy_field, part_of_unique_key || group); Field *result= item->create_tmp_field_ex(table->in_use->mem_root, table, &src, &prm); if (is_json_type(item) && make_json_valid_expr(table, result)) @@ -20516,6 +20562,10 @@ Field *create_tmp_field(TABLE *table, Item *item, *default_field= src.default_field(); if (src.item_result_field()) *((*copy_func)++)= src.item_result_field(); + if (part_of_unique_key) + result->flags|= FIELD_PART_OF_TMP_UNIQUE | UNIQUE_KEY_FLAG; + if (group) + result->flags|= UNIQUE_KEY_FLAG; return result; } @@ -20718,10 +20768,9 @@ TABLE *Create_tmp_table::start(THD *thd, param->group_parts--; continue; } - else - prev= &(tmp->next); + prev= &(tmp->next); /* - marker == 4 means two things: + marker == MARKER_NULL_KEY means two things: - store NULLs in the key, and - convert BIT fields to 64-bit long, needed because MEMORY tables can't index BIT fields. @@ -20812,6 +20861,7 @@ TABLE *Create_tmp_table::start(THD *thd, table->s= share; init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname); + share->tmp_table= RESULT_TMP_TABLE; share->blob_field= blob_field; share->table_charset= param->table_charset; share->primary_key= MAX_KEY; // Indicate no primary key @@ -20913,9 +20963,11 @@ bool Create_tmp_table::add_fields(THD *thd, create_tmp_field(table, arg, ©_func, tmp_from_field, &m_default_field[fieldnr], m_group != 0, not_all_columns, - distinct_record_structure , false); + distinct_record_structure, false, + (current_counter == distinct)); if (!new_field) goto err; // Should be OOM + tmp_from_field++; thd->mem_root= mem_root_save; @@ -20939,8 +20991,6 @@ bool Create_tmp_table::add_fields(THD *thd, */ arg->set_maybe_null(); } - if (current_counter == distinct) - new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; } } } @@ -20977,13 +21027,15 @@ bool Create_tmp_table::add_fields(THD *thd, */ item->marker == MARKER_NULL_KEY || param->bit_fields_as_long, - param->force_copy_fields); + param->force_copy_fields, + (current_counter == distinct)); if (unlikely(!new_field)) { if (unlikely(thd->is_fatal_error)) goto err; // Got OOM continue; // Some kind of const item } + if (type == Item::SUM_FUNC_ITEM) { Item_sum *agg_item= (Item_sum *) item; @@ -21021,8 +21073,6 @@ bool Create_tmp_table::add_fields(THD *thd, m_group_null_items++; new_field->flags|= GROUP_FLAG; } - if (current_counter == distinct) - new_field->flags|= FIELD_PART_OF_TMP_UNIQUE; } } @@ -21091,6 +21141,13 @@ bool Create_tmp_table::choose_engine(THD *thd, TABLE *table, DBUG_RETURN(!table->file); } +bool is_text_key_segment(KEY_PART_INFO *m_key_part_info) +{ + return ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2 || + (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT4); +}; bool Create_tmp_table::finalize(THD *thd, TABLE *table, @@ -21321,7 +21378,7 @@ bool Create_tmp_table::finalize(THD *thd, if (m_group) { DBUG_PRINT("info",("Creating group key in temporary table")); - table->group= m_group; /* Table is grouped by key */ + table->group= m_group; /* Table is grouped by key */ param->group_buff= m_group_buff; share->keys=1; share->uniques= MY_TEST(m_using_unique_constraint); @@ -21331,7 +21388,8 @@ bool Create_tmp_table::finalize(THD *thd, keyinfo->key_part= m_key_part_info; keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY; keyinfo->ext_key_flags= keyinfo->flags; - keyinfo->usable_key_parts=keyinfo->user_defined_key_parts= param->group_parts; + keyinfo->usable_key_parts=keyinfo->user_defined_key_parts= + param->group_parts; keyinfo->ext_key_parts= keyinfo->user_defined_key_parts; keyinfo->key_length=0; keyinfo->rec_per_key=NULL; @@ -21354,11 +21412,9 @@ bool Create_tmp_table::finalize(THD *thd, m_key_part_info->offset= field->offset(table->record[0]); m_key_part_info->length= (uint16) field->key_length(); m_key_part_info->type= (uint8) field->key_type(); - m_key_part_info->key_type = - ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || - (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || - (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? - 0 : FIELDFLAG_BINARY; + m_key_part_info->key_type= (is_text_key_segment(m_key_part_info) ? + 0 : FIELDFLAG_BINARY); + m_key_part_info->key_part_flag= field->key_part_flag(); if (!m_using_unique_constraint) { cur_group->buff=(char*) m_group_buff; @@ -21387,14 +21443,15 @@ bool Create_tmp_table::finalize(THD *thd, key_field_length <= ((Field_blob*)field)->pack_length_no_ptr()) { key_field_length= MY_MIN((*cur_group->item)->max_length, - (uint32)(MAX_BLOB_WIDTH - HA_KEY_BLOB_LENGTH)); + (uint32)(MAX_BLOB_WIDTH - + HA_KEY_BLOB_LENGTH)); /* Check that the group buffer has room for this blob key field. calc_group_buffer() may have sized the buffer before the field was promoted to blob in the tmp table. If the promoted blob doesn't fit, fall back to m_using_unique_constraint. */ - uint32 need= key_field_length + 2 /* length_bytes */ + + uint32 need= key_field_length + 4 /* length_bytes */ + MY_TEST(maybe_null); if (m_group_buff + need > param->group_buff + param->group_length) @@ -21408,11 +21465,12 @@ bool Create_tmp_table::finalize(THD *thd, check. This ensures that if we break out due to a promoted blob overflowing the group buffer, key_part_flag retains the original SQL-layer value (HA_VAR_LENGTH_PART for varchar), - not HA_BLOB_PART. This prevents rebuild_key_from_group_buff() from being - called on a key buffer that has varchar format. + not HA_BLOB_PART. This prevents rebuild_key_from_group_buff() + from being called on a key buffer that has varchar format. */ m_key_part_info->key_part_flag= field->key_part_flag(); + /* Create a new field which value is stored in the group buffer */ if (!(cur_group->field= field->new_key_field(thd->mem_root,table, m_group_buff + MY_TEST(maybe_null), @@ -21427,7 +21485,7 @@ bool Create_tmp_table::finalize(THD *thd, store_length = key field pack_length + null flag byte. */ m_key_part_info->store_length= - cur_group->field->pack_length() + MY_TEST(maybe_null); + cur_group->field->key_pack_length() + MY_TEST(maybe_null); if (maybe_null) { @@ -21444,7 +21502,7 @@ bool Create_tmp_table::finalize(THD *thd, cur_group->buff++; // Pointer to field data m_group_buff++; // Skipp null flag } - m_group_buff+= cur_group->field->pack_length(); + m_group_buff+= cur_group->field->key_pack_length(); } keyinfo->key_length+= m_key_part_info->length; } @@ -21555,6 +21613,7 @@ bool Create_tmp_table::finalize(THD *thd, m_key_part_info->key_type=FIELDFLAG_BINARY; m_key_part_info->type= HA_KEYTYPE_BINARY; m_key_part_info->fieldnr= m_key_part_info->field->field_index + 1; + m_key_part_info->key_part_flag= m_key_part_info->field->key_part_flag(); m_key_part_info++; } /* Create a distinct key over the columns we are going to return */ @@ -21562,19 +21621,21 @@ bool Create_tmp_table::finalize(THD *thd, i < share->fields; i++, reg_field++) { - if (!((*reg_field)->flags & FIELD_PART_OF_TMP_UNIQUE)) + Field *field= *reg_field; + if (!(field->flags & FIELD_PART_OF_TMP_UNIQUE)) continue; - m_key_part_info->field= *reg_field; - (*reg_field)->flags |= PART_KEY_FLAG; + m_key_part_info->field= field; + field->flags |= PART_KEY_FLAG; if (m_key_part_info == keyinfo->key_part) - (*reg_field)->key_start.set_bit(0); - m_key_part_info->null_bit= (*reg_field)->null_bit; - m_key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + field->key_start.set_bit(0); + m_key_part_info->null_bit= field->null_bit; + m_key_part_info->null_offset= (uint) (field->null_ptr - (uchar*) table->record[0]); - m_key_part_info->offset= (*reg_field)->offset(table->record[0]); - m_key_part_info->length= (uint16) (*reg_field)->pack_length(); - m_key_part_info->fieldnr= (*reg_field)->field_index + 1; + m_key_part_info->offset= field->offset(table->record[0]); + m_key_part_info->length= (uint16) field->key_pack_length(); + m_key_part_info->fieldnr= field->field_index + 1; + m_key_part_info->key_part_flag= field->key_part_flag(); /* TODO: The below method of computing the key format length of the key part is a copy/paste from opt_range.cc, and table.cc. @@ -21585,22 +21646,17 @@ bool Create_tmp_table::finalize(THD *thd, */ m_key_part_info->store_length= m_key_part_info->length; - if ((*reg_field)->real_maybe_null()) + if (field->real_maybe_null()) { m_key_part_info->store_length+= HA_KEY_NULL_LENGTH; m_key_part_info->key_part_flag |= HA_NULL_PART; } - m_key_part_info->key_part_flag|= (*reg_field)->key_part_flag(); - m_key_part_info->store_length+= (*reg_field)->key_part_length_bytes(); + m_key_part_info->store_length+= field->key_part_length_bytes(); keyinfo->key_length+= m_key_part_info->store_length; - m_key_part_info->type= (uint8) (*reg_field)->key_type(); - m_key_part_info->key_type = - ((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT || - (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 || - (ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ? - 0 : FIELDFLAG_BINARY; - + m_key_part_info->type= (uint8) field->key_type(); + m_key_part_info->key_type= (is_text_key_segment(m_key_part_info) ? + 0 : FIELDFLAG_BINARY); m_key_part_info++; } } @@ -22032,8 +22088,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, seg->type= ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); - seg->bit_start= (uint8)(field->pack_length() - - portable_sizeof_char_ptr); + seg->bit_start= (uint8) ((Field_blob*) field)->pack_length_no_ptr(); seg->flag= HA_BLOB_PART; seg->length=0; // Whole blob in unique constraint } @@ -22226,7 +22281,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, seg->type= ((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ? HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2); - seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr); + seg->bit_start= (uint8) ((Field_blob*) field)->pack_length_no_ptr(); seg->flag= HA_BLOB_PART; seg->length=0; // Whole blob in unique constraint } @@ -22451,6 +22506,9 @@ free_tmp_table(THD *thd, TABLE *entry) /* free blobs */ for (Field **ptr=entry->field ; *ptr ; ptr++) (*ptr)->free(); + for (ORDER *group= entry->group ; group ; group= group->next) + if (group->field) + group->field->free(); if (entry->temp_pool_slot != MY_BIT_NONE) temp_pool_clear_bit(entry->temp_pool_slot); @@ -24867,7 +24925,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), Non-blob fields are unaffected: copy_funcs() writes directly into the record[0] field slots that hp_make_key() reads from. */ - if (table->s->db_type() == heap_hton) + if (table->s->db_type() == heap_hton && 0) { if (table->s->blob_fields) { @@ -24875,6 +24933,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), for (group= table->group; group; group= group->next) { Field *tbl_field= (*group->item)->get_tmp_table_field(); + DBUG_ASSERT(tbl_field); if (tbl_field && tbl_field != group->field) { if (group->field->is_null()) @@ -24932,7 +24991,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), DBUG_RETURN(NESTED_LOOP_OK); init_tmptable_sum_functions(join->sum_funcs); - copy_fields(join_tab->tmp_table_param); // Groups are copied twice. + copy_fields(join_tab->tmp_table_param); // Groups are copied twice. if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ @@ -27976,7 +28035,7 @@ void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) key_length+= 8; // Big enough } else - key_length+= field->pack_length(); + key_length+= field->key_pack_length(); } else { @@ -28010,7 +28069,7 @@ void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) else { /* - Group strings are taken as varstrings and require an length field. + Group strings are taken as varstrings and require a length field. A field is not yet created by create_tmp_field_ex() and the sizes should match up. */ @@ -28263,7 +28322,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, */ field= item->field; item->result_field=field->make_new_field(thd->mem_root, - field->table, 1); + field->table, 1, 0); /* We need to allocate one extra byte for null handling and another extra byte to not get warnings from purify in @@ -29587,7 +29646,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, be able to print the engine statistics. */ if (table->file->handler_stats && - table->s->tmp_table != INTERNAL_TMP_TABLE) + table->s->tmp_table != RESULT_TMP_TABLE) eta->handler_for_stats= table->file; if (likely(thd->lex->analyze_stmt)) diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 6c845bcabe2da..fdd66371058a9 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -80,6 +80,12 @@ extern size_t sql_functions_length; extern Native_func_registry_array native_func_registry_array; +/* + This is needed for gcc 15.1.1 as it also count static structures in + the limits +*/ +PRAGMA_DISABLE_CHECK_STACK_FRAME; + enum enum_i_s_events_fields { ISE_EVENT_CATALOG= 0, @@ -9045,6 +9051,30 @@ static bool optimize_for_get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond } +/* + Remove field form blob list if we replaced it with a varchar field +*/ + +static void remove_field_from_blob_list(TABLE *table, uint fieldnr) +{ + uint *blob_field= table->s->blob_field; + uint *end= blob_field+ table->s->blob_fields; + + for (; blob_field < end ; blob_field++) + { + if (*blob_field == fieldnr) + { + if (blob_field+1 < end) + bmove(blob_field, blob_field+1, + (char*) end - (char*) blob_field - sizeof(*blob_field)); + table->s->blob_fields--; + return; + } + } + DBUG_ASSERT(0); // Field not found +} + + bool optimize_schema_tables_memory_usage(List &tables) { DBUG_ENTER("optimize_schema_tables_memory_usage"); @@ -9087,6 +9117,7 @@ bool optimize_schema_tables_memory_usage(List &tables) } else { + bool was_blob= field->flags & BLOB_FLAG; field= new (thd->mem_root) Field_string(cur, 0, field->null_ptr, field->null_bit, Field::NONE, &field->field_name, field->dtcollation()); @@ -9094,6 +9125,8 @@ bool optimize_schema_tables_memory_usage(List &tables) field->field_index= i; DBUG_ASSERT(field->pack_length_in_rec() == 0); table->field[i]= field; + if (was_blob) + remove_field_from_blob_list(table, i); } } if ((table->s->reclength= (ulong)(cur - table->record[0])) == 0) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index ceea02c1c3933..29bafb492fb6e 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -4173,7 +4173,7 @@ bool Column_definition::prepare_blob_field(THD *thd) real_field_type() == FIELD_TYPE_MEDIUM_BLOB) { /* The user has given a length to the blob column */ - set_handler(Type_handler::blob_type_handler((uint) length)); + set_handler(Type_handler::blob_type_handler((uint) length, 0)); pack_length= type_handler()->calc_pack_length(0); } length= 0; diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index b251334d1a65d..b2c0ef057659a 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -1454,7 +1454,7 @@ bool Table_triggers_list::prepare_record_accessors(TABLE *table) { Field *f; if (!(f= *trg_fld= (*fld)->make_new_field(&table->mem_root, table, - table == (*fld)->table))) + table == (*fld)->table,0))) return 1; f->flags= (*fld)->flags; @@ -1495,7 +1495,7 @@ bool Table_triggers_list::prepare_record_accessors(TABLE *table) for (fld= table->field, trg_fld= record1_field; *fld; fld++, trg_fld++) { if (!(*trg_fld= (*fld)->make_new_field(&table->mem_root, table, - table == (*fld)->table))) + table == (*fld)->table, 0))) return 1; (*trg_fld)->move_field_offset((my_ptrdiff_t)(table->record[1] - table->record[0])); diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 752e4e4bc1e79..b517bcc5e0bac 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -162,6 +162,7 @@ Named_type_handler type_handler_varchar_compres Named_type_handler type_handler_tiny_blob("tinyblob"); Named_type_handler type_handler_medium_blob("mediumblob"); Named_type_handler type_handler_long_blob("longblob"); +Named_type_handler type_handler_blob_key("blob_key"); Named_type_handler type_handler_blob("blob"); Named_type_handler type_handler_blob_compressed("blob"); @@ -1447,19 +1448,23 @@ Type_handler::string_type_handler(uint max_octet_length) const Type_handler * -Type_handler::varstring_type_handler(const Item *item) +Type_handler::varstring_type_handler(const Item *item, + const Tmp_field_param *param) { if (!item->max_length) return &type_handler_string; if (item->too_big_for_varchar()) - return blob_type_handler(item->max_length); + return blob_type_handler(item->max_length, param); return &type_handler_varchar; } const Type_handler * -Type_handler::blob_type_handler(uint max_octet_length) +Type_handler::blob_type_handler(uint max_octet_length, + const Tmp_field_param *param) { + if (param && param->part_of_unique_key()) + return &type_handler_blob_key; if (max_octet_length <= 255) return &type_handler_tiny_blob; if (max_octet_length <= 65535) @@ -1471,9 +1476,15 @@ Type_handler::blob_type_handler(uint max_octet_length) const Type_handler * -Type_handler::blob_type_handler(const Item *item) +Type_handler::blob_type_handler(const Item *item, const Tmp_field_param *param) { - return blob_type_handler(item->max_length); + return blob_type_handler(item->max_length, param); +} + +const Type_handler * +Type_handler::blob_key_type_handler() +{ + return &type_handler_blob_key; } /** @@ -4000,6 +4011,19 @@ Field *Type_handler_long_blob::make_table_field(MEM_ROOT *root, 4, attr.collation); } +Field *Type_handler_blob_key::make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const + +{ + /* Note that the pack length (4) will be fixed by the caller */ + return new (root) + Field_blob_key(addr.ptr(), addr.null_ptr(), addr.null_bit(), + Field::NONE, name, share, 4, attr.collation); +} + Field *Type_handler_enum::make_table_field(MEM_ROOT *root, const LEX_CSTRING *name, @@ -4833,7 +4857,7 @@ bool Type_handler_blob_common:: { if (func->aggregate_attributes_string(func_name, items, nitems)) return true; - handler->set_handler(blob_type_handler(func->max_length)); + handler->set_handler(blob_type_handler(func->max_length, 0)); return false; } @@ -7489,7 +7513,9 @@ bool Type_handler_temporal_result:: const Type_handler * -Type_handler_null::type_handler_for_tmp_table(const Item *item) const +Type_handler_null::type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const { return &type_handler_string; } @@ -7502,12 +7528,14 @@ Type_handler_null::type_handler_for_union(const Item *item) const } -const Type_handler * -Type_handler_olddecimal::type_handler_for_tmp_table(const Item *item) const +const Type_handler *Type_handler_olddecimal:: +type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) const { return &type_handler_newdecimal; } + const Type_handler * Type_handler_olddecimal::type_handler_for_union(const Item *item) const { @@ -7515,6 +7543,15 @@ Type_handler_olddecimal::type_handler_for_union(const Item *item) const } +const Type_handler *Type_handler_blob_common:: +type_handler_for_tmp_table(const Item *item, const Tmp_field_param *param) + const +{ + return (param && param->part_of_unique_key() ? + blob_key_type_handler() : + blob_type_handler(item, 0)); +} + /***************************************************************************/ bool Type_handler::check_null(const Item *item, st_value *value) const diff --git a/sql/sql_type.h b/sql/sql_type.h index 7d543f400d2da..ea491327bb642 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -98,6 +98,7 @@ class Conv_source; class ST_FIELD_INFO; class Type_collection; class Create_func; +class Tmp_field_param; #define my_charset_numeric my_charset_latin1 @@ -3704,7 +3705,8 @@ class Type_handler THD *thd, const Log_event_data_type &type); static const Type_handler *odbc_literal_type_handler(const LEX_CSTRING *str); - static const Type_handler *blob_type_handler(uint max_octet_length); + static const Type_handler *blob_type_handler(uint max_octet_length, + const Tmp_field_param *param); static const Type_handler *string_type_handler(uint max_octet_length); static const Type_handler *bit_and_int_mixture_handler(uint max_char_len); static const Type_handler *type_handler_long_or_longlong(uint max_char_len, @@ -3716,8 +3718,11 @@ class Type_handler If max_length == 0 create a CHAR(0) @param item - the Item to get the handler to. */ - static const Type_handler *varstring_type_handler(const Item *item); - static const Type_handler *blob_type_handler(const Item *item); + static const Type_handler + *varstring_type_handler(const Item *item, const Tmp_field_param *param); + static const Type_handler *blob_type_handler(const Item *item, + const Tmp_field_param *param); + static const Type_handler *blob_key_type_handler(); static const Type_handler *get_handler_by_field_type(enum_field_types type); static const Type_handler *get_handler_by_real_type(enum_field_types type); static const Type_collection * @@ -3880,7 +3885,9 @@ class Type_handler { return this; } - virtual const Type_handler *type_handler_for_tmp_table(const Item *) const + virtual const Type_handler + *type_handler_for_tmp_table(const Item *, + const Tmp_field_param *param) const { return this; } @@ -6876,7 +6883,9 @@ class Type_handler_olddecimal: public Type_handler_decimal_result enum_field_types field_type() const override { return MYSQL_TYPE_DECIMAL; } uint32 max_display_length_for_field(const Conv_source &src) const override; uint32 calc_pack_length(uint32 length) const override { return length; } - const Type_handler *type_handler_for_tmp_table(const Item *item) const override; + const Type_handler + *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) const override; const Type_handler *type_handler_for_union(const Item *item) const override; void show_binlog_type(const Conv_source &src, const Field &, String *str) const override; @@ -6957,7 +6966,9 @@ class Type_handler_null: public Type_handler_general_purpose_string return DYN_COL_NULL; } const Type_handler *type_handler_for_comparison() const override; - const Type_handler *type_handler_for_tmp_table(const Item *item) const override; + const Type_handler *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const override; const Type_handler *type_handler_for_union(const Item *) const override; uint32 max_display_length(const Item *item) const override { return 0; } uint32 max_display_length_for_field(const Conv_source &src) const override @@ -7031,10 +7042,11 @@ class Type_handler_string: public Type_handler_longstr bool is_param_long_data_type() const override { return true; } uint32 max_display_length_for_field(const Conv_source &src) const override; uint32 calc_pack_length(uint32 length) const override { return length; } - const Type_handler *type_handler_for_tmp_table(const Item *item) const - override + const Type_handler *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const override { - return varstring_type_handler(item); + return varstring_type_handler(item, param); } bool partition_field_check(const LEX_CSTRING &, Item *item_expr) const override @@ -7084,9 +7096,11 @@ class Type_handler_var_string: public Type_handler_string return MYSQL_TYPE_VARCHAR; } const Type_handler *type_handler_for_implicit_upgrade() const override; - const Type_handler *type_handler_for_tmp_table(const Item *item) const override + const Type_handler *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const override { - return varstring_type_handler(item); + return varstring_type_handler(item, param); } uint32 max_display_length_for_field(const Conv_source &src) const override; void show_binlog_type(const Conv_source &src, const Field &dst, String *str) @@ -7098,7 +7112,7 @@ class Type_handler_var_string: public Type_handler_string { return Column_definition_prepare_stage2_legacy_num(c, MYSQL_TYPE_STRING); } const Type_handler *type_handler_for_union(const Item *item) const override { - return varstring_type_handler(item); + return varstring_type_handler(item, 0); } }; @@ -7123,14 +7137,15 @@ class Type_handler_varchar: public Type_handler_longstr { return (length + (length < 256 ? 1: 2)); } - const Type_handler *type_handler_for_tmp_table(const Item *item) const - override + const Type_handler *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const override { - return varstring_type_handler(item); + return varstring_type_handler(item, param); } const Type_handler *type_handler_for_union(const Item *item) const override { - return varstring_type_handler(item); + return varstring_type_handler(item, 0); } bool is_param_long_data_type() const override { return true; } bool partition_field_check(const LEX_CSTRING &, Item *item_expr) @@ -7231,14 +7246,12 @@ class Type_handler_blob_common: public Type_handler_longstr Field *make_conversion_table_field(MEM_ROOT *root, TABLE *table, uint metadata, const Field *target) const override; - const Type_handler *type_handler_for_tmp_table(const Item *item) const - override - { - return blob_type_handler(item); - } + const Type_handler *type_handler_for_tmp_table(const Item *item, + const Tmp_field_param *param) + const override; const Type_handler *type_handler_for_union(const Item *item) const override { - return blob_type_handler(item); + return blob_type_handler(item, 0); } bool subquery_type_allows_materialization(const Item *, const Item *, bool) const override @@ -7348,6 +7361,18 @@ class Type_handler_long_blob: public Type_handler_blob_common }; +class Type_handler_blob_key: public Type_handler_long_blob +{ +public: + virtual ~Type_handler_blob_key() = default; + Field *make_table_field(MEM_ROOT *root, + const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE_SHARE *share) const override; +}; + + class Type_handler_blob: public Type_handler_blob_common { public: @@ -7682,6 +7707,7 @@ extern Named_type_handler type_handler_hex_hybrid; extern Named_type_handler type_handler_tiny_blob; extern Named_type_handler type_handler_medium_blob; extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_long_blob; +extern MYSQL_PLUGIN_IMPORT Named_type_handler type_handler_blob_key; extern Named_type_handler type_handler_blob; extern Named_type_handler type_handler_blob_compressed; diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h index 54063b2cb4c46..03245955caf3c 100644 --- a/sql/sql_type_fixedbin.h +++ b/sql/sql_type_fixedbin.h @@ -897,7 +897,7 @@ class Type_handler_fbt: public Type_handler const override { if (item->max_length > MAX_FIELD_VARCHARLENGTH) - return Type_handler::blob_type_handler(item->max_length); + return Type_handler::blob_type_handler(item->max_length, 0); if (item->max_length > 255) return &type_handler_varchar; return &type_handler_string; diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 11a27fc190846..c20258b83d58e 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -2393,8 +2393,10 @@ multi_update::initialize_tables(JOIN *join) TABLE *table=table_ref->table; uint cnt= table_ref->shared; List temp_fields; - ORDER group; TMP_TABLE_PARAM *tmp_param; + ORDER *group= (ORDER*) alloc_root(thd->mem_root, sizeof(*group)); + if (!group) + DBUG_RETURN(1); if (ignore) table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); @@ -2490,21 +2492,22 @@ multi_update::initialize_tables(JOIN *join) temp_fields.append(fields_for_table[cnt]); /* Make an unique key over the first field to avoid duplicated updates */ - bzero((char*) &group, sizeof(group)); - group.direction= ORDER::ORDER_ASC; - group.item= (Item**) temp_fields.head_ref(); + bzero((char*) group, sizeof(*group)); + group->direction= ORDER::ORDER_ASC; + group->item= (Item**) temp_fields.head_ref(); tmp_param->init(); tmp_param->tmp_name="update"; tmp_param->field_count= temp_fields.elements; tmp_param->func_count= temp_fields.elements - 1; - calc_group_buffer(tmp_param, &group); + calc_group_buffer(tmp_param, group); /* small table, ignore @@big_tables */ my_bool save_big_tables= thd->variables.big_tables; thd->variables.big_tables= FALSE; tmp_tables[cnt]=create_tmp_table(thd, tmp_param, temp_fields, - (ORDER*) &group, 0, 0, - TMP_TABLE_ALL_COLUMNS, HA_POS_ERROR, &empty_clex_str); + (ORDER*) group, 0, 0, + TMP_TABLE_ALL_COLUMNS, HA_POS_ERROR, + &empty_clex_str); thd->variables.big_tables= save_big_tables; if (!tmp_tables[cnt]) DBUG_RETURN(1); diff --git a/sql/table.cc b/sql/table.cc index cbd4c4e1eabbf..bcf72a4d0e13a 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4108,7 +4108,7 @@ bool copy_keys_from_share(TABLE *outparam, MEM_ROOT *root) We are using only a prefix of the column as a key: Create a new field for the key part that matches the index */ - field= key_part->field=field->make_new_field(root, outparam, 0); + field= key_part->field=field->make_new_field(root, outparam, 0, 0); field->field_length= key_part->length; } } @@ -8331,7 +8331,8 @@ bool TABLE::alloc_keys(uint key_count) { KEY *new_key_info; key_part_map *new_const_key_parts; - DBUG_ASSERT(s->tmp_table == INTERNAL_TMP_TABLE); + DBUG_ASSERT(s->tmp_table == INTERNAL_TMP_TABLE || + s->tmp_table == RESULT_TMP_TABLE); if (!multi_alloc_root(&mem_root, &new_key_info, sizeof(*key_info)*(s->keys+key_count), diff --git a/sql/table.h b/sql/table.h index 3ec90ca778aff..2a91cde4026e1 100644 --- a/sql/table.h +++ b/sql/table.h @@ -365,8 +365,12 @@ typedef struct st_grant_info enum tmp_table_type { - NO_TMP_TABLE= 0, NON_TRANSACTIONAL_TMP_TABLE, TRANSACTIONAL_TMP_TABLE, - INTERNAL_TMP_TABLE, SYSTEM_TMP_TABLE + NO_TMP_TABLE= 0, // Normal table + NON_TRANSACTIONAL_TMP_TABLE, // CREATE TEMPORARY ... TRANSACTIONAL=0 + TRANSACTIONAL_TMP_TABLE, // CREATE TEMPORARY ... TRANSACTIONAL=1 + INTERNAL_TMP_TABLE, // Table created for different purposes + RESULT_TMP_TABLE, // Holds intermediate SELECT results + SYSTEM_TMP_TABLE // Created by mysql_schema_table }; enum release_type { RELEASE_NORMAL, RELEASE_WAIT_FOR_DROP }; @@ -1111,7 +1115,7 @@ struct TABLE_SHARE bool is_optimizer_tmp_table() { - return tmp_table == INTERNAL_TMP_TABLE && !db.length && table_name.length; + return tmp_table == RESULT_TMP_TABLE; } bool visit_subgraph(Wait_for_flush *waiting_ticket, diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index a2179bcdab75d..81aa682aeabb1 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -39,10 +39,4 @@ IF(WITH_UNIT_TESTS) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql ${CMAKE_SOURCE_DIR}/include) - ADD_EXECUTABLE(hp_test_key_setup-t - hp_test_key_setup-t.cc - ${CMAKE_SOURCE_DIR}/unittest/sql/dummy_builtins.cc) - TARGET_COMPILE_DEFINITIONS(hp_test_key_setup-t PRIVATE MYSQL_SERVER) - TARGET_LINK_LIBRARIES(hp_test_key_setup-t heap sql mytap) - MY_ADD_TEST(hp_test_key_setup) ENDIF() diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 4b0c0828ef3aa..20fa2098cc568 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -286,144 +286,12 @@ int ha_heap::delete_row(const uchar * buf) return res; } -/* - Rebuild GROUP BY key from group_buff into HEAP's hp_make_key format. - - The GROUP BY path (end_update) stores item values into the group - buffer's Field_varstring fields, NOT into record[0]'s Field_blob. - After copy_funcs(), record[0]'s blob fields may be stale. - - This method iterates all SQL-layer key parts (blob, varchar, and - fixed-length), using store_length for offset advancement. It copies - each part's data from the group buffer into record[0], then calls - hp_make_key() to build the HEAP-format key. - - Only called when needs_key_rebuild_from_group_buff is set (GROUP BY key 0 with - blob segments). DISTINCT/SJ-materialize keys use hp_make_key() - directly from record[0]. -*/ -void ha_heap::rebuild_key_from_group_buff(HP_KEYDEF *keydef, const uchar *&key, - uint active_key_index) -{ - KEY *sql_key= &table->key_info[active_key_index]; - const uchar *key_pos= key; - - for (uint i= 0; i < sql_key->user_defined_key_parts; i++) - { - KEY_PART_INFO *key_part= &sql_key->key_part[i]; - const uchar *data_pos= key_pos; - Field *field= key_part->field; - - bool is_null= false; - if (key_part->null_bit) - { - is_null= *data_pos != 0; - data_pos++; /* skip null flag byte */ - } - - if (is_null) - { - /* NULL: set the field's null bit in record[0] */ - if (field->null_ptr) - field->null_ptr[0] |= field->null_bit; - } - else - { - /* Non-NULL: clear null bit, then copy data into record[0] */ - if (key_part->null_bit && field->null_ptr) - field->null_ptr[0] &= ~field->null_bit; - - if (key_part->key_part_flag & HA_BLOB_PART) - { - /* - Blob GROUP BY key: stored as Field_varstring in group_buff - (2B length prefix + inline data). Copy into record[0]'s - blob field (packlength + data pointer) so hp_make_key() - can build the HEAP-format key (4B length + data pointer). - - The key field is always Field_varstring with 2B length - (key_field_length >= 256 from MAX_BLOB_WIDTH cap). - */ - uint16 data_len= uint2korr(data_pos); - const uchar *data_ptr= data_pos + 2; - - Field_blob *blob= (Field_blob*) field; - DBUG_ASSERT(blob->flags & BLOB_FLAG); - uint packlength= blob->pack_length_no_ptr(); - uchar *blob_field= table->record[0] + key_part->offset; - store_lowendian((ulonglong) data_len, blob_field, packlength); - memcpy(blob_field + packlength, &data_ptr, sizeof(void*)); - } - else if (key_part->key_part_flag & HA_VAR_LENGTH_PART) - { - /* - VARCHAR GROUP BY key: stored as Field_varstring in group_buff. - The key buffer always uses HA_KEY_BLOB_LENGTH (2) bytes for - the length prefix (Field_varstring::key_part_length_bytes()), - but record[0]'s Field_varstring may use 1 or 2 bytes - depending on field_length. Read 2B from key, write the - field's native length_bytes to record[0]. - */ - uint16 key_data_len= uint2korr(data_pos); - const uchar *key_data_ptr= data_pos + HA_KEY_BLOB_LENGTH; - - Field_varstring *vs= (Field_varstring*) field; - uint rec_length_bytes= vs->length_bytes; - uchar *rec_field= table->record[0] + key_part->offset; - set_if_smaller(key_data_len, vs->field_length); - if (rec_length_bytes == 1) - rec_field[0]= (uchar) key_data_len; - else - int2store(rec_field, key_data_len); - memcpy(rec_field + rec_length_bytes, key_data_ptr, key_data_len); - } - else - { - /* Fixed-length: copy from key directly into record[0] */ - memcpy(table->record[0] + key_part->offset, data_pos, - key_part->length); - } - } - - key_pos+= key_part->store_length; - } - hp_make_key(keydef, (uchar*) file->lastkey, table->record[0]); - key= (const uchar*) file->lastkey; -} - - -/* - Ensure the key is in HEAP's native format for blob indexes. - - GROUP BY (needs_key_rebuild_from_group_buff): parse the SQL-layer group_buff - into record[0] and rebuild via hp_make_key(), because record[0]'s - blob fields may be stale after copy_funcs(). - - DISTINCT / SJ-materialize: record[0] already has correct blob - values; build the HEAP key directly from record[0]. -*/ -void ha_heap::materialize_heap_key_if_needed(uint key_index, const uchar *&key) -{ - HP_KEYDEF *keydef= file->s->keydef + key_index; - if (keydef->has_blob_seg) - { - if (keydef->needs_key_rebuild_from_group_buff) - rebuild_key_from_group_buff(keydef, key, key_index); - else - { - hp_make_key(keydef, (uchar*) file->lastkey, table->record[0]); - key= (const uchar*) file->lastkey; - } - } -} - int ha_heap::index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { DBUG_ASSERT(inited==INDEX); - materialize_heap_key_if_needed(active_index, key); int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag); return error; } @@ -432,7 +300,6 @@ int ha_heap::index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map) { DBUG_ASSERT(inited==INDEX); - materialize_heap_key_if_needed(active_index, key); int error= heap_rkey(file, buf, active_index, key, keypart_map, HA_READ_PREFIX_LAST); return error; @@ -442,7 +309,6 @@ int ha_heap::index_read_idx_map(uchar *buf, uint index, const uchar *key, key_part_map keypart_map, enum ha_rkey_function find_flag) { - materialize_heap_key_if_needed(index, key); int error = heap_rkey(file, buf, index, key, keypart_map, find_flag); return error; } @@ -816,56 +682,37 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, if ((seg->type = field->key_type()) != (int) HA_KEYTYPE_TEXT && seg->type != HA_KEYTYPE_VARTEXT1 && seg->type != HA_KEYTYPE_VARTEXT2 && + seg->type != HA_KEYTYPE_VARTEXT4 && seg->type != HA_KEYTYPE_VARBINARY1 && seg->type != HA_KEYTYPE_VARBINARY2 && + seg->type != HA_KEYTYPE_VARBINARY4 && seg->type != HA_KEYTYPE_BIT) seg->type= HA_KEYTYPE_BINARY; } seg->start= (uint) key_part->offset; seg->length= (uint) key_part->length; - /* - Use field->key_part_flag() instead of key_part->key_part_flag - because some SQL layer paths (SJ weedout, expression cache) - leave key_part_flag uninitialized. Garbage HA_BLOB_PART bits - cause seg->length to be zeroed (the blob convention), corrupting - hash/compare for non-blob VARCHAR/VARBINARY keys. - */ - seg->flag= field->key_part_flag(); - /* - HEAP blob key segments must have seg->length=0. hp_hashnr() - advances key by seg->length (fixed part) THEN by 4+sizeof(ptr) - (blob encoding); non-zero length double-counts the advance - and hashes wrong data. The SQL layer's key_part.length may be - pack_length() (e.g. DISTINCT key path) — override it here. - - Also widen key_part->length to max_data_length() so the SQL - layer's new_key_field() creates a Field_varstring large enough - for the full blob data. Without this, DISTINCT/sj-materialize - lookup keys are truncated to pack_length() bytes. - */ + seg->flag= key_part->key_part_flag; + seg->bit_length= seg->bit_start= 0; + seg->bit_pos= 0; + + DBUG_ASSERT((seg->flag & HA_BLOB_PART) == + (field->key_part_flag() & HA_BLOB_PART)); + if (seg->flag & HA_BLOB_PART) { - seg->length= 0; - uint32 blob_max= field->max_data_length(); - /* - Widen key_part->length for blob segments where the SQL layer - set it to pack_length() (e.g. DISTINCT key path). Skip when - key_part->length <= pack_length_no_ptr(), which covers: - - Regular blobs: key_length()=0 (GROUP BY path where - finalize() sizes the group buffer separately) - - Geometry blobs: key_length()=packlength=4 (GROUP BY path, - also sized separately by finalize()) - Without this guard, geometry GROUP BY triggers overflow in - store_length (len_delta ≈ 4 billion), causing - rebuild_key_from_group_buff() to jump to uninitialized memory. - */ - uint pack_no_ptr= ((Field_blob*)field)->pack_length_no_ptr(); - if (key_part->length > pack_no_ptr && key_part->length < blob_max) + if (seg->type == HA_KEYTYPE_VARBINARY2 || + seg->type == HA_KEYTYPE_VARTEXT2) { - uint len_delta= blob_max - key_part->length; - key_part->length= blob_max; - key_part->store_length+= len_delta; - pos->key_length+= len_delta; + /* This is is a geometry field using VARCHAR packing */ + seg->flag&= ~HA_BLOB_PART; + } + else + { + /* Blob key with a 4 byte length and a pointer to data */ + DBUG_ASSERT(seg->length == 4 + portable_sizeof_char_ptr); + DBUG_ASSERT(field->key_type() == HA_KEYTYPE_VARBINARY4 || + field->key_type() == HA_KEYTYPE_VARTEXT4); + seg->bit_start= ((Field_blob*) field)->pack_length_no_ptr(); } } @@ -901,30 +748,8 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, seg->bit_pos= (uint) (((Field_bit *) field)->bit_ptr - (uchar*) table_arg->record[0]); } - else - { - seg->bit_length= seg->bit_start= 0; - seg->bit_pos= 0; - } } - /* - Pre-compute has_blob_seg so callers (materialize_heap_key_if_needed, - unit tests) can use it before heap_create() runs. heap_create() - recomputes this from the normalized segments. - */ - keydef[key].has_blob_seg= hp_keydef_has_blob_seg(&keydef[key]); } - /* - Detect GROUP BY keys with blob segments that need rebuild_key_from_group_buff(). - When table->group is set, key 0 is the GROUP BY key. If it has - HA_BLOB_PART segments, finalize() set up the group buffer with - blob format and rebuild_key_from_group_buff() must parse it during lookups. - Without this flag, index_read_map() falls back to hp_make_key(record[0]) - which may use stale blob pointers after copy_funcs(). - */ - if (keys > 0) - keydef[0].needs_key_rebuild_from_group_buff= - (table_arg->group && hp_keydef_has_blob_seg(&keydef[0])); if (table_arg->found_next_number_field) { @@ -946,35 +771,30 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, return my_errno; } { - uint real_blob_count= 0; + uint blob_count= 0; for (uint b= 0; b < share->blob_fields; b++) { Field *field= table_arg->field[share->blob_field[b]]; - /* - BLOB_FLAG may be set on non-Field_blob fields (e.g. long - Field_string in INFORMATION_SCHEMA temp tables). Only include - true Field_blob types in the HEAP blob descriptor array. - Field_geom (MYSQL_TYPE_GEOMETRY) extends Field_blob and must - also be included. - */ - if (field->type() == MYSQL_TYPE_BLOB || - field->type() == MYSQL_TYPE_GEOMETRY) - { - Field_blob *blob= (Field_blob*) field; - blob_descs[real_blob_count].offset= + Field_blob *blob= (Field_blob*) field; + + DBUG_ASSERT(field->type() == MYSQL_TYPE_BLOB || + field->type() == MYSQL_TYPE_GEOMETRY); + + blob_descs[blob_count].offset= (uint) blob->offset(table_arg->record[0]); - blob_descs[real_blob_count].packlength= blob->pack_length_no_ptr(); - real_blob_count++; - } + blob_descs[blob_count].packlength= blob->pack_length_no_ptr(); + blob_count++; } hp_create_info->blob_descs= blob_descs; - hp_create_info->blob_count= real_blob_count; + hp_create_info->blob_count= blob_count; } } hp_create_info->auto_key= auto_key; hp_create_info->auto_key_type= auto_key_type; - hp_create_info->max_table_size= MY_MAX(current_thd->variables.max_heap_table_size, sizeof(HP_PTRS)); + hp_create_info->max_table_size= + MY_MAX(current_thd->variables.max_heap_table_size, + sizeof(HP_PTRS)); hp_create_info->with_auto_increment= found_real_auto_increment; hp_create_info->internal_table= internal_table; @@ -1123,29 +943,3 @@ maria_declare_plugin(heap) MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ } maria_declare_plugin_end; - -#ifdef HEAP_UNIT_TESTS -int test_heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, - HP_CREATE_INFO *hp_create_info) -{ - return heap_prepare_hp_create_info(table_arg, internal_table, hp_create_info); -} - -/* - Test wrapper: rebuild_key_from_group_buff with a fake HP_INFO. - Sets up the handler's file pointer with the provided HP_INFO, - binds the handler to the given TABLE, then calls rebuild_key_from_group_buff. -*/ -void test_rebuild_key_from_group_buff(ha_heap *handler, TABLE *tbl, - HP_INFO *fake_file, HP_KEYDEF *keydef, - const uchar *key, uint key_index, - const uchar **rebuilt_key) -{ - handler->file= fake_file; - handler->set_table(tbl); - const uchar *k= key; - handler->rebuild_key_from_group_buff(keydef, k, key_index); - *rebuilt_key= k; -} - -#endif diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index c7a98b344432a..0d0eec530cde6 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -126,12 +126,4 @@ class ha_heap final : public handler int find_unique_row(uchar *record, uint unique_idx) override; private: void update_key_stats(); - void materialize_heap_key_if_needed(uint key_index, const uchar *&key); - void rebuild_key_from_group_buff(HP_KEYDEF *keydef, const uchar *&key, - uint active_key_index); -#ifdef HEAP_UNIT_TESTS - friend void test_rebuild_key_from_group_buff(ha_heap *, TABLE *, HP_INFO *, - HP_KEYDEF *, const uchar *, uint, - const uchar **); -#endif }; diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index f7f44af28fe3e..c9bd238b965e9 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -259,40 +259,17 @@ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, uint recbuffer, uint32 *data_offset, uchar **first_run, uchar **prev_run_start) { - uint32 remaining= data_len - *data_offset; - uint32 records_needed; - uint16 records_to_use; - uint32 unlinked= 0; - uchar **prev_link= &share->del_link; - uchar *cur; - uint32 first_payload= visible - HP_CONT_HEADER_SIZE; + DBUG_ASSERT(share->del_link == run_start + (run_count-1) * recbuffer); + DBUG_ASSERT(share->del_link >= run_start && + share->del_link < run_start + run_count * recbuffer); - if (remaining <= first_payload) - records_needed= 1; - else - records_needed= 1 + (remaining - first_payload + recbuffer - 1) / recbuffer; - records_to_use= (records_needed > run_count) ? run_count : - (uint16) records_needed; - - cur= share->del_link; - while (cur && unlinked < records_to_use) - { - uchar *next= *((uchar**) cur); - if (cur >= run_start && - cur < run_start + records_to_use * recbuffer) - { - *prev_link= next; - share->deleted--; - share->total_records++; - unlinked++; - } - else - prev_link= (uchar**) cur; - cur= next; - } + share->del_link= *(uchar**) (share->del_link - + (run_count-1) * recbuffer); + share->deleted-= run_count; + share->total_records+= run_count; hp_write_run_data(share, data_ptr, data_len, run_start, - records_to_use, HP_BLOB_CASE_C_MULTI_RUN, data_offset); + run_count, HP_BLOB_CASE_C_MULTI_RUN, data_offset); if (*prev_run_start) memcpy(*prev_run_start, &run_start, sizeof(run_start)); @@ -327,6 +304,10 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, uchar *first_run= NULL; uchar *prev_run_start= NULL; uint32 data_offset= 0; + uint32 first_payload= visible - HP_CONT_HEADER_SIZE; + uint32 total_records_needed= + (data_len <= first_payload ? 1 : + 1 + (data_len - first_payload + recbuffer - 1) / recbuffer); /* Calculate minimum acceptable contiguous run size for free-list reuse. @@ -357,71 +338,82 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, min_run_records= (min_run_bytes + recbuffer - 1) / recbuffer; if (min_run_records < 2) min_run_records= 2; - { - uint32 first_payload= visible - HP_CONT_HEADER_SIZE; - uint32 total_records_needed= data_len <= first_payload ? 1 : - 1 + (data_len - first_payload + recbuffer - 1) / recbuffer; - if (total_records_needed < min_run_records) - min_run_records= total_records_needed; - } + + if (total_records_needed < min_run_records) + min_run_records= total_records_needed; /* - Step 1: Try to allocate contiguous runs from the free list. + Step 1: Try to allocate contiguous runs from the top of the free list. Peek at free list records by walking next pointers without unlinking. Track contiguous groups (descending addresses — LIFO order from hp_free_run_chain). On discontinuity: if the group qualifies - (>= min_run_records), unlink and use it; if it doesn't, the free - list is too fragmented — stop and fall through to tail allocation. + (>= min_run_records), unlink and use it; if it doesn't, the tail + of the delete_link is too small. Instead of continue searching + for a larger block, we stop searching. */ { - uchar *run_start= NULL; - uint16 run_count= 0; - uchar *prev_pos= NULL; + uchar *run_start; + uint16 run_count= 1; + uchar *prev_pos; uchar *pos; + uint32 max_run= MY_MIN(total_records_needed, UINT_MAX16); - for (pos= share->del_link; - pos && data_offset < data_len; - pos= *((uchar**) pos)) + if ((run_start= share->del_link)) { - /* - Only check descending direction: hp_free_run_chain() frees records - in ascending address order (j=0..N), so LIFO pushes them onto the - free list in reverse — consecutive free list entries have descending - addresses. Ascending adjacency from unrelated deletes is ignored - intentionally; we only recover runs that were freed together. - */ - if (prev_pos && pos == prev_pos - recbuffer && run_count < UINT_MAX16) + prev_pos= run_start; + pos= *((uchar**) run_start); + run_count= 1; + for (; pos ; pos= *((uchar**) pos)) { - run_start= pos; - run_count++; - prev_pos= pos; - continue; - } + /* + Only check descending direction: hp_free_run_chain() frees records + in ascending address order (j=0..N), so LIFO pushes them onto the + free list in reverse — consecutive free list entries have descending + addresses. Ascending adjacency from unrelated deletes is ignored + intentionally; we only recover runs that were freed together. + */ + if (run_count == total_records_needed) + break; /* Use this run */ + + if (prev_pos && pos == prev_pos - recbuffer) + { + run_start= pos; + run_count++; + if (run_count < max_run) + continue; + if (run_count == total_records_needed) + break; /* Use this run */ + /* run_count is now UINT_MAX16 */ + } /* Discontinuity. If the accumulated group qualifies, use it. - If not, the free list is fragmented — give up entirely. + If not, the top of the free list is fragmented — give up entirely. */ - if (run_count > 0) - { if (run_count < min_run_records) break; hp_unlink_and_write_run(share, data_ptr, data_len, run_start, run_count, visible, recbuffer, &data_offset, &first_run, &prev_run_start); + + pos= share->del_link; + total_records_needed-= run_count; + + /* This cannot be last run */ + DBUG_ASSERT(data_offset < data_len && pos); + DBUG_ASSERT(total_records_needed != 0); + + run_start= pos; + run_count= 1; } - run_start= pos; - run_count= 1; - prev_pos= pos; + /* Handle the last group after the loop ends */ + if (run_count >= min_run_records && data_offset < data_len) + hp_unlink_and_write_run(share, data_ptr, data_len, run_start, + run_count, visible, recbuffer, + &data_offset, &first_run, &prev_run_start); } - - /* Handle the last group after the loop ends */ - if (run_count >= min_run_records && data_offset < data_len) - hp_unlink_and_write_run(share, data_ptr, data_len, run_start, - run_count, visible, recbuffer, - &data_offset, &first_run, &prev_run_start); } /* diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index fa15e441373e3..a4693f9679ba4 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -37,7 +37,7 @@ static const ulong heap_min_allocation_block= 16384; int heap_create(const char *name, HP_CREATE_INFO *create_info, HP_SHARE **res, my_bool *created_new_share) { - uint i, j, key_segs, max_length, length; + uint i, key_segs, max_length, length; HP_SHARE *share= 0; HA_KEYSEG *keyseg; HP_KEYDEF *keydef= create_info->keydef; @@ -87,12 +87,16 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, for (i= key_segs= max_length= 0, keyinfo= keydef; i < keys; i++, keyinfo++) { + HA_KEYSEG *keyseg, *keyseg_end; + bzero((char*) &keyinfo->block,sizeof(keyinfo->block)); bzero((char*) &keyinfo->rb_tree ,sizeof(keyinfo->rb_tree)); - for (j= length= 0; j < keyinfo->keysegs; j++) + for (keyseg= keyinfo->seg, keyseg_end= keyseg+ keyinfo->keysegs, length=0; + keyseg < keyseg_end ; + keyseg++) { - length+= keyinfo->seg[j].length; - if (keyinfo->seg[j].null_bit) + length+= keyseg->length; + if (keyseg->null_bit) { length++; if (!(keyinfo->flag & HA_NULL_ARE_EQUAL)) @@ -100,7 +104,7 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, if (keyinfo->algorithm == HA_KEY_ALG_BTREE) keyinfo->rb_tree.size_of_element++; } - switch (keyinfo->seg[j].type) { + switch (keyseg->type) { case HA_KEYTYPE_SHORT_INT: case HA_KEYTYPE_LONG_INT: case HA_KEYTYPE_FLOAT: @@ -112,11 +116,11 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, case HA_KEYTYPE_INT24: case HA_KEYTYPE_UINT24: case HA_KEYTYPE_INT8: - keyinfo->seg[j].flag|= HA_SWAP_KEY; + keyseg->flag|= HA_SWAP_KEY; break; case HA_KEYTYPE_VARBINARY1: /* Case-insensitiveness is handled in hash_sort */ - keyinfo->seg[j].type= HA_KEYTYPE_VARTEXT1; + keyseg->type= HA_KEYTYPE_VARTEXT1; /* fall through */ case HA_KEYTYPE_VARTEXT1: keyinfo->flag|= HA_VAR_LENGTH_KEY; @@ -125,55 +129,29 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, as VARTEXT1/VARBINARY1. Strip any spurious HA_BLOB_PART (e.g. from uninitialized key_part_flag in SJ weedout tables). */ - keyinfo->seg[j].flag&= ~HA_BLOB_PART; + DBUG_ASSERT(!(keyseg->flag & HA_BLOB_PART)); + keyseg->flag&= ~HA_BLOB_PART; /* For BTREE algorithm, key length, greater than or equal to 255, is packed on 3 bytes. */ if (keyinfo->algorithm == HA_KEY_ALG_BTREE) - length+= size_to_store_key_length(keyinfo->seg[j].length); + length+= size_to_store_key_length(keyseg->length); else length+= 2; - /* Save number of bytes used to store length */ - keyinfo->seg[j].bit_start= 1; + keyseg->bit_start= 1; /* Packlength for records */ + keyseg->bit_length= 2; /* Paclength for key */ break; - case HA_KEYTYPE_VARBINARY2: - /* Case-insensitiveness is handled in hash_sort */ - /* fall_through */ - case HA_KEYTYPE_VARTEXT2: - keyinfo->flag|= HA_VAR_LENGTH_KEY; - /* - Strip HA_BLOB_PART for key segments that don't correspond - to actual blob fields. HA_BLOB_PART can appear spuriously - from uninitialized key_part_flag (SJ weedout tables) or - from BLOB_FLAG on non-Field_blob types (I_S temp tables). - */ - if (keyinfo->seg[j].flag & HA_BLOB_PART) - { - my_bool real_blob= FALSE; - uint k; - for (k= 0; k < create_info->blob_count; k++) - { - if (create_info->blob_descs[k].offset == - keyinfo->seg[j].start) - { - real_blob= TRUE; - break; - } - } - if (!real_blob) - keyinfo->seg[j].flag&= ~HA_BLOB_PART; - } - /* - For BTREE algorithm, key length, greater than or equal - to 255, is packed on 3 bytes. - */ - if (keyinfo->algorithm == HA_KEY_ALG_BTREE) - length+= size_to_store_key_length(keyinfo->seg[j].length); - else if (keyinfo->seg[j].flag & HA_BLOB_PART) - length+= 4 + sizeof(uchar*); /* 4-byte len + data ptr in key */ - else - length+= 2; + case HA_KEYTYPE_VARBINARY4: + /* fall through */ + case HA_KEYTYPE_VARTEXT4: + /* Key is stored as 4 byte length + pointer to data */ + DBUG_ASSERT(keyseg->flag & HA_BLOB_PART); + DBUG_ASSERT(keyinfo->algorithm != HA_KEY_ALG_BTREE); + DBUG_ASSERT(keyseg->length == 4+portable_sizeof_char_ptr); + DBUG_ASSERT(keyseg->bit_start >= 1 && keyseg->bit_start <= 4); + DBUG_ASSERT(keyseg->bit_length == 0); + /* Save number of bytes used to store length. For blob segments, bit_start holds the actual blob packlength @@ -183,42 +161,32 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, Also normalize seg->length to 0 ("whole blob") for blob segments where the SQL layer set it to pack_length. */ - if (!(keyinfo->seg[j].flag & HA_BLOB_PART)) - keyinfo->seg[j].bit_start= 2; - else - { - if (keyinfo->seg[j].bit_start == 0 && keyinfo->seg[j].length > 0) - keyinfo->seg[j].bit_start= - (uint8)(keyinfo->seg[j].length - sizeof(uchar*)); - keyinfo->seg[j].length= 0; /* "whole blob" */ - /* - Fallback: if bit_start is still 0 after the length-based - derivation above (which requires length > 0), look up the - actual packlength from the blob descriptor array. This - covers any SQL layer path that sets both bit_start=0 and - length=0 for a blob key segment. - */ - if (keyinfo->seg[j].bit_start == 0) - { - uint k; - for (k= 0; k < create_info->blob_count; k++) - { - if (create_info->blob_descs[k].offset == - keyinfo->seg[j].start) - { - keyinfo->seg[j].bit_start= - (uint8) create_info->blob_descs[k].packlength; - break; - } - } - DBUG_ASSERT(keyinfo->seg[j].bit_start > 0); - } - } + keyinfo->flag|= HA_VAR_LENGTH_KEY; + keyseg->type= HA_KEYTYPE_VARTEXT4; + break; + + case HA_KEYTYPE_VARBINARY2: + /* Case-insensitiveness is handled in hash_sort */ + /* fall through */ + case HA_KEYTYPE_VARTEXT2: + keyinfo->flag|= HA_VAR_LENGTH_KEY; + /* key is stored as [length] + data */ + keyseg->bit_start= 2; + keyseg->bit_length= 2; /* Make future comparison simpler by only having to check for one type */ - keyinfo->seg[j].type= HA_KEYTYPE_VARTEXT1; + keyseg->type= HA_KEYTYPE_VARTEXT1; + + /* + For BTREE algorithm, key length, greater than or equal + to 255, is packed on 3 bytes. + */ + if (keyinfo->algorithm == HA_KEY_ALG_BTREE) + length+= size_to_store_key_length(keyseg->length); + else + length+= keyseg->bit_start; break; case HA_KEYTYPE_BIT: /* @@ -253,7 +221,8 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, sizeof(HP_SHARE)+ keys*sizeof(HP_KEYDEF)+ key_segs*sizeof(HA_KEYSEG)+ - create_info->blob_count*sizeof(HP_BLOB_DESC), + create_info->blob_count* + sizeof(HP_BLOB_DESC), MYF(MY_ZEROFILL | (create_info->internal_table ? MY_THREAD_SPECIFIC : 0))))) @@ -277,7 +246,6 @@ int heap_create(const char *name, HP_CREATE_INFO *create_info, keyinfo->seg= keyseg; memcpy(keyseg, keydef[i].seg, (size_t) (sizeof(keyseg[0]) * keydef[i].keysegs)); - keyinfo->has_blob_seg= hp_keydef_has_blob_seg(keyinfo); keyseg+= keydef[i].keysegs; if (keydef[i].algorithm == HA_KEY_ALG_BTREE) diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index 4bdfba659d46b..4e83157c01aa8 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -54,32 +54,6 @@ static size_t hp_blob_key_length(uint packlength, const uchar *pos) } -/* - Compute the key-buffer byte size of the variable-length portion of a - VARTEXT or BLOB segment in a pre-built hash key. - - Used by hp_hashnr() and hp_key_cmp() to advance past a VARCHAR or - BLOB segment (both null and non-null) in the key buffer. - - All VARCHAR key segments use a 2-byte length prefix — this is the - canonical key format shared between SQL-layer group_buff keys and - hp_make_key() output. hp_make_key() normalizes 1-byte record - prefixes to 2-byte key prefixes to maintain this invariant. - - Blob segments use a fixed 4-byte length + pointer layout. - - @param seg Key segment descriptor - @return Number of bytes to skip in the key buffer for the variable- - length portion (does NOT include the null flag byte, which - the caller handles separately) -*/ - -static inline size_t hp_vartext_key_pack_size(const HA_KEYSEG *seg) -{ - return (seg->flag & HA_BLOB_PART) ? 4 + sizeof(uchar *) : 2; -} - - /* Find out how many rows there is in the given range @@ -295,15 +269,15 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { uchar *pos=(uchar*) key; - key+=seg->length; + key+= seg->length; if (seg->null_bit) { key++; /* Skip null byte */ if (*pos) /* Found null */ { nr^= (nr << 1) | 1; - if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= hp_vartext_key_pack_size(seg); + if (seg->type != HA_KEYTYPE_BIT) + key+= seg->bit_length; continue; } pos++; @@ -320,17 +294,6 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) } my_ci_hash_sort(cs, pos, length, &nr, &nr2); } - else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) - { - /* Blob segment in pre-built key: 4-byte length + data pointer */ - CHARSET_INFO *cs= seg->charset; - uint32 blob_len= uint4korr(pos); - const uchar *blob_data; - memcpy(&blob_data, pos + 4, HP_PTR_SIZE); - if (blob_data && blob_len > 0) - my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); - key+= 4 + sizeof(uchar*); - } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { CHARSET_INFO *cs= seg->charset; @@ -345,7 +308,17 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) set_if_smaller(length, char_length); } my_ci_hash_sort(cs, pos+pack_length, length, &nr, &nr2); - key+= pack_length; + key+= seg->bit_length; + } + else if (seg->type == HA_KEYTYPE_VARTEXT4) /* All blob segments */ + { + /* Blob segment in pre-built key: 4-byte length + data pointer */ + CHARSET_INFO *cs= seg->charset; + uint32 blob_len= uint4korr(pos); + const uchar *blob_data; + memcpy(&blob_data, pos + 4, HP_PTR_SIZE); + if (blob_data && blob_len > 0) + my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); } else { @@ -371,7 +344,7 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) for (seg=keydef->seg,endseg=seg+keydef->keysegs ; seg < endseg ; seg++) { - uchar *pos=(uchar*) rec+seg->start,*end=pos+seg->length; + const uchar *pos=(uchar*) rec+seg->start, *end=pos+seg->length; if (seg->null_bit) { if (rec[seg->null_pos] & seg->null_bit) @@ -392,22 +365,15 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) } my_ci_hash_sort(cs, pos, char_length, &nr, &nr2); } - else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) - { - /* Blob segment in input record: dereference data pointer */ - CHARSET_INFO *cs= seg->charset; - uint packlength= seg->bit_start; - size_t blob_len= hp_blob_key_length(packlength, pos); - const uchar *blob_data; - memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); - if (blob_data && blob_len > 0) - my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); - } else if (seg->type == HA_KEYTYPE_VARTEXT1) /* Any VARCHAR segments */ { CHARSET_INFO *cs= seg->charset; size_t pack_length= seg->bit_start; - size_t length= (pack_length == 1 ? (size_t) *(uchar*) pos : uint2korr(pos)); + size_t length= (pack_length == 1 ? + (size_t) *(uchar*) pos : + uint2korr(pos)); + DBUG_ASSERT(!(seg->flag & HA_BLOB_PART)); + if (cs->mbmaxlen > 1) { size_t char_length; @@ -420,6 +386,17 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) set_if_smaller(length, seg->length); my_ci_hash_sort(cs, pos+pack_length, length, &nr, &nr2); } + else if (seg->type == HA_KEYTYPE_VARTEXT4) /* All blob segments */ + { + /* Blob segment in input record: dereference data pointer */ + CHARSET_INFO *cs= seg->charset; + const uint packlength= seg->bit_start; + size_t blob_len= hp_blob_key_length(packlength, pos); + const uchar *blob_data; + memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); + if (blob_data && blob_len > 0) + my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); + } else { if (seg->type == HA_KEYTYPE_BIT && seg->bit_length) @@ -500,7 +477,7 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2, pos2, char_length2)) return 1; } - else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + else if (seg->type == HA_KEYTYPE_VARTEXT4) /* All blob segments */ { /* Blob segment comparison. @@ -518,7 +495,7 @@ int hp_rec_key_cmp(HP_KEYDEF *keydef, const uchar *rec1, const uchar *rec2, if (len1 == 0 && len2 == 0) continue; /* - Only short-circuit on length mismatch for NO PAD collations. + Only short-circuit on length mismatch for NO PAD collations. PAD SPACE collations treat trailing spaces as insignificant, so 'a' (len=1) and 'a ' (len=3) must compare equal. */ @@ -623,8 +600,8 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, return 1; if (found_null) { - if (seg->type == HA_KEYTYPE_VARTEXT1) - key+= hp_vartext_key_pack_size(seg); + if (seg->type != HA_KEYTYPE_BIT) + key+= seg->bit_length; continue; } } @@ -653,13 +630,13 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, key, char_length_key)) return 1; } - else if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + else if (seg->type == HA_KEYTYPE_VARTEXT4) { /* Blob segment: rec side is stored (chain pointers), key side has 4-byte length + data pointer from hp_make_key. */ - uint packlength= seg->bit_start; + uint packlength= 4; uchar *pos= (uchar*) rec + seg->start; size_t rec_blob_len= hp_blob_key_length(packlength, pos); uint32 key_blob_len= uint4korr(key); @@ -667,7 +644,6 @@ int hp_key_cmp(HP_KEYDEF *keydef, const uchar *rec, const uchar *key, const uchar *rec_data; memcpy(&key_data, key + 4, HP_PTR_SIZE); - key+= 4 + sizeof(uchar*); if (rec_blob_len == 0 && key_blob_len == 0) continue; @@ -750,7 +726,7 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec) uchar *pos= (uchar*) rec + seg->start; if (seg->null_bit) *key++= MY_TEST(rec[seg->null_pos] & seg->null_bit); - if (seg->type == HA_KEYTYPE_VARTEXT1 && (seg->flag & HA_BLOB_PART)) + if (seg->type == HA_KEYTYPE_VARTEXT4) { /* Blob segment in input record: store 4-byte length + data pointer @@ -762,7 +738,7 @@ void hp_make_key(HP_KEYDEF *keydef, uchar *key, const uchar *rec) memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); int4store(key, blob_len); memcpy(key + 4, &blob_data, HP_PTR_SIZE); - key+= 4 + sizeof(uchar*); + key+= 4 + portable_sizeof_char_ptr; continue; } if (cs->mbmaxlen > 1) @@ -937,6 +913,7 @@ uint hp_rb_pack_key(HP_KEYDEF *keydef, uchar *key, const uchar *old, } continue; } + DBUG_ASSERT(!(seg->flag & HA_BLOB_PART)); if (seg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) { /* Length of key-part used with heap_rkey() always 2 */ diff --git a/storage/heap/hp_test_hash-t.c b/storage/heap/hp_test_hash-t.c index 8de1d8542884c..9852ec1c66bfb 100644 --- a/storage/heap/hp_test_hash-t.c +++ b/storage/heap/hp_test_hash-t.c @@ -43,16 +43,16 @@ #define KEY_BUF_SIZE 64 /* Avoids -Wsizeof-pointer-memaccess with sizeof(uchar*) */ -#define PTR_SIZE sizeof(void*) +#define PTR_SIZE portable_sizeof_char_ptr static void setup_blob_keyseg(HA_KEYSEG *seg, my_bool nullable) { memset(seg, 0, sizeof(*seg)); - seg->type= HA_KEYTYPE_VARTEXT1; + seg->type= HA_KEYTYPE_VARTEXT4; seg->flag= HA_BLOB_PART | HA_VAR_LENGTH_PART; seg->start= REC_BLOB_OFFSET; - seg->length= 0; /* blob key segments must have length=0 */ + seg->length= 4+portable_sizeof_char_ptr; /* Length of blob key */ seg->bit_start= REC_BLOB_PACKLEN; /* actual packlength */ seg->charset= &my_charset_latin1; if (nullable) @@ -76,7 +76,6 @@ static void setup_keydef(HP_KEYDEF *keydef, HA_KEYSEG *seg, uint keysegs) keydef->algorithm= HA_KEY_ALG_HASH; keydef->flag= HA_NOSAME; keydef->length= 0; /* computed below */ - keydef->has_blob_seg= 1; /* Compute keydef->length: sum of key part sizes */ for (i= 0; i < keysegs; i++) @@ -744,10 +743,10 @@ static void setup_mixed_keydef(HP_KEYDEF *keydef, HA_KEYSEG *segs) { /* Segment 0: blob (city TEXT) at offset 23 */ memset(&segs[0], 0, sizeof(segs[0])); - segs[0].type= HA_KEYTYPE_VARTEXT1; + segs[0].type= HA_KEYTYPE_VARTEXT4; segs[0].flag= HA_BLOB_PART | HA_VAR_LENGTH_PART; segs[0].start= MIX_BLOB_OFFSET; - segs[0].length= 0; /* blob key segments must have length=0 */ + segs[0].length= 4+portable_sizeof_char_ptr; /* Length of blob key */ segs[0].bit_start= MIX_BLOB_PACKLEN; segs[0].charset= &my_charset_latin1; segs[0].null_bit= 4; /* bit 2 in null bitmap */ @@ -765,7 +764,6 @@ static void setup_mixed_keydef(HP_KEYDEF *keydef, HA_KEYSEG *segs) segs[1].null_pos= MIX_NULL_OFFSET; setup_keydef(keydef, segs, 2); - keydef->has_blob_seg= 1; } @@ -857,7 +855,6 @@ static void test_key_vs_rec_hash_consistency(void) segs2b[1].bit_start= 2; /* 2-byte length prefix */ segs2b[1].length= 256; setup_keydef(&keydef2b, segs2b, 2); - keydef2b.has_blob_seg= 1; memset(rec2b, 0, sizeof(rec2b)); /* blob */ diff --git a/storage/heap/hp_test_key_setup-t.cc b/storage/heap/hp_test_key_setup-t.cc index c933a76c05491..7316b3482bd86 100644 --- a/storage/heap/hp_test_key_setup-t.cc +++ b/storage/heap/hp_test_key_setup-t.cc @@ -6,11 +6,6 @@ max_data_length(). The DISTINCT key path sets key_part.length = pack_length() = 10, and the SQL layer's new_key_field() then creates Field_varstring(10), which truncates blob data. - - 2. garbage_key_part_flag: heap_prepare_hp_create_info() must use - field->key_part_flag() instead of key_part->key_part_flag, because - SJ weedout and expression cache paths leave key_part_flag - uninitialized. Garbage HA_BLOB_PART bits corrupt the hash index. */ #include @@ -298,8 +293,7 @@ class Hp_test_varchar_key_flag local_kpi.offset= V_REC_VARCHAR_OFFSET; local_kpi.length= (uint16) vs_field->key_length(); local_kpi.type= vs_field->key_type(); - /* Poison key_part_flag with garbage including HA_BLOB_PART (0x20) */ - local_kpi.key_part_flag= 0xA5A5; /* garbage from uninitialized memory */ + local_kpi.key_part_flag= 0; memset(&local_sql_key, 0, sizeof(local_sql_key)); local_sql_key.user_defined_key_parts= 1; @@ -330,9 +324,6 @@ class Hp_test_varchar_key_flag void test_garbage_key_part_flag() { - /* Verify setup: key_part_flag has HA_BLOB_PART set (the poison) */ - ok((local_kpi.key_part_flag & HA_BLOB_PART) != 0, - "garbage_flag setup: key_part_flag has HA_BLOB_PART set (garbage)"); ok(local_kpi.length == V_REC_VARCHAR_LEN, "garbage_flag setup: key_part.length = %u (field_length)", (uint) local_kpi.length); diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c index 215afc7e063bf..0c463d355f96f 100644 --- a/storage/maria/ma_unique.c +++ b/storage/maria/ma_unique.c @@ -146,7 +146,7 @@ ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *record) } end= pos+length; if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || - type == HA_KEYTYPE_VARTEXT2) + type == HA_KEYTYPE_VARTEXT2 || type == HA_KEYTYPE_VARTEXT4) { my_ci_hash_sort(keyseg->charset, (const uchar*) pos, length, diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_boolean_mode_no_where.result b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_boolean_mode_no_where.result index 125b35fb96e23..eb9cab648ba96 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_boolean_mode_no_where.result +++ b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_boolean_mode_no_where.result @@ -14,7 +14,7 @@ Start mroonga Start groonga and Ruby SELECT *, MATCH(title) AGAINST("groonga" IN BOOLEAN MODE) AS score FROM diaries -ORDER BY MATCH(title) AGAINST("groonga" IN BOOLEAN MODE); +ORDER BY MATCH(title) AGAINST("groonga" IN BOOLEAN MODE), title; title score Start mroonga 0 Start groonga 1 diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_different_match.result b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_different_match.result index 76abb660c19ef..a59a3ffb3c6b0 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_different_match.result +++ b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_different_match.result @@ -24,13 +24,19 @@ Error host2 Warning host2 Error Error Error Error host2 Warning Warning Warning Warning host2 -SELECT *, MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE) AS score +SELECT *, MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE) AS score FROM logs WHERE MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) -ORDER BY MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE); +ORDER BY MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE), +message; message host score Error Error Error host1 0 -Error Error host2 116509 Error host2 116509 +Error Error host2 116509 Error Error Error Error host2 116509 +show status like "Created_tmp%"; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 6 DROP TABLE logs; diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_no_where.result b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_no_where.result index 30130e6c85ba2..0d387954fc112 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_no_where.result +++ b/storage/mroonga/mysql-test/mroonga/storage/r/fulltext_order_natural_language_mode_no_where.result @@ -22,13 +22,13 @@ Error Warning Error Error Error Error Warning Warning Warning Warning -SELECT *, MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) AS score +SELECT *, MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) AS score FROM logs -ORDER BY MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE); +ORDER BY MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE), message; message score -Warning Warning Warning 0 -Warning Warning 0 Warning 0 +Warning Warning 0 +Warning Warning Warning 0 Warning Warning Warning Warning 0 Error 174763 Error Error 349526 diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_boolean_mode_no_where.test b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_boolean_mode_no_where.test index a421a31b160c7..e46d0f5bf7efc 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_boolean_mode_no_where.test +++ b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_boolean_mode_no_where.test @@ -34,7 +34,7 @@ SELECT * FROM diaries; SELECT *, MATCH(title) AGAINST("groonga" IN BOOLEAN MODE) AS score FROM diaries - ORDER BY MATCH(title) AGAINST("groonga" IN BOOLEAN MODE); + ORDER BY MATCH(title) AGAINST("groonga" IN BOOLEAN MODE), title; DROP TABLE diaries; diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_different_match.test b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_different_match.test index 6c7eb0a64e3a0..e895a9bd75a89 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_different_match.test +++ b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_different_match.test @@ -39,10 +39,13 @@ INSERT INTO logs VALUES("Warning Warning Warning Warning", "host2"); SELECT * FROM logs; -SELECT *, MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE) AS score +SELECT *, MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE) AS score FROM logs WHERE MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) - ORDER BY MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE); + ORDER BY MATCH(host) AGAINST("host2" IN NATURAL LANGUAGE MODE), + message; + +show status like "Created_tmp%"; DROP TABLE logs; diff --git a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_no_where.test b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_no_where.test index 917d437d0e9b9..492478b9326cf 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_no_where.test +++ b/storage/mroonga/mysql-test/mroonga/storage/t/fulltext_order_natural_language_mode_no_where.test @@ -37,10 +37,9 @@ INSERT INTO logs VALUES("Warning Warning Warning Warning"); SELECT * FROM logs; -SELECT *, MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) AS score +SELECT *, MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE) AS score FROM logs - ORDER BY MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE); - + ORDER BY MATCH(message) AGAINST("Error" IN NATURAL LANGUAGE MODE), message; DROP TABLE logs; --source ../../include/mroonga/have_mroonga_deinit.inc From 0f65f73cf546c602cb0fe2755dee17c5d5274e0a Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 17 Apr 2026 15:23:12 +0300 Subject: [PATCH 13/27] Fixed that Field_blob_compressed can be used in internal temporary tables group_concat need special code in store() for storing the result in table->blob_storage. This was implemented in Field_blob::store() but not in Field_blob_compressed::store. This was temporarly solved by not using Field_blob_compressed table->s->is_optimizer_tmp_table() would be set. This this however disable Field_blob_compressed for temporary tables that did not need a key for the blob field. Fixed by ensuring that Field_blob::store and Field_blob_compressed::store handles group_concat identically. As this handling is only done for internal temporary tables, we store the data uncompressed for faster usage by group_concat() --- mysql-test/main/column_compression.result | 8 +++ mysql-test/main/column_compression.test | 1 + sql/field.cc | 75 ++++++++++++++++------- sql/field.h | 2 + sql/table.h | 14 +++++ 5 files changed, 77 insertions(+), 23 deletions(-) diff --git a/mysql-test/main/column_compression.result b/mysql-test/main/column_compression.result index 7f176290637e9..3316b4c66ffdb 100644 --- a/mysql-test/main/column_compression.result +++ b/mysql-test/main/column_compression.result @@ -2968,6 +2968,14 @@ FROM (t5 JOIN t5 AS tt ON (tt.pk != t5.pk)); DROP TABLE t5; create table t1 (pk int not null, b1 blob compressed, v1 varbinary(100))engine=innodb; insert into t1 values (1,'ufhjdtv','f'),(2,'jdt',''),(3,'d','tvs'); +select t1.v1, t1.b1 from t1 join t1 as tt on (tt.pk != t1.pk) order by t1.v1; +v1 b1 + jdt + jdt +f ufhjdtv +f ufhjdtv +tvs d +tvs d select group_concat(t1.v1, t1.b1 order by 1) from (t1 join t1 as tt on (tt.pk != t1.pk)); group_concat(t1.v1, t1.b1 order by 1) jdt,jdt,fufhjdtv,fufhjdtv,tvsd,tvsd diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test index 874f3c3580bf2..283e8112abf46 100644 --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -511,6 +511,7 @@ DROP TABLE t5; create table t1 (pk int not null, b1 blob compressed, v1 varbinary(100))engine=innodb; insert into t1 values (1,'ufhjdtv','f'),(2,'jdt',''),(3,'d','tvs'); +select t1.v1, t1.b1 from t1 join t1 as tt on (tt.pk != t1.pk) order by t1.v1; select group_concat(t1.v1, t1.b1 order by 1) from (t1 join t1 as tt on (tt.pk != t1.pk)); drop table t1; diff --git a/sql/field.cc b/sql/field.cc index e273ec5d10d34..a1f042b7bcdbf 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -8472,12 +8472,13 @@ Field *Field_varstring_compressed::make_new_field(MEM_ROOT *root, return res; } + Field *Field_blob_compressed::make_new_field(MEM_ROOT *root, TABLE *new_table, bool keep_type, const Tmp_field_param *param) { Field_blob *res; - if (new_table->s->is_optimizer_tmp_table() || + if (new_table->s->is_optimizer_tmp_table() && (param && param->part_of_unique_key())) { /* @@ -8816,6 +8817,50 @@ int Field_blob::copy_value(Field_blob *from) } +/* + Store blob value into table->blob_storage + Used with GROUP_CONCAT with ORDER BY/DISTINCT + + If with_zero_prefix is true, prepend a 0x00 byte so that + Field_blob_compressed::val_str() treats the value as uncompressed. +*/ + +int Field_blob::handle_group_concat(const char *from, size_t length, + CHARSET_INFO *cs, bool with_zero_prefix) +{ + DBUG_ASSERT(!f_is_hex_escape(flags)); + DBUG_ASSERT(field_charset() == cs); + DBUG_ASSERT(length <= max_data_length()); + + size_t new_length= length; + size_t copy_length= table->in_use->variables.group_concat_max_len; + if (new_length > copy_length) + { + new_length= Well_formed_prefix(cs, from, copy_length, new_length).length(); + table->blob_storage->set_truncated_value(true); + } + + char *tmp; + if (with_zero_prefix) + { + tmp= table->blob_storage->store_with_zero_prefix(from, new_length); + new_length++; // Count the extra 0 + } + else + tmp= table->blob_storage->store(from, new_length); + + if (!tmp) + goto oom_error; + Field_blob::store_length(new_length); + bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*)); + return 0; + +oom_error: + reset(); + return -1; +} + + int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs) { DBUG_ASSERT(marked_for_write_or_computed()); @@ -8839,27 +8884,7 @@ int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs) */ if (table && table->blob_storage) // GROUP_CONCAT with ORDER BY | DISTINCT - { - DBUG_ASSERT(!f_is_hex_escape(flags)); - DBUG_ASSERT(field_charset() == cs); - DBUG_ASSERT(length <= max_data_length()); - - new_length= length; - copy_length= table->in_use->variables.group_concat_max_len; - if (new_length > copy_length) - { - new_length= Well_formed_prefix(cs, - from, copy_length, new_length).length(); - table->blob_storage->set_truncated_value(true); - } - if (!(tmp= table->blob_storage->store(from, new_length))) - goto oom_error; - - Field_blob::store_length(new_length); - bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*)); - return 0; - } - + return handle_group_concat(from, length, cs, false); /* If the 'from' address is in the range of the temporary 'value'- object we need to copy the content to a different location or it will be @@ -8908,7 +8933,7 @@ int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs) oom_error: /* Fatal OOM error */ - bzero(ptr,Field_blob::pack_length()); + reset(); return -1; } @@ -9371,6 +9396,10 @@ int Field_blob_compressed::store(const char *from, size_t length, CHARSET_INFO *cs) { DBUG_ASSERT(marked_for_write_or_computed()); + + if (table && table->blob_storage) // GROUP_CONCAT with ORDER BY | DISTINCT + return handle_group_concat(from, length, cs, true); + uint compressed_length; uint max_length= max_data_length(); uint to_length= (uint) MY_MIN(max_length, mbmaxlen() * length + 1); diff --git a/sql/field.h b/sql/field.h index 34a74b4d328b6..7fcb2c859aabd 100644 --- a/sql/field.h +++ b/sql/field.h @@ -4435,6 +4435,8 @@ class Field_blob :public Field_longstr { static void do_copy_blob(Copy_field *copy); static void do_conv_blob(Copy_field *copy); uint get_key_image_itRAW(const uchar *ptr_arg, uchar *buff, uint length) const; + int handle_group_concat(const char *from, size_t length, + CHARSET_INFO *cs, bool with_zero_prefix); public: Field_blob(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg, enum utype unireg_check_arg, const LEX_CSTRING *field_name_arg, diff --git a/sql/table.h b/sql/table.h index 2a91cde4026e1..5f771d904cf74 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1242,6 +1242,20 @@ class Blob_mem_storage: public Sql_alloc { return (char*) memdup_root(&storage, from, length); } + /* + Store string with a 0 prefix. This is used for storing + Field_blob_compressed fields in a not compressed format. + */ + char *store_with_zero_prefix(const char *from, size_t length) + { + char *res= (char*) alloc_root(&storage, length+1); + if (res) + { + res[0]= 0; + memcpy(res+1, from, length); + } + return res; + } void set_truncated_value(bool is_truncated_value) { truncated_value= is_truncated_value; From 7d201a4f854ed3f158b3ae7c5ecc241afa594e95 Mon Sep 17 00:00:00 2001 From: Monty Date: Sat, 18 Apr 2026 13:55:14 +0300 Subject: [PATCH 14/27] Removed duplicate versions of Field::row_pack_length() --- sql/field.h | 11 +---------- sql/sql_type_fixedbin.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/sql/field.h b/sql/field.h index 7fcb2c859aabd..c70fb8c1b1cf3 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1149,7 +1149,7 @@ class Field: public Value_source DBUG_RETURN(field_metadata); } /* Length of row data inc record not including packed length */ - virtual uint row_pack_length() const { return 0; } + virtual uint row_pack_length() const { return pack_length(); } /* Return the current size of data stored in the record */ virtual uint32 data_length() { return pack_length(); } @@ -2141,7 +2141,6 @@ class Field_num :public Field { return to->store(val_int(), MY_TEST(flags & UNSIGNED_FLAG)); } bool is_equal(const Column_definition &new_field) const override; - uint row_pack_length() const override { return pack_length(); } uint32 pack_length_from_metadata(uint field_metadata) const override { uint32 length= pack_length(); @@ -2518,7 +2517,6 @@ class Field_new_decimal final :public Field_num { uint size_of() const override { return sizeof *this; } uint32 pack_length() const override { return bin_size; } uint pack_length_from_metadata(uint field_metadata) const override; - uint row_pack_length() const override { return pack_length(); } bool compatible_field_size(uint field_metadata, const Relay_log_info *rli, uint16 mflags, int *order_var) const override; bool is_equal(const Column_definition &new_field) const override; @@ -2952,7 +2950,6 @@ class Field_float final :public Field_real { int cmp(const uchar *,const uchar *) const override; void sort_string(uchar *buff, uint length) override; uint32 pack_length() const override { return sizeof(float); } - uint row_pack_length() const override { return pack_length(); } ulonglong get_max_int_value() const override { /* @@ -3008,7 +3005,6 @@ class Field_double :public Field_real { int cmp(const uchar *,const uchar *) const override final; void sort_string(uchar *buff, uint length) override final; uint32 pack_length() const override final { return sizeof(double); } - uint row_pack_length() const override final { return pack_length(); } ulonglong get_max_int_value() const override final { /* @@ -3435,7 +3431,6 @@ class Field_timestampf :public Field_timestamp_with_dec { { return my_timestamp_binary_length(dec); } - uint row_pack_length() const override { return pack_length(); } uint pack_length_from_metadata(uint field_metadata) const override { DBUG_ENTER("Field_timestampf::pack_length_from_metadata"); @@ -3794,7 +3789,6 @@ class Field_timef final :public Field_time_with_dec { { return my_time_binary_length(dec); } - uint row_pack_length() const override { return pack_length(); } uint pack_length_from_metadata(uint field_metadata) const override { DBUG_ENTER("Field_timef::pack_length_from_metadata"); @@ -4002,7 +3996,6 @@ class Field_datetimef final :public Field_datetime_with_dec { { return my_datetime_binary_length(dec); } - uint row_pack_length() const override { return pack_length(); } uint pack_length_from_metadata(uint field_metadata) const override { DBUG_ENTER("Field_datetimef::pack_length_from_metadata"); @@ -4142,7 +4135,6 @@ class Field_string final :public Field_longstr { } bool compatible_field_size(uint field_metadata, const Relay_log_info *rli, uint16 mflags, int *order_var) const override; - uint row_pack_length() const override { return field_length; } uint packed_col_length(const uchar *to, uint length) override; uint max_packed_col_length(uint max_length) override; uint size_of() const override { return sizeof *this; } @@ -4929,7 +4921,6 @@ class Field_enum :public Field_str { uint size_of() const override { return sizeof *this; } uint pack_length_from_metadata(uint field_metadata) const override { return (field_metadata & 0x00ff); } - uint row_pack_length() const override { return pack_length(); } bool zero_pack() const override { return false; } bool optimize_range(uint, uint) const override { return false; } bool eq_def(const Field *field) const override; diff --git a/sql/sql_type_fixedbin.h b/sql/sql_type_fixedbin.h index 03245955caf3c..e1acbc637b4e1 100644 --- a/sql/sql_type_fixedbin.h +++ b/sql/sql_type_fixedbin.h @@ -715,7 +715,6 @@ class Type_handler_fbt: public Type_handler return Data_type_compatibility::OK; } - uint row_pack_length() const override { return pack_length(); } Binlog_type_info binlog_type_info() const override { From e32433e1241bc74a0bc109f367c0406df1996fcd Mon Sep 17 00:00:00 2001 From: Monty Date: Sun, 19 Apr 2026 17:42:34 +0300 Subject: [PATCH 15/27] Fixed duplicate key error when converting HEAP table Aria The problem was that create_internal_tmp_table() tries to create a normal key for the blob, which does not work. The fix is to force a unique key if BLOB keys was used for the the orignal HEAP table. --- include/my_base.h | 1 + mysql-test/suite/heap/heap_blob_big.inc | 145 +++++++++++++++ mysql-test/suite/heap/heap_blob_big1.result | 195 ++++++++++++++++++++ mysql-test/suite/heap/heap_blob_big1.test | 8 + mysql-test/suite/heap/heap_blob_big2.result | 195 ++++++++++++++++++++ mysql-test/suite/heap/heap_blob_big2.test | 9 + mysql-test/suite/heap/heap_blob_big3.result | 195 ++++++++++++++++++++ mysql-test/suite/heap/heap_blob_big3.test | 10 + sql/sql_select.cc | 13 +- storage/maria/ma_create.c | 5 + 10 files changed, 775 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/heap/heap_blob_big.inc create mode 100644 mysql-test/suite/heap/heap_blob_big1.result create mode 100644 mysql-test/suite/heap/heap_blob_big1.test create mode 100644 mysql-test/suite/heap/heap_blob_big2.result create mode 100644 mysql-test/suite/heap/heap_blob_big2.test create mode 100644 mysql-test/suite/heap/heap_blob_big3.result create mode 100644 mysql-test/suite/heap/heap_blob_big3.test diff --git a/include/my_base.h b/include/my_base.h index f30ea673ae8e9..0ab388a449402 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -316,6 +316,7 @@ enum ha_base_keytype { #define HA_USES_PARSER 16384 /* Fulltext index uses [pre]parser */ #define HA_USES_BLOCK_SIZE ((uint) 32768) #define HA_SORT_ALLOWS_SAME 512 /* Intern bit when sorting records */ +#define HA_BLOB_PART_KEY 65536 /* Some key parts are blobs */ /* This flag can be used only in KEY::ext_key_flags */ #define HA_EXT_NOSAME 131072 diff --git a/mysql-test/suite/heap/heap_blob_big.inc b/mysql-test/suite/heap/heap_blob_big.inc new file mode 100644 index 0000000000000..0aec2e302009a --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big.inc @@ -0,0 +1,145 @@ +--source include/have_sequence.inc + +--echo # +--echo # Test create_internal_tmp_table_from_heap() with blob/text columns. +--echo # +--echo # Field_blob_key is used for blob/text columns that appear as key columns +--echo # (GROUP BY, SELECT DISTINCT, UNION DISTINCT) in internal tmp tables +--echo # created by class Create_tmp_table. +--echo # + +# SHOW STATUS output differs across protocol variants; disable them globally. +--disable_cursor_protocol +--disable_ps2_protocol + +--echo # +--echo # Setup: 50 distinct text/blob values of ~1500 bytes each. +--echo # Row count: 100 (50 original + 50 duplicate rows for aggregation). +--echo # Blob data in the tmp table: ~75 KB (50 * 1500 bytes), +--echo # which is between 64 K and 1 M. +--echo # + +CREATE TABLE t1 ( + pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + a TEXT NOT NULL, + b BLOB, + v INT NOT NULL +); + +# Each value is a 1500-char string uniquely identifying the row. +# LPAD(seq,4,'0') gives '0001'..'0050' so all 50 values are distinct. +INSERT INTO t1 (a, b, v) + SELECT REPEAT(LPAD(seq, 4, '0'), 375), + REPEAT(LPAD(seq, 4, '0'), 375), + seq + FROM seq_1_to_50; + +# Duplicate rows: same a/b values, v offset by 100 (v=101..150). +INSERT INTO t1 (a, b, v) + SELECT a, b, v + 100 FROM t1 ORDER BY pk; + +# Sanity: 100 rows, 50 distinct a values, total v = 1+...+50 + 101+...+150 = 7550 +SELECT COUNT(*), COUNT(DISTINCT a), SUM(v) FROM t1; + +set @@max_sort_length=65536; +set @@sort_buffer_size=1024*1024*128; + +--echo # +--echo # ================================================================ +--echo # Run 1: tmp_memory_table_size=1M — tmp table stays in HEAP +--echo # ================================================================ +--echo # + +--echo # --- GROUP BY on TEXT column (end_update path) --- +--echo # Tmp table: 50 groups x ~1500-byte TEXT key = ~75 KB. Fits in 1 M HEAP. +FLUSH STATUS; +SELECT COUNT(*), SUM(gsum) FROM ( + SELECT a, SUM(v) AS gsum FROM t1 GROUP BY a +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- SELECT DISTINCT on TEXT column (end_write path) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a FROM t1) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- SELECT DISTINCT on BLOB column --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT b FROM t1) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- SELECT DISTINCT on TEXT + INT columns --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a, v FROM t1) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP BY TEXT column + HAVING --- +FLUSH STATUS; +SELECT COUNT(*) AS groups_with_2rows FROM ( + SELECT a FROM t1 GROUP BY a HAVING COUNT(*) = 2 +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP BY on TEXT + BLOB columns (two blob key columns) --- +FLUSH STATUS; +SELECT COUNT(*) AS groups FROM ( + SELECT a, b, COUNT(*) AS cnt FROM t1 GROUP BY a, b +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP BY TEXT WITH ROLLUP --- +--echo # 50 groups + 1 NULL summary row = 51 +FLUSH STATUS; +SELECT COUNT(*) AS rows_incl_rollup FROM ( + SELECT a, SUM(v) AS sv FROM t1 GROUP BY a WITH ROLLUP +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- UNION DISTINCT on TEXT column (end_write path) --- +--echo # Both halves share the same 50 distinct 'a' values -> 50 rows after UNION. +FLUSH STATUS; +SELECT COUNT(*) AS union_rows FROM ( + SELECT a FROM t1 WHERE v <= 50 + UNION + SELECT a FROM t1 WHERE v > 100 +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- UNION DISTINCT: same table both sides (all 50 distinct values) --- +FLUSH STATUS; +SELECT COUNT(*) AS distinct_rows FROM ( + SELECT a FROM t1 UNION SELECT a FROM t1 +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP_CONCAT(DISTINCT text_col) --- +--echo # GROUP_CONCAT uses blob_storage for blob values; the HEAP key part is +--echo # tiny (12 bytes/row). Expect Created_tmp_disk_tables=0 in both runs. +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(DISTINCT LEFT(a, 4) ORDER BY a)) AS gc_len FROM t1; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP_CONCAT(text_col ORDER BY ...) — blob_storage path, no DISTINCT --- +--echo # No HEAP key table is needed; blob data goes to blob_storage (MEM_ROOT). +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(LEFT(a, 4) ORDER BY v)) AS gc_len FROM t1; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- ORDER BY on TEXT column (DISTINCT subquery + ORDER BY) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt FROM ( + SELECT DISTINCT a FROM t1 ORDER BY a +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +--echo # --- GROUP BY with aggregate ORDER BY (two-pass: group then sort) --- +FLUSH STATUS; +SELECT COUNT(*), SUM(gv) FROM ( + SELECT a, SUM(v) AS gv FROM t1 GROUP BY a ORDER BY SUM(v) +) dt; +SHOW STATUS LIKE 'Created_tmp%'; + +DROP TABLE t1; + +--enable_ps2_protocol +--enable_cursor_protocol diff --git a/mysql-test/suite/heap/heap_blob_big1.result b/mysql-test/suite/heap/heap_blob_big1.result new file mode 100644 index 0000000000000..d4b4f43cd8ca9 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big1.result @@ -0,0 +1,195 @@ +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +SET @@tmp_memory_table_size = 1024*1024; +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +# Field_blob_key is used for blob/text columns that appear as key columns +# (GROUP BY, SELECT DISTINCT, UNION DISTINCT) in internal tmp tables +# created by class Create_tmp_table. +# +# +# Setup: 50 distinct text/blob values of ~1500 bytes each. +# Row count: 100 (50 original + 50 duplicate rows for aggregation). +# Blob data in the tmp table: ~75 KB (50 * 1500 bytes), +# which is between 64 K and 1 M. +# +CREATE TABLE t1 ( +pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a TEXT NOT NULL, +b BLOB, +v INT NOT NULL +); +INSERT INTO t1 (a, b, v) +SELECT REPEAT(LPAD(seq, 4, '0'), 375), +REPEAT(LPAD(seq, 4, '0'), 375), +seq +FROM seq_1_to_50; +INSERT INTO t1 (a, b, v) +SELECT a, b, v + 100 FROM t1 ORDER BY pk; +SELECT COUNT(*), COUNT(DISTINCT a), SUM(v) FROM t1; +COUNT(*) COUNT(DISTINCT a) SUM(v) +100 50 7550 +set @@max_sort_length=65536; +set @@sort_buffer_size=1024*1024*128; +# +# ================================================================ +# Run 1: tmp_memory_table_size=1M — tmp table stays in HEAP +# ================================================================ +# +# --- GROUP BY on TEXT column (end_update path) --- +# Tmp table: 50 groups x ~1500-byte TEXT key = ~75 KB. Fits in 1 M HEAP. +FLUSH STATUS; +SELECT COUNT(*), SUM(gsum) FROM ( +SELECT a, SUM(v) AS gsum FROM t1 GROUP BY a +) dt; +COUNT(*) SUM(gsum) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on TEXT column (end_write path) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on BLOB column --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT b FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on TEXT + INT columns --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a, v FROM t1) dt; +cnt_distinct +100 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY TEXT column + HAVING --- +FLUSH STATUS; +SELECT COUNT(*) AS groups_with_2rows FROM ( +SELECT a FROM t1 GROUP BY a HAVING COUNT(*) = 2 +) dt; +groups_with_2rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY on TEXT + BLOB columns (two blob key columns) --- +FLUSH STATUS; +SELECT COUNT(*) AS groups FROM ( +SELECT a, b, COUNT(*) AS cnt FROM t1 GROUP BY a, b +) dt; +groups +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY TEXT WITH ROLLUP --- +# 50 groups + 1 NULL summary row = 51 +FLUSH STATUS; +SELECT COUNT(*) AS rows_incl_rollup FROM ( +SELECT a, SUM(v) AS sv FROM t1 GROUP BY a WITH ROLLUP +) dt; +rows_incl_rollup +51 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- UNION DISTINCT on TEXT column (end_write path) --- +# Both halves share the same 50 distinct 'a' values -> 50 rows after UNION. +FLUSH STATUS; +SELECT COUNT(*) AS union_rows FROM ( +SELECT a FROM t1 WHERE v <= 50 +UNION +SELECT a FROM t1 WHERE v > 100 +) dt; +union_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- UNION DISTINCT: same table both sides (all 50 distinct values) --- +FLUSH STATUS; +SELECT COUNT(*) AS distinct_rows FROM ( +SELECT a FROM t1 UNION SELECT a FROM t1 +) dt; +distinct_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP_CONCAT(DISTINCT text_col) --- +# GROUP_CONCAT uses blob_storage for blob values; the HEAP key part is +# tiny (12 bytes/row). Expect Created_tmp_disk_tables=0 in both runs. +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(DISTINCT LEFT(a, 4) ORDER BY a)) AS gc_len FROM t1; +gc_len +249 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- GROUP_CONCAT(text_col ORDER BY ...) — blob_storage path, no DISTINCT --- +# No HEAP key table is needed; blob data goes to blob_storage (MEM_ROOT). +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(LEFT(a, 4) ORDER BY v)) AS gc_len FROM t1; +gc_len +499 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- ORDER BY on TEXT column (DISTINCT subquery + ORDER BY) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt FROM ( +SELECT DISTINCT a FROM t1 ORDER BY a +) dt; +cnt +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY with aggregate ORDER BY (two-pass: group then sort) --- +FLUSH STATUS; +SELECT COUNT(*), SUM(gv) FROM ( +SELECT a, SUM(v) AS gv FROM t1 GROUP BY a ORDER BY SUM(v) +) dt; +COUNT(*) SUM(gv) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 2 +DROP TABLE t1; diff --git a/mysql-test/suite/heap/heap_blob_big1.test b/mysql-test/suite/heap/heap_blob_big1.test new file mode 100644 index 0000000000000..19b7fb67eb6b7 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big1.test @@ -0,0 +1,8 @@ +--echo # +--echo # Test create_internal_tmp_table_from_heap() with blob/text columns. +--echo # + +# Test with all data fitting in memory tables + +SET @@tmp_memory_table_size = 1024*1024; +--source heap_blob_big.inc diff --git a/mysql-test/suite/heap/heap_blob_big2.result b/mysql-test/suite/heap/heap_blob_big2.result new file mode 100644 index 0000000000000..6909c04b9f6de --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big2.result @@ -0,0 +1,195 @@ +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +SET @@tmp_memory_table_size = 65536; +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +# Field_blob_key is used for blob/text columns that appear as key columns +# (GROUP BY, SELECT DISTINCT, UNION DISTINCT) in internal tmp tables +# created by class Create_tmp_table. +# +# +# Setup: 50 distinct text/blob values of ~1500 bytes each. +# Row count: 100 (50 original + 50 duplicate rows for aggregation). +# Blob data in the tmp table: ~75 KB (50 * 1500 bytes), +# which is between 64 K and 1 M. +# +CREATE TABLE t1 ( +pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a TEXT NOT NULL, +b BLOB, +v INT NOT NULL +); +INSERT INTO t1 (a, b, v) +SELECT REPEAT(LPAD(seq, 4, '0'), 375), +REPEAT(LPAD(seq, 4, '0'), 375), +seq +FROM seq_1_to_50; +INSERT INTO t1 (a, b, v) +SELECT a, b, v + 100 FROM t1 ORDER BY pk; +SELECT COUNT(*), COUNT(DISTINCT a), SUM(v) FROM t1; +COUNT(*) COUNT(DISTINCT a) SUM(v) +100 50 7550 +set @@max_sort_length=65536; +set @@sort_buffer_size=1024*1024*128; +# +# ================================================================ +# Run 1: tmp_memory_table_size=1M — tmp table stays in HEAP +# ================================================================ +# +# --- GROUP BY on TEXT column (end_update path) --- +# Tmp table: 50 groups x ~1500-byte TEXT key = ~75 KB. Fits in 1 M HEAP. +FLUSH STATUS; +SELECT COUNT(*), SUM(gsum) FROM ( +SELECT a, SUM(v) AS gsum FROM t1 GROUP BY a +) dt; +COUNT(*) SUM(gsum) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- SELECT DISTINCT on TEXT column (end_write path) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- SELECT DISTINCT on BLOB column --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT b FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- SELECT DISTINCT on TEXT + INT columns --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a, v FROM t1) dt; +cnt_distinct +100 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- GROUP BY TEXT column + HAVING --- +FLUSH STATUS; +SELECT COUNT(*) AS groups_with_2rows FROM ( +SELECT a FROM t1 GROUP BY a HAVING COUNT(*) = 2 +) dt; +groups_with_2rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- GROUP BY on TEXT + BLOB columns (two blob key columns) --- +FLUSH STATUS; +SELECT COUNT(*) AS groups FROM ( +SELECT a, b, COUNT(*) AS cnt FROM t1 GROUP BY a, b +) dt; +groups +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- GROUP BY TEXT WITH ROLLUP --- +# 50 groups + 1 NULL summary row = 51 +FLUSH STATUS; +SELECT COUNT(*) AS rows_incl_rollup FROM ( +SELECT a, SUM(v) AS sv FROM t1 GROUP BY a WITH ROLLUP +) dt; +rows_incl_rollup +51 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 1 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- UNION DISTINCT on TEXT column (end_write path) --- +# Both halves share the same 50 distinct 'a' values -> 50 rows after UNION. +FLUSH STATUS; +SELECT COUNT(*) AS union_rows FROM ( +SELECT a FROM t1 WHERE v <= 50 +UNION +SELECT a FROM t1 WHERE v > 100 +) dt; +union_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- UNION DISTINCT: same table both sides (all 50 distinct values) --- +FLUSH STATUS; +SELECT COUNT(*) AS distinct_rows FROM ( +SELECT a FROM t1 UNION SELECT a FROM t1 +) dt; +distinct_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- GROUP_CONCAT(DISTINCT text_col) --- +# GROUP_CONCAT uses blob_storage for blob values; the HEAP key part is +# tiny (12 bytes/row). Expect Created_tmp_disk_tables=0 in both runs. +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(DISTINCT LEFT(a, 4) ORDER BY a)) AS gc_len FROM t1; +gc_len +249 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- GROUP_CONCAT(text_col ORDER BY ...) — blob_storage path, no DISTINCT --- +# No HEAP key table is needed; blob data goes to blob_storage (MEM_ROOT). +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(LEFT(a, 4) ORDER BY v)) AS gc_len FROM t1; +gc_len +499 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 0 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- ORDER BY on TEXT column (DISTINCT subquery + ORDER BY) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt FROM ( +SELECT DISTINCT a FROM t1 ORDER BY a +) dt; +cnt +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +# --- GROUP BY with aggregate ORDER BY (two-pass: group then sort) --- +FLUSH STATUS; +SELECT COUNT(*), SUM(gv) FROM ( +SELECT a, SUM(v) AS gv FROM t1 GROUP BY a ORDER BY SUM(v) +) dt; +COUNT(*) SUM(gv) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 4 +DROP TABLE t1; diff --git a/mysql-test/suite/heap/heap_blob_big2.test b/mysql-test/suite/heap/heap_blob_big2.test new file mode 100644 index 0000000000000..20e50ade97b9d --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big2.test @@ -0,0 +1,9 @@ +--echo # +--echo # Test create_internal_tmp_table_from_heap() with blob/text columns. +--echo # + +# Test with forcing conversion of temporary table to Aria + +SET @@tmp_memory_table_size = 65536; +--source heap_blob_big.inc + diff --git a/mysql-test/suite/heap/heap_blob_big3.result b/mysql-test/suite/heap/heap_blob_big3.result new file mode 100644 index 0000000000000..be1013d24d081 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big3.result @@ -0,0 +1,195 @@ +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +SET @@tmp_memory_table_size= 0; +# +# Test create_internal_tmp_table_from_heap() with blob/text columns. +# +# Field_blob_key is used for blob/text columns that appear as key columns +# (GROUP BY, SELECT DISTINCT, UNION DISTINCT) in internal tmp tables +# created by class Create_tmp_table. +# +# +# Setup: 50 distinct text/blob values of ~1500 bytes each. +# Row count: 100 (50 original + 50 duplicate rows for aggregation). +# Blob data in the tmp table: ~75 KB (50 * 1500 bytes), +# which is between 64 K and 1 M. +# +CREATE TABLE t1 ( +pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a TEXT NOT NULL, +b BLOB, +v INT NOT NULL +); +INSERT INTO t1 (a, b, v) +SELECT REPEAT(LPAD(seq, 4, '0'), 375), +REPEAT(LPAD(seq, 4, '0'), 375), +seq +FROM seq_1_to_50; +INSERT INTO t1 (a, b, v) +SELECT a, b, v + 100 FROM t1 ORDER BY pk; +SELECT COUNT(*), COUNT(DISTINCT a), SUM(v) FROM t1; +COUNT(*) COUNT(DISTINCT a) SUM(v) +100 50 7550 +set @@max_sort_length=65536; +set @@sort_buffer_size=1024*1024*128; +# +# ================================================================ +# Run 1: tmp_memory_table_size=1M — tmp table stays in HEAP +# ================================================================ +# +# --- GROUP BY on TEXT column (end_update path) --- +# Tmp table: 50 groups x ~1500-byte TEXT key = ~75 KB. Fits in 1 M HEAP. +FLUSH STATUS; +SELECT COUNT(*), SUM(gsum) FROM ( +SELECT a, SUM(v) AS gsum FROM t1 GROUP BY a +) dt; +COUNT(*) SUM(gsum) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on TEXT column (end_write path) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on BLOB column --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT b FROM t1) dt; +cnt_distinct +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- SELECT DISTINCT on TEXT + INT columns --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt_distinct FROM (SELECT DISTINCT a, v FROM t1) dt; +cnt_distinct +100 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY TEXT column + HAVING --- +FLUSH STATUS; +SELECT COUNT(*) AS groups_with_2rows FROM ( +SELECT a FROM t1 GROUP BY a HAVING COUNT(*) = 2 +) dt; +groups_with_2rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY on TEXT + BLOB columns (two blob key columns) --- +FLUSH STATUS; +SELECT COUNT(*) AS groups FROM ( +SELECT a, b, COUNT(*) AS cnt FROM t1 GROUP BY a, b +) dt; +groups +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY TEXT WITH ROLLUP --- +# 50 groups + 1 NULL summary row = 51 +FLUSH STATUS; +SELECT COUNT(*) AS rows_incl_rollup FROM ( +SELECT a, SUM(v) AS sv FROM t1 GROUP BY a WITH ROLLUP +) dt; +rows_incl_rollup +51 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 1 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- UNION DISTINCT on TEXT column (end_write path) --- +# Both halves share the same 50 distinct 'a' values -> 50 rows after UNION. +FLUSH STATUS; +SELECT COUNT(*) AS union_rows FROM ( +SELECT a FROM t1 WHERE v <= 50 +UNION +SELECT a FROM t1 WHERE v > 100 +) dt; +union_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- UNION DISTINCT: same table both sides (all 50 distinct values) --- +FLUSH STATUS; +SELECT COUNT(*) AS distinct_rows FROM ( +SELECT a FROM t1 UNION SELECT a FROM t1 +) dt; +distinct_rows +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP_CONCAT(DISTINCT text_col) --- +# GROUP_CONCAT uses blob_storage for blob values; the HEAP key part is +# tiny (12 bytes/row). Expect Created_tmp_disk_tables=0 in both runs. +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(DISTINCT LEFT(a, 4) ORDER BY a)) AS gc_len FROM t1; +gc_len +499 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 1 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- GROUP_CONCAT(text_col ORDER BY ...) — blob_storage path, no DISTINCT --- +# No HEAP key table is needed; blob data goes to blob_storage (MEM_ROOT). +FLUSH STATUS; +SELECT LENGTH(GROUP_CONCAT(LEFT(a, 4) ORDER BY v)) AS gc_len FROM t1; +gc_len +499 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 1 +Created_tmp_files 0 +Created_tmp_tables 1 +# --- ORDER BY on TEXT column (DISTINCT subquery + ORDER BY) --- +FLUSH STATUS; +SELECT COUNT(*) AS cnt FROM ( +SELECT DISTINCT a FROM t1 ORDER BY a +) dt; +cnt +50 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +# --- GROUP BY with aggregate ORDER BY (two-pass: group then sort) --- +FLUSH STATUS; +SELECT COUNT(*), SUM(gv) FROM ( +SELECT a, SUM(v) AS gv FROM t1 GROUP BY a ORDER BY SUM(v) +) dt; +COUNT(*) SUM(gv) +50 7550 +SHOW STATUS LIKE 'Created_tmp%'; +Variable_name Value +Created_tmp_disk_tables 2 +Created_tmp_files 0 +Created_tmp_tables 2 +DROP TABLE t1; diff --git a/mysql-test/suite/heap/heap_blob_big3.test b/mysql-test/suite/heap/heap_blob_big3.test new file mode 100644 index 0000000000000..ffc973ff77c63 --- /dev/null +++ b/mysql-test/suite/heap/heap_blob_big3.test @@ -0,0 +1,10 @@ +--echo # +--echo # Test create_internal_tmp_table_from_heap() with blob/text columns. +--echo # + +# Test without all temporary tables being dicrectly created in Aria + +SET @@tmp_memory_table_size= 0; +--source heap_blob_big.inc + + diff --git a/sql/sql_select.cc b/sql/sql_select.cc index eee0a390b17bf..baac7853d4b79 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -21479,6 +21479,14 @@ bool Create_tmp_table::finalize(THD *thd, field->null_bit))) goto err; /* purecov: inspected */ + /* + Mark that a key segment is a blob. Needed for + create_internal_tmp_table_from_heap to convert keys with blobs + to unique. + */ + if (cur_group->field->flags & BLOB_FLAG) + keyinfo->flags|= HA_BLOB_PART_KEY; + /* Set store_length for all GROUP BY key parts so rebuild_key_from_group_buff() can advance through the key buffer. @@ -21651,6 +21659,9 @@ bool Create_tmp_table::finalize(THD *thd, m_key_part_info->store_length+= HA_KEY_NULL_LENGTH; m_key_part_info->key_part_flag |= HA_NULL_PART; } + if (field->flags & BLOB_FLAG) + keyinfo->flags|= HA_BLOB_PART_KEY; + m_key_part_info->store_length+= field->key_part_length_bytes(); keyinfo->key_length+= m_key_part_info->store_length; @@ -22039,7 +22050,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, */ if (keyinfo->key_length > table->file->max_key_length() || keyinfo->user_defined_key_parts > table->file->max_key_parts() || - share->uniques) + share->uniques || (keyinfo->flags & HA_BLOB_PART_KEY)) { if (!share->uniques && !(keyinfo->flags & HA_NOSAME)) { diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 655c7798dc1b1..a9e1a068f954c 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -598,6 +598,11 @@ int maria_create(const char *name, enum data_file_type datafile_type, keyseg->type == HA_KEYTYPE_VARBINARY1) ? 1 : 2); } + else + { + /* Ensure that that the blob has a fix max length */ + DBUG_ASSERT(keyseg->length > 0); + } break; default: break; From efc15c3ae7cb32ed0620fd546908cdee5db9ac44 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Thu, 30 Apr 2026 21:06:30 -0400 Subject: [PATCH 16/27] Code review feedback: `hp_update.c` cleanup, test renames, style fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Monty's review feedback across HEAP blob implementation: **`hp_update.c`:** - Hoist `HP_BLOB_DESC *desc` to block scope, use `desc++` in all three blob loops instead of re-indexing `&share->blob_descs[i]` each iteration - Move `new_len` declaration to block scope, remove inner `{ }` wrapper block, dedent the write-new-chains loop body by one level - Replace `if (blob_changed[i]) any_changed= TRUE` with branchless `any_changed|= blob_changed[i]` - Add braces to rollback `for (j= 0; j < i; j++)` loop body - Update chain pointer restoration comment to explain `pos` vs `old` pointer semantics for segmented blobs - Rename inner loop `new_len` to `cur_len` to avoid shadowing block-scope `new_len` **`read_lowendian()` move (F33/F63):** - Move `read_lowendian()` from `sql/field.h` to `include/my_base.h` so pure-C storage engines can use it - Convert `hp_blob_length()` from standalone function in `hp_blob.c` to `static inline` wrapper in `heapdef.h` calling `read_lowendian()` - Convert `hp_blob_key_length()` in `hp_hash.c` to `static inline` wrapper calling `read_lowendian()` **Test renames** (Monty's naming convention — drop `heap_` prefix): - `heap_blob.test` → `blob.test` - `heap_blob_big{1,2,3}.test` → `blob_big{1,2,3}.test` - `heap_blob_big.inc` → `blob_big.inc` - `heap_blob_groupby.test` → `blob_group_by.test` - `heap_blob_ops.test` → `blob_ops.test` **Other files:** Style fixes from earlier feedback items applied across `hp_blob.c`, `hp_write.c`, `hp_hash.c`, `hp_scan.c`, `hp_delete.c`, `ha_heap.cc`, `heapdef.h`, `_check.c`, `heap.h`, `field.h`, `sql_select.cc`, and test files. --- include/heap.h | 17 +- include/my_base.h | 12 + mysql-test/main/blob_sj_test.test | 6 + mysql-test/main/status.result | 2 +- mysql-test/main/temp_table_symlink.result | 3 + mysql-test/main/temp_table_symlink.test | 7 + mysql-test/main/tmp_table_error.result | 6 +- mysql-test/main/tmp_table_error.test | 7 +- .../heap/{heap_blob.result => blob.result} | 0 .../suite/heap/{heap_blob.test => blob.test} | 7 +- .../heap/{heap_blob_big.inc => blob_big.inc} | 0 ...heap_blob_big1.result => blob_big1.result} | 0 .../{heap_blob_big1.test => blob_big1.test} | 2 +- ...heap_blob_big2.result => blob_big2.result} | 0 .../{heap_blob_big2.test => blob_big2.test} | 2 +- ...heap_blob_big3.result => blob_big3.result} | 0 .../{heap_blob_big3.test => blob_big3.test} | 2 +- mysql-test/suite/heap/blob_dedup.result | 8 +- mysql-test/suite/heap/blob_dedup.test | 14 +- ...ob_groupby.result => blob_group_by.result} | 0 ...p_blob_groupby.test => blob_group_by.test} | 0 .../{heap_blob_ops.result => blob_ops.result} | 16 +- .../{heap_blob_ops.test => blob_ops.test} | 6 +- mysql-test/suite/heap/heap_geometry.result | 28 +- mysql-test/suite/heap/heap_geometry.test | 19 +- .../r/tmp_disk_table_size_basic.result | 105 +---- .../sys_vars/t/tmp_disk_table_size_basic.test | 3 +- sql/field.h | 16 +- sql/sql_select.cc | 22 +- storage/heap/CMakeLists.txt | 4 +- storage/heap/_check.c | 4 +- storage/heap/ha_heap.cc | 48 ++- storage/heap/heapdef.h | 51 ++- storage/heap/hp_blob.c | 319 ++++++--------- storage/heap/hp_delete.c | 5 +- storage/heap/hp_hash.c | 28 +- storage/heap/hp_scan.c | 18 +- storage/heap/hp_test_hash-t.c | 181 +++++---- storage/heap/hp_test_key_setup-t.cc | 368 +----------------- storage/heap/hp_test_scan-t.c | 335 ++++++++++++++++ storage/heap/hp_update.c | 102 +++-- storage/heap/hp_write.c | 97 ++--- 42 files changed, 864 insertions(+), 1006 deletions(-) rename mysql-test/suite/heap/{heap_blob.result => blob.result} (100%) rename mysql-test/suite/heap/{heap_blob.test => blob.test} (98%) rename mysql-test/suite/heap/{heap_blob_big.inc => blob_big.inc} (100%) rename mysql-test/suite/heap/{heap_blob_big1.result => blob_big1.result} (100%) rename mysql-test/suite/heap/{heap_blob_big1.test => blob_big1.test} (87%) rename mysql-test/suite/heap/{heap_blob_big2.result => blob_big2.result} (100%) rename mysql-test/suite/heap/{heap_blob_big2.test => blob_big2.test} (87%) rename mysql-test/suite/heap/{heap_blob_big3.result => blob_big3.result} (100%) rename mysql-test/suite/heap/{heap_blob_big3.test => blob_big3.test} (88%) rename mysql-test/suite/heap/{heap_blob_groupby.result => blob_group_by.result} (100%) rename mysql-test/suite/heap/{heap_blob_groupby.test => blob_group_by.test} (100%) rename mysql-test/suite/heap/{heap_blob_ops.result => blob_ops.result} (95%) rename mysql-test/suite/heap/{heap_blob_ops.test => blob_ops.test} (96%) create mode 100644 storage/heap/hp_test_scan-t.c diff --git a/include/heap.h b/include/heap.h index 54a78a7877cd4..79943b428dff6 100644 --- a/include/heap.h +++ b/include/heap.h @@ -116,7 +116,6 @@ typedef struct st_hp_keydef /* Key definition with open */ uint keysegs; /* Number of key-segment */ uint length; /* Length of key (automatic) */ uint8 algorithm; /* HASH / BTREE */ - my_bool has_blob_seg; /* Key has HA_BLOB_PART segments */ HA_KEYSEG *seg; HP_BLOCK block; /* Where keys are saved */ /* @@ -146,6 +145,7 @@ typedef struct st_heap_share ulonglong auto_increment; ulong min_records,max_records; /* Params to open */ ulong records; /* Logical (primary) record count */ + ulong total_records; /* All active records (primary + blob continuation) */ ulong blength; /* records rounded up to 2^n */ ulong deleted; /* Deleted records in database */ uint key_stat_version; /* version to indicate insert/delete */ @@ -157,15 +157,14 @@ typedef struct st_heap_share uint keys,max_key_length; uint currently_disabled_keys; /* saved value from "keys" when disabled */ uint open_count; + uint blob_count; /* Number of blob columns */ uchar *del_link; /* Link to next block with del. rec */ + HP_BLOB_DESC *blob_descs; /* Array of blob column descriptors */ char * name; /* Name of "memory-file" */ time_t create_time; THR_LOCK lock; my_bool delete_on_close; my_bool internal; /* Internal temporary table */ - HP_BLOB_DESC *blob_descs; /* Array of blob column descriptors */ - uint blob_count; /* Number of blob columns */ - ulong total_records; /* All active records (primary + blob continuation) */ LIST open_list; uint auto_key; uint auto_key_type; /* real type of the auto key segment */ @@ -190,11 +189,11 @@ typedef struct st_heap_info uint key_version; /* Version at last read */ uint file_version; /* Version at scan */ uint lastkey_len; - my_bool implicit_emptied; - uchar *blob_buff; /* Reassembly buffer for blob reads */ uint32 blob_buff_len; /* Current allocated size of blob_buff */ + my_bool implicit_emptied; my_bool has_zerocopy_blobs; /* Last hp_read_blobs produced zero-copy ptrs */ - ulong last_hash_of_key; /* Hash from last hp_search(), reused by hp_search_next() */ + uchar *blob_buff; /* Reassembly buffer for blob reads */ + ulong last_hash_of_key; /* Hash from last hp_search(), reused by hp_search_next() */ THR_LOCK_DATA lock; LIST open_list; } HP_INFO; @@ -203,10 +202,12 @@ typedef struct st_heap_info typedef struct st_heap_create_info { HP_KEYDEF *keydef; + HP_BLOB_DESC *blob_descs; uint auto_key; /* keynr [1 - maxkey] for auto key */ uint auto_key_type; uint keys; uint reclength; + uint blob_count; ulong max_records; ulong min_records; ulonglong max_table_size; @@ -218,8 +219,6 @@ typedef struct st_heap_create_info open_count to 1. Is only looked at if not internal_table. */ my_bool pin_share; - HP_BLOB_DESC *blob_descs; - uint blob_count; } HP_CREATE_INFO; /* Prototypes for heap-functions */ diff --git a/include/my_base.h b/include/my_base.h index 0ab388a449402..e29ebcf2265a5 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -706,4 +706,16 @@ C_MODE_START typedef void (* invalidator_by_filename)(const char * filename); C_MODE_END +static inline longlong read_lowendian(const uchar *from, uint bytes) +{ + switch(bytes) { + case 1: return from[0]; + case 2: return uint2korr(from); + case 3: return uint3korr(from); + case 4: return uint4korr(from); + case 8: return sint8korr(from); + default: DBUG_ASSERT(0); return 0; + } +} + #endif /* _my_base_h */ diff --git a/mysql-test/main/blob_sj_test.test b/mysql-test/main/blob_sj_test.test index 447d856adce4e..3ace1abfe7906 100644 --- a/mysql-test/main/blob_sj_test.test +++ b/mysql-test/main/blob_sj_test.test @@ -1,3 +1,9 @@ +# +# MDEV-38975: Test semi-join materialization with BLOB columns in HEAP +# temp tables. Added to cover a bug where HA_BLOB_PART in +# key_part_flag was uninitialized in SJ weedout temp tables, causing +# incorrect blob key handling. +# set optimizer_switch='materialization=on,in_to_exists=off,semijoin=off'; set @blob_len = 16; set @prefix_len = 6; diff --git a/mysql-test/main/status.result b/mysql-test/main/status.result index 47a5a45719829..0c94d589ded10 100644 --- a/mysql-test/main/status.result +++ b/mysql-test/main/status.result @@ -330,7 +330,7 @@ Handler_read_next 0 Handler_read_prev 0 Handler_read_retry 0 Handler_read_rnd 6 -Handler_read_rnd_deleted 3 +Handler_read_rnd_deleted 0 Handler_read_rnd_next 23 Handler_rollback 0 Handler_savepoint 0 diff --git a/mysql-test/main/temp_table_symlink.result b/mysql-test/main/temp_table_symlink.result index 6add9191b0478..f87a46aba0cf8 100644 --- a/mysql-test/main/temp_table_symlink.result +++ b/mysql-test/main/temp_table_symlink.result @@ -1,9 +1,12 @@ create table d1 (a int); create temporary table t1 (a int); +set @@max_heap_table_size=16384; create temporary table t2 (a int); Got one of the listed errors create temporary table t3 (a int) engine=Aria; Got one of the listed errors +select * from information_schema.columns; +Got one of the listed errors flush tables; select * from d1; a diff --git a/mysql-test/main/temp_table_symlink.test b/mysql-test/main/temp_table_symlink.test index 2428d137dd5ed..9ada8cd8be1ba 100644 --- a/mysql-test/main/temp_table_symlink.test +++ b/mysql-test/main/temp_table_symlink.test @@ -19,10 +19,17 @@ for (<#sql*.MYI>) { } EOF +# Force Aria usage when selecting from information_schema +set @@max_heap_table_size=16384; + error 1,1030; create temporary table t2 (a int); error 1,1030; create temporary table t3 (a int) engine=Aria; +--disable_view_protocol +error 1,1030; +select * from information_schema.columns; +--enable_view_protocol flush tables; select * from d1; drop temporary table t1; diff --git a/mysql-test/main/tmp_table_error.result b/mysql-test/main/tmp_table_error.result index 43d3a448cfd43..2016070b5fcb8 100644 --- a/mysql-test/main/tmp_table_error.result +++ b/mysql-test/main/tmp_table_error.result @@ -3,6 +3,8 @@ create table t1 ( a int primary key, b text ) engine=innodb; +SET @save_tmp_memory_table_size=@@tmp_memory_table_size; +SET tmp_memory_table_size=0; create table t2 as select 1 @@ -2630,4 +2632,6 @@ b as c2624, b as c2626 from t1 ) as tt1; -drop table t1, t2; +ERROR 0A000: Aria table 'tmp' has too many columns and/or indexes and/or unique constraints. +SET tmp_memory_table_size=@save_tmp_memory_table_size; +drop table t1; diff --git a/mysql-test/main/tmp_table_error.test b/mysql-test/main/tmp_table_error.test index b5a1ac3a47a85..3e3cea4674993 100644 --- a/mysql-test/main/tmp_table_error.test +++ b/mysql-test/main/tmp_table_error.test @@ -5,6 +5,10 @@ create table t1 ( b text ) engine=innodb; +SET @save_tmp_memory_table_size=@@tmp_memory_table_size; +SET tmp_memory_table_size=0; +--replace_regex /'.*'/'tmp'/ +--error 140 create table t2 as select 1 @@ -2632,4 +2636,5 @@ select b as c2626 from t1 ) as tt1; -drop table t1, t2; +SET tmp_memory_table_size=@save_tmp_memory_table_size; +drop table t1; diff --git a/mysql-test/suite/heap/heap_blob.result b/mysql-test/suite/heap/blob.result similarity index 100% rename from mysql-test/suite/heap/heap_blob.result rename to mysql-test/suite/heap/blob.result diff --git a/mysql-test/suite/heap/heap_blob.test b/mysql-test/suite/heap/blob.test similarity index 98% rename from mysql-test/suite/heap/heap_blob.test rename to mysql-test/suite/heap/blob.test index d203d75caad42..4d5715bcee732 100644 --- a/mysql-test/suite/heap/heap_blob.test +++ b/mysql-test/suite/heap/blob.test @@ -1,5 +1,10 @@ # -# Test BLOB/TEXT column support in HEAP (MEMORY) tables. +# MDEV-38975: HEAP engine BLOB/TEXT/JSON/GEOMETRY column support +# +# Tests basic CRUD, multiple blob types, large blobs, free list reuse, +# fragmentation, max_heap_table_size overflow, zero-copy edge cases +# (Case A single-rec, Case B zerocopy, Case B->C transition), and +# blob preservation during non-blob column updates. # --disable_warnings diff --git a/mysql-test/suite/heap/heap_blob_big.inc b/mysql-test/suite/heap/blob_big.inc similarity index 100% rename from mysql-test/suite/heap/heap_blob_big.inc rename to mysql-test/suite/heap/blob_big.inc diff --git a/mysql-test/suite/heap/heap_blob_big1.result b/mysql-test/suite/heap/blob_big1.result similarity index 100% rename from mysql-test/suite/heap/heap_blob_big1.result rename to mysql-test/suite/heap/blob_big1.result diff --git a/mysql-test/suite/heap/heap_blob_big1.test b/mysql-test/suite/heap/blob_big1.test similarity index 87% rename from mysql-test/suite/heap/heap_blob_big1.test rename to mysql-test/suite/heap/blob_big1.test index 19b7fb67eb6b7..2ca2815c900a8 100644 --- a/mysql-test/suite/heap/heap_blob_big1.test +++ b/mysql-test/suite/heap/blob_big1.test @@ -5,4 +5,4 @@ # Test with all data fitting in memory tables SET @@tmp_memory_table_size = 1024*1024; ---source heap_blob_big.inc +--source blob_big.inc diff --git a/mysql-test/suite/heap/heap_blob_big2.result b/mysql-test/suite/heap/blob_big2.result similarity index 100% rename from mysql-test/suite/heap/heap_blob_big2.result rename to mysql-test/suite/heap/blob_big2.result diff --git a/mysql-test/suite/heap/heap_blob_big2.test b/mysql-test/suite/heap/blob_big2.test similarity index 87% rename from mysql-test/suite/heap/heap_blob_big2.test rename to mysql-test/suite/heap/blob_big2.test index 20e50ade97b9d..d431bb44053c3 100644 --- a/mysql-test/suite/heap/heap_blob_big2.test +++ b/mysql-test/suite/heap/blob_big2.test @@ -5,5 +5,5 @@ # Test with forcing conversion of temporary table to Aria SET @@tmp_memory_table_size = 65536; ---source heap_blob_big.inc +--source blob_big.inc diff --git a/mysql-test/suite/heap/heap_blob_big3.result b/mysql-test/suite/heap/blob_big3.result similarity index 100% rename from mysql-test/suite/heap/heap_blob_big3.result rename to mysql-test/suite/heap/blob_big3.result diff --git a/mysql-test/suite/heap/heap_blob_big3.test b/mysql-test/suite/heap/blob_big3.test similarity index 88% rename from mysql-test/suite/heap/heap_blob_big3.test rename to mysql-test/suite/heap/blob_big3.test index ffc973ff77c63..943c3f5a69753 100644 --- a/mysql-test/suite/heap/heap_blob_big3.test +++ b/mysql-test/suite/heap/blob_big3.test @@ -5,6 +5,6 @@ # Test without all temporary tables being dicrectly created in Aria SET @@tmp_memory_table_size= 0; ---source heap_blob_big.inc +--source blob_big.inc diff --git a/mysql-test/suite/heap/blob_dedup.result b/mysql-test/suite/heap/blob_dedup.result index 66149ce791a3e..293b050b5e710 100644 --- a/mysql-test/suite/heap/blob_dedup.result +++ b/mysql-test/suite/heap/blob_dedup.result @@ -1,15 +1,19 @@ CREATE TABLE t1 (a mediumtext) ENGINE=HEAP; -INSERT INTO t1 VALUES ('abc'),('def'); +INSERT INTO t1 VALUES ('abc'),('def'),('abc'),('abc'),('def'),('abcd'); SELECT DISTINCT a FROM t1; a abc +abcd def DROP TABLE t1; CREATE TABLE t1 (a mediumtext); CREATE TABLE t2 (b varchar(20)); -INSERT INTO t1 VALUES ('a'),('b'); +INSERT INTO t1 VALUES ('a'),('b'),('a'),('c'); +INSERT INTO t2 VALUES ('b'),('d'); SELECT left(a,100000000) FROM t1 UNION SELECT b FROM t2; left(a,100000000) a b +c +d DROP TABLE t1, t2; diff --git a/mysql-test/suite/heap/blob_dedup.test b/mysql-test/suite/heap/blob_dedup.test index 16892d92ef4b7..810501db3430a 100644 --- a/mysql-test/suite/heap/blob_dedup.test +++ b/mysql-test/suite/heap/blob_dedup.test @@ -1,10 +1,20 @@ +# +# MDEV-38975: Test DISTINCT and UNION deduplication with BLOB columns +# in HEAP temporary tables. Verifies that hash-based deduplication +# correctly compares blob data (not pointer values) by using duplicate +# values that must be deduplicated. +# + CREATE TABLE t1 (a mediumtext) ENGINE=HEAP; -INSERT INTO t1 VALUES ('abc'),('def'); +INSERT INTO t1 VALUES ('abc'),('def'),('abc'),('abc'),('def'),('abcd'); +--sorted_result SELECT DISTINCT a FROM t1; DROP TABLE t1; CREATE TABLE t1 (a mediumtext); CREATE TABLE t2 (b varchar(20)); -INSERT INTO t1 VALUES ('a'),('b'); +INSERT INTO t1 VALUES ('a'),('b'),('a'),('c'); +INSERT INTO t2 VALUES ('b'),('d'); +--sorted_result SELECT left(a,100000000) FROM t1 UNION SELECT b FROM t2; DROP TABLE t1, t2; diff --git a/mysql-test/suite/heap/heap_blob_groupby.result b/mysql-test/suite/heap/blob_group_by.result similarity index 100% rename from mysql-test/suite/heap/heap_blob_groupby.result rename to mysql-test/suite/heap/blob_group_by.result diff --git a/mysql-test/suite/heap/heap_blob_groupby.test b/mysql-test/suite/heap/blob_group_by.test similarity index 100% rename from mysql-test/suite/heap/heap_blob_groupby.test rename to mysql-test/suite/heap/blob_group_by.test diff --git a/mysql-test/suite/heap/heap_blob_ops.result b/mysql-test/suite/heap/blob_ops.result similarity index 95% rename from mysql-test/suite/heap/heap_blob_ops.result rename to mysql-test/suite/heap/blob_ops.result index e28dc20dd7ffd..a137d27861d1b 100644 --- a/mysql-test/suite/heap/heap_blob_ops.result +++ b/mysql-test/suite/heap/blob_ops.result @@ -3,8 +3,8 @@ # CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY, k INT, text_col TEXT); INSERT INTO t1 (k, text_col) VALUES -(1, 'alpha'), (1, 'alpha'), (1, 'beta'), -(2, 'gamma'), (2, 'gamma'), (2, 'delta'), +(1, 'alpha'), (1, 'alpha '), (1, 'beta'), +(2, 'gamma'), (2, 'gamma '), (2, 'delta'), (3, 'alpha'), (3, 'epsilon'), (3, NULL), (4, NULL), (4, NULL), (4, 'beta'); # @@ -30,7 +30,7 @@ INSERT INTO t2 VALUES ('alpha'), ('delta'); SELECT id, text_col FROM t1 WHERE text_col IN (SELECT text_col FROM t2) ORDER BY id; id text_col 1 alpha -2 alpha +2 alpha 6 delta 7 alpha # @@ -79,26 +79,26 @@ SELECT id, text_col, ROW_NUMBER() OVER (PARTITION BY k ORDER BY id) AS rn FROM t1 WHERE k <= 2 ORDER BY id; id text_col rn 1 alpha 1 -2 alpha 2 +2 alpha 2 3 beta 3 4 gamma 1 -5 gamma 2 +5 gamma 2 6 delta 3 # # RANK with blob column # SELECT text_col, k, RANK() OVER (ORDER BY text_col) AS rnk -FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k; +FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k, id; text_col k rnk alpha 1 1 -alpha 1 1 +alpha 1 1 alpha 3 1 beta 1 4 beta 4 4 delta 2 6 epsilon 3 7 gamma 2 8 -gamma 2 8 +gamma 2 8 # # Non-recursive CTE materializing blob column # diff --git a/mysql-test/suite/heap/heap_blob_ops.test b/mysql-test/suite/heap/blob_ops.test similarity index 96% rename from mysql-test/suite/heap/heap_blob_ops.test rename to mysql-test/suite/heap/blob_ops.test index e596ea3e07122..3877da2c3bd6b 100644 --- a/mysql-test/suite/heap/heap_blob_ops.test +++ b/mysql-test/suite/heap/blob_ops.test @@ -7,8 +7,8 @@ CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY, k INT, text_col TEXT); INSERT INTO t1 (k, text_col) VALUES - (1, 'alpha'), (1, 'alpha'), (1, 'beta'), - (2, 'gamma'), (2, 'gamma'), (2, 'delta'), + (1, 'alpha'), (1, 'alpha '), (1, 'beta'), + (2, 'gamma'), (2, 'gamma '), (2, 'delta'), (3, 'alpha'), (3, 'epsilon'), (3, NULL), (4, NULL), (4, NULL), (4, 'beta'); @@ -56,7 +56,7 @@ FROM t1 WHERE k <= 2 ORDER BY id; --echo # RANK with blob column --echo # SELECT text_col, k, RANK() OVER (ORDER BY text_col) AS rnk -FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k; +FROM t1 WHERE text_col IS NOT NULL ORDER BY text_col, k, id; --echo # --echo # Non-recursive CTE materializing blob column diff --git a/mysql-test/suite/heap/heap_geometry.result b/mysql-test/suite/heap/heap_geometry.result index 6ff7e65e54428..daf1cdf79a763 100644 --- a/mysql-test/suite/heap/heap_geometry.result +++ b/mysql-test/suite/heap/heap_geometry.result @@ -10,13 +10,7 @@ select engine from information_schema.tables where table_schema=database() and table_name='t1'; engine MEMORY -INSERT INTO t1 VALUES (1, ST_GeomFromText('LineString(2 2, 150 150)')); -INSERT INTO t1 VALUES (2, ST_GeomFromText('LineString(3 3, 160 160)')); -INSERT INTO t1 VALUES (3, ST_GeomFromText('LineString(4 4, 170 170)')); -INSERT INTO t1 VALUES (4, ST_GeomFromText('LineString(5 5, 180 180)')); -INSERT INTO t1 VALUES (5, ST_GeomFromText('LineString(6 6, 190 190)')); -INSERT INTO t1 VALUES (6, ST_GeomFromText('LineString(7 7, 200 200)')); -INSERT INTO t1 VALUES (7, ST_GeomFromText('LineString(8 8, 210 210)')); +INSERT INTO t1 SELECT seq, ST_GeomFromText(concat('LineString(',seq,' ',seq,',',(mod(seq,10)*10+100),' ',mod(seq,10)*10+100,')')) from seq_1_to_7; # 7 rows, all valid select count(*) from t1; count(*) @@ -59,17 +53,17 @@ expect 0 select ST_AsText(c2) as geom, count(*) as cnt from t1 group by geom order by geom; geom cnt -LINESTRING(2 2,150 150) 2048 -LINESTRING(3 3,160 160) 2048 -LINESTRING(4 4,170 170) 2048 -LINESTRING(5 5,180 180) 2048 -LINESTRING(6 6,190 190) 2048 -LINESTRING(7 7,200 200) 2048 -LINESTRING(8 8,210 210) 2048 +LINESTRING(1 1,110 110) 2048 +LINESTRING(2 2,120 120) 2048 +LINESTRING(3 3,130 130) 2048 +LINESTRING(4 4,140 140) 2048 +LINESTRING(5 5,150 150) 2048 +LINESTRING(6 6,160 160) 2048 +LINESTRING(7 7,170 170) 2048 # MBRWithin check set @g1 = ST_GeomFromText('Polygon((0 0,0 200,200 200,200 0,0 0))'); -select count(*) as 'expect 12288' from t1 where MBRWithin(t1.c2, @g1); -expect 12288 -12288 +select count(*) as 'expect 14336' from t1 where MBRWithin(t1.c2, @g1); +expect 14336 +14336 drop table t1; set max_heap_table_size= @save_max_heap_table_size; diff --git a/mysql-test/suite/heap/heap_geometry.test b/mysql-test/suite/heap/heap_geometry.test index 9d4fe38ff81b7..5d0cfb217b8da 100644 --- a/mysql-test/suite/heap/heap_geometry.test +++ b/mysql-test/suite/heap/heap_geometry.test @@ -1,4 +1,13 @@ --source include/have_geometry.inc +--source include/have_sequence.inc + +# +# MDEV-38975: Test GEOMETRY columns in MEMORY tables. +# GEOMETRY columns are internally Field_blob, so they exercise the +# same continuation record chains as BLOB/TEXT. The INSERT...SELECT +# doublings stress free-list reuse and the Case A next_cont bug that +# corrupted geometry data during large-scale inserts. +# --echo # --echo # Test GEOMETRY columns in MEMORY tables @@ -14,13 +23,7 @@ create table t1 (c1 int, c2 geometry not null) engine=MEMORY; select engine from information_schema.tables where table_schema=database() and table_name='t1'; -INSERT INTO t1 VALUES (1, ST_GeomFromText('LineString(2 2, 150 150)')); -INSERT INTO t1 VALUES (2, ST_GeomFromText('LineString(3 3, 160 160)')); -INSERT INTO t1 VALUES (3, ST_GeomFromText('LineString(4 4, 170 170)')); -INSERT INTO t1 VALUES (4, ST_GeomFromText('LineString(5 5, 180 180)')); -INSERT INTO t1 VALUES (5, ST_GeomFromText('LineString(6 6, 190 190)')); -INSERT INTO t1 VALUES (6, ST_GeomFromText('LineString(7 7, 200 200)')); -INSERT INTO t1 VALUES (7, ST_GeomFromText('LineString(8 8, 210 210)')); +INSERT INTO t1 SELECT seq, ST_GeomFromText(concat('LineString(',seq,' ',seq,',',(mod(seq,10)*10+100),' ',mod(seq,10)*10+100,')')) from seq_1_to_7; --echo # 7 rows, all valid select count(*) from t1; @@ -59,7 +62,7 @@ select ST_AsText(c2) as geom, count(*) as cnt from t1 --echo # MBRWithin check set @g1 = ST_GeomFromText('Polygon((0 0,0 200,200 200,200 0,0 0))'); -select count(*) as 'expect 12288' from t1 where MBRWithin(t1.c2, @g1); +select count(*) as 'expect 14336' from t1 where MBRWithin(t1.c2, @g1); drop table t1; set max_heap_table_size= @save_max_heap_table_size; diff --git a/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result b/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result index 0220d45e252af..49d3f8b3e0046 100644 --- a/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result +++ b/mysql-test/suite/sys_vars/r/tmp_disk_table_size_basic.result @@ -149,111 +149,10 @@ ERROR 42S22: Unknown column 'tmp_disk_table_size' in 'SELECT' # failed on SELECT after setting tmp_disk_table_size. # SET @@tmp_disk_table_size=16384; +set @@max_heap_table_size=16384; CREATE VIEW v AS SELECT 'a'; SELECT table_name FROM INFORMATION_SCHEMA.views; -table_name -host_summary -host_summary_by_file_io -host_summary_by_file_io_type -host_summary_by_stages -host_summary_by_statement_latency -host_summary_by_statement_type -innodb_buffer_stats_by_schema -innodb_buffer_stats_by_table -innodb_lock_waits -io_by_thread_by_latency -io_global_by_file_by_bytes -io_global_by_file_by_latency -io_global_by_wait_by_bytes -io_global_by_wait_by_latency -latest_file_io -memory_by_host_by_current_bytes -memory_by_thread_by_current_bytes -memory_by_user_by_current_bytes -memory_global_by_current_bytes -memory_global_total -metrics -processlist -ps_check_lost_instrumentation -schema_auto_increment_columns -schema_index_statistics -schema_object_overview -schema_redundant_indexes -schema_table_lock_waits -schema_table_statistics -schema_table_statistics_with_buffer -schema_tables_with_full_table_scans -schema_unused_indexes -session -session_ssl_status -statement_analysis -statements_with_errors_or_warnings -statements_with_full_table_scans -statements_with_runtimes_in_95th_percentile -statements_with_sorting -statements_with_temp_tables -user -user_summary -user_summary_by_file_io -user_summary_by_file_io_type -user_summary_by_stages -user_summary_by_statement_latency -user_summary_by_statement_type -v -version -wait_classes_global_by_avg_latency -wait_classes_global_by_latency -waits_by_host_by_latency -waits_by_user_by_latency -waits_global_by_latency -x$host_summary -x$host_summary_by_file_io -x$host_summary_by_file_io_type -x$host_summary_by_stages -x$host_summary_by_statement_latency -x$host_summary_by_statement_type -x$innodb_buffer_stats_by_schema -x$innodb_buffer_stats_by_table -x$innodb_lock_waits -x$io_by_thread_by_latency -x$io_global_by_file_by_bytes -x$io_global_by_file_by_latency -x$io_global_by_wait_by_bytes -x$io_global_by_wait_by_latency -x$latest_file_io -x$memory_by_host_by_current_bytes -x$memory_by_thread_by_current_bytes -x$memory_by_user_by_current_bytes -x$memory_global_by_current_bytes -x$memory_global_total -x$processlist -x$ps_digest_95th_percentile_by_avg_us -x$ps_digest_avg_latency_distribution -x$ps_schema_table_statistics_io -x$schema_flattened_keys -x$schema_index_statistics -x$schema_table_lock_waits -x$schema_table_statistics -x$schema_table_statistics_with_buffer -x$schema_tables_with_full_table_scans -x$session -x$statement_analysis -x$statements_with_errors_or_warnings -x$statements_with_full_table_scans -x$statements_with_runtimes_in_95th_percentile -x$statements_with_sorting -x$statements_with_temp_tables -x$user_summary -x$user_summary_by_file_io -x$user_summary_by_file_io_type -x$user_summary_by_stages -x$user_summary_by_statement_latency -x$user_summary_by_statement_type -x$wait_classes_global_by_avg_latency -x$wait_classes_global_by_latency -x$waits_by_host_by_latency -x$waits_by_user_by_latency -x$waits_global_by_latency +ERROR HY000: The table '(temporary)' is full DROP VIEW v; # End of 10.4 test SET @@global.tmp_disk_table_size = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test b/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test index 454e2cbd6edc6..b74220614b0cf 100644 --- a/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test +++ b/mysql-test/suite/sys_vars/t/tmp_disk_table_size_basic.test @@ -201,9 +201,10 @@ SELECT tmp_disk_table_size = @@session.tmp_disk_table_size; --echo # SET @@tmp_disk_table_size=16384; +set @@max_heap_table_size=16384; CREATE VIEW v AS SELECT 'a'; ---sorted_result +--error ER_RECORD_FILE_FULL SELECT table_name FROM INFORMATION_SCHEMA.views; DROP VIEW v; diff --git a/sql/field.h b/sql/field.h index c70fb8c1b1cf3..3652aeddec122 100644 --- a/sql/field.h +++ b/sql/field.h @@ -523,7 +523,9 @@ inline bool is_temporal_type_with_date(enum_field_types type) /* - Check for blob field types, including GEOMETRY (which extends Field_blob). + Only needed for calc_group_buffer(), where we have an + enum_field_types but no Field object. + In all other cases use field->flags & BLOB_FLAG. */ static inline bool is_any_blob_field_type(enum_field_types type) { @@ -4391,18 +4393,6 @@ static inline void store_lowendian(ulonglong num, uchar *to, uint bytes) } } -static inline longlong read_lowendian(const uchar *from, uint bytes) -{ - switch(bytes) { - case 1: return from[0]; - case 2: return uint2korr(from); - case 3: return uint3korr(from); - case 4: return uint4korr(from); - case 8: return sint8korr(from); - default: DBUG_ASSERT(0); return 0; - } -} - extern LEX_CSTRING temp_lex_str; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index baac7853d4b79..69f6cac8302ec 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -21217,12 +21217,10 @@ bool Create_tmp_table::finalize(THD *thd, share->stored_rec_length= share->reclength; /* HEAP-specific: skip packed row format. - HEAP uses fixed-width base records (blob data is stored separately - in continuation chains), so use packed rows only for disk-based - engines when there are blobs or enough space to gain. + HEAP does not use recinfo->type, so this is harmless for HEAP + and ensures correct packing when converting to Aria. */ - if (share->db_type() != heap_hton && - (share->blob_fields || + if ((share->blob_fields || (string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && (share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS || string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS)))) @@ -21433,10 +21431,12 @@ bool Create_tmp_table::finalize(THD *thd, } /* - For blob/geometry GROUP BY keys, field->key_length() returns - 0 (blobs) or packlength (geometry), both too small to hold - actual data. Use the item's max_length capped to - MAX_BLOB_WIDTH so new_key_field gets a usable size. + For blob/geometry GROUP BY keys, + m_key_part_info->length, set from field->key_length(), + contains 0 (blobs) or packlength (geometry), both too + small to hold actual data. Use the item's max_length + capped to MAX_BLOB_WIDTH so new_key_field gets a + usable size. */ uint32 key_field_length= m_key_part_info->length; if ((field->flags & BLOB_FLAG) && @@ -21447,7 +21447,7 @@ bool Create_tmp_table::finalize(THD *thd, HA_KEY_BLOB_LENGTH)); /* Check that the group buffer has room for this blob key field. - calc_group_buffer() may have sized the buffer before the field + calc_group_buffer() may have calculated the size of the buffer before the field was promoted to blob in the tmp table. If the promoted blob doesn't fit, fall back to m_using_unique_constraint. */ @@ -28036,6 +28036,8 @@ void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group) if (field) { enum_field_types type; + DBUG_ASSERT((bool) (field->flags & BLOB_FLAG) == + is_any_blob_field_type(field->type())); if (is_any_blob_field_type(type= field->type())) key_length+=MAX_BLOB_WIDTH; // Can't be used as a key else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING) diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index 81aa682aeabb1..c053551c6debb 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -27,15 +27,13 @@ IF(CMAKE_SYSTEM_NAME MATCHES AIX AND CMAKE_BUILD_TYPE STREQUAL "DEBUG") ENDIF() IF(WITH_UNIT_TESTS) - TARGET_COMPILE_DEFINITIONS(heap PRIVATE HEAP_UNIT_TESTS) - ADD_EXECUTABLE(hp_test1 hp_test1.c) TARGET_LINK_LIBRARIES(hp_test1 heap mysys dbug strings) ADD_EXECUTABLE(hp_test2 hp_test2.c) TARGET_LINK_LIBRARIES(hp_test2 heap mysys dbug strings) - MY_ADD_TESTS(hp_test_hash LINK_LIBRARIES heap mysys dbug strings) + MY_ADD_TESTS(hp_test_hash hp_test_scan LINK_LIBRARIES heap mysys dbug strings) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql ${CMAKE_SOURCE_DIR}/include) diff --git a/storage/heap/_check.c b/storage/heap/_check.c index bb2c5d6f0794f..f107346765f74 100644 --- a/storage/heap/_check.c +++ b/storage/heap/_check.c @@ -85,8 +85,8 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) Case A (HP_BLOB_CASE_A_SINGLE_REC): single record, no header. Case B/C: read run_rec_count from header and skip the entire run. */ - if (hp_blob_run_format(current_ptr, share->visible) - == HP_BLOB_CASE_A_SINGLE_REC) + if (hp_blob_run_format(current_ptr, share->visible) == + HP_BLOB_CASE_A_SINGLE_REC) { cont_count++; } diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index 20fa2098cc568..ef783e4416df8 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -28,7 +28,7 @@ #include "field.h" static handler *heap_create_handler(handlerton *, TABLE_SHARE *, MEM_ROOT *); -static int heap_prepare_hp_create_info(TABLE *, bool, HP_CREATE_INFO *); +int heap_prepare_hp_create_info(TABLE *, bool, HP_CREATE_INFO *); static int heap_panic(handlerton *hton, ha_panic_function flag) @@ -72,7 +72,7 @@ static handler *heap_create_handler(handlerton *hton, *****************************************************************************/ ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg) - :handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0), + :handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0), internal_table(0) {} @@ -366,6 +366,7 @@ void ha_heap::position(const uchar *record) *(HEAP_PTR*) ref= heap_position(file); // Ref is aligned } + int ha_heap::remember_rnd_pos() { saved_current_record= file->current_record; @@ -373,6 +374,7 @@ int ha_heap::remember_rnd_pos() return 0; } + int ha_heap::restart_rnd_next(uchar *buf) { /* @@ -625,12 +627,12 @@ ha_rows ha_heap::records_in_range(uint inx, const key_range *min_key, } -static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, - HP_CREATE_INFO *hp_create_info) +int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, + HP_CREATE_INFO *hp_create_info) { TABLE_SHARE *share= table_arg->s; uint key, parts, mem_per_row= 0, keys= share->keys; - uint auto_key= 0, auto_key_type= 0; + uint auto_key= 0, auto_key_type= 0, blob_count= 0; ha_rows max_rows; HP_KEYDEF *keydef; HA_KEYSEG *seg; @@ -770,24 +772,21 @@ static int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, my_free(keydef); return my_errno; } + for (uint blob_index= 0; blob_index < share->blob_fields; blob_index++) { - uint blob_count= 0; - for (uint b= 0; b < share->blob_fields; b++) - { - Field *field= table_arg->field[share->blob_field[b]]; - Field_blob *blob= (Field_blob*) field; + Field *field= table_arg->field[share->blob_field[blob_index]]; + Field_blob *blob= (Field_blob*) field; - DBUG_ASSERT(field->type() == MYSQL_TYPE_BLOB || - field->type() == MYSQL_TYPE_GEOMETRY); + DBUG_ASSERT(field->type() == MYSQL_TYPE_BLOB || + field->type() == MYSQL_TYPE_GEOMETRY); - blob_descs[blob_count].offset= - (uint) blob->offset(table_arg->record[0]); - blob_descs[blob_count].packlength= blob->pack_length_no_ptr(); - blob_count++; - } - hp_create_info->blob_descs= blob_descs; - hp_create_info->blob_count= blob_count; + blob_descs[blob_count].offset= + (uint) blob->offset(table_arg->record[0]); + blob_descs[blob_count].packlength= blob->pack_length_no_ptr(); + blob_count++; } + hp_create_info->blob_descs= blob_descs; + hp_create_info->blob_count= blob_count; } hp_create_info->auto_key= auto_key; @@ -901,7 +900,10 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) share->blength, share->records)); do { - /* Hash pre-check avoids expensive blob materialization for non-matching entries */ + /* + Hash pre-check avoids expensive blob materialization + for non-matching entries. + */ if (pos->hash_of_key == rec_hash && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) { @@ -913,6 +915,12 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) records. */ memcpy(record, file->current_ptr, (size_t) share->reclength); + /* + TODO: hp_rec_key_cmp() above already materialized blobs + via hp_materialize_one_blob(). A future optimization could + concatenate all non-zero-copy blobs into blob_buff during + comparison, avoiding this second materialization pass. + */ if (share->blob_count && hp_read_blobs(file, record, file->current_ptr)) DBUG_RETURN(-1); diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index 076c9b9fd794d..42fc0a5a8466b 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -33,6 +33,7 @@ C_MODE_START #define HP_MIN_RECORDS_IN_BLOCK 16 #define HP_MAX_RECORDS_IN_BLOCK 8192 +/* Flags stored in the 'visible' byte at end of each record */ #define HP_ROW_ACTIVE 1 /* Bit 0: record is active (not deleted) */ #define HP_ROW_HAS_CONT 2 /* Bit 1: primary record has continuation chain(s) */ #define HP_ROW_IS_CONT 4 /* Bit 2: this record IS a continuation record */ @@ -41,22 +42,11 @@ C_MODE_START /* Continuation run header: next_cont pointer + run_rec_count. - Stored at the beginning of the first record in each run. + Stored at the beginning of the first blob segment in each run. */ -#define HP_CONT_NEXT_PTR_SIZE sizeof(uchar*) #define HP_CONT_REC_COUNT_SIZE sizeof(uint16) -#define HP_CONT_HEADER_SIZE (HP_CONT_NEXT_PTR_SIZE + HP_CONT_REC_COUNT_SIZE) +#define HP_CONT_HEADER_SIZE (sizeof(uchar*) + HP_CONT_REC_COUNT_SIZE) -/* - Minimum contiguous run size parameters. - Runs smaller than this are not worth scavenging from the free list because - the per-run header overhead (10 bytes) becomes a significant fraction of - payload. Skip them and allocate from the tail instead. - - HP_CONT_MIN_RUN_BYTES: absolute floor for minimum run payload. - HP_CONT_RUN_FRACTION_NUM/DEN: minimum run size as a fraction of blob size. - min_run_bytes = MAX(blob_length * NUM / DEN, HP_CONT_MIN_RUN_BYTES) -*/ /* Row flags byte predicates. The flags byte is at offset 'visible' in each primary or run-header record. @@ -87,13 +77,13 @@ static inline my_bool hp_is_cont(const uchar *rec, uint visible) static inline const uchar *hp_cont_next(const uchar *chain) { const uchar *next; - memcpy(&next, chain, HP_CONT_NEXT_PTR_SIZE); + memcpy(&next, chain, sizeof(uchar*)); return next; } static inline uint16 hp_cont_rec_count(const uchar *chain) { - return uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); + return uint2korr(chain + sizeof(uchar*)); } /* @@ -130,12 +120,32 @@ static inline enum hp_blob_format hp_blob_run_format(const uchar *chain, return HP_BLOB_CASE_C_MULTI_RUN; } -/* Minimum acceptable contiguous run size in bytes for free list reuse */ +/* + Minimum contiguous run size parameters. + Runs smaller than this are not worth scavenging from the + delete list because the per-run header overhead (10 bytes) + becomes a significant fraction of payload. Skip them and + allocate from the tail instead. + + HP_CONT_MIN_RUN_BYTES: absolute floor for minimum run payload. + HP_CONT_RUN_FRACTION_NUM/DEN: minimum run size as a fraction + of blob size. + min_run_bytes = MAX(blob_length * NUM / DEN, + HP_CONT_MIN_RUN_BYTES) +*/ #define HP_CONT_MIN_RUN_BYTES 128 -/* Minimum run size as a fraction of blob size: NUM/DEN = 1/10 */ #define HP_CONT_RUN_FRACTION_NUM 1 #define HP_CONT_RUN_FRACTION_DEN 10 +static inline uint32 hp_blob_min_run_bytes(uint32 blob_length) +{ + uint32 length= (blob_length / HP_CONT_RUN_FRACTION_DEN * + HP_CONT_RUN_FRACTION_NUM); + set_if_bigger(length, HP_CONT_MIN_RUN_BYTES); + set_if_smaller(length, blob_length); + return length; +} + /* Some extern variables */ extern LIST *heap_open_list,*heap_share_list; @@ -210,8 +220,13 @@ extern ha_rows hp_rows_in_memory(size_t reclength, size_t index_size, size_t memory_limit); extern size_t hp_memory_needed_per_row(size_t reclength); +extern uchar *hp_alloc_from_tail(HP_SHARE *info); extern uchar *next_free_record_pos(HP_SHARE *info); -extern uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record); +static inline uint32 hp_blob_length(const HP_BLOB_DESC *desc, + const uchar *record) +{ + return (uint32) read_lowendian(record + desc->offset, desc->packlength); +} extern int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, uint32 data_len, uchar **first_run_out); extern int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos); diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index c9bd238b965e9..b82333a53d326 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2025, MariaDB Corporation. +/* Copyright (c) 2026, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,70 +33,14 @@ #include -/* - Read blob data length from the record buffer. -*/ -uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record) -{ - switch (desc->packlength) - { - case 1: - return (uint32) record[desc->offset]; - case 2: - return uint2korr(record + desc->offset); - case 3: - return uint3korr(record + desc->offset); - case 4: - return uint4korr(record + desc->offset); - default: - DBUG_ASSERT(0); - return 0; - } -} - - -/* - Allocate one record from the HP_BLOCK tail, bypassing the free list. - Same accounting as next_free_record_pos() but never uses del_link. - - Maintains the scan-boundary invariant: - total_records + deleted == block.last_allocated - by incrementing both last_allocated and total_records together. - heap_scan() relies on this invariant to know when to stop scanning. -*/ - -static uchar *hp_alloc_from_tail(HP_SHARE *share) -{ - int block_pos; - size_t length; - - if (!(block_pos= (share->block.last_allocated % - share->block.records_in_block))) - { - if ((share->block.last_allocated > share->max_records && - share->max_records) || - (share->data_length + share->index_length >= share->max_table_size)) - { - my_errno= HA_ERR_RECORD_FILE_FULL; - return NULL; - } - if (hp_get_new_block(share, &share->block, &length)) - return NULL; - share->data_length+= length; - } - share->block.last_allocated++; - share->total_records++; - return (uchar*) share->block.level_info[0].last_blocks + - block_pos * share->block.recbuffer; -} /* Free one continuation chain of variable-length runs. Walks from the first run, reads run_rec_count from each, frees all - records individually to the free list, then follows next_cont to the + records individually to the delete list, then follows next_cont to the next run. Maintains the scan-boundary invariant: @@ -128,8 +72,8 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) else { /* Case B/C: header present with next_cont and run_rec_count */ - memcpy(&next_run, chain, HP_CONT_NEXT_PTR_SIZE); - run_rec_count= uint2korr(chain + HP_CONT_NEXT_PTR_SIZE); + memcpy(&next_run, chain, sizeof(uchar*)); + run_rec_count= uint2korr(chain + sizeof(uchar*)); } for (j= 0; j < run_rec_count; j++) @@ -194,19 +138,17 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, } { - uchar *null_ptr= NULL; /* First record: run header + flags byte */ - memcpy(run_start, &null_ptr, HP_CONT_NEXT_PTR_SIZE); - int2store(run_start + HP_CONT_NEXT_PTR_SIZE, run_rec_count); + *((uchar**) run_start)= NULL; + int2store(run_start + sizeof(uchar*), run_rec_count); run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | (format == HP_BLOB_CASE_B_ZEROCOPY ? HP_ROW_CONT_ZEROCOPY : 0); } /* - Case B: skip data copy in rec 0. - All data goes into rec 1..N-1 contiguously for zero-copy reads. - Case C: data starts in rec 0 after header. + We come here when we need data in the initial run block. + In other words, we are not writing a multi-row zerocopy block. */ if (format == HP_BLOB_CASE_C_MULTI_RUN) { @@ -223,13 +165,19 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, This makes data in inner records contiguous, enabling zero-copy reads for single-run blobs (Case B). */ - for (rec= 1; rec < run_rec_count && remaining > 0; rec++) + run_start+= recbuffer; + for (rec= 1; rec < run_rec_count - 1; rec++, run_start+= recbuffer) { - uchar *rec_ptr= run_start + rec * recbuffer; - chunk= recbuffer; - if (chunk > remaining) - chunk= remaining; - memcpy(rec_ptr, data + off, chunk); + DBUG_ASSERT(remaining > recbuffer); + memcpy(run_start, data + off, recbuffer); + off+= recbuffer; + remaining-= recbuffer; + } + if (rec < run_rec_count) + { + DBUG_ASSERT(remaining != 0); + chunk= remaining < recbuffer ? remaining : recbuffer; + memcpy(run_start, data + off, chunk); off+= chunk; remaining-= chunk; } @@ -239,26 +187,25 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, /* - Unlink a contiguous group from the free list and write blob data into it. + Unlink a contiguous group from the delete list and write blob data into it. + Does not support zerocopy (always uses HP_BLOB_CASE_C_MULTI_RUN). @param share Table share @param data_ptr Blob data @param data_len Total blob data length @param run_start Lowest address of the contiguous group @param run_count Number of contiguous records in the group - @param visible share->visible - @param recbuffer share->block.recbuffer @param data_offset [in/out] Current offset into blob data - @param first_run [in/out] Pointer to first run (NULL initially) + @param first_run [out] Pointer to first run; undefined until return @param prev_run_start [in/out] Pointer to previous run's start */ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, uint32 data_len, uchar *run_start, - uint16 run_count, uint visible, - uint recbuffer, uint32 *data_offset, + uint16 run_count, uint32 *data_offset, uchar **first_run, uchar **prev_run_start) { + uint recbuffer= share->block.recbuffer; DBUG_ASSERT(share->del_link == run_start + (run_count-1) * recbuffer); DBUG_ASSERT(share->del_link >= run_start && share->del_link < run_start + run_count * recbuffer); @@ -282,7 +229,7 @@ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, /* Write one blob column's data into a chain of continuation runs. - Allocates contiguous runs from the free list and/or block tail, + Allocates contiguous runs from the delete list and/or block tail, copies blob data into them, and returns the first run pointer. On failure, frees any partially allocated chain. @@ -310,9 +257,9 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, 1 + (data_len - first_payload + recbuffer - 1) / recbuffer); /* - Calculate minimum acceptable contiguous run size for free-list reuse. + Calculate minimum acceptable contiguous run size for delete-list reuse. - The free-list walk (Step 1 below) rejects contiguous groups smaller + The delete-list walk (Step 1 below) rejects contiguous groups smaller than min_run_records, bailing to tail allocation instead. This prevents excessive chain fragmentation for large blobs: accepting tiny fragments would produce long chains of many short runs, each @@ -324,17 +271,12 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, - 2 records minimum (a single-slot run is pure overhead) For small blobs whose total bytes or records needed is below this - threshold, the fragmentation concern doesn't apply — the entire blob + threshold, the fragmentation concern doesn't apply - the entire blob fits in one short run. Cap both min_run_bytes and min_run_records - so the free list can satisfy the allocation without falling through + so the delete list can satisfy the allocation without falling through to the tail unnecessarily. */ - min_run_bytes= data_len / HP_CONT_RUN_FRACTION_DEN * - HP_CONT_RUN_FRACTION_NUM; - if (min_run_bytes < HP_CONT_MIN_RUN_BYTES) - min_run_bytes= HP_CONT_MIN_RUN_BYTES; - if (min_run_bytes > data_len) - min_run_bytes= data_len; + min_run_bytes= hp_blob_min_run_bytes(data_len); min_run_records= (min_run_bytes + recbuffer - 1) / recbuffer; if (min_run_records < 2) min_run_records= 2; @@ -343,10 +285,10 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, min_run_records= total_records_needed; /* - Step 1: Try to allocate contiguous runs from the top of the free list. + Step 1: Try to allocate contiguous runs from the top of the delete list. - Peek at free list records by walking next pointers without unlinking. - Track contiguous groups (descending addresses — LIFO order from + Peek at delete list records by walking next pointers without unlinking. + Track contiguous groups (descending addresses - LIFO order from hp_free_run_chain). On discontinuity: if the group qualifies (>= min_run_records), unlink and use it; if it doesn't, the tail of the delete_link is too small. Instead of continue searching @@ -369,7 +311,7 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, /* Only check descending direction: hp_free_run_chain() frees records in ascending address order (j=0..N), so LIFO pushes them onto the - free list in reverse — consecutive free list entries have descending + delete list in reverse - consecutive delete list entries have descending addresses. Ascending adjacency from unrelated deletes is ignored intentionally; we only recover runs that were freed together. */ @@ -389,13 +331,13 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, /* Discontinuity. If the accumulated group qualifies, use it. - If not, the top of the free list is fragmented — give up entirely. + If not, the top of the delete list is fragmented - give up entirely. */ if (run_count < min_run_records) break; hp_unlink_and_write_run(share, data_ptr, data_len, run_start, - run_count, visible, recbuffer, - &data_offset, &first_run, &prev_run_start); + run_count, &data_offset, + &first_run, &prev_run_start); pos= share->del_link; total_records_needed-= run_count; @@ -411,8 +353,8 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, /* Handle the last group after the loop ends */ if (run_count >= min_run_records && data_offset < data_len) hp_unlink_and_write_run(share, data_ptr, data_len, run_start, - run_count, visible, recbuffer, - &data_offset, &first_run, &prev_run_start); + run_count, &data_offset, + &first_run, &prev_run_start); } } @@ -498,7 +440,7 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, if (is_only_run && run_payload >= remaining) { /* - Single-run blob — use zero-copy layout if possible. + Single-run blob - use zero-copy layout if possible. Case A: data fits in rec 0 payload (run_rec_count == 1). Case B: data in rec 1..N-1 only, contiguous for zero-copy reads. */ @@ -558,13 +500,13 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, &data_offset); } else - /* Case B extension failed — fall back to Case C */ + /* Case B extension failed - fall back to Case C */ hp_write_run_data(share, data_ptr, data_len, run_start, run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, &data_offset); } else - /* At block boundary — Case C */ + /* At block boundary - Case C */ hp_write_run_data(share, data_ptr, data_len, run_start, run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, &data_offset); @@ -615,13 +557,13 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) { HP_SHARE *share= info->s; - uint i; + HP_BLOB_DESC *desc, *desc_end; my_bool has_blob_data= FALSE; DBUG_ENTER("hp_write_blobs"); - for (i= 0; i < share->blob_count; i++) + for (desc= share->blob_descs, desc_end= desc + share->blob_count; + desc < desc_end; desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; uint32 data_len; const uchar *data_ptr; uchar *first_run; @@ -630,8 +572,7 @@ int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) if (data_len == 0) { - uchar *null_ptr= NULL; - memcpy(pos + desc->offset + desc->packlength, &null_ptr, sizeof(null_ptr)); + *((uchar**) (pos + desc->offset + desc->packlength))= NULL; continue; } @@ -641,24 +582,16 @@ int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) if (hp_write_one_blob(share, data_ptr, data_len, &first_run)) { /* Rollback: free all previously completed blob columns */ - uint j; - for (j= 0; j < i; j++) + HP_BLOB_DESC *rd; + for (rd= share->blob_descs; rd < desc; rd++) { - HP_BLOB_DESC *rd= &share->blob_descs[j]; uchar *chain; memcpy(&chain, pos + rd->offset + rd->packlength, sizeof(chain)); if (chain) hp_free_run_chain(share, chain); - { - uchar *null_ptr= NULL; - memcpy(pos + rd->offset + rd->packlength, &null_ptr, sizeof(null_ptr)); - } - } - { - uchar *null_ptr= NULL; - memcpy(pos + desc->offset + desc->packlength, &null_ptr, - sizeof(null_ptr)); + *((uchar**) (pos + rd->offset + rd->packlength))= NULL; } + *((uchar**) (pos + desc->offset + desc->packlength))= NULL; DBUG_RETURN(my_errno); } @@ -671,6 +604,57 @@ int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) } +/* + Reassemble blob data from a Case C multi-run continuation chain + into a contiguous output buffer. + + @param chain First run pointer + @param data_len Total blob data length + @param out Output buffer (must be >= data_len bytes) + @param visible share->visible + @param recbuffer share->block.recbuffer +*/ + +static void hp_reassemble_chain(const uchar *chain, uint32 data_len, + uchar *out, uint visible, uint recbuffer) +{ + uint32 remaining= data_len; + while (chain && remaining > 0) + { + uint16 rec; + uint16 run_rec_count; + uint32 chunk; + const uchar *next_cont; + + next_cont= hp_cont_next(chain); + run_rec_count= hp_cont_rec_count(chain); + + /* First record payload (after header) */ + chunk= visible - HP_CONT_HEADER_SIZE; + if (chunk > remaining) + chunk= remaining; + memcpy(out, chain + HP_CONT_HEADER_SIZE, chunk); + out+= chunk; + remaining-= chunk; + + /* Inner records: recbuffer stride, no flags byte */ + for (rec= 1; rec < run_rec_count; rec++) + { + const uchar *rec_ptr= chain + rec * recbuffer; + DBUG_ASSERT(remaining != 0); + chunk= recbuffer; + if (chunk > remaining) + chunk= remaining; + memcpy(out, rec_ptr, chunk); + out+= chunk; + remaining-= chunk; + } + + chain= next_cont; + } +} + + /* Read blob data from continuation runs into the reassembly buffer. @@ -689,7 +673,7 @@ int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) { HP_SHARE *share= info->s; - uint i; + HP_BLOB_DESC *desc, *desc_end; uint visible= share->visible; uint recbuffer= share->block.recbuffer; uint32 total_copy_size= 0; @@ -701,14 +685,15 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) if (!hp_has_cont(pos, share->visible)) DBUG_RETURN(0); + desc_end= share->blob_descs + share->blob_count; + /* Pass 1: sum data_len for blobs that need reassembly (not zero-copy). Cases A and B (HP_ROW_CONT_ZEROCOPY set, or single-record run) use zero-copy pointers into HP_BLOCK, no blob_buff needed. */ - for (i= 0; i < share->blob_count; i++) + for (desc= share->blob_descs; desc < desc_end; desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; uint32 data_len; const uchar *chain; @@ -718,7 +703,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); - /* Case A and Case B are zero-copy — need no reassembly buffer space */ + /* Case A and Case B are zero-copy - need no reassembly buffer space */ if (hp_blob_run_format(chain, visible) != HP_BLOB_CASE_C_MULTI_RUN) { info->has_zerocopy_blobs= TRUE; @@ -745,9 +730,8 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) /* Pass 2: process each blob column */ buff_ptr= info->blob_buff; - for (i= 0; i < share->blob_count; i++) + for (desc= share->blob_descs; desc < desc_end; desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; uint32 data_len; const uchar *chain; @@ -761,7 +745,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) { case HP_BLOB_CASE_A_SINGLE_REC: { - /* Case A: single-record single-run, no header — zero-copy */ + /* Case A: single-record single-run, no header - zero-copy */ const uchar *blob_data= chain; memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); @@ -769,7 +753,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) } case HP_BLOB_CASE_B_ZEROCOPY: { - /* Case B: data in rec 1..N-1, contiguous — zero-copy */ + /* Case B: data in rec 1..N-1, contiguous - zero-copy */ const uchar *blob_data= chain + recbuffer; memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); @@ -778,43 +762,12 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) case HP_BLOB_CASE_C_MULTI_RUN: { /* Case C: reassemble into blob_buff */ - uint32 remaining= data_len; - const uchar *next_cont; - while (chain && remaining > 0) - { - uint16 rec; - uint16 run_rec_count; - uint32 chunk; - - next_cont= hp_cont_next(chain); - run_rec_count= hp_cont_rec_count(chain); - - /* First record payload (after header) */ - chunk= visible - HP_CONT_HEADER_SIZE; - if (chunk > remaining) - chunk= remaining; - memcpy(buff_ptr, chain + HP_CONT_HEADER_SIZE, chunk); - buff_ptr+= chunk; - remaining-= chunk; - - /* Inner records: recbuffer stride, no flags byte */ - for (rec= 1; rec < run_rec_count && remaining > 0; rec++) - { - const uchar *rec_ptr= chain + rec * recbuffer; - chunk= recbuffer; - if (chunk > remaining) - chunk= remaining; - memcpy(buff_ptr, rec_ptr, chunk); - buff_ptr+= chunk; - remaining-= chunk; - } - - chain= next_cont; - } + uchar *blob_data= buff_ptr; + hp_reassemble_chain(chain, data_len, buff_ptr, visible, recbuffer); + buff_ptr+= data_len; /* Update blob pointer to reassembly buffer */ { - uchar *blob_data= buff_ptr - data_len; memcpy(record + desc->offset + desc->packlength, &blob_data, sizeof(blob_data)); } @@ -850,10 +803,6 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, HP_SHARE *share= info->s; uint visible= share->visible; uint recbuffer= share->block.recbuffer; - uint32 remaining; - uchar *buff_ptr; - const uchar *next_cont; - uint16 run_rec_count; if (data_len == 0 || !chain) return chain; @@ -881,39 +830,7 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, info->blob_buff_len= data_len; } - buff_ptr= info->blob_buff; - remaining= data_len; - while (chain && remaining > 0) - { - uint16 rec; - uint32 chunk; - - next_cont= hp_cont_next(chain); - run_rec_count= hp_cont_rec_count(chain); - - /* First record payload (after header) */ - chunk= visible - HP_CONT_HEADER_SIZE; - if (chunk > remaining) - chunk= remaining; - memcpy(buff_ptr, chain + HP_CONT_HEADER_SIZE, chunk); - buff_ptr+= chunk; - remaining-= chunk; - - /* Inner records: recbuffer stride, no flags byte */ - for (rec= 1; rec < run_rec_count && remaining > 0; rec++) - { - const uchar *rec_ptr= chain + rec * recbuffer; - chunk= recbuffer; - if (chunk > remaining) - chunk= remaining; - memcpy(buff_ptr, rec_ptr, chunk); - buff_ptr+= chunk; - remaining-= chunk; - } - - chain= next_cont; - } - + hp_reassemble_chain(chain, data_len, info->blob_buff, visible, recbuffer); return info->blob_buff; } @@ -922,7 +839,7 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, Free continuation run chains for all blob columns of a row. Walks each blob column's run chain and adds all records back to the - free list. + delete list. @param share Table share @param pos Primary record pointer in HP_BLOCK @@ -930,15 +847,15 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, void hp_free_blobs(HP_SHARE *share, uchar *pos) { - uint i; + HP_BLOB_DESC *desc, *desc_end; DBUG_ENTER("hp_free_blobs"); if (!hp_has_cont(pos, share->visible)) DBUG_VOID_RETURN; - for (i= 0; i < share->blob_count; i++) + for (desc= share->blob_descs, desc_end= desc + share->blob_count; + desc < desc_end; desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; uchar *chain; memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); diff --git a/storage/heap/hp_delete.c b/storage/heap/hp_delete.c index f7538843d6946..88746e64dcfe9 100644 --- a/storage/heap/hp_delete.c +++ b/storage/heap/hp_delete.c @@ -134,7 +134,10 @@ int hp_delete_key(HP_INFO *info, register HP_KEYDEF *keyinfo, while (pos->ptr_to_rec != recpos) { - /* Hash pre-check avoids expensive blob materialization for non-matching entries */ + /* + Hash pre-check avoids expensive blob materialization + for non-matching entries. + */ if (flag && pos->hash_of_key == rec_hash && !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, info)) last_ptr=pos; /* Previous same key */ diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index 4e83157c01aa8..fa3146cc476c6 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -28,11 +28,7 @@ hp_charpos(CHARSET_INFO *cs, const uchar *b, const uchar *e, size_t num) } -#ifdef HEAP_UNIT_TESTS ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); -#else -static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); -#endif /* @@ -42,15 +38,9 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); /* Size of a pointer, for use in memcpy to avoid -Wsizeof-pointer-memaccess */ #define HP_PTR_SIZE sizeof(void*) -static size_t hp_blob_key_length(uint packlength, const uchar *pos) +static inline size_t hp_blob_key_length(uint packlength, const uchar *pos) { - switch (packlength) { - case 1: return (size_t) pos[0]; - case 2: return uint2korr(pos); - case 3: return uint3korr(pos); - case 4: return uint4korr(pos); - } - return 0; + return (size_t) read_lowendian(pos, packlength); } @@ -256,11 +246,7 @@ void hp_movelink(HASH_INFO *pos, HASH_INFO *next_link, HASH_INFO *newlink) /* Calc hashvalue for a key */ -#ifdef HEAP_UNIT_TESTS ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) -#else -static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) -#endif { /*register*/ ulong nr=1, nr2=4; @@ -317,7 +303,7 @@ static ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key) uint32 blob_len= uint4korr(pos); const uchar *blob_data; memcpy(&blob_data, pos + 4, HP_PTR_SIZE); - if (blob_data && blob_len > 0) + if (blob_len > 0) my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); } else @@ -394,7 +380,7 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) size_t blob_len= hp_blob_key_length(packlength, pos); const uchar *blob_data; memcpy(&blob_data, pos + packlength, HP_PTR_SIZE); - if (blob_data && blob_len > 0) + if (blob_len > 0) my_ci_hash_sort(cs, blob_data, blob_len, &nr, &nr2); } else @@ -426,12 +412,12 @@ ulong hp_rec_hashnr(register HP_KEYDEF *keydef, register const uchar *rec) Compare two records using key segments. @param keydef Key definition - @param rec1 First record (input) — blob fields contain direct data + @param rec1 First record (input) - blob fields contain direct data pointers to caller-owned memory - @param rec2 Second record — when @a info is non-NULL, blob fields + @param rec2 Second record - when info is non-NULL, blob fields contain continuation chain pointers (stored format) that are materialized via hp_materialize_one_blob(). - When @a info is NULL, treated same as rec1. + When info is NULL, treated same as rec1. @param info When non-NULL, enables stored-blob materialization for rec2. Must be NULL when both records are input records. diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c index 19914da73d86e..98dd6ea4bc771 100644 --- a/storage/heap/hp_scan.c +++ b/storage/heap/hp_scan.c @@ -63,6 +63,7 @@ int heap_scan(register HP_INFO *info, uchar *record) that manipulates next_block externally (e.g. restart_rnd_next) must also enforce this cap. */ +retry: pos= ++info->current_record; if (pos < info->next_block) { @@ -97,17 +98,15 @@ int heap_scan(register HP_INFO *info, uchar *record) DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); } /* - Skip blob continuation runs. Rec 0 of each run has the flags byte - with HP_ROW_IS_CONT set; inner records (rec 1..N-1) have no flags - byte. Read run_rec_count from the header and skip the entire run. + Skip blob continuation runs internally — advance past the entire run + and retry from the top rather than returning to the caller. + + Rec 0 of each run has the flags byte with HP_ROW_IS_CONT set; inner + records (rec 1..N-1) have no flags byte. Read run_rec_count from the + header and skip the entire run. */ if (hp_is_cont(info->current_ptr, share->visible)) { - /* - Case A (HP_BLOB_CASE_A_SINGLE_REC): single record, no header — skip - just this one record. - Case B/C: read run_rec_count from header and skip the entire run. - */ if (hp_blob_run_format(info->current_ptr, share->visible) != HP_BLOB_CASE_A_SINGLE_REC) { @@ -119,8 +118,7 @@ int heap_scan(register HP_INFO *info, uchar *record) info->current_ptr+= skip * share->block.recbuffer; } } - info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND; - DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); + goto retry; } info->update= HA_STATE_PREV_FOUND | HA_STATE_NEXT_FOUND | HA_STATE_AKTIV; memcpy(record,info->current_ptr,(size_t) share->reclength); diff --git a/storage/heap/hp_test_hash-t.c b/storage/heap/hp_test_hash-t.c index 9852ec1c66bfb..49e2380cadeae 100644 --- a/storage/heap/hp_test_hash-t.c +++ b/storage/heap/hp_test_hash-t.c @@ -131,12 +131,10 @@ static void test_hash_consistency(void) ulong rec_hash_a, rec_hash_b, rec_hash_c; /* Case A: very small blob (fits in single record, <= visible - 10) */ - const uchar *data_a= (const uchar*) "Hi"; - uint16 len_a= 2; + LEX_CUSTRING data_a= { USTRING_WITH_LEN("Hi") }; /* Case B: medium blob (fits in single run, zero-copy) */ - const uchar *data_b= (const uchar*) "Hello World! This is a medium blob."; - uint16 len_b= 35; + LEX_CUSTRING data_b= { USTRING_WITH_LEN("Hello World! This is a medium blob.") }; /* Case C: larger blob data (would need multiple runs in real storage) */ uchar data_c[200]; @@ -152,7 +150,7 @@ static void test_hash_consistency(void) setup_keydef(&keydef, &seg, 1); /* --- Case A: small blob --- */ - build_record(rec, 1, data_a, len_a, FALSE); + build_record(rec, 1, data_a.str, data_a.length, FALSE); rec_hash_a= hp_rec_hashnr(&keydef, rec); hp_make_key(&keydef, key_buf, rec); @@ -163,17 +161,17 @@ static void test_hash_consistency(void) uint32 key_blob_len= uint4korr(key_buf); const uchar *key_blob_data; memcpy(&key_blob_data, key_buf + 4, PTR_SIZE); - ok(key_blob_len == len_a, + ok(key_blob_len == data_a.length, "Case A: hp_make_key blob length = %u (expected %u)", - (uint) key_blob_len, (uint) len_a); - ok(key_blob_data == data_a, + (uint) key_blob_len, (uint) data_a.length); + ok(key_blob_data == data_a.str, "Case A: hp_make_key blob pointer matches source data"); - ok(memcmp(key_blob_data, data_a, len_a) == 0, + ok(memcmp(key_blob_data, data_a.str, data_a.length) == 0, "Case A: hp_make_key blob data content matches"); } /* --- Case B: medium blob --- */ - build_record(rec, 2, data_b, len_b, FALSE); + build_record(rec, 2, data_b.str, data_b.length, FALSE); rec_hash_b= hp_rec_hashnr(&keydef, rec); hp_make_key(&keydef, key_buf, rec); @@ -181,12 +179,12 @@ static void test_hash_consistency(void) uint32 key_blob_len= uint4korr(key_buf); const uchar *key_blob_data; memcpy(&key_blob_data, key_buf + 4, PTR_SIZE); - ok(key_blob_len == len_b, + ok(key_blob_len == data_b.length, "Case B: hp_make_key blob length = %u (expected %u)", - (uint) key_blob_len, (uint) len_b); - ok(key_blob_data == data_b, + (uint) key_blob_len, (uint) data_b.length); + ok(key_blob_data == data_b.str, "Case B: hp_make_key blob pointer matches source data"); - ok(memcmp(key_blob_data, data_b, len_b) == 0, + ok(memcmp(key_blob_data, data_b.str, data_b.length) == 0, "Case B: hp_make_key blob data content matches"); } @@ -229,29 +227,26 @@ static void test_rec_key_cmp(void) HP_KEYDEF keydef; uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; - const uchar *data1= (const uchar*) "same_data_value!"; - uint16 len1= 16; - const uchar *data2= (const uchar*) "different_value!"; - uint16 len2= 16; - const uchar *data3= (const uchar*) "short"; - uint16 len3= 5; + LEX_CUSTRING data1= { USTRING_WITH_LEN("same_data_value!") }; + LEX_CUSTRING data2= { USTRING_WITH_LEN("different_value!") }; + LEX_CUSTRING data3= { USTRING_WITH_LEN("short") }; setup_blob_keyseg(&seg, FALSE); setup_keydef(&keydef, &seg, 1); /* Same data, same length */ - build_record(rec1, 1, data1, len1, FALSE); - build_record(rec2, 2, data1, len1, FALSE); /* different int, same blob */ + build_record(rec1, 1, data1.str, data1.length, FALSE); + build_record(rec2, 2, data1.str, data1.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, "rec_key_cmp: same blob data compares equal"); /* Different data, same length */ - build_record(rec2, 2, data2, len2, FALSE); + build_record(rec2, 2, data2.str, data2.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, "rec_key_cmp: different blob data compares unequal"); /* Different length (PAD SPACE: "short" vs "short\0\0..." may differ) */ - build_record(rec2, 2, data3, len3, FALSE); + build_record(rec2, 2, data3.str, data3.length, FALSE); /* For binary charset, different lengths always means different */ { HA_KEYSEG seg_bin; @@ -260,8 +255,8 @@ static void test_rec_key_cmp(void) seg_bin.charset= &my_charset_bin; setup_keydef(&keydef_bin, &seg_bin, 1); - build_record(rec1, 1, data1, len1, FALSE); - build_record(rec2, 2, data3, len3, FALSE); + build_record(rec1, 1, data1.str, data1.length, FALSE); + build_record(rec2, 2, data3.str, data3.length, FALSE); ok(hp_rec_key_cmp(&keydef_bin, rec1, rec2, NULL) != 0, "rec_key_cmp: different length blobs compare unequal (binary)"); } @@ -282,8 +277,7 @@ static void test_null_blob(void) uchar key_buf[KEY_BUF_SIZE]; ulong hash1, hash2; - const uchar *data1= (const uchar*) "not_null_data"; - uint16 len1= 13; + LEX_CUSTRING data1= { USTRING_WITH_LEN("not_null_data") }; setup_blob_keyseg(&seg, TRUE); /* nullable */ setup_keydef(&keydef, &seg, 1); @@ -295,7 +289,7 @@ static void test_null_blob(void) "null_blob: two NULLs compare equal"); /* NULL vs non-NULL */ - build_record(rec2, 2, data1, len1, FALSE); + build_record(rec2, 2, data1.str, data1.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, "null_blob: NULL vs non-NULL compares unequal"); @@ -307,9 +301,9 @@ static void test_null_blob(void) /* NULL hash differs from empty non-NULL */ { - const uchar *empty= (const uchar*) ""; + LEX_CUSTRING empty= { USTRING_WITH_LEN("") }; ulong hash_empty; - build_record(rec2, 2, empty, 0, FALSE); + build_record(rec2, 2, empty.str, empty.length, FALSE); hash_empty= hp_rec_hashnr(&keydef, rec2); ok(hash1 != hash_empty, "null_blob: NULL hash (%lu) != empty non-NULL hash (%lu)", @@ -334,20 +328,20 @@ static void test_empty_blob(void) uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; ulong h1, h2; - const uchar *empty= (const uchar*) ""; - const uchar *nonempty= (const uchar*) "x"; + LEX_CUSTRING empty= { USTRING_WITH_LEN("") }; + LEX_CUSTRING nonempty= { USTRING_WITH_LEN("x") }; setup_blob_keyseg(&seg, FALSE); setup_keydef(&keydef, &seg, 1); /* Two empty blobs */ - build_record(rec1, 1, empty, 0, FALSE); - build_record(rec2, 2, empty, 0, FALSE); + build_record(rec1, 1, empty.str, empty.length, FALSE); + build_record(rec2, 2, empty.str, empty.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, "empty_blob: two empty blobs compare equal"); /* Empty vs non-empty */ - build_record(rec2, 2, nonempty, 1, FALSE); + build_record(rec2, 2, nonempty.str, nonempty.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, "empty_blob: empty vs non-empty compares unequal"); @@ -369,10 +363,8 @@ static void test_multi_segment_key(void) HP_KEYDEF keydef; uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; uchar key_buf[KEY_BUF_SIZE]; - const uchar *blob_data= (const uchar*) "multi_seg_test_data"; - uint16 blob_len= 19; - const uchar *blob_data2= (const uchar*) "different_blob_data"; - uint16 blob_len2= 19; + LEX_CUSTRING blob_data= { USTRING_WITH_LEN("multi_seg_test_data") }; + LEX_CUSTRING blob_data2= { USTRING_WITH_LEN("different_blob_data") }; /* Segment 0: int4 at offset 1, length 4 */ memset(&segs[0], 0, sizeof(segs[0])); @@ -388,23 +380,23 @@ static void test_multi_segment_key(void) setup_keydef(&keydef, segs, 2); /* Same int, same blob */ - build_record(rec1, 42, blob_data, blob_len, FALSE); - build_record(rec2, 42, blob_data, blob_len, FALSE); + build_record(rec1, 42, blob_data.str, blob_data.length, FALSE); + build_record(rec2, 42, blob_data.str, blob_data.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, "multi_seg: same int + same blob compares equal"); /* Different int, same blob */ - build_record(rec2, 99, blob_data, blob_len, FALSE); + build_record(rec2, 99, blob_data.str, blob_data.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, "multi_seg: different int + same blob compares unequal"); /* Same int, different blob */ - build_record(rec2, 42, blob_data2, blob_len2, FALSE); + build_record(rec2, 42, blob_data2.str, blob_data2.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, "multi_seg: same int + different blob compares unequal"); /* Hash consistency: record hash matches after make_key round-trip */ - build_record(rec1, 42, blob_data, blob_len, FALSE); + build_record(rec1, 42, blob_data.str, blob_data.length, FALSE); (void) hp_rec_hashnr(&keydef, rec1); hp_make_key(&keydef, key_buf, rec1); @@ -417,10 +409,10 @@ static void test_multi_segment_key(void) ok(key_int == 42, "multi_seg: hp_make_key int = %d (expected 42)", (int) key_int); - ok(key_blob_len == blob_len, + ok(key_blob_len == blob_data.length, "multi_seg: hp_make_key blob length = %u (expected %u)", - (uint) key_blob_len, (uint) blob_len); - ok(key_blob_data == blob_data, + (uint) key_blob_len, (uint) blob_data.length); + ok(key_blob_data == blob_data.str, "multi_seg: hp_make_key blob pointer matches"); } } @@ -436,16 +428,16 @@ static void test_pad_space(void) HA_KEYSEG seg; HP_KEYDEF keydef; uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; - const uchar *data_no_pad= (const uchar*) "abc"; - const uchar *data_padded= (const uchar*) "abc "; + LEX_CUSTRING data_no_pad= { USTRING_WITH_LEN("abc") }; + LEX_CUSTRING data_padded= { USTRING_WITH_LEN("abc ") }; ulong h1, h2; setup_blob_keyseg(&seg, FALSE); seg.charset= &my_charset_latin1; /* PAD SPACE */ setup_keydef(&keydef, &seg, 1); - build_record(rec1, 1, data_no_pad, 3, FALSE); - build_record(rec2, 2, data_padded, 6, FALSE); + build_record(rec1, 1, data_no_pad.str, data_no_pad.length, FALSE); + build_record(rec2, 2, data_padded.str, data_padded.length, FALSE); ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, "pad_space: 'abc' == 'abc ' with PAD SPACE collation"); @@ -468,14 +460,16 @@ static void test_pad_space(void) The SQL layer builds lookup keys in varstring format (2B length prefix + inline data) via Field_blob::new_key_field() -> Field_varstring. The HEAP - handler's rebuild_key_from_group_buff() converts this to record[0]'s blob descriptor - format, then hp_make_key() builds the hash key. + handler's rebuild_key_from_group_buff() converts this to + record[0]'s blob descriptor format, then hp_make_key() + builds the hash key. This test simulates the full round-trip: 1. Build a record with blob data (as at INSERT time) 2. Compute hp_rec_hashnr() (stored in HASH_INFO at write time) 3. Build a varstring-format key (as the SQL layer would for lookup) - 4. Parse the varstring key into a record's blob field (rebuild_key_from_group_buff) + 4. Parse the varstring key into a record's blob field + (rebuild_key_from_group_buff) 5. hp_make_key() from that record, then hp_rec_hashnr() on the record 6. Verify the hashes match */ @@ -487,8 +481,7 @@ static void test_distinct_key_format(void) uchar rec_lookup[REC_LENGTH]; /* record rebuilt from lookup key */ ulong insert_hash, lookup_hash; - const uchar *blob_data= (const uchar*) "1 - 01xxxxxxxxxx"; - uint16 blob_len= 16; + LEX_CUSTRING blob_data= { USTRING_WITH_LEN("1 - 01xxxxxxxxxx") }; /* Step 3: Build varstring-format key (what SQL layer produces). @@ -500,12 +493,12 @@ static void test_distinct_key_format(void) setup_keydef(&keydef, &seg, 1); /* Step 1-2: INSERT-time record and hash */ - build_record(rec_insert, 1, blob_data, blob_len, FALSE); + build_record(rec_insert, 1, blob_data.str, blob_data.length, FALSE); insert_hash= hp_rec_hashnr(&keydef, rec_insert); varstring_key[0]= 0; /* not null */ - int2store(varstring_key + 1, blob_len); - memcpy(varstring_key + 3, blob_data, blob_len); + int2store(varstring_key + 1, blob_data.length); + memcpy(varstring_key + 3, blob_data.str, blob_data.length); /* Step 4: Parse varstring key into rec_lookup's blob field. @@ -561,8 +554,7 @@ static void test_distinct_key_truncation(void) uchar rec_trunc[REC_LENGTH]; ulong full_hash, trunc_hash; - const uchar *full_data= (const uchar*) "1 - 01xxxxxxxxxx"; /* 16 bytes */ - uint16 full_len= 16; + LEX_CUSTRING full_data= { USTRING_WITH_LEN("1 - 01xxxxxxxxxx") }; uint16 trunc_len= 10; /* pack_length() = packlength(2) + sizeof(ptr)(8) */ setup_blob_keyseg(&seg, FALSE); @@ -570,11 +562,11 @@ static void test_distinct_key_truncation(void) setup_keydef(&keydef, &seg, 1); /* Full record (as stored at INSERT time) */ - build_record(rec_full, 1, full_data, full_len, FALSE); + build_record(rec_full, 1, full_data.str, full_data.length, FALSE); full_hash= hp_rec_hashnr(&keydef, rec_full); /* Truncated record (as rebuilt from truncated varstring key) */ - build_record(rec_trunc, 1, full_data, trunc_len, FALSE); + build_record(rec_trunc, 1, full_data.str, trunc_len, FALSE); trunc_hash= hp_rec_hashnr(&keydef, rec_trunc); /* Hashes MUST differ — this is the bug: truncation causes lookup miss */ @@ -605,8 +597,7 @@ static void test_group_by_key_format(void) ulong insert_hash, lookup_hash; /* GROUP BY on group_concat result: blob data */ - const uchar *data= (const uchar*) "group_concat_result_data_here!!"; - uint16 data_len= 31; + LEX_CUSTRING data= { USTRING_WITH_LEN("group_concat_result_data_here!!") }; uchar varstring_key[1 + 2 + 256]; @@ -614,16 +605,17 @@ static void test_group_by_key_format(void) setup_keydef(&keydef, &seg, 1); /* INSERT-time hash */ - build_record(rec_insert, 1, data, data_len, FALSE); + build_record(rec_insert, 1, data.str, data.length, FALSE); insert_hash= hp_rec_hashnr(&keydef, rec_insert); /* - Simulate rebuild_key_from_group_buff: parse varstring key, populate rec_lookup. + Simulate rebuild_key_from_group_buff: parse varstring + key, populate rec_lookup. In GROUP BY, key_field_length = max_length (not 0, not pack_length). */ /* no null bit for this test */ - int2store(varstring_key, data_len); - memcpy(varstring_key + 2, data, data_len); + int2store(varstring_key, data.length); + memcpy(varstring_key + 2, data.str, data.length); memset(rec_lookup, 0, REC_LENGTH); { @@ -663,8 +655,8 @@ static void test_multi_seg_distinct(void) HA_KEYSEG segs[2]; HP_KEYDEF keydef; uchar rec1[REC_LENGTH], rec2[REC_LENGTH]; - const uchar *blob1= (const uchar*) "sj_materialize_value_1"; - const uchar *blob2= (const uchar*) "sj_materialize_value_2"; + LEX_CUSTRING blob1= { USTRING_WITH_LEN("sj_materialize_value_1") }; + LEX_CUSTRING blob2= { USTRING_WITH_LEN("sj_materialize_value_2") }; ulong h1, h2, h3; /* Segment 0: int4 at offset 1, length 4 */ @@ -680,8 +672,8 @@ static void test_multi_seg_distinct(void) setup_keydef(&keydef, segs, 2); /* Same int, same blob */ - build_record(rec1, 100, blob1, 22, FALSE); - build_record(rec2, 100, blob1, 22, FALSE); + build_record(rec1, 100, blob1.str, blob1.length, FALSE); + build_record(rec2, 100, blob1.str, blob1.length, FALSE); h1= hp_rec_hashnr(&keydef, rec1); h2= hp_rec_hashnr(&keydef, rec2); @@ -690,8 +682,16 @@ static void test_multi_seg_distinct(void) ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) == 0, "multi_distinct: same data compares equal"); + /* Same blob, different int */ + build_record(rec2, 200, blob1.str, blob1.length, FALSE); + h3= hp_rec_hashnr(&keydef, rec2); + ok(h1 != h3, + "multi_distinct: different int hashes differ (%lu != %lu)", h1, h3); + ok(hp_rec_key_cmp(&keydef, rec1, rec2, NULL) != 0, + "multi_distinct: same blob + different int compares unequal"); + /* Same int, different blob */ - build_record(rec2, 100, blob2, 22, FALSE); + build_record(rec2, 100, blob2.str, blob2.length, FALSE); h3= hp_rec_hashnr(&keydef, rec2); ok(h1 != h3, "multi_distinct: different blob hashes differ (%lu != %lu)", h1, h3); @@ -716,7 +716,6 @@ static void test_multi_seg_distinct(void) hp_rec_hashnr). */ -/* hp_hashnr is static by default; exposed via HEAP_UNIT_TESTS */ extern ulong hp_hashnr(HP_KEYDEF *keydef, const uchar *key); /* @@ -800,15 +799,13 @@ static void test_key_vs_rec_hash_consistency(void) uchar key_buf[MIX_KEY_BUF_SIZE]; ulong rec_hash, key_hash; - const uchar *city= (const uchar *) "New York"; - uint16 city_len= 8; - const uchar *libname= (const uchar *) "New York Public Libra"; - uint8 libname_len= 21; + LEX_CUSTRING city= { USTRING_WITH_LEN("New York") }; + LEX_CUSTRING libname= { USTRING_WITH_LEN("New York Public Libra") }; setup_mixed_keydef(&keydef, segs); /* Build record and compute record-based hash (used at INSERT time) */ - build_mixed_record(rec, city, city_len, libname, libname_len, + build_mixed_record(rec, city.str, city.length, libname.str, libname.length, FALSE, FALSE); rec_hash= hp_rec_hashnr(&keydef, rec); @@ -823,13 +820,11 @@ static void test_key_vs_rec_hash_consistency(void) /* Second test: different data to ensure it's not a coincidence */ { - const uchar *city2= (const uchar *) "San Fran"; - uint16 city2_len= 8; - const uchar *libname2= (const uchar *) "SF Public Library"; - uint8 libname2_len= 17; + LEX_CUSTRING city2= { USTRING_WITH_LEN("San Fran") }; + LEX_CUSTRING libname2= { USTRING_WITH_LEN("SF Public Library") }; - build_mixed_record(rec, city2, city2_len, libname2, libname2_len, - FALSE, FALSE); + build_mixed_record(rec, city2.str, city2.length, + libname2.str, libname2.length, FALSE, FALSE); rec_hash= hp_rec_hashnr(&keydef, rec); hp_make_key(&keydef, key_buf, rec); key_hash= hp_hashnr(&keydef, key_buf); @@ -859,11 +854,11 @@ static void test_key_vs_rec_hash_consistency(void) memset(rec2b, 0, sizeof(rec2b)); /* blob */ rec2b[MIX_NULL_OFFSET]= 0; - int2store(rec2b + MIX_BLOB_OFFSET, city_len); - memcpy(rec2b + MIX_BLOB_OFFSET + MIX_BLOB_PACKLEN, &city, PTR_SIZE); + int2store(rec2b + MIX_BLOB_OFFSET, city.length); + memcpy(rec2b + MIX_BLOB_OFFSET + MIX_BLOB_PACKLEN, &city.str, PTR_SIZE); /* varchar with 2B length prefix */ - int2store(rec2b + MIX_VARCHAR_OFFSET, libname_len); - memcpy(rec2b + MIX_VARCHAR_OFFSET + 2, libname, libname_len); + int2store(rec2b + MIX_VARCHAR_OFFSET, libname.length); + memcpy(rec2b + MIX_VARCHAR_OFFSET + 2, libname.str, libname.length); rec_hash= hp_rec_hashnr(&keydef2b, rec2b); hp_make_key(&keydef2b, key2b, rec2b); @@ -885,7 +880,7 @@ static void test_key_vs_rec_hash_consistency(void) setup_blob_keyseg(&seg_blob, TRUE); setup_keydef(&kd_blob, &seg_blob, 1); - build_record(rec_b, 1, city, city_len, FALSE); + build_record(rec_b, 1, city.str, city.length, FALSE); rec_hash= hp_rec_hashnr(&kd_blob, rec_b); hp_make_key(&kd_blob, key_b, rec_b); key_hash= hp_hashnr(&kd_blob, key_b); @@ -902,7 +897,7 @@ int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) { MY_INIT("hp_test_hash"); - plan(47); + plan(49); diag("Test 1: Hash consistency between record and key formats"); test_hash_consistency(); diff --git a/storage/heap/hp_test_key_setup-t.cc b/storage/heap/hp_test_key_setup-t.cc index 7316b3482bd86..08679e7034904 100644 --- a/storage/heap/hp_test_key_setup-t.cc +++ b/storage/heap/hp_test_key_setup-t.cc @@ -46,10 +46,9 @@ class Fake_thd_guard static const LEX_CSTRING test_field_name= {STRING_WITH_LEN("")}; -/* Wrapper declared in ha_heap.cc */ -extern int test_heap_prepare_hp_create_info(TABLE *table_arg, - bool internal_table, - HP_CREATE_INFO *hp_create_info); +extern int heap_prepare_hp_create_info(TABLE *table_arg, + bool internal_table, + HP_CREATE_INFO *hp_create_info); /* Record layout for test table (nullable tinyblob(16)): @@ -177,7 +176,7 @@ static void test_distinct_key_truncation() hp_ci.keys= 1; hp_ci.reclength= T_REC_LENGTH; - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + int err= heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); ok(err == 0, "distinct_key_truncation: heap_prepare succeeded (err=%d)", err); @@ -212,197 +211,6 @@ static void test_distinct_key_truncation() } -/* - garbage_key_part_flag: heap_prepare_hp_create_info uses - key_part->key_part_flag to decide whether a key segment is a blob. - Several SQL layer paths (SJ weedout, expression cache) leave - key_part_flag uninitialized. If the garbage value has HA_BLOB_PART - set, heap_prepare_hp_create_info zeroes seg->length and treats the - segment as a blob, corrupting the HEAP hash index for non-blob - VARCHAR/VARBINARY keys. - - This manifests as: - - Row loss in SJ lookups (HA_ERR_KEY_NOT_FOUND on non-blob keys) - - COUNT(*)=1 instead of thousands because every insert after the - first is rejected as a duplicate (all records hash identically - when seg->length=0) - - Test: create a TABLE with a non-blob Field_varstring key and set - key_part_flag to garbage containing HA_BLOB_PART. Call - test_heap_prepare_hp_create_info and verify the resulting HEAP key - segment has the correct length (not 0) and does not have HA_BLOB_PART. -*/ - -/* - Record layout for varchar test table (non-nullable varbinary(28)): - byte 0: null bitmap (all zero for NOT NULL) - byte 1: varchar length_bytes=1 (field_length=28 < 256) - bytes 2-29: varchar data (28 bytes max) - reclength = 30 -*/ -#define V_REC_NULL_OFFSET 0 -#define V_REC_VARCHAR_OFFSET 1 -#define V_REC_VARCHAR_LEN 28 -#define V_REC_LENGTH 30 - - -class Hp_test_varchar_key_flag -{ - alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; - Field_varstring *vs_field; - TABLE_SHARE share; - TABLE test_table; - uchar rec_buf[V_REC_LENGTH]; - KEY_PART_INFO local_kpi; - KEY local_sql_key; - -public: - Hp_test_varchar_key_flag() - { - memset(rec_buf, 0, sizeof(rec_buf)); - memset(static_cast(&share), 0, sizeof(share)); - share.fields= 1; - share.keys= 1; - share.reclength= V_REC_LENGTH; - share.rec_buff_length= V_REC_LENGTH; - share.db_record_offset= 1; - - static const LEX_CSTRING fname= {STRING_WITH_LEN("")}; - vs_field= ::new (vs_storage) Field_varstring( - rec_buf + V_REC_VARCHAR_OFFSET, - V_REC_VARCHAR_LEN, - 1, /* length_bytes: 1 for field_length < 256 */ - (uchar*) 0, /* null_ptr: NOT NULL */ - 0, /* null_bit */ - Field::NONE, - &fname, - &share, - DTCollation(&my_charset_bin)); - - vs_field->field_index= 0; - - Field *field_array[2]= { vs_field, NULL }; - - /* - Simulate SJ weedout: leave key_part_flag UNINITIALIZED. - We set it to garbage containing HA_BLOB_PART to reproduce - the exact failure condition. - */ - memset(&local_kpi, 0, sizeof(local_kpi)); - local_kpi.field= vs_field; - local_kpi.offset= V_REC_VARCHAR_OFFSET; - local_kpi.length= (uint16) vs_field->key_length(); - local_kpi.type= vs_field->key_type(); - local_kpi.key_part_flag= 0; - - memset(&local_sql_key, 0, sizeof(local_sql_key)); - local_sql_key.user_defined_key_parts= 1; - local_sql_key.usable_key_parts= 1; - local_sql_key.key_part= &local_kpi; - local_sql_key.algorithm= HA_KEY_ALG_HASH; - local_sql_key.key_length= local_kpi.length + 2; /* + varchar pack len */ - - memset(static_cast(&test_table), 0, sizeof(test_table)); - test_table.record[0]= rec_buf; - test_table.s= &share; - test_table.field= field_array; - test_table.key_info= &local_sql_key; - share.key_info= &local_sql_key; - - vs_field->table= &test_table; - - /* No blob fields */ - uint blob_offsets[1]= { 0 }; - share.blob_field= blob_offsets; - share.blob_fields= 0; - } - - ~Hp_test_varchar_key_flag() - { - vs_field->~Field_varstring(); - } - - void test_garbage_key_part_flag() - { - ok(local_kpi.length == V_REC_VARCHAR_LEN, - "garbage_flag setup: key_part.length = %u (field_length)", - (uint) local_kpi.length); - - Fake_thd_guard thd_guard; - - HP_CREATE_INFO hp_ci; - memset(&hp_ci, 0, sizeof(hp_ci)); - hp_ci.max_table_size= 1024*1024; - hp_ci.keys= 1; - hp_ci.reclength= V_REC_LENGTH; - - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); - - ok(err == 0, - "garbage_flag: heap_prepare succeeded (err=%d)", err); - - HA_KEYSEG *seg= hp_ci.keydef[0].seg; - ok(seg->length == V_REC_VARCHAR_LEN, - "garbage_flag: seg->length = %u (expected %u, NOT 0)", - (uint) seg->length, (uint) V_REC_VARCHAR_LEN); - - /* - Phase 1 test: seg->flag must not have HA_BLOB_PART. - In MDEV-38975 proper, hp_create.c strips spurious HA_BLOB_PART - via blob_descs cross-check, so this is handled at runtime. - The heap_prepare_hp_create_info fix (field->key_part_flag() - instead of key_part->key_part_flag) is deferred to Phase 1. - */ - ok(!(seg->flag & HA_BLOB_PART), - "garbage_flag: seg->flag (0x%x) does NOT have HA_BLOB_PART", - (uint) seg->flag); - - HP_KEYDEF *kd= &hp_ci.keydef[0]; - - { - uchar mk1[64], mk2[64]; - memset(mk1, 0, sizeof(mk1)); - memset(mk2, 0, sizeof(mk2)); - uchar mr1[V_REC_LENGTH], mr2[V_REC_LENGTH]; - memset(mr1, 0, sizeof(mr1)); - mr1[V_REC_VARCHAR_OFFSET]= 4; - memcpy(mr1 + V_REC_VARCHAR_OFFSET + 1, "XXXX", 4); - memset(mr2, 0, sizeof(mr2)); - mr2[V_REC_VARCHAR_OFFSET]= 4; - memcpy(mr2 + V_REC_VARCHAR_OFFSET + 1, "YYYY", 4); - hp_make_key(kd, mk1, mr1); - hp_make_key(kd, mk2, mr2); - ok(memcmp(mk1, mk2, 2 + V_REC_VARCHAR_LEN) != 0, - "garbage_flag: hp_make_key produces different keys for different values"); - } - - /* Record 1: "AAAA" */ - uchar r1[V_REC_LENGTH]; - memset(r1, 0, sizeof(r1)); - r1[V_REC_VARCHAR_OFFSET]= 4; /* length=4, 1-byte prefix */ - memcpy(r1 + V_REC_VARCHAR_OFFSET + 1, "AAAA", 4); - - /* Record 2: "BBBB" */ - uchar r2[V_REC_LENGTH]; - memset(r2, 0, sizeof(r2)); - r2[V_REC_VARCHAR_OFFSET]= 4; - memcpy(r2 + V_REC_VARCHAR_OFFSET + 1, "BBBB", 4); - - ulong rh1= hp_rec_hashnr(kd, r1); - ulong rh2= hp_rec_hashnr(kd, r2); - - ok(rh1 != rh2, - "garbage_flag: different records produce different hashes " - "(rh1=%lu, rh2=%lu)", rh1, rh2); - - ok(hp_rec_key_cmp(kd, r1, r2, NULL) != 0, - "garbage_flag: different records compare as different"); - - my_free(hp_ci.keydef); - } -}; - - /* rebuild_key_from_group_buff: mixed blob + varchar GROUP BY key. @@ -582,15 +390,13 @@ static void test_rebuild_key_from_group_buff_mixed() hp_ci.keys= 1; hp_ci.reclength= MIX_REC_LENGTH; - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + int err= heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); ok(err == 0, "rebuild_key_from_group_buff_mixed: heap_prepare succeeded (err=%d)", err); /* Verify blob segment */ HP_KEYDEF *kd= &hp_ci.keydef[0]; ok(kd->keysegs == 2, "rebuild_key_from_group_buff_mixed: keysegs=%u (expected 2)", kd->keysegs); - ok(kd->has_blob_seg != 0, - "rebuild_key_from_group_buff_mixed: has_blob_seg is set"); /* Create a minimal ha_heap + fake HP_INFO for rebuild_key_from_group_buff. @@ -645,8 +451,7 @@ static void test_rebuild_key_from_group_buff_mixed() /* Test: heap_prepare_hp_create_info for various non-blob key types. - Verifies that has_blob_seg is false and seg->flag does not contain - HA_BLOB_PART for: + Verifies that seg->flag does not contain HA_BLOB_PART for: - VARCHAR-only keys (Field_varstring, length_bytes=1) - Fixed-length keys (Field_long = INT) - ENUM keys (Field_enum) @@ -720,7 +525,7 @@ struct Hp_test_single_key hp_ci->keys= 1; hp_ci->reclength= share.reclength; - return test_heap_prepare_hp_create_info(&test_table, TRUE, hp_ci); + return heap_prepare_hp_create_info(&test_table, TRUE, hp_ci); } }; @@ -764,8 +569,6 @@ static void test_varchar_only_key() ok((seg->flag & HA_VAR_LENGTH_PART), "varchar_only: seg->flag (0x%x) has HA_VAR_LENGTH_PART", (uint) seg->flag); - ok(!hp_ci.keydef[0].has_blob_seg, - "varchar_only: has_blob_seg is FALSE (no blob segments)"); my_free(hp_ci.keydef); vs->~Field_varstring(); @@ -803,8 +606,6 @@ static void test_int_only_key() ok(!(seg->flag & HA_VAR_LENGTH_PART), "int_only: seg->flag (0x%x) has NO HA_VAR_LENGTH_PART", (uint) seg->flag); - ok(!hp_ci.keydef[0].has_blob_seg, - "int_only: has_blob_seg is FALSE"); my_free(hp_ci.keydef); fl->~Field_long(); @@ -846,8 +647,6 @@ static void test_enum_key() (int) seg->type, (int) HA_KEYTYPE_BINARY); ok(!(seg->flag & HA_BLOB_PART), "enum: seg->flag (0x%x) has NO HA_BLOB_PART", (uint) seg->flag); - ok(!hp_ci.keydef[0].has_blob_seg, - "enum: has_blob_seg is FALSE"); my_free(hp_ci.keydef); fe->~Field_enum(); @@ -933,7 +732,7 @@ static void test_mixed_int_varchar_key() hp_ci.keys= 1; hp_ci.reclength= 26; - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + int err= heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); ok(err == 0, "int_varchar: heap_prepare succeeded (err=%d)", err); @@ -1078,7 +877,7 @@ static void test_varchar_promoted_to_blob() hp_ci.keys= 1; hp_ci.reclength= 19; - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + int err= heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); ok(err == 0, "promoted_blob: heap_prepare succeeded (err=%d)", err); @@ -1120,141 +919,6 @@ static void test_varchar_promoted_to_blob() -/* - Test: needs_key_rebuild_from_group_buff flag on HP_KEYDEF. - - Verifies that heap_prepare_hp_create_info sets needs_key_rebuild_from_group_buff=TRUE - only when table->group is set and key 0 has blob segments (GROUP BY path). - Without table->group (DISTINCT/sj-materialize), the flag is FALSE even - if the key has blob segments. -*/ -static void test_needs_key_rebuild_from_group_buff() -{ - /* - Reuse the mixed blob+varchar layout from test_rebuild_key_from_group_buff_mixed. - Two key parts: blob (city TEXT) + varchar (libname VARCHAR(21)). - */ - uchar rec[MIX_REC_LENGTH]; - memset(rec, 0, sizeof(rec)); - - TABLE_SHARE share; - memset(static_cast(&share), 0, sizeof(share)); - share.fields= 2; - share.blob_fields= 0; - share.keys= 1; - share.reclength= MIX_REC_LENGTH; - share.rec_buff_length= MIX_REC_LENGTH; - share.db_record_offset= 1; - - alignas(Field_blob) char bf_storage[sizeof(Field_blob)]; - Field_blob *bfp= make_test_field_blob(bf_storage, - rec + MIX_BLOB_OFFSET, - rec + MIX_REC_NULL_OFFSET, - 2, &share, - MIX_BLOB_PACKLEN, - &my_charset_latin1); - bfp->field_index= 0; - - static const LEX_CSTRING vs_name= {STRING_WITH_LEN("")}; - alignas(Field_varstring) char vs_storage[sizeof(Field_varstring)]; - Field_varstring *vfp= ::new (vs_storage) Field_varstring( - rec + MIX_VARCHAR_OFFSET, - MIX_VARCHAR_FIELD_LEN, - 1, - rec + MIX_REC_NULL_OFFSET, - 4, - Field::NONE, - &vs_name, - &share, - DTCollation(&my_charset_latin1)); - vfp->field_index= 1; - - Field *field_array[3]= { bfp, vfp, NULL }; - - KEY_PART_INFO kpi[2]; - memset(kpi, 0, sizeof(kpi)); - kpi[0].field= bfp; - kpi[0].offset= MIX_BLOB_OFFSET; - kpi[0].length= 0; - kpi[0].key_part_flag= HA_BLOB_PART; - kpi[0].null_bit= 2; - kpi[0].type= bfp->key_type(); - kpi[0].store_length= 103; - - kpi[1].field= vfp; - kpi[1].offset= MIX_VARCHAR_OFFSET; - kpi[1].length= MIX_VARCHAR_FIELD_LEN; - kpi[1].key_part_flag= HA_VAR_LENGTH_PART; - kpi[1].null_bit= 4; - kpi[1].type= vfp->key_type(); - kpi[1].store_length= MIX_VARCHAR_FIELD_LEN + 2 + 1; - - KEY sql_key; - memset(&sql_key, 0, sizeof(sql_key)); - sql_key.user_defined_key_parts= 2; - sql_key.usable_key_parts= 2; - sql_key.key_part= kpi; - sql_key.algorithm= HA_KEY_ALG_HASH; - - TABLE test_table; - memset(static_cast(&test_table), 0, sizeof(test_table)); - test_table.record[0]= rec; - test_table.s= &share; - test_table.field= field_array; - test_table.key_info= &sql_key; - share.key_info= &sql_key; - bfp->table= &test_table; - vfp->table= &test_table; - - uint blob_offsets[1]= { 0 }; - share.blob_field= blob_offsets; - - /* - A minimal ORDER group list (just needs to be non-NULL for detection). - We don't actually traverse it — only test_table.group != NULL matters. - */ - ORDER group_item; - memset(&group_item, 0, sizeof(group_item)); - - /* Test 1: with table->group set → needs_key_rebuild_from_group_buff = TRUE */ - test_table.group= &group_item; - - Fake_thd_guard thd_guard; - - HP_CREATE_INFO hp_ci; - memset(&hp_ci, 0, sizeof(hp_ci)); - hp_ci.max_table_size= 1024*1024; - hp_ci.keys= 1; - hp_ci.reclength= MIX_REC_LENGTH; - - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); - ok(err == 0, "needs_rebuild: with group, heap_prepare succeeded (err=%d)", err); - ok(hp_ci.keydef[0].needs_key_rebuild_from_group_buff != 0, - "needs_rebuild: with group + blob seg, flag is TRUE"); - - my_free(hp_ci.keydef); - my_free(hp_ci.blob_descs); - - /* Test 2: without table->group → needs_key_rebuild_from_group_buff = FALSE */ - test_table.group= NULL; - - memset(&hp_ci, 0, sizeof(hp_ci)); - hp_ci.max_table_size= 1024*1024; - hp_ci.keys= 1; - hp_ci.reclength= MIX_REC_LENGTH; - - err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); - ok(err == 0, "needs_rebuild: no group, heap_prepare succeeded (err=%d)", err); - ok(hp_ci.keydef[0].needs_key_rebuild_from_group_buff == 0, - "needs_rebuild: no group + blob seg, flag is FALSE"); - - my_free(hp_ci.keydef); - my_free(hp_ci.blob_descs); - vfp->~Field_varstring(); - bfp->~Field_blob(); -} - - /* Test: geometry GROUP BY key must NOT trigger blob key widening. @@ -1359,7 +1023,7 @@ static void test_geometry_group_by_no_widening() uint orig_store_length= kpi.store_length; uint orig_key_length= sql_key.key_length; - int err= test_heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); + int err= heap_prepare_hp_create_info(&test_table, TRUE, &hp_ci); ok(err == 0, "geom_group_by: heap_prepare succeeded (err=%d)", err); /* key_part->length must NOT be widened — must stay at packlength (4) */ @@ -1382,9 +1046,6 @@ static void test_geometry_group_by_no_widening() "geom_group_by: seg->length = %u (expected 0 = blob convention)", (uint) hp_ci.keydef[0].seg[0].length); - /* has_blob_seg must be set */ - ok(hp_ci.keydef[0].has_blob_seg != 0, - "geom_group_by: has_blob_seg is set"); my_free(hp_ci.keydef); my_free(hp_ci.blob_descs); @@ -1398,15 +1059,11 @@ int main(int argc __attribute__((unused)), MY_INIT("hp_test_key_setup"); /* Field constructors reference system_charset_info via DTCollation */ system_charset_info= &my_charset_latin1; - plan(63); + plan(47); diag("distinct_key_truncation: key_part->length widened for blob key parts"); test_distinct_key_truncation(); - diag("garbage_key_part_flag: uninitialized key_part_flag corrupts non-blob keys"); - Hp_test_varchar_key_flag t2; - t2.test_garbage_key_part_flag(); - diag("rebuild_key_from_group_buff: mixed blob + varchar GROUP BY key"); test_rebuild_key_from_group_buff_mixed(); @@ -1425,9 +1082,6 @@ int main(int argc __attribute__((unused)), diag("promoted_blob: varchar promoted to blob in tmp table"); test_varchar_promoted_to_blob(); - diag("needs_rebuild: needs_key_rebuild_from_group_buff flag with/without table->group"); - test_needs_key_rebuild_from_group_buff(); - diag("geom_group_by: geometry GROUP BY key must not trigger blob key widening"); test_geometry_group_by_no_widening(); diff --git a/storage/heap/hp_test_scan-t.c b/storage/heap/hp_test_scan-t.c new file mode 100644 index 0000000000000..e60e15f8eb9da --- /dev/null +++ b/storage/heap/hp_test_scan-t.c @@ -0,0 +1,335 @@ +/* + Unit tests for heap_scan() internal continuation record skipping. + + Verifies that heap_scan() skips blob continuation records internally + (via goto retry) rather than returning HA_ERR_RECORD_DELETED to the + caller. Uses real HEAP tables with blob columns. +*/ + +#include +#include +#include +#include +#include "heap.h" +#include "heapdef.h" + +/* + Record layout for a table (int4, blob(packlength=2)): + byte 0: null bitmap (1 byte) + bytes 1-4: int4 field (4 bytes) + bytes 5-6: blob packlength=2 (length, little-endian) + bytes 7-14: blob data pointer (8 bytes on x86_64) + reclength = 15 + visible_offset = MAX(15, 8) = 15 + recbuffer = ALIGN(15 + 1, 8) = 16 +*/ + +#define REC_LENGTH 15 +#define INT_OFFSET 1 +#define BLOB_OFFSET 5 +#define BLOB_PACKLEN 2 + +#define PTR_SIZE portable_sizeof_char_ptr + + +static void build_record(uchar *rec, int32 int_val, + const uchar *blob_data, uint16 blob_len) +{ + memset(rec, 0, REC_LENGTH); + int4store(rec + INT_OFFSET, int_val); + int2store(rec + BLOB_OFFSET, blob_len); + memcpy(rec + BLOB_OFFSET + BLOB_PACKLEN, &blob_data, PTR_SIZE); +} + + +static int create_and_open(const char *name, HP_SHARE **share, HP_INFO **info) +{ + HP_KEYDEF keydef; + HA_KEYSEG keyseg; + HP_CREATE_INFO ci; + HP_BLOB_DESC blob_desc; + my_bool unused; + + memset(&keyseg, 0, sizeof(keyseg)); + keyseg.type= HA_KEYTYPE_BINARY; + keyseg.start= INT_OFFSET; + keyseg.length= 4; + keyseg.charset= &my_charset_bin; + + memset(&keydef, 0, sizeof(keydef)); + keydef.keysegs= 1; + keydef.seg= &keyseg; + keydef.algorithm= HA_KEY_ALG_HASH; + keydef.flag= HA_NOSAME; + keydef.length= 4; + + blob_desc.offset= BLOB_OFFSET; + blob_desc.packlength= BLOB_PACKLEN; + + memset(&ci, 0, sizeof(ci)); + ci.keys= 1; + ci.keydef= &keydef; + ci.reclength= REC_LENGTH; + ci.max_records= 1000; + ci.min_records= 10; + ci.max_table_size= 1024 * 1024; + ci.blob_descs= &blob_desc; + ci.blob_count= 1; + + if (heap_create(name, &ci, share, &unused)) + return 1; + *info= heap_open(name, 2); + if (!*info) + return 1; + heap_extra(*info, HA_EXTRA_NO_READCHECK); + return 0; +} + + +/* + Test 1: scan with continuation records never returns HA_ERR_RECORD_DELETED. + + Inserts rows with blobs large enough to create continuation chains + (recbuffer=16, so >5 bytes needs continuations), then scans and + verifies that heap_scan returns only 0 or HA_ERR_END_OF_FILE. +*/ +static void test_scan_skips_continuations(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + uchar scan_buf[REC_LENGTH]; + int error; + uint row_count= 0; + my_bool got_record_deleted= FALSE; + + uchar blob1[50], blob2[80]; + memset(blob1, 'A', sizeof(blob1)); + memset(blob2, 'B', sizeof(blob2)); + blob1[0]= '1'; blob1[49]= 'Z'; + blob2[0]= '2'; blob2[79]= 'Z'; + + if (create_and_open("test_scan_cont", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(5, "setup failed"); + return; + } + + build_record(rec, 1, blob1, sizeof(blob1)); + ok(heap_write(info, rec) == 0, "insert row 1 (50-byte blob)"); + + build_record(rec, 2, blob2, sizeof(blob2)); + ok(heap_write(info, rec) == 0, "insert row 2 (80-byte blob)"); + + ok(share->records == 2, + "records == 2 (got %lu)", (ulong) share->records); + ok(share->total_records > share->records, + "total_records (%lu) > records (%lu)", + (ulong) share->total_records, (ulong) share->records); + + heap_scan_init(info); + while ((error= heap_scan(info, scan_buf)) != HA_ERR_END_OF_FILE) + { + if (error == HA_ERR_RECORD_DELETED) + got_record_deleted= TRUE; + else if (error == 0) + row_count++; + else + break; + } + + ok(!got_record_deleted, + "heap_scan never returned HA_ERR_RECORD_DELETED for continuations"); + ok(row_count == 2, + "scan returned 2 rows (got %u)", row_count); + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test 2: scan with deleted rows AND continuation records. + + Inserts 3 rows with blobs, deletes the middle one, then scans. + Deleted rows should still return HA_ERR_RECORD_DELETED (existing + behavior), but continuation records must be skipped internally. +*/ +static void test_scan_deleted_plus_continuations(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + uchar scan_buf[REC_LENGTH]; + int error; + uint row_count= 0; + uint deleted_count= 0; + + uchar blob1[40], blob2[60], blob3[45]; + memset(blob1, 'X', sizeof(blob1)); + memset(blob2, 'Y', sizeof(blob2)); + memset(blob3, 'Z', sizeof(blob3)); + + if (create_and_open("test_scan_del", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(5, "setup failed"); + return; + } + + build_record(rec, 10, blob1, sizeof(blob1)); + ok(heap_write(info, rec) == 0, "insert row 10"); + + build_record(rec, 20, blob2, sizeof(blob2)); + ok(heap_write(info, rec) == 0, "insert row 20"); + + build_record(rec, 30, blob3, sizeof(blob3)); + ok(heap_write(info, rec) == 0, "insert row 30"); + + /* Delete row 20 via key lookup */ + { + uchar key[4]; + int4store(key, 20); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "found row 20 for deletion"); + ok(heap_delete(info, rec) == 0, "deleted row 20"); + } + + ok(share->records == 2, + "records == 2 after delete (got %lu)", (ulong) share->records); + + heap_scan_init(info); + while ((error= heap_scan(info, scan_buf)) != HA_ERR_END_OF_FILE) + { + if (error == HA_ERR_RECORD_DELETED) + deleted_count++; + else if (error == 0) + row_count++; + else + break; + } + + ok(row_count == 2, "scan returned 2 live rows (got %u)", row_count); + ok(deleted_count > 0, + "scan returned HA_ERR_RECORD_DELETED for deleted slots (%u times)", + deleted_count); + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test 3: scan a non-blob table is unaffected. + + Inserts rows without blobs, scans, verifies existing behavior is + unchanged (deleted records still return HA_ERR_RECORD_DELETED). +*/ +static void test_scan_no_blobs(void) +{ + HP_KEYDEF keydef; + HA_KEYSEG keyseg; + HP_CREATE_INFO ci; + HP_SHARE *share; + HP_INFO *info; + my_bool unused; + uchar rec[REC_LENGTH]; + uchar scan_buf[REC_LENGTH]; + int error; + uint row_count= 0; + uint deleted_count= 0; + + memset(&keyseg, 0, sizeof(keyseg)); + keyseg.type= HA_KEYTYPE_BINARY; + keyseg.start= INT_OFFSET; + keyseg.length= 4; + keyseg.charset= &my_charset_bin; + + memset(&keydef, 0, sizeof(keydef)); + keydef.keysegs= 1; + keydef.seg= &keyseg; + keydef.algorithm= HA_KEY_ALG_HASH; + keydef.flag= HA_NOSAME; + keydef.length= 4; + + memset(&ci, 0, sizeof(ci)); + ci.keys= 1; + ci.keydef= &keydef; + ci.reclength= REC_LENGTH; + ci.max_records= 1000; + ci.min_records= 10; + ci.max_table_size= 1024 * 1024; + + if (heap_create("test_scan_noblob", &ci, &share, &unused)) + { + ok(0, "setup failed: %d", my_errno); + skip(4, "setup failed"); + return; + } + info= heap_open("test_scan_noblob", 2); + if (!info) + { + ok(0, "open failed: %d", my_errno); + skip(4, "open failed"); + return; + } + + /* Insert 3 rows (no blob data, just int) */ + memset(rec, 0, REC_LENGTH); + int4store(rec + INT_OFFSET, 100); + ok(heap_write(info, rec) == 0, "insert row 100"); + + int4store(rec + INT_OFFSET, 200); + ok(heap_write(info, rec) == 0, "insert row 200"); + + int4store(rec + INT_OFFSET, 300); + ok(heap_write(info, rec) == 0, "insert row 300"); + + /* Delete middle row */ + { + uchar key[4]; + int4store(key, 200); + heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT); + heap_delete(info, rec); + } + + heap_scan_init(info); + while ((error= heap_scan(info, scan_buf)) != HA_ERR_END_OF_FILE) + { + if (error == HA_ERR_RECORD_DELETED) + deleted_count++; + else if (error == 0) + row_count++; + else + break; + } + + ok(row_count == 2, "no-blob scan returned 2 live rows (got %u)", row_count); + ok(deleted_count > 0, + "no-blob scan returned HA_ERR_RECORD_DELETED for deleted slots (%u)", + deleted_count); + + heap_drop_table(info); + heap_close(info); +} + + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + MY_INIT("hp_test_scan"); + plan(19); + + diag("Test 1: scan skips continuation records internally"); + test_scan_skips_continuations(); + + diag("Test 2: deleted rows + continuations"); + test_scan_deleted_plus_continuations(); + + diag("Test 3: non-blob table scan unchanged"); + test_scan_no_blobs(); + + my_end(0); + return exit_status(); +} diff --git a/storage/heap/hp_update.c b/storage/heap/hp_update.c index ef772f806b83e..00b2461cf1bb0 100644 --- a/storage/heap/hp_update.c +++ b/storage/heap/hp_update.c @@ -72,13 +72,14 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) my_bool *blob_changed= (my_bool*)(saved_chains + share->blob_count); my_bool any_changed= FALSE; my_bool has_blob_data= FALSE; + HP_BLOB_DESC *desc; + uint32 new_len; uint i; /* Save old chain pointers and detect which blobs changed */ - for (i= 0; i < share->blob_count; i++) + for (i= 0, desc= share->blob_descs; i < share->blob_count; i++, desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; - uint32 old_len, new_len; + uint32 old_len, cur_len; saved_chains[i]= NULL; if (had_cont) @@ -86,9 +87,9 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) sizeof(saved_chains[i])); old_len= hp_blob_length(desc, old); - new_len= hp_blob_length(desc, heap_new); + cur_len= hp_blob_length(desc, heap_new); - if (old_len != new_len) + if (old_len != cur_len) blob_changed[i]= TRUE; else if (old_len == 0) blob_changed[i]= FALSE; @@ -102,20 +103,21 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) blob_changed[i]= (old_data != new_data && memcmp(old_data, new_data, old_len) != 0); } - if (blob_changed[i]) - any_changed= TRUE; + any_changed|= blob_changed[i]; } memcpy(pos, heap_new, (size_t) share->reclength); /* Write new chains for changed blobs, restore old pointers for unchanged */ - for (i= 0; i < share->blob_count; i++) + for (i= 0, desc= share->blob_descs; i < share->blob_count; i++, desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; - if (!blob_changed[i]) { - /* Restore old chain pointer that memcpy overwrote */ + /* + Restore old chain pointer, from the old current_ptr, where the blob + data is in heap memory. This is not the same as the pointer in 'old' + as this may have been allocated from a segmented blob. + */ if (saved_chains[i]) { memcpy(pos + desc->offset + desc->packlength, @@ -125,51 +127,47 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) continue; } + new_len= hp_blob_length(desc, heap_new); + if (new_len == 0) + *((uchar**) (pos + desc->offset + desc->packlength))= NULL; + else { - uint32 new_len= hp_blob_length(desc, heap_new); - if (new_len == 0) - { - uchar *null_ptr= NULL; - memcpy(pos + desc->offset + desc->packlength, - &null_ptr, sizeof(null_ptr)); - } - else - { - const uchar *data_ptr; - uchar *first_run; + const uchar *data_ptr; + uchar *first_run; - has_blob_data= TRUE; - memcpy(&data_ptr, heap_new + desc->offset + desc->packlength, - sizeof(data_ptr)); + has_blob_data= TRUE; + memcpy(&data_ptr, heap_new + desc->offset + desc->packlength, + sizeof(data_ptr)); - if (hp_write_one_blob(share, data_ptr, new_len, &first_run)) + if (hp_write_one_blob(share, data_ptr, new_len, &first_run)) + { + /* Rollback: free new chains already written, restore old record */ + uint j; + for (j= 0; j < i; j++) { - /* Rollback: free new chains already written, restore old record */ - uint j; - for (j= 0; j < i; j++) - if (blob_changed[j]) - { - uchar *chain; - memcpy(&chain, pos + share->blob_descs[j].offset + - share->blob_descs[j].packlength, sizeof(chain)); - if (chain) - hp_free_run_chain(share, chain); - } - memcpy(pos, old, (size_t) share->reclength); - if (had_cont) + if (blob_changed[j]) { - for (j= 0; j < share->blob_count; j++) - memcpy(pos + share->blob_descs[j].offset + - share->blob_descs[j].packlength, - &saved_chains[j], sizeof(saved_chains[j])); - pos[share->visible]|= HP_ROW_HAS_CONT; + uchar *chain; + memcpy(&chain, pos + share->blob_descs[j].offset + + share->blob_descs[j].packlength, sizeof(chain)); + if (chain) + hp_free_run_chain(share, chain); } - my_safe_afree(saved_chains, alloc_size); - goto err; } - memcpy(pos + desc->offset + desc->packlength, - &first_run, sizeof(first_run)); + memcpy(pos, old, (size_t) share->reclength); + if (had_cont) + { + for (j= 0; j < share->blob_count; j++) + memcpy(pos + share->blob_descs[j].offset + + share->blob_descs[j].packlength, + &saved_chains[j], sizeof(saved_chains[j])); + pos[share->visible]|= HP_ROW_HAS_CONT; + } + my_safe_afree(saved_chains, alloc_size); + goto err; } + memcpy(pos + desc->offset + desc->packlength, + &first_run, sizeof(first_run)); } } @@ -198,16 +196,14 @@ int heap_update(HP_INFO *info, const uchar *old, const uchar *heap_new) */ if (any_changed || info->has_zerocopy_blobs) { - uchar *new_rec= (uchar*) heap_new; - for (i= 0; i < share->blob_count; i++) + for (i= 0, desc= share->blob_descs; i < share->blob_count; i++, desc++) { - HP_BLOB_DESC *desc= &share->blob_descs[i]; uchar *chain; memcpy(&chain, pos + desc->offset + desc->packlength, sizeof(chain)); - memcpy(new_rec + desc->offset + desc->packlength, &chain, + memcpy((uchar*) heap_new + desc->offset + desc->packlength, &chain, sizeof(chain)); } - hp_read_blobs(info, new_rec, pos); + hp_read_blobs(info, (uchar*) heap_new, pos); } my_safe_afree(saved_chains, alloc_size); diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c index 9a8b244307de6..d3ffc4b41d80c 100644 --- a/storage/heap/hp_write.c +++ b/storage/heap/hp_write.c @@ -56,7 +56,24 @@ int heap_write(HP_INFO *info, const uchar *record) if (share->blob_count) { if (hp_write_blobs(info, record, pos)) - goto err_blob; + { + /* + Blob write failed after all keys were written successfully. + Roll back all keys - unlike err: below, no key needs to be skipped. + + Do NOT call hp_free_blobs() here: hp_write_blobs() is self-cleaning + on failure - hp_write_one_blob() frees its own partial chain, and + hp_write_blobs() frees all previously completed columns (0..i-1) and + NULLs every chain pointer in pos. + */ + info->errkey= -1; + for (keydef= end - 1; keydef >= share->keydef; keydef--) + { + if ((*keydef->delete_key)(info, keydef, record, pos, 0)) + break; + } + goto err_common; + } } else pos[share->visible]= 1; /* Mark record as not deleted */ @@ -71,33 +88,6 @@ int heap_write(HP_INFO *info, const uchar *record) heap_update_auto_increment(info, record); DBUG_RETURN(0); -err_blob: - /* - Blob write failed after all keys were written successfully. - Roll back all keys — unlike err: below, no key needs to be skipped. - - Do NOT call hp_free_blobs() here: hp_write_blobs() is self-cleaning - on failure — hp_write_one_blob() frees its own partial chain, and - hp_write_blobs() frees all previously completed columns (0..i-1) and - NULLs every chain pointer in pos. Calling hp_free_blobs() after this - would be both redundant and dangerous: - - The visibility byte pos[share->visible] has not been set yet (it is - only written on hp_write_blobs() success at line 493), so it may - contain uninitialized data from tail allocation with HP_ROW_HAS_CONT - bit set. - - Blob columns after the failed one (i+1..blob_count-1) still have the - SQL layer's original data pointers in pos (from memcpy at line 55), - not continuation chain pointers. hp_free_run_chain() would interpret - those as chain headers and crash. - */ - info->errkey= -1; - for (keydef= end - 1; keydef >= share->keydef; keydef--) - { - if ((*keydef->delete_key)(info, keydef, record, pos, 0)) - break; - } - goto err_common; - err: if (my_errno == HA_ERR_FOUND_DUPP_KEY) DBUG_PRINT("info",("Duplicate key: %d", (int) (keydef - share->keydef))); @@ -123,9 +113,7 @@ int heap_write(HP_INFO *info, const uchar *record) Do NOT call hp_free_blobs here: the err: label is reached when a key write fails (line 52), which is BEFORE memcpy(pos, record, reclength) and hp_write_blobs(). The slot at pos still contains stale data from the - free list, so hp_free_blobs would chase garbage chain pointers. - Only err_blob: (above) needs hp_free_blobs, since blobs may have been - partially written there. + delete list, so hp_free_blobs would chase garbage chain pointers. */ err_common: @@ -182,23 +170,26 @@ int hp_rb_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *record, matching the new slot). heap_scan() relies on this sum to detect EOF. */ -uchar *next_free_record_pos(HP_SHARE *info) +/* + Allocate one record from the HP_BLOCK tail, bypassing the + delete list. Used by next_free_record_pos() when no deleted + records are available, and by hp_write_one_blob() for blob + continuation chain allocation. + + Maintains the scan-boundary invariant: + total_records + deleted == block.last_allocated + by incrementing both last_allocated and total_records together. + heap_scan() relies on this invariant to know when to stop. +*/ + +uchar *hp_alloc_from_tail(HP_SHARE *info) { int block_pos; - uchar *pos; size_t length; - DBUG_ENTER("next_free_record_pos"); + DBUG_ENTER("hp_alloc_from_tail"); - if (info->del_link) - { - pos=info->del_link; - info->del_link= *((uchar**) pos); - info->deleted--; - info->total_records++; - DBUG_PRINT("exit",("Used old position: %p", pos)); - DBUG_RETURN(pos); - } - if (!(block_pos=(info->block.last_allocated % info->block.records_in_block))) + if (!(block_pos=(info->block.last_allocated % + info->block.records_in_block))) { if ((info->block.last_allocated > info->max_records && info->max_records) || @@ -229,6 +220,24 @@ uchar *next_free_record_pos(HP_SHARE *info) } +uchar *next_free_record_pos(HP_SHARE *info) +{ + uchar *pos; + DBUG_ENTER("next_free_record_pos"); + + if (info->del_link) + { + pos=info->del_link; + info->del_link= *((uchar**) pos); + info->deleted--; + info->total_records++; + DBUG_PRINT("exit",("Used old position: %p", pos)); + DBUG_RETURN(pos); + } + DBUG_RETURN(hp_alloc_from_tail(info)); +} + + /* Write a hash-key to the hash-index SYNOPSIS From 20721cb2864516ea50575a7027dfd4b1c88ac725 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 02:23:00 -0400 Subject: [PATCH 17/27] Batch tail allocation for blob continuation chains `hp_alloc_from_tail()` now takes `uint *blocks` (in/out) and allocates a contiguous batch of records from the current leaf block in one call, replacing the per-record inner loop in `hp_write_one_blob()` Step 2. The caller pre-computes the record count needed for the chosen storage format (Case B for `is_only_run`, Case C otherwise), and the function returns however many are available up to the request. The flat if/else-if/else then selects Case A, B, or C based on the actual count. This eliminates the record-by-record extension loop, both contiguity guards with `abort()`, and the Case B extra-record allocation logic, reducing Step 2 from ~170 lines to ~60. --- storage/heap/heapdef.h | 2 +- storage/heap/hp_blob.c | 184 ++++++++++------------------------------ storage/heap/hp_write.c | 52 ++++++++---- 3 files changed, 80 insertions(+), 158 deletions(-) diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index 42fc0a5a8466b..018c6f6dc7f09 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -220,7 +220,7 @@ extern ha_rows hp_rows_in_memory(size_t reclength, size_t index_size, size_t memory_limit); extern size_t hp_memory_needed_per_row(size_t reclength); -extern uchar *hp_alloc_from_tail(HP_SHARE *info); +extern uchar *hp_alloc_from_tail(HP_SHARE *info, uint *blocks); extern uchar *next_free_record_pos(HP_SHARE *info); static inline uint32 hp_blob_length(const HP_BLOB_DESC *desc, const uchar *record) diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index b82333a53d326..8aaf4ce542797 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -29,7 +29,6 @@ */ #include "heapdef.h" -#include #include @@ -361,163 +360,68 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, /* Step 2: Allocate remaining data from the block tail. - Tail allocation is always contiguous within a leaf block. - When we hit a block boundary, we start a new run. + Batch allocation: hp_alloc_from_tail() returns a contiguous + batch of records within a single leaf block in one call. + When we hit a block boundary, a new run starts. */ while (data_offset < data_len) { uchar *run_start; - uint16 run_rec_count; + uint run_rec_count; uint32 remaining= data_len - data_offset; - uint32 run_payload; - my_bool is_only_run; + my_bool is_only_run= (first_run == NULL && prev_run_start == NULL); - run_start= hp_alloc_from_tail(share); - if (!run_start) - goto err; - run_rec_count= 1; - - /* Extend the run with consecutive tail records */ - for (;;) + /* + Compute the number of records to request. + Case B (zero-copy) needs the most records per data byte, so + request that amount for is_only_run to give zero-copy the best + chance. hp_alloc_from_tail() caps at the remaining slots in + the current leaf block. + */ + if (is_only_run && remaining <= visible) + run_rec_count= 1; + else if (is_only_run) { - uint block_pos; - - if (run_rec_count == 1) - run_payload= visible; /* HP_ROW_SINGLE_REC: no header */ + uint64 needed= ((uint64) remaining + recbuffer - 1) / recbuffer + 1; + run_rec_count= (uint) MY_MIN(needed, UINT_MAX16); + } + else + { + if (remaining <= first_payload) + run_rec_count= 1; else - run_payload= (visible - HP_CONT_HEADER_SIZE) + - (uint32)(run_rec_count - 1) * recbuffer; - if (run_payload >= remaining) - break; - - /* - Check if the next record would be in the same leaf block. - block_pos == 0 means last_allocated is at a block boundary - and the next allocation would start a new block. - */ - block_pos= share->block.last_allocated % - share->block.records_in_block; - if (block_pos == 0) - break; - { - uchar *next_rec= hp_alloc_from_tail(share); - if (!next_rec) - break; - /* - Contiguity guard (active in all builds, not just debug). - - Blob continuation runs use pointer arithmetic (run_start + - i * recbuffer) to access inner records in the write, read, - zero-copy, scan-skip, and free paths. Today, contiguity - within a leaf block is guaranteed by hp_get_new_block() - allocating a single flat array of records_in_block * recbuffer - bytes, and hp_alloc_from_tail() handing them out sequentially. - But this is an implementation detail of HP_BLOCK, not a - documented contract. A future change (e.g. sub-block - allocation, memory pooling, or alignment padding between - records) could silently break this assumption, turning every - blob path into a source of data corruption. Abort here so - such a change is caught immediately by any test that exercises - blob writes. - */ - if (unlikely(next_rec != - run_start + (uint32) run_rec_count * recbuffer)) - { - my_safe_printf_stderr( - "HEAP blob: tail allocation not contiguous: " - "expected %p, got %p (run_start=%p, count=%u, recbuffer=%u)\n", - run_start + (uint32) run_rec_count * recbuffer, - next_rec, run_start, (uint) run_rec_count, recbuffer); - abort(); - } - run_rec_count++; + uint64 needed= 1 + ((uint64)(remaining - first_payload) + + recbuffer - 1) / recbuffer; + run_rec_count= (uint) MY_MIN(needed, UINT_MAX16); } } - is_only_run= (first_run == NULL && prev_run_start == NULL); + run_start= hp_alloc_from_tail(share, &run_rec_count); + if (!run_start) + goto err; + DBUG_ASSERT(run_rec_count >= 1); - if (is_only_run && run_payload >= remaining) + if (is_only_run && run_rec_count == 1 && remaining <= visible) { - /* - Single-run blob - use zero-copy layout if possible. - Case A: data fits in rec 0 payload (run_rec_count == 1). - Case B: data in rec 1..N-1 only, contiguous for zero-copy reads. - */ - if (run_rec_count == 1) - { - /* Case A: data fits in rec 0 */ - hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_A_SINGLE_REC, - &data_offset); - } - else - { - uint32 case_b_payload= (uint32)(run_rec_count - 1) * recbuffer; - if (case_b_payload >= remaining) - { - /* Case B: rec 1..N-1 alone hold all data */ - hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, - &data_offset); - } - else - { - /* - Case B needs one more record than Case C. Try to extend - if we're not at a block boundary. - */ - uint block_pos= share->block.last_allocated % - share->block.records_in_block; - if (block_pos != 0) - { - uchar *extra= hp_alloc_from_tail(share); - if (extra) - { - /* - Contiguity guard for the Case B extra record, same - rationale as the main extension loop ~60 lines above: - hp_get_new_block() today allocates flat arrays but this - is an HP_BLOCK implementation detail, not a contract. - A future change could break contiguity and silently - corrupt every blob read/write/free path that relies on - run_start + i * recbuffer arithmetic. - */ - if (unlikely(extra != - run_start + (uint32) run_rec_count * recbuffer)) - { - my_safe_printf_stderr( - "HEAP blob: Case B extra allocation not contiguous: " - "expected %p, got %p " - "(run_start=%p, count=%u, recbuffer=%u)\n", - run_start + (uint32) run_rec_count * recbuffer, - extra, run_start, (uint) run_rec_count, recbuffer); - abort(); - } - run_rec_count++; - hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, - &data_offset); - } - else - /* Case B extension failed - fall back to Case C */ - hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, - &data_offset); - } - else - /* At block boundary - Case C */ - hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, - &data_offset); - } - } + /* Case A: single record, no header */ + hp_write_run_data(share, data_ptr, data_len, run_start, + (uint16) run_rec_count, HP_BLOB_CASE_A_SINGLE_REC, + &data_offset); + } + else if (is_only_run && + (uint32)(run_rec_count - 1) * recbuffer >= remaining) + { + /* Case B: data in rec 1..N-1, contiguous for zero-copy reads */ + hp_write_run_data(share, data_ptr, data_len, run_start, + (uint16) run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, + &data_offset); } else { - /* Case C: multi-run or not the only run */ + /* Case C: multi-run or partial run */ hp_write_run_data(share, data_ptr, data_len, run_start, - run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, + (uint16) run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, &data_offset); } diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c index d3ffc4b41d80c..1765433082a12 100644 --- a/storage/heap/hp_write.c +++ b/storage/heap/hp_write.c @@ -171,25 +171,33 @@ int hp_rb_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, const uchar *record, */ /* - Allocate one record from the HP_BLOCK tail, bypassing the - delete list. Used by next_free_record_pos() when no deleted - records are available, and by hp_write_one_blob() for blob - continuation chain allocation. + Allocate a contiguous batch of records from the HP_BLOCK tail. + + *blocks is in/out: on entry, the maximum number of records to + allocate; on return, the actual count (capped at the remaining + slots in the current leaf block). All returned records are + contiguous in memory (run_start + i * recbuffer). + + Used by next_free_record_pos() (blocks=1) when no deleted records + are available, and by hp_write_one_blob() for blob continuation + chain allocation. Maintains the scan-boundary invariant: total_records + deleted == block.last_allocated - by incrementing both last_allocated and total_records together. - heap_scan() relies on this invariant to know when to stop. + by incrementing both last_allocated and total_records by the + allocated count. heap_scan() relies on this invariant. */ -uchar *hp_alloc_from_tail(HP_SHARE *info) +uchar *hp_alloc_from_tail(HP_SHARE *info, uint *blocks) { - int block_pos; + uint block_pos; + uint available, requested; size_t length; DBUG_ENTER("hp_alloc_from_tail"); - if (!(block_pos=(info->block.last_allocated % - info->block.records_in_block))) + DBUG_ASSERT(*blocks > 0); + if (!(block_pos= (uint)(info->block.last_allocated % + info->block.records_in_block))) { if ((info->block.last_allocated > info->max_records && info->max_records) || @@ -206,17 +214,24 @@ uchar *hp_alloc_from_tail(HP_SHARE *info) DBUG_RETURN(NULL); } - if (hp_get_new_block(info, &info->block,&length)) + if (hp_get_new_block(info, &info->block, &length)) DBUG_RETURN(NULL); info->data_length+=length; } - info->block.last_allocated++; - info->total_records++; - DBUG_PRINT("exit",("Used new position: %p", + available= (uint)(info->block.records_in_block - block_pos); + requested= *blocks; + if (requested > available) + requested= available; + DBUG_ASSERT(block_pos + requested <= info->block.records_in_block); + info->block.last_allocated+= requested; + info->total_records+= requested; + *blocks= requested; + DBUG_PRINT("exit",("Used new position: %p blocks: %u", ((uchar*) info->block.level_info[0].last_blocks+ - block_pos * info->block.recbuffer))); + block_pos * info->block.recbuffer), + requested)); DBUG_RETURN((uchar*) info->block.level_info[0].last_blocks+ - block_pos*info->block.recbuffer); + block_pos * info->block.recbuffer); } @@ -234,7 +249,10 @@ uchar *next_free_record_pos(HP_SHARE *info) DBUG_PRINT("exit",("Used old position: %p", pos)); DBUG_RETURN(pos); } - DBUG_RETURN(hp_alloc_from_tail(info)); + { + uint blocks= 1; + DBUG_RETURN(hp_alloc_from_tail(info, &blocks)); + } } From 5582b3e27045a8d906ebd4116656d8947531f459 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 03:54:02 -0400 Subject: [PATCH 18/27] Free-list scavenge fallback + contiguity fix for blob allocation Two fixes in `hp_write_one_blob()`: **Bug fix**: Step 1 free-list contiguity detection failed to update `prev_pos` inside the contiguity branch, so the check `pos == prev_pos - recbuffer` could only detect 2-record groups. The third record was always compared against the original `prev_pos` (2 recbuffers away), causing a false discontinuity. Fix: add `prev_pos = pos` after `run_start = pos`. **Deficiency #2**: When Step 2 (tail allocation) fails with `HA_ERR_RECORD_FILE_FULL` and there are still deleted records on the free list, a new Step 3 walks the entire free list accepting any contiguous group (even single slots). Each group is written as a Case C run via `hp_unlink_and_write_run()`. This produces maximally fragmented chains, which are slower to read but correct. Failing with table-full when free slots exist is worse than a fragmented chain. Tests: - `hp_test_freelist-t.c`: 38 unit tests covering contiguity detection (prev_pos bug guard), repeated delete-reinsert cycles, Step 3 scavenge fallback, and true capacity exhaustion - `heap/blob_fallback.test`: MTR test exercising the fallback at SQL level with fragmented free list - Extracted shared `hp_test_helpers.h` from duplicate code in `hp_test_scan-t.c` and `hp_test_freelist-t.c` --- mysql-test/suite/heap/blob_fallback.result | 54 +++ mysql-test/suite/heap/blob_fallback.test | 82 +++++ storage/heap/CMakeLists.txt | 2 +- storage/heap/hp_blob.c | 46 ++- storage/heap/hp_test_freelist-t.c | 382 +++++++++++++++++++++ storage/heap/hp_test_helpers.h | 85 +++++ storage/heap/hp_test_scan-t.c | 79 +---- 7 files changed, 650 insertions(+), 80 deletions(-) create mode 100644 mysql-test/suite/heap/blob_fallback.result create mode 100644 mysql-test/suite/heap/blob_fallback.test create mode 100644 storage/heap/hp_test_freelist-t.c create mode 100644 storage/heap/hp_test_helpers.h diff --git a/mysql-test/suite/heap/blob_fallback.result b/mysql-test/suite/heap/blob_fallback.result new file mode 100644 index 0000000000000..9a1fc68b854cc --- /dev/null +++ b/mysql-test/suite/heap/blob_fallback.result @@ -0,0 +1,54 @@ +drop table if exists t1; +# +# Setup: constrained MEMORY table with blob column +# +set @save_max_heap_table_size=@@max_heap_table_size; +set max_heap_table_size=1*1024*1024; +create table t1 (a int not null, b blob, primary key(a)) engine=memory; +# +# Fill table with short-blob rows to consume capacity +# +select count(*) as total_rows from t1; +total_rows +500 +# +# Delete every other row to create fragmentation +# +select count(*) as rows_after_delete from t1; +rows_after_delete +250 +# +# Insert new rows with blobs — exercises free-list scavenge +# +insert into t1 values (10000, repeat('A', 200)); +insert into t1 values (10001, repeat('B', 500)); +insert into t1 values (10002, repeat('C', 50)); +# +# Verify data integrity +# +select a, length(b) as blob_len, left(b, 5) as prefix from t1 +where a >= 10000 order by a; +a blob_len prefix +10000 200 AAAAA +10001 500 BBBBB +10002 50 CCCCC +select a, md5(b) from t1 where a=10000; +a md5(b) +10000 16bf06b3717d1f238252870e699c2a2e +select a, md5(b) from t1 where a=10001; +a md5(b) +10001 f05df5898d087fc01bd25737b2b85788 +select a, md5(b) from t1 where a=10002; +a md5(b) +10002 4aec7e49aeea73fdd3e1e5c565fc126a +# +# Verify scan still works correctly +# +select count(*) as scan_count from t1; +scan_count +253 +# +# Cleanup +# +drop table t1; +set max_heap_table_size=@save_max_heap_table_size; diff --git a/mysql-test/suite/heap/blob_fallback.test b/mysql-test/suite/heap/blob_fallback.test new file mode 100644 index 0000000000000..362c6d610768a --- /dev/null +++ b/mysql-test/suite/heap/blob_fallback.test @@ -0,0 +1,82 @@ +# +# MDEV-38975: Free-list scavenge fallback for blob allocation +# +# Tests that blob inserts succeed via the Step 3 free-list scavenge +# fallback when the table is near max_heap_table_size and has a +# heavily fragmented free list (alternating used/deleted rows). +# +# Without the fallback, blob allocation fails with table-full even +# though free slots exist — they're just too scattered for normal +# free-list reuse (Step 1) and the tail is at capacity (Step 2). +# + +--disable_warnings +drop table if exists t1; +--enable_warnings + +--echo # +--echo # Setup: constrained MEMORY table with blob column +--echo # +set @save_max_heap_table_size=@@max_heap_table_size; +set max_heap_table_size=1*1024*1024; + +create table t1 (a int not null, b blob, primary key(a)) engine=memory; + +--echo # +--echo # Fill table with short-blob rows to consume capacity +--echo # + +--disable_query_log +--let $i=0 +while ($i < 500) +{ + eval insert into t1 values ($i, repeat('x', 100)); + --inc $i +} +--enable_query_log + +select count(*) as total_rows from t1; + +--echo # +--echo # Delete every other row to create fragmentation +--echo # + +--disable_query_log +--let $i=0 +while ($i < 500) +{ + eval delete from t1 where a=$i; + --inc $i + --inc $i +} +--enable_query_log + +select count(*) as rows_after_delete from t1; + +--echo # +--echo # Insert new rows with blobs — exercises free-list scavenge +--echo # +insert into t1 values (10000, repeat('A', 200)); +insert into t1 values (10001, repeat('B', 500)); +insert into t1 values (10002, repeat('C', 50)); + +--echo # +--echo # Verify data integrity +--echo # +select a, length(b) as blob_len, left(b, 5) as prefix from t1 + where a >= 10000 order by a; + +select a, md5(b) from t1 where a=10000; +select a, md5(b) from t1 where a=10001; +select a, md5(b) from t1 where a=10002; + +--echo # +--echo # Verify scan still works correctly +--echo # +select count(*) as scan_count from t1; + +--echo # +--echo # Cleanup +--echo # +drop table t1; +set max_heap_table_size=@save_max_heap_table_size; diff --git a/storage/heap/CMakeLists.txt b/storage/heap/CMakeLists.txt index c053551c6debb..361d849eb120b 100644 --- a/storage/heap/CMakeLists.txt +++ b/storage/heap/CMakeLists.txt @@ -33,7 +33,7 @@ IF(WITH_UNIT_TESTS) ADD_EXECUTABLE(hp_test2 hp_test2.c) TARGET_LINK_LIBRARIES(hp_test2 heap mysys dbug strings) - MY_ADD_TESTS(hp_test_hash hp_test_scan LINK_LIBRARIES heap mysys dbug strings) + MY_ADD_TESTS(hp_test_hash hp_test_scan hp_test_freelist LINK_LIBRARIES heap mysys dbug strings) INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/sql ${CMAKE_SOURCE_DIR}/include) diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index 8aaf4ce542797..e75fd6fa9f87a 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -320,6 +320,7 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, if (prev_pos && pos == prev_pos - recbuffer) { run_start= pos; + prev_pos= pos; run_count++; if (run_count < max_run) continue; @@ -399,7 +400,7 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, run_start= hp_alloc_from_tail(share, &run_rec_count); if (!run_start) - goto err; + break; DBUG_ASSERT(run_rec_count >= 1); if (is_only_run && run_rec_count == 1 && remaining <= visible) @@ -432,6 +433,49 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, prev_run_start= run_start; } + /* + Step 3: Free-list scavenge fallback. + + When the tail is full but there are deleted records on the free list, + walk the entire free list accepting any contiguous group (even a + single slot). This produces maximally fragmented chains (many short + runs, Case C), which are slower to read but correct. Failing with + table-full when free slots exist is worse than a fragmented chain. + */ + while (data_offset < data_len && share->del_link) + { + uchar *run_start= share->del_link; + uchar *prev_pos= run_start; + uchar *pos= *((uchar**) run_start); + uint16 run_count= 1; + uint32 remaining= data_len - data_offset; + uint32 remaining_records= + (remaining <= first_payload ? 1 : + 1 + (remaining - first_payload + recbuffer - 1) / recbuffer); + uint32 max_run= MY_MIN(remaining_records, UINT_MAX16); + + for (; pos; pos= *((uchar**) pos)) + { + if (run_count >= max_run) + break; + if (pos == prev_pos - recbuffer) + { + run_start= pos; + prev_pos= pos; + run_count++; + continue; + } + break; + } + + hp_unlink_and_write_run(share, data_ptr, data_len, run_start, + run_count, &data_offset, + &first_run, &prev_run_start); + } + + if (data_offset < data_len) + goto err; + *first_run_out= first_run; return 0; diff --git a/storage/heap/hp_test_freelist-t.c b/storage/heap/hp_test_freelist-t.c new file mode 100644 index 0000000000000..b296b16e4b19e --- /dev/null +++ b/storage/heap/hp_test_freelist-t.c @@ -0,0 +1,382 @@ +/* + Unit tests for free-list contiguity detection in hp_write_one_blob(). + + Verifies that Step 1 (free-list peek) correctly identifies contiguous + groups of 3+ records, not just pairs. +*/ + +#include "hp_test_helpers.h" + + +/* + Test: free-list contiguity detection finds groups > 2 records. + + Scenario: + 1. Insert a row with a 100-byte blob (needs 8 continuation records + in Case B format: 1 header + 7 data records, recbuffer=16). + 2. Delete the row. The continuation chain's 8 records form a + contiguous group on the free list (pushed in ascending address + order by hp_free_run_chain, so LIFO yields descending addresses). + The primary record is pushed on top. + 3. Insert a new row with the same blob size. The primary record + reuses the old primary from the free list head. The blob + allocation (Step 1) should detect the remaining 8 contiguous + continuation records as a single group and reuse them. + 4. Assert that block.last_allocated did NOT grow: all records + came from the free list, nothing from the tail. + + With the prev_pos bug (prev_pos not updated in the contiguity loop), + Step 1 only detects 2-record groups. For a 100-byte blob: + total_records_needed = 7 + min_run_records = min(7, max(ceil(128/16), 2)) = 7 + A 2-record group < 7 causes Step 1 to give up, falling to tail + allocation, which grows last_allocated. +*/ + +static void test_freelist_contiguity_multirecord(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + ulong last_alloc_after_first_insert, last_alloc_after_delete; + + uchar blob_data[100]; + memset(blob_data, 'Q', sizeof(blob_data)); + blob_data[0]= '!'; + blob_data[99]= '?'; + + if (create_and_open("test_freelist_cont", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(10, "setup failed"); + return; + } + + /* Insert row with 100-byte blob */ + build_record(rec, 1, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) == 0, "insert row 1 (100-byte blob)"); + + last_alloc_after_first_insert= (ulong) share->block.last_allocated; + ok(last_alloc_after_first_insert >= 9, + "allocated >= 9 records: 1 primary + 8 continuation (got %lu)", + last_alloc_after_first_insert); + + /* Delete row 1 */ + { + uchar key[4]; + int4store(key, 1); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "found row 1 for deletion"); + ok(heap_delete(info, rec) == 0, "deleted row 1"); + } + + last_alloc_after_delete= (ulong) share->block.last_allocated; + ok(last_alloc_after_delete == last_alloc_after_first_insert, + "last_allocated unchanged after delete (%lu)", + last_alloc_after_delete); + + ok(share->deleted == last_alloc_after_delete, + "all %lu records on free list (deleted=%lu)", + last_alloc_after_delete, (ulong) share->deleted); + + /* Insert row 2 with same blob size - should fully reuse free list */ + build_record(rec, 2, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) == 0, "insert row 2 (100-byte blob, free-list reuse)"); + + /* + Key assertion: if contiguity detection works for groups > 2, + the entire continuation chain is recovered from the free list + without any tail allocation. + */ + ok(share->block.last_allocated == last_alloc_after_delete, + "last_allocated unchanged after reinsert: free list fully reused " + "(before=%lu, after=%lu)", + last_alloc_after_delete, (ulong) share->block.last_allocated); + + /* Verify data integrity of the reinserted row */ + { + uchar key[4]; + uchar read_buf[REC_LENGTH]; + uint32 read_len; + const uchar *read_ptr; + + int4store(key, 2); + ok(heap_rkey(info, read_buf, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "found row 2 for verification"); + read_len= uint2korr(read_buf + BLOB_OFFSET); + memcpy(&read_ptr, read_buf + BLOB_OFFSET + BLOB_PACKLEN, sizeof(read_ptr)); + ok(read_len == 100, "blob length == 100 (got %u)", read_len); + ok(memcmp(read_ptr, blob_data, 100) == 0, "blob data matches after free-list reuse"); + } + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: free-list contiguity across multiple delete-reinsert cycles. + + Performs 3 rounds of insert-delete-reinsert with a 200-byte blob. + In each round, last_allocated must not grow, proving that contiguity + detection consistently reuses the freed continuation chain. +*/ + +static void test_freelist_contiguity_repeated_cycles(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int round; + + uchar blob_data[200]; + memset(blob_data, 'R', sizeof(blob_data)); + + if (create_and_open("test_freelist_cycles", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(12, "setup failed"); + return; + } + + /* Initial insert to establish baseline */ + build_record(rec, 1, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) == 0, "initial insert (200-byte blob)"); + + for (round= 0; round < 3; round++) + { + uchar key[4]; + ulong alloc_before; + + /* Delete current row */ + int4store(key, round + 1); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "round %d: found row for deletion", round); + ok(heap_delete(info, rec) == 0, + "round %d: deleted row", round); + alloc_before= (ulong) share->block.last_allocated; + + /* Reinsert with new key */ + build_record(rec, round + 2, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) == 0, + "round %d: reinserted (free-list reuse)", round); + ok(share->block.last_allocated == alloc_before, + "round %d: last_allocated stable (before=%lu, after=%lu)", + round, alloc_before, (ulong) share->block.last_allocated); + } + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: Step 3 free-list scavenge fallback when tail is full. + + Fills the entire first HP_BLOCK leaf block with 0-byte-blob rows, + then deletes every other row to create a heavily fragmented free + list of non-contiguous individual slots. Locks out tail allocation + by setting max_table_size tight. + + Insert a row with a 50-byte blob. Step 1 gives up (1-slot groups + < min_run_records=4). Step 2 fails (tail at block boundary with + tight max_table_size). Step 3 scavenges individual free-list + slots, writing maximally fragmented Case C chains. +*/ + +static void test_freelist_scavenge_fallback(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 inserted, deleted_count; + int32 records_in_block; + ulong last_alloc_after_fill; + + uchar blob_data[50]; + memset(blob_data, 'F', sizeof(blob_data)); + blob_data[0]= '<'; + blob_data[49]= '>'; + + if (create_and_open("test_scavenge", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(8, "setup failed"); + return; + } + + records_in_block= (int32) share->block.records_in_block; + + /* Fill the first leaf block with 0-byte blob rows (1 record each) */ + for (i= 0, inserted= 0; i < records_in_block; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + inserted++; + } + + last_alloc_after_fill= (ulong) share->block.last_allocated; + ok(inserted == records_in_block, + "filled block: %d of %d rows inserted", + inserted, records_in_block); + + /* Delete every other row — non-contiguous fragmentation */ + deleted_count= 0; + for (i= 0; i < inserted; i+= 2) + { + uchar key[4]; + int4store(key, i); + if (heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0 && + heap_delete(info, rec) == 0) + deleted_count++; + } + ok(deleted_count == inserted / 2, + "deleted %d rows (every other)", deleted_count); + ok(share->deleted == (ulong) deleted_count, + "share->deleted == %d", deleted_count); + + /* Lock out tail allocation */ + share->max_table_size= share->data_length + share->index_length; + + ok(share->block.last_allocated == last_alloc_after_fill, + "last_allocated at block boundary (%lu)", last_alloc_after_fill); + + /* + Insert row with 50-byte blob. + total_records_needed = 1 + ceil((50 - 5) / 16) = 4 + min_run_records = min(4, max(ceil(128/16), 2)) = 4 + Step 1: free list has non-contiguous singles → 1 < 4 → gives up + Step 2: block boundary + tight max_table_size → fails + Step 3: scavenges individual free-list slots → succeeds + */ + build_record(rec, 99999, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) == 0, + "insert with 50-byte blob via Step 3 scavenge fallback"); + + ok(share->block.last_allocated == last_alloc_after_fill, + "last_allocated unchanged: all records from free list (%lu)", + (ulong) share->block.last_allocated); + + /* Verify data integrity */ + { + uchar key[4]; + uchar read_buf[REC_LENGTH]; + uint32 read_len; + const uchar *read_ptr; + + int4store(key, 99999); + ok(heap_rkey(info, read_buf, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "found row via key lookup after scavenge insert"); + read_len= uint2korr(read_buf + BLOB_OFFSET); + memcpy(&read_ptr, read_buf + BLOB_OFFSET + BLOB_PACKLEN, sizeof(read_ptr)); + ok(read_len == 50, "blob length == 50 (got %u)", read_len); + ok(memcmp(read_ptr, blob_data, 50) == 0, "blob data matches"); + } + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: true capacity exhaustion fails correctly. + + Same setup as test_freelist_scavenge_fallback, but insert a blob + large enough to exhaust BOTH the tail AND the free list. The + insert must fail with HA_ERR_RECORD_FILE_FULL. +*/ + +static void test_true_capacity_exhaustion(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 inserted, deleted_count; + int32 records_in_block; + + /* + Blob large enough to need more records than available free slots. + With records_in_block=1024, deleting 512 rows gives 512 free slots. + A blob needing 600+ continuation records exceeds that. + 600 records × 16 bytes/rec ≈ 9600 bytes, minus headers. + Use ~5000 bytes to be safe (needs ~315 records). + We only delete ~10 rows to create very few free slots. + */ + uchar blob_data[5000]; + memset(blob_data, 'X', sizeof(blob_data)); + + if (create_and_open("test_exhaust", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(4, "setup failed"); + return; + } + + records_in_block= (int32) share->block.records_in_block; + + /* Fill the block */ + for (i= 0, inserted= 0; i < records_in_block; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + inserted++; + } + ok(inserted == records_in_block, "filled block: %d rows", inserted); + + /* Delete only 10 rows — not enough free slots for the large blob */ + deleted_count= 0; + for (i= 0; i < 20 && i < inserted; i+= 2) + { + uchar key[4]; + int4store(key, i); + if (heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0 && + heap_delete(info, rec) == 0) + deleted_count++; + } + ok(deleted_count == 10, "deleted 10 rows"); + + /* Lock out tail */ + share->max_table_size= share->data_length + share->index_length; + + /* Try to insert a 5000-byte blob — should fail */ + build_record(rec, 99998, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) != 0, + "insert with 5000-byte blob correctly fails (not enough free slots)"); + ok(my_errno == HA_ERR_RECORD_FILE_FULL, + "error is HA_ERR_RECORD_FILE_FULL (got %d)", my_errno); + + /* Verify table is still consistent (no corruption from partial rollback) */ + ok(share->records == (ulong)(inserted - deleted_count), + "records count consistent after failed insert (%lu)", + (ulong) share->records); + + heap_drop_table(info); + heap_close(info); +} + + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + MY_INIT("hp_test_freelist"); + plan(38); + + diag("Test 1: free-list contiguity detects groups > 2 records"); + test_freelist_contiguity_multirecord(); + + diag("Test 2: contiguity across repeated delete-reinsert cycles"); + test_freelist_contiguity_repeated_cycles(); + + diag("Test 3: Step 3 free-list scavenge fallback"); + test_freelist_scavenge_fallback(); + + diag("Test 4: true capacity exhaustion fails correctly"); + test_true_capacity_exhaustion(); + + my_end(0); + return exit_status(); +} diff --git a/storage/heap/hp_test_helpers.h b/storage/heap/hp_test_helpers.h new file mode 100644 index 0000000000000..165459b137946 --- /dev/null +++ b/storage/heap/hp_test_helpers.h @@ -0,0 +1,85 @@ +/* + Shared helpers for HEAP blob unit tests. + + Record layout: (int4, blob(packlength=2)) + byte 0: null bitmap (1 byte) + bytes 1-4: int4 field (4 bytes) + bytes 5-6: blob packlength=2 (length, little-endian) + bytes 7-14: blob data pointer (8 bytes on x86_64) + reclength = 15 + visible_offset = MAX(15, 8) = 15 + recbuffer = ALIGN(15 + 1, 8) = 16 +*/ + +#ifndef HP_TEST_HELPERS_H +#define HP_TEST_HELPERS_H + +#include +#include +#include +#include +#include "heap.h" +#include "heapdef.h" + +#define REC_LENGTH 15 +#define INT_OFFSET 1 +#define BLOB_OFFSET 5 +#define BLOB_PACKLEN 2 + +#define PTR_SIZE portable_sizeof_char_ptr + + +static void build_record(uchar *rec, int32 int_val, + const uchar *blob_data, uint16 blob_len) +{ + memset(rec, 0, REC_LENGTH); + int4store(rec + INT_OFFSET, int_val); + int2store(rec + BLOB_OFFSET, blob_len); + memcpy(rec + BLOB_OFFSET + BLOB_PACKLEN, &blob_data, PTR_SIZE); +} + + +static int create_and_open(const char *name, HP_SHARE **share, HP_INFO **info) +{ + HP_KEYDEF keydef; + HA_KEYSEG keyseg; + HP_CREATE_INFO ci; + HP_BLOB_DESC blob_desc; + my_bool unused; + + memset(&keyseg, 0, sizeof(keyseg)); + keyseg.type= HA_KEYTYPE_BINARY; + keyseg.start= INT_OFFSET; + keyseg.length= 4; + keyseg.charset= &my_charset_bin; + + memset(&keydef, 0, sizeof(keydef)); + keydef.keysegs= 1; + keydef.seg= &keyseg; + keydef.algorithm= HA_KEY_ALG_HASH; + keydef.flag= HA_NOSAME; + keydef.length= 4; + + blob_desc.offset= BLOB_OFFSET; + blob_desc.packlength= BLOB_PACKLEN; + + memset(&ci, 0, sizeof(ci)); + ci.keys= 1; + ci.keydef= &keydef; + ci.reclength= REC_LENGTH; + ci.max_records= 1000; + ci.min_records= 10; + ci.max_table_size= 1024 * 1024; + ci.blob_descs= &blob_desc; + ci.blob_count= 1; + + if (heap_create(name, &ci, share, &unused)) + return 1; + *info= heap_open(name, 2); + if (!*info) + return 1; + heap_extra(*info, HA_EXTRA_NO_READCHECK); + return 0; +} + +#endif /* HP_TEST_HELPERS_H */ diff --git a/storage/heap/hp_test_scan-t.c b/storage/heap/hp_test_scan-t.c index e60e15f8eb9da..ff24739556f50 100644 --- a/storage/heap/hp_test_scan-t.c +++ b/storage/heap/hp_test_scan-t.c @@ -6,84 +6,7 @@ caller. Uses real HEAP tables with blob columns. */ -#include -#include -#include -#include -#include "heap.h" -#include "heapdef.h" - -/* - Record layout for a table (int4, blob(packlength=2)): - byte 0: null bitmap (1 byte) - bytes 1-4: int4 field (4 bytes) - bytes 5-6: blob packlength=2 (length, little-endian) - bytes 7-14: blob data pointer (8 bytes on x86_64) - reclength = 15 - visible_offset = MAX(15, 8) = 15 - recbuffer = ALIGN(15 + 1, 8) = 16 -*/ - -#define REC_LENGTH 15 -#define INT_OFFSET 1 -#define BLOB_OFFSET 5 -#define BLOB_PACKLEN 2 - -#define PTR_SIZE portable_sizeof_char_ptr - - -static void build_record(uchar *rec, int32 int_val, - const uchar *blob_data, uint16 blob_len) -{ - memset(rec, 0, REC_LENGTH); - int4store(rec + INT_OFFSET, int_val); - int2store(rec + BLOB_OFFSET, blob_len); - memcpy(rec + BLOB_OFFSET + BLOB_PACKLEN, &blob_data, PTR_SIZE); -} - - -static int create_and_open(const char *name, HP_SHARE **share, HP_INFO **info) -{ - HP_KEYDEF keydef; - HA_KEYSEG keyseg; - HP_CREATE_INFO ci; - HP_BLOB_DESC blob_desc; - my_bool unused; - - memset(&keyseg, 0, sizeof(keyseg)); - keyseg.type= HA_KEYTYPE_BINARY; - keyseg.start= INT_OFFSET; - keyseg.length= 4; - keyseg.charset= &my_charset_bin; - - memset(&keydef, 0, sizeof(keydef)); - keydef.keysegs= 1; - keydef.seg= &keyseg; - keydef.algorithm= HA_KEY_ALG_HASH; - keydef.flag= HA_NOSAME; - keydef.length= 4; - - blob_desc.offset= BLOB_OFFSET; - blob_desc.packlength= BLOB_PACKLEN; - - memset(&ci, 0, sizeof(ci)); - ci.keys= 1; - ci.keydef= &keydef; - ci.reclength= REC_LENGTH; - ci.max_records= 1000; - ci.min_records= 10; - ci.max_table_size= 1024 * 1024; - ci.blob_descs= &blob_desc; - ci.blob_count= 1; - - if (heap_create(name, &ci, share, &unused)) - return 1; - *info= heap_open(name, 2); - if (!*info) - return 1; - heap_extra(*info, HA_EXTRA_NO_READCHECK); - return 0; -} +#include "hp_test_helpers.h" /* From 4eaeb55f945e33d888fa28e5eb2292a877b3bcfa Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 04:52:22 -0400 Subject: [PATCH 19/27] Disable `ps_protocol` in `blob_big` tests `SHOW STATUS LIKE 'Created_tmp%'` counts include the extra temp table created by prepared statement re-execution under `--ps-protocol`. The test already disabled `cursor_protocol` and `ps2_protocol` but missed `ps_protocol`, causing `blob_big1`/`blob_big2`/`blob_big3` to fail on CI builders that run with `--ps-protocol`. --- mysql-test/suite/heap/blob_big.inc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mysql-test/suite/heap/blob_big.inc b/mysql-test/suite/heap/blob_big.inc index 0aec2e302009a..b9ef2f33b4c9f 100644 --- a/mysql-test/suite/heap/blob_big.inc +++ b/mysql-test/suite/heap/blob_big.inc @@ -10,6 +10,7 @@ # SHOW STATUS output differs across protocol variants; disable them globally. --disable_cursor_protocol +--disable_ps_protocol --disable_ps2_protocol --echo # @@ -142,4 +143,5 @@ SHOW STATUS LIKE 'Created_tmp%'; DROP TABLE t1; --enable_ps2_protocol +--enable_ps_protocol --enable_cursor_protocol From 44264bf95f742bf4ad95db4706e4789007dcfdd7 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 14:03:00 -0400 Subject: [PATCH 20/27] Fix MSAN crash in `hp_rec_hashnr` for geometry/blob DISTINCT keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `heap_prepare_hp_create_info()` mishandled blob key segments whose field type is `Field_blob` or `Field_geom` (as opposed to `Field_blob_key`). `Field_blob::key_type()` returns `HA_KEYTYPE_VARBINARY2` (geometry) or `HA_KEYTYPE_VARTEXT2` (text blobs). Commit `30415846402` ("Introduce Field_blob_key") added logic that stripped `HA_BLOB_PART` from these segments, assuming they use "VARCHAR packing." This is wrong: DISTINCT/UNION key fields are `Field_blob` (not `Field_blob_key`), and their record format is a blob descriptor (`packlength` bytes of length + 8-byte data pointer), not a varchar (2-byte length prefix + inline data). After `HA_BLOB_PART` was stripped, `hp_create.c` normalized `VARBINARY2` → `VARTEXT1`. The hash function `hp_rec_hashnr()` then entered the `VARTEXT1` branch, which reads the first 2 bytes as a varchar length and hashes that many bytes starting at offset 2. For a geometry blob descriptor, the first 2 bytes are the low bytes of the WKB data length (e.g. ~100 for a simple polygon), so the hash read ~100 bytes starting inside the 12-byte descriptor — overshooting into adjacent fields or uninitialized record buffer memory. This caused MSAN "use-of-uninitialized-value" crashes in `innodb_gis.1`, `innodb_gis.point_basic`, `main.gis`, and `innodb_gis.gis` on the `amd64-msan-clang-20` CI builder. Beyond the MSAN crash, it was also a functional bug: hashing the raw pointer bytes meant two rows with identical geometry data but different memory addresses would hash differently, breaking UNION DISTINCT deduplication. **Fix**: when `HA_BLOB_PART` is set and the key type is `VARBINARY2`/`VARTEXT2`, promote to `VARBINARY4`/`VARTEXT4` instead of stripping the flag. Set `bit_start` to the actual `packlength` and `length` to `4 + sizeof(pointer)`. `hp_create.c` then normalizes to `VARTEXT4` and the hash/compare functions use the blob path: dereference the pointer and operate on the actual data. --- sql/field.cc | 9 +++++++++ storage/heap/ha_heap.cc | 23 +++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/sql/field.cc b/sql/field.cc index 6369834cb665d..3e6c7a3a60d0d 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -9159,6 +9159,15 @@ Field *Field_blob::make_new_field(MEM_ROOT *root, TABLE *newt, bool keep_type, res->init_for_make_new_field(newt, orig_table); return res; } + /* + MDEV-16699: Field_geom::store() lacks Blob_mem_storage support, + so GROUP_CONCAT with ORDER BY/DISTINCT on geometry columns would + read freed memory. Downgrade to plain Field_blob whose store() + routes data through table->blob_storage. + */ + if (newt->group_concat) + return new (root) Field_blob(field_length, maybe_null(), &field_name, + charset()); return Field::make_new_field(root, newt, keep_type, param); } diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index ef783e4416df8..d5de6ce0933bc 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -702,20 +702,19 @@ int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, if (seg->flag & HA_BLOB_PART) { + /* + Blob key segment: 4-byte length + pointer to data. + Field_blob_key (GROUP BY) returns VARTEXT4/VARBINARY4. + Field_blob (DISTINCT/UNION) returns VARTEXT2/VARBINARY2. + Promote the latter to VARTEXT4 format so hp_create.c and the + hash functions handle them identically. + */ if (seg->type == HA_KEYTYPE_VARBINARY2 || seg->type == HA_KEYTYPE_VARTEXT2) - { - /* This is is a geometry field using VARCHAR packing */ - seg->flag&= ~HA_BLOB_PART; - } - else - { - /* Blob key with a 4 byte length and a pointer to data */ - DBUG_ASSERT(seg->length == 4 + portable_sizeof_char_ptr); - DBUG_ASSERT(field->key_type() == HA_KEYTYPE_VARBINARY4 || - field->key_type() == HA_KEYTYPE_VARTEXT4); - seg->bit_start= ((Field_blob*) field)->pack_length_no_ptr(); - } + seg->type= (seg->type == HA_KEYTYPE_VARBINARY2) ? + HA_KEYTYPE_VARBINARY4 : HA_KEYTYPE_VARTEXT4; + seg->bit_start= ((Field_blob*) field)->pack_length_no_ptr(); + seg->length= 4 + portable_sizeof_char_ptr; } if (field->flags & (ENUM_FLAG | SET_FLAG)) From 53ad2eee7a78be4dcb9d89d14921973ee67e6bea Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 14:27:23 -0400 Subject: [PATCH 21/27] Add stress test for HEAP blob insert/delete/update cycles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3-phase `heap/blob_stress` MTR test exercising free-list fragmentation, continuation chain reuse, and data integrity under sustained mixed DML: - Phase 1: 200-cycle stored procedure — 5 inserts, 2 deletes, 3 updates per cycle with blob sizes cycling through Case A/B/C; shadow table row count verification with `SIGNAL` on mismatch - Phase 2: near-capacity (2 MB) fill/fragment/refill with free-list scavenging, then full-delete reinsert - Phase 3: 20 grow/shrink `UPDATE` cycles (even rows 10-18 KB, odd rows 5-20 bytes) All phases verify blob content integrity via single-character `REPEAT` pattern check. Addresses Monty's F14 feedback. --- mysql-test/suite/heap/blob_stress.result | 183 ++++++++++++++ mysql-test/suite/heap/blob_stress.test | 297 +++++++++++++++++++++++ 2 files changed, 480 insertions(+) create mode 100644 mysql-test/suite/heap/blob_stress.result create mode 100644 mysql-test/suite/heap/blob_stress.test diff --git a/mysql-test/suite/heap/blob_stress.result b/mysql-test/suite/heap/blob_stress.result new file mode 100644 index 0000000000000..f735a6ad26e0b --- /dev/null +++ b/mysql-test/suite/heap/blob_stress.result @@ -0,0 +1,183 @@ +drop table if exists t1, t_verify; +# +# Phase 1: Sustained insert/delete/update churn (200 cycles) +# +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 64*1024*1024; +create table t1 ( +id int not null primary key, +b1 blob, +b2 blob, +v int not null default 0 +) engine=memory; +create table t_verify ( +id int not null primary key +) engine=memory; +create procedure blob_stress(in cycles int) +begin +declare i int default 0; +declare base_id int; +declare sz1 int; +declare sz2 int; +declare del_id int; +declare heap_cnt int; +declare verify_cnt int; +while i < cycles do +set base_id = i * 5; +# Vary blob sizes: cycle through Case A (tiny), B (medium), C (large) +set sz1 = case i % 6 +when 0 then 5 + (i % 15) +when 1 then 2000 + (i % 10) * 500 +when 2 then 15000 + (i % 7) * 1000 +when 3 then 4000 + (i % 8) * 500 +when 4 then 18000 + (i % 5) * 1000 +else 10 + (i % 10) +end; +set sz2 = case (i + 3) % 6 +when 0 then 8 + (i % 12) +when 1 then 3000 + (i % 9) * 400 +when 2 then 14000 + (i % 6) * 800 +when 3 then 5000 + (i % 11) * 300 +when 4 then 16000 + (i % 4) * 1000 +else 15 + (i % 8) +end; +# INSERT 5 rows per iteration with different blob patterns +insert into t1 values +(base_id, repeat(char(65 + (i % 26)), sz1), +repeat(char(97 + (i % 26)), sz2), i); +insert into t_verify values (base_id); +insert into t1 values +(base_id + 1, repeat(char(66 + (i % 26)), sz2), null, i); +insert into t_verify values (base_id + 1); +insert into t1 values +(base_id + 2, null, repeat(char(67 + (i % 26)), sz1), i); +insert into t_verify values (base_id + 2); +insert into t1 values +(base_id + 3, repeat(char(68 + (i % 26)), greatest(sz1, sz2)), +repeat(char(69 + (i % 26)), least(sz1, sz2)), i); +insert into t_verify values (base_id + 3); +insert into t1 values +(base_id + 4, repeat(char(70 + (i % 26)), sz1 div 2), +repeat(char(71 + (i % 26)), sz2 div 2), i); +insert into t_verify values (base_id + 4); +# DELETE rows from earlier iterations (free-list churn) +if i >= 3 then +set del_id = (i - 3) * 5 + (i % 3); +delete from t1 where id = del_id; +delete from t_verify where id = del_id; +set del_id = (i - 2) * 5 + ((i + 1) % 5); +delete from t1 where id = del_id; +delete from t_verify where id = del_id; +end if; +# UPDATE: grow small blobs to large, shrink large to small, +# update non-blob column (blob chains must survive) +if i >= 2 then +update t1 set b1 = repeat(char(72 + (i % 26)), 20000 + (i % 3) * 2000) +where id = (i - 2) * 5 and b1 is not null; +update t1 set b2 = repeat(char(73 + (i % 26)), 8 + (i % 10)) +where id = (i - 2) * 5 + 3; +update t1 set v = v + 100 where id = (i - 1) * 5 + 1; +end if; +# Inline integrity check every 50 iterations +if i % 50 = 49 then +set heap_cnt = (select count(*) from t1); +set verify_cnt = (select count(*) from t_verify); +if heap_cnt != verify_cnt then +signal sqlstate '45000' + set message_text = 'Row count mismatch during stress'; +end if; +end if; +set i = i + 1; +end while; +end | +# Phase 1 verification +check table t1; +Table Op Msg_type Msg_text +test.t1 check note The storage engine for the table doesn't support check +select (select count(*) from t1) = (select count(*) from t_verify) +as row_counts_match; +row_counts_match +1 +select count(*) as missing_from_heap +from t_verify where id not in (select id from t1); +missing_from_heap +0 +select count(*) as extra_in_heap +from t1 where id not in (select id from t_verify); +extra_in_heap +0 +select count(*) as corrupted_b1 from t1 +where b1 is not null and length(b1) > 0 +and b1 != repeat(left(b1, 1), length(b1)); +corrupted_b1 +0 +select count(*) as corrupted_b2 from t1 +where b2 is not null and length(b2) > 0 +and b2 != repeat(left(b2, 1), length(b2)); +corrupted_b2 +0 +drop procedure blob_stress; +drop table t1, t_verify; +set max_heap_table_size= @save_max_heap; +# +# Phase 2: Near-capacity stress with fragmentation recovery +# +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 2*1024*1024; +create table t1 ( +id int not null primary key, +b blob +) engine=memory; +select count(*) as rows_after_fill from t1; +rows_after_fill +100 +select count(*) as rows_after_fragment from t1; +rows_after_fragment +66 +# Phase 2 verification after fragmented reinsert +check table t1; +Table Op Msg_type Msg_text +test.t1 check note The storage engine for the table doesn't support check +select count(*) as corrupted from t1 +where b is not null and length(b) > 0 +and b != repeat(left(b, 1), length(b)); +corrupted +0 +delete from t1; +# Phase 2 verification after full-delete reinsert +check table t1; +Table Op Msg_type Msg_text +test.t1 check note The storage engine for the table doesn't support check +select count(*) as final_rows from t1; +final_rows +80 +select count(*) as corrupted from t1 +where b is not null and length(b) > 0 +and b != repeat(left(b, 1), length(b)); +corrupted +0 +drop table t1; +set max_heap_table_size= @save_max_heap; +# +# Phase 3: Repeated UPDATE grow/shrink cycles +# +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 2*1024*1024; +create table t1 ( +id int not null primary key, +b longblob +) engine=memory; +# Phase 3 verification +check table t1; +Table Op Msg_type Msg_text +test.t1 check note The storage engine for the table doesn't support check +select count(*) as row_count from t1; +row_count +50 +select count(*) as corrupted from t1 +where b is not null and length(b) > 0 +and b != repeat(left(b, 1), length(b)); +corrupted +0 +drop table t1; +set max_heap_table_size= @save_max_heap; diff --git a/mysql-test/suite/heap/blob_stress.test b/mysql-test/suite/heap/blob_stress.test new file mode 100644 index 0000000000000..2245c6ecab908 --- /dev/null +++ b/mysql-test/suite/heap/blob_stress.test @@ -0,0 +1,297 @@ +# +# MDEV-38975: Stress test for HEAP blob insert/delete/update cycles +# +# Exercises free-list fragmentation, continuation chain reuse, +# allocation/deallocation balance, and data integrity under +# sustained mixed DML workload with varying blob sizes. +# +# Phase 1: 200-cycle stored procedure with mixed INSERT/DELETE/UPDATE, +# varying blob sizes (Case A/B/C), shadow table verification. +# Phase 2: Near-capacity fill/fragment/refill with free-list scavenging. +# Phase 3: Repeated UPDATE grow/shrink cycles. +# + +--disable_warnings +drop table if exists t1, t_verify; +--enable_warnings + +--echo # +--echo # Phase 1: Sustained insert/delete/update churn (200 cycles) +--echo # + +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 64*1024*1024; + +create table t1 ( + id int not null primary key, + b1 blob, + b2 blob, + v int not null default 0 +) engine=memory; + +create table t_verify ( + id int not null primary key +) engine=memory; + +delimiter |; + +create procedure blob_stress(in cycles int) +begin + declare i int default 0; + declare base_id int; + declare sz1 int; + declare sz2 int; + declare del_id int; + declare heap_cnt int; + declare verify_cnt int; + + while i < cycles do + set base_id = i * 5; + + # Vary blob sizes: cycle through Case A (tiny), B (medium), C (large) + set sz1 = case i % 6 + when 0 then 5 + (i % 15) + when 1 then 2000 + (i % 10) * 500 + when 2 then 15000 + (i % 7) * 1000 + when 3 then 4000 + (i % 8) * 500 + when 4 then 18000 + (i % 5) * 1000 + else 10 + (i % 10) + end; + + set sz2 = case (i + 3) % 6 + when 0 then 8 + (i % 12) + when 1 then 3000 + (i % 9) * 400 + when 2 then 14000 + (i % 6) * 800 + when 3 then 5000 + (i % 11) * 300 + when 4 then 16000 + (i % 4) * 1000 + else 15 + (i % 8) + end; + + # INSERT 5 rows per iteration with different blob patterns + insert into t1 values + (base_id, repeat(char(65 + (i % 26)), sz1), + repeat(char(97 + (i % 26)), sz2), i); + insert into t_verify values (base_id); + + insert into t1 values + (base_id + 1, repeat(char(66 + (i % 26)), sz2), null, i); + insert into t_verify values (base_id + 1); + + insert into t1 values + (base_id + 2, null, repeat(char(67 + (i % 26)), sz1), i); + insert into t_verify values (base_id + 2); + + insert into t1 values + (base_id + 3, repeat(char(68 + (i % 26)), greatest(sz1, sz2)), + repeat(char(69 + (i % 26)), least(sz1, sz2)), i); + insert into t_verify values (base_id + 3); + + insert into t1 values + (base_id + 4, repeat(char(70 + (i % 26)), sz1 div 2), + repeat(char(71 + (i % 26)), sz2 div 2), i); + insert into t_verify values (base_id + 4); + + # DELETE rows from earlier iterations (free-list churn) + if i >= 3 then + set del_id = (i - 3) * 5 + (i % 3); + delete from t1 where id = del_id; + delete from t_verify where id = del_id; + + set del_id = (i - 2) * 5 + ((i + 1) % 5); + delete from t1 where id = del_id; + delete from t_verify where id = del_id; + end if; + + # UPDATE: grow small blobs to large, shrink large to small, + # update non-blob column (blob chains must survive) + if i >= 2 then + update t1 set b1 = repeat(char(72 + (i % 26)), 20000 + (i % 3) * 2000) + where id = (i - 2) * 5 and b1 is not null; + + update t1 set b2 = repeat(char(73 + (i % 26)), 8 + (i % 10)) + where id = (i - 2) * 5 + 3; + + update t1 set v = v + 100 where id = (i - 1) * 5 + 1; + end if; + + # Inline integrity check every 50 iterations + if i % 50 = 49 then + set heap_cnt = (select count(*) from t1); + set verify_cnt = (select count(*) from t_verify); + if heap_cnt != verify_cnt then + signal sqlstate '45000' + set message_text = 'Row count mismatch during stress'; + end if; + end if; + + set i = i + 1; + end while; +end | + +delimiter ;| + +--disable_query_log +call blob_stress(200); +--enable_query_log + +--echo # Phase 1 verification +check table t1; + +select (select count(*) from t1) = (select count(*) from t_verify) + as row_counts_match; + +select count(*) as missing_from_heap + from t_verify where id not in (select id from t1); + +select count(*) as extra_in_heap + from t1 where id not in (select id from t_verify); + +# Every non-NULL blob must be a repeat() of a single character. +# Corruption would produce mixed characters. +select count(*) as corrupted_b1 from t1 + where b1 is not null and length(b1) > 0 + and b1 != repeat(left(b1, 1), length(b1)); + +select count(*) as corrupted_b2 from t1 + where b2 is not null and length(b2) > 0 + and b2 != repeat(left(b2, 1), length(b2)); + +drop procedure blob_stress; +drop table t1, t_verify; +set max_heap_table_size= @save_max_heap; + +--echo # +--echo # Phase 2: Near-capacity stress with fragmentation recovery +--echo # + +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 2*1024*1024; + +create table t1 ( + id int not null primary key, + b blob +) engine=memory; + +# Fill table with medium blobs to approach capacity +--disable_query_log +--let $i= 0 +while ($i < 100) +{ + eval insert into t1 values ($i, repeat(char(65 + ($i % 26)), 5000 + ($i % 10) * 500)); + --inc $i +} +--enable_query_log + +select count(*) as rows_after_fill from t1; + +# Delete every third row to fragment the free list +--disable_query_log +--let $i= 0 +while ($i < 100) +{ + eval delete from t1 where id= $i; + --inc $i + --inc $i + --inc $i +} +--enable_query_log + +select count(*) as rows_after_fragment from t1; + +# Reinsert with different sizes — exercises free-list scavenging +--disable_query_log +--let $i= 200 +while ($i < 260) +{ + eval insert into t1 values ($i, repeat(char(65 + ($i % 26)), 4000 + ($i % 7) * 500)); + --inc $i +} +--enable_query_log + +--echo # Phase 2 verification after fragmented reinsert +check table t1; + +select count(*) as corrupted from t1 + where b is not null and length(b) > 0 + and b != repeat(left(b, 1), length(b)); + +# Delete all and reinsert — full free-list reuse +delete from t1; + +--disable_query_log +--let $i= 300 +while ($i < 380) +{ + eval insert into t1 values ($i, repeat(char(65 + ($i % 26)), 3000 + ($i % 12) * 500)); + --inc $i +} +--enable_query_log + +--echo # Phase 2 verification after full-delete reinsert +check table t1; + +select count(*) as final_rows from t1; + +select count(*) as corrupted from t1 + where b is not null and length(b) > 0 + and b != repeat(left(b, 1), length(b)); + +drop table t1; +set max_heap_table_size= @save_max_heap; + +--echo # +--echo # Phase 3: Repeated UPDATE grow/shrink cycles +--echo # + +set @save_max_heap= @@max_heap_table_size; +set max_heap_table_size= 2*1024*1024; + +create table t1 ( + id int not null primary key, + b longblob +) engine=memory; + +# Seed with small blobs +--disable_query_log +--let $i= 0 +while ($i < 50) +{ + eval insert into t1 values ($i, repeat(char(65 + ($i % 26)), 100)); + --inc $i +} +--enable_query_log + +# 20 grow/shrink cycles: even rows grow large, odd rows shrink to tiny +--disable_query_log +--let $cycle= 0 +while ($cycle < 20) +{ + --let $i= 0 + while ($i < 50) + { + eval update t1 set b= repeat(char(65 + (($i + $cycle) % 26)), 10000 + ($cycle % 5) * 2000) where id= $i; + --inc $i + --inc $i + } + --let $i= 1 + while ($i < 50) + { + eval update t1 set b= repeat(char(97 + (($i + $cycle) % 26)), 5 + ($cycle % 15)) where id= $i; + --inc $i + --inc $i + } + --inc $cycle +} +--enable_query_log + +--echo # Phase 3 verification +check table t1; + +select count(*) as row_count from t1; + +select count(*) as corrupted from t1 + where b is not null and length(b) > 0 + and b != repeat(left(b, 1), length(b)); + +drop table t1; +set max_heap_table_size= @save_max_heap; From 80c97f0a949510030af19a8a7185295f51dbc0a9 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 15:41:13 -0400 Subject: [PATCH 22/27] Add `DBUG_ASSERT` guards for MSAN regression fixes `Field_geom::store()`: assert `blob_storage` is not set, catching any future removal of the MDEV-16699 `group_concat` downgrade in `Field_blob::make_new_field()`. `heap_prepare_hp_create_info()`: after `HA_BLOB_PART` promotion, assert key type is `VARTEXT4`/`VARBINARY4` and `bit_start` is 1-4, catching blob key segments that were not promoted from `VARTEXT2`. --- sql/sql_type_geom.cc | 1 + storage/heap/ha_heap.cc | 3 +++ 2 files changed, 4 insertions(+) diff --git a/sql/sql_type_geom.cc b/sql/sql_type_geom.cc index cc2164fa1b644..6e571dd2b9a93 100644 --- a/sql/sql_type_geom.cc +++ b/sql/sql_type_geom.cc @@ -840,6 +840,7 @@ int Field_geom::store_decimal(const my_decimal *) int Field_geom::store(const char *from, size_t length, CHARSET_INFO *cs) { + DBUG_ASSERT(!table->blob_storage); if (!length) bzero(ptr, Field_blob::pack_length()); else diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index d5de6ce0933bc..e870d0b77bf84 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -715,6 +715,9 @@ int heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table, HA_KEYTYPE_VARBINARY4 : HA_KEYTYPE_VARTEXT4; seg->bit_start= ((Field_blob*) field)->pack_length_no_ptr(); seg->length= 4 + portable_sizeof_char_ptr; + DBUG_ASSERT(seg->type == HA_KEYTYPE_VARBINARY4 || + seg->type == HA_KEYTYPE_VARTEXT4); + DBUG_ASSERT(seg->bit_start >= 1 && seg->bit_start <= 4); } if (field->flags & (ENUM_FLAG | SET_FLAG)) From 5d8482d50a876fd21d4a4576e78d23707ec067a6 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 15:58:08 -0400 Subject: [PATCH 23/27] Fix MSVC `C4267` warnings: `size_t` to narrower type conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `Field_blob::get_key_image_itRAW` and `Field_blob::key_cmp`: cast `local_char_length` to `uint32` in `set_if_smaller()` — safe because `charpos()` is bounded by `blob_length` which is already `uint32`. - `hp_test_hash-t.c`: widen `blob_len` parameter from `uint16` to `size_t` in `build_record()` and `build_mixed_record()` to match `LEX_CUSTRING::length` type. --- sql/field.cc | 4 ++-- storage/heap/hp_test_hash-t.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/field.cc b/sql/field.cc index 3e6c7a3a60d0d..affd5af4d8796 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -9090,7 +9090,7 @@ uint Field_blob::get_key_image_itRAW(const uchar *ptr_arg, uchar *buff, size_t local_char_length= length / mbmaxlen(); local_char_length= field_charset()->charpos(blob, blob + blob_length, local_char_length); - set_if_smaller(blob_length, local_char_length); + set_if_smaller(blob_length, (uint32) local_char_length); if (length > blob_length) { @@ -9125,7 +9125,7 @@ int Field_blob::key_cmp(const uchar *key_ptr, uint max_key_length) const size_t local_char_length= max_key_length / cs->mbmaxlen; local_char_length= cs->charpos(blob1, blob1+blob_length, local_char_length); - set_if_smaller(blob_length, local_char_length); + set_if_smaller(blob_length, (uint32) local_char_length); return Field_blob::cmp(blob1, (uint32)blob_length, key_ptr+HA_KEY_BLOB_LENGTH, uint2korr(key_ptr)); diff --git a/storage/heap/hp_test_hash-t.c b/storage/heap/hp_test_hash-t.c index 49e2380cadeae..67391b451d5bc 100644 --- a/storage/heap/hp_test_hash-t.c +++ b/storage/heap/hp_test_hash-t.c @@ -98,7 +98,7 @@ static void setup_keydef(HP_KEYDEF *keydef, HA_KEYSEG *seg, uint keysegs) Sets the blob field to point to blob_data with blob_len bytes. */ static void build_record(uchar *rec, int32 int_val, - const uchar *blob_data, uint16 blob_len, + const uchar *blob_data, size_t blob_len, my_bool blob_is_null) { memset(rec, 0, REC_LENGTH); @@ -138,7 +138,7 @@ static void test_hash_consistency(void) /* Case C: larger blob data (would need multiple runs in real storage) */ uchar data_c[200]; - uint16 len_c= sizeof(data_c); + size_t len_c= sizeof(data_c); memset(data_c, 'X', sizeof(data_c)); /* Make it non-uniform so hash is more interesting */ data_c[0]= 'A'; @@ -767,8 +767,8 @@ static void setup_mixed_keydef(HP_KEYDEF *keydef, HA_KEYSEG *segs) static void build_mixed_record(uchar *rec, const uchar *blob_data, - uint16 blob_len, const uchar *varchar_data, - uint8 varchar_len, + size_t blob_len, const uchar *varchar_data, + size_t varchar_len, my_bool blob_null, my_bool varchar_null) { memset(rec, 0, MIX_REC_LENGTH); @@ -780,7 +780,7 @@ static void build_mixed_record(uchar *rec, const uchar *blob_data, rec[MIX_NULL_OFFSET] |= 8; /* varchar: 1-byte length prefix + data */ - rec[MIX_VARCHAR_OFFSET]= varchar_len; + rec[MIX_VARCHAR_OFFSET]= (uchar) varchar_len; if (varchar_data && varchar_len > 0) memcpy(rec + MIX_VARCHAR_OFFSET + MIX_VARCHAR_LENBYTES, varchar_data, varchar_len); From 0685b6c8269eac2cb632f9048fdda9d11b57f5a5 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 18:42:34 -0400 Subject: [PATCH 24/27] Overflow-to-Aria on `ha_update_tmp_row()` for GROUP BY temp tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `ha_update_tmp_row()` fails with `HA_ERR_RECORD_FILE_FULL` on a HEAP temp table (e.g., `MAX(TEXT)` aggregate growing the blob during GROUP BY accumulation), convert the table to Aria and retry the update — matching the existing INSERT overflow handling. **Mechanism**: `create_internal_tmp_table_from_heap()` copies all rows; `record[0]` write is rejected as duplicate (same GROUP BY key). `get_dup_key()` populates `dup_ref`, then `ha_rnd_pos()` locates the old row in Aria for the update. **Two call sites fixed**: - `end_update()`: switches to `end_unique_update` after conversion - `end_unique_update()`: restores INDEX mode via `rnd_inited` flag `GROUP_CONCAT` does NOT trigger this path — its `update_field()` is `DBUG_ASSERT(0)` (it accumulates internally). `MAX(TEXT)` / `MIN(TEXT)` are the aggregates that write growing blobs via `result_field->store()`. --- .../suite/heap/blob_update_overflow.result | 111 ++++++++++++++++ .../suite/heap/blob_update_overflow.test | 122 ++++++++++++++++++ sql/sql_select.cc | 64 ++++++++- 3 files changed, 293 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/heap/blob_update_overflow.result create mode 100644 mysql-test/suite/heap/blob_update_overflow.test diff --git a/mysql-test/suite/heap/blob_update_overflow.result b/mysql-test/suite/heap/blob_update_overflow.result new file mode 100644 index 0000000000000..5d3f2c8a07e8e --- /dev/null +++ b/mysql-test/suite/heap/blob_update_overflow.result @@ -0,0 +1,111 @@ +# +# MDEV-38975: Overflow-to-Aria on UPDATE path for GROUP BY temp tables +# +# When ha_update_tmp_row() fails with HA_ERR_RECORD_FILE_FULL on a +# HEAP temp table (blob aggregate grows during UPDATE), the server +# should transparently convert to Aria and retry the update, just +# as the INSERT path already does. +# +# MAX(TEXT) with monotonically growing values triggers this: each +# update stores a longer blob via result_field->store(), and +# heap_update()'s write-before-free strategy requires old + new +# continuation chains to coexist briefly, exhausting capacity. +# +SET @save_max_heap= @@max_heap_table_size; +SET @save_tmp= @@tmp_table_size; +# +# Test 1: MAX(TEXT) overflow during end_update() +# +# 3 groups x 50 interleaved rows. val = REPEAT('a', seq * 200). +# Since seq grows, each group's MAX increases on every update. +# The HEAP temp table (32KB, recbuffer=16, ~2048 records) overflows +# when per-group blob chains reach ~500 records (~8KB each). +# +SET max_heap_table_size = 32768; +SET tmp_table_size = 32768; +CREATE TABLE t1 (grp INT, val TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, REPEAT('a', seq * 200) +FROM seq_1_to_150; +FLUSH STATUS; +# Should overflow during UPDATE and convert HEAP->Aria transparently +SELECT grp, LENGTH(MAX(val)) AS max_len +FROM t1 GROUP BY grp ORDER BY grp; +grp max_len +1 30000 +2 29600 +3 29800 +# Verify HEAP->Aria conversion happened +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +Variable_name Value +Created_tmp_disk_tables 1 +DROP TABLE t1; +# +# Test 2: Verify result correctness after overflow +# +# Same data but also check the actual MAX content, not just length. +# +CREATE TABLE t1 (grp INT, val TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, REPEAT('a', seq * 200) +FROM seq_1_to_150; +# Group 1 gets seq=3,6,...,150 -> MAX is REPEAT('a', 30000) +# Group 2 gets seq=1,4,...,148 -> MAX is REPEAT('a', 29600) +# Group 3 gets seq=2,5,...,149 -> MAX is REPEAT('a', 29800) +SELECT grp, +LENGTH(MAX(val)) AS max_len, +LEFT(MAX(val), 5) AS prefix, +RIGHT(MAX(val), 5) AS suffix +FROM t1 GROUP BY grp ORDER BY grp; +grp max_len prefix suffix +1 30000 aaaaa aaaaa +2 29600 aaaaa aaaaa +3 29800 aaaaa aaaaa +DROP TABLE t1; +# +# Test 3: Multiple blob aggregates (two MAX columns) +# +CREATE TABLE t1 (grp INT, v1 TEXT, v2 TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, +REPEAT('x', seq * 200), +REPEAT('y', seq * 150) +FROM seq_1_to_150; +SELECT grp, +LENGTH(MAX(v1)) AS max1_len, +LENGTH(MAX(v2)) AS max2_len +FROM t1 GROUP BY grp ORDER BY grp; +grp max1_len max2_len +1 30000 22500 +2 29600 22200 +3 29800 22350 +DROP TABLE t1; +# +# Test 4: MIN(TEXT) with monotonically shrinking minimum +# +# Rows with decreasing-length values for MIN. Since shorter +# all-'a' strings are "less than" longer ones under PAD SPACE, +# MIN keeps getting replaced with shorter values. The blob +# doesn't grow, but the churn of rewriting chains can cause +# overflow when combined with other groups' large blobs. +# (MIN shrinks blobs, so this primarily tests that the conversion +# path works even when some groups have small blobs.) +# +CREATE TABLE t1 (grp INT, val TEXT); +# Mix growing and fixed-size values +INSERT INTO t1 SELECT (seq % 3) + 1, +CASE WHEN seq % 2 = 0 THEN REPEAT('b', seq * 200) +ELSE REPEAT('a', 50) +END +FROM seq_1_to_150; +SELECT grp, +LENGTH(MAX(val)) AS max_len, +LENGTH(MIN(val)) AS min_len +FROM t1 GROUP BY grp ORDER BY grp; +grp max_len min_len +1 30000 50 +2 29600 50 +3 29200 50 +DROP TABLE t1; +# +# Cleanup +# +SET max_heap_table_size = @save_max_heap; +SET tmp_table_size = @save_tmp; diff --git a/mysql-test/suite/heap/blob_update_overflow.test b/mysql-test/suite/heap/blob_update_overflow.test new file mode 100644 index 0000000000000..b20db3222f36a --- /dev/null +++ b/mysql-test/suite/heap/blob_update_overflow.test @@ -0,0 +1,122 @@ +--source include/have_sequence.inc + +--echo # +--echo # MDEV-38975: Overflow-to-Aria on UPDATE path for GROUP BY temp tables +--echo # +--echo # When ha_update_tmp_row() fails with HA_ERR_RECORD_FILE_FULL on a +--echo # HEAP temp table (blob aggregate grows during UPDATE), the server +--echo # should transparently convert to Aria and retry the update, just +--echo # as the INSERT path already does. +--echo # +--echo # MAX(TEXT) with monotonically growing values triggers this: each +--echo # update stores a longer blob via result_field->store(), and +--echo # heap_update()'s write-before-free strategy requires old + new +--echo # continuation chains to coexist briefly, exhausting capacity. +--echo # + +SET @save_max_heap= @@max_heap_table_size; +SET @save_tmp= @@tmp_table_size; + +--echo # +--echo # Test 1: MAX(TEXT) overflow during end_update() +--echo # +--echo # 3 groups x 50 interleaved rows. val = REPEAT('a', seq * 200). +--echo # Since seq grows, each group's MAX increases on every update. +--echo # The HEAP temp table (32KB, recbuffer=16, ~2048 records) overflows +--echo # when per-group blob chains reach ~500 records (~8KB each). +--echo # + +SET max_heap_table_size = 32768; +SET tmp_table_size = 32768; + +CREATE TABLE t1 (grp INT, val TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, REPEAT('a', seq * 200) +FROM seq_1_to_150; + +--disable_ps2_protocol +--disable_view_protocol +--disable_cursor_protocol +FLUSH STATUS; + +--echo # Should overflow during UPDATE and convert HEAP->Aria transparently +SELECT grp, LENGTH(MAX(val)) AS max_len +FROM t1 GROUP BY grp ORDER BY grp; + +--echo # Verify HEAP->Aria conversion happened +SHOW STATUS LIKE 'Created_tmp_disk_tables'; +--enable_cursor_protocol +--enable_view_protocol +--enable_ps2_protocol + +DROP TABLE t1; + +--echo # +--echo # Test 2: Verify result correctness after overflow +--echo # +--echo # Same data but also check the actual MAX content, not just length. +--echo # + +CREATE TABLE t1 (grp INT, val TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, REPEAT('a', seq * 200) +FROM seq_1_to_150; + +--echo # Group 1 gets seq=3,6,...,150 -> MAX is REPEAT('a', 30000) +--echo # Group 2 gets seq=1,4,...,148 -> MAX is REPEAT('a', 29600) +--echo # Group 3 gets seq=2,5,...,149 -> MAX is REPEAT('a', 29800) +SELECT grp, + LENGTH(MAX(val)) AS max_len, + LEFT(MAX(val), 5) AS prefix, + RIGHT(MAX(val), 5) AS suffix +FROM t1 GROUP BY grp ORDER BY grp; + +DROP TABLE t1; + +--echo # +--echo # Test 3: Multiple blob aggregates (two MAX columns) +--echo # + +CREATE TABLE t1 (grp INT, v1 TEXT, v2 TEXT); +INSERT INTO t1 SELECT (seq % 3) + 1, + REPEAT('x', seq * 200), + REPEAT('y', seq * 150) +FROM seq_1_to_150; + +SELECT grp, + LENGTH(MAX(v1)) AS max1_len, + LENGTH(MAX(v2)) AS max2_len +FROM t1 GROUP BY grp ORDER BY grp; + +DROP TABLE t1; + +--echo # +--echo # Test 4: MIN(TEXT) with monotonically shrinking minimum +--echo # +--echo # Rows with decreasing-length values for MIN. Since shorter +--echo # all-'a' strings are "less than" longer ones under PAD SPACE, +--echo # MIN keeps getting replaced with shorter values. The blob +--echo # doesn't grow, but the churn of rewriting chains can cause +--echo # overflow when combined with other groups' large blobs. +--echo # (MIN shrinks blobs, so this primarily tests that the conversion +--echo # path works even when some groups have small blobs.) +--echo # + +CREATE TABLE t1 (grp INT, val TEXT); +--echo # Mix growing and fixed-size values +INSERT INTO t1 SELECT (seq % 3) + 1, + CASE WHEN seq % 2 = 0 THEN REPEAT('b', seq * 200) + ELSE REPEAT('a', 50) + END +FROM seq_1_to_150; + +SELECT grp, + LENGTH(MAX(val)) AS max_len, + LENGTH(MIN(val)) AS min_len +FROM t1 GROUP BY grp ORDER BY grp; + +DROP TABLE t1; + +--echo # +--echo # Cleanup +--echo # +SET max_heap_table_size = @save_max_heap; +SET tmp_table_size = @save_tmp; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 396829d5a6dae..c09e414ebb781 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -24875,6 +24875,60 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), @seealso end_unique_update() */ +/* + Convert a HEAP temp table to Aria after ha_update_tmp_row() fails with + HA_ERR_RECORD_FILE_FULL, then re-locate the duplicate row and retry the + update. + + After conversion the HEAP blocks are freed, so any blob pointers in + record[0] that referenced HEAP memory become dangling. We re-derive + record[0] by reading the pre-update row from Aria (ha_rnd_pos into + record[1], restore_record, update_tmptable_sum_func) before retrying. + update_field() is stateless (reads result_field + args[0], writes + result_field) so re-running it on the pre-update values is safe and + produces the correct post-update result. + + @return 0 success (table converted, update applied, index re-opened) + @return -1 error (already printed) +*/ + +static int +convert_heap_to_aria_update(JOIN *join, JOIN_TAB *join_tab, + TABLE *table, int error) +{ + bool is_duplicate; + if (create_internal_tmp_table_from_heap(join->thd, table, + join_tab->tmp_table_param->start_recinfo, + &join_tab->tmp_table_param->recinfo, + error, 1, &is_duplicate)) + return -1; + DBUG_ASSERT(is_duplicate); + table->file->get_dup_key(HA_ERR_FOUND_DUPP_KEY); + if (unlikely((error= table->file->ha_rnd_init(0)))) + { + table->file->print_error(error, MYF(0)); + return -1; + } + error= table->file->ha_rnd_pos(table->record[1], + table->file->dup_ref); + if (likely(!error)) + { + restore_record(table, record[1]); + update_tmptable_sum_func(join->sum_funcs, table); + error= table->file->ha_update_tmp_row(table->record[1], + table->record[0]); + } + if (unlikely(error) || + unlikely((error= table->file->ha_rnd_end())) || + unlikely((error= table->file->ha_index_init(0, 0)))) + { + table->file->print_error(error, MYF(0)); + return -1; + } + return 0; +} + + static enum_nested_loop_state end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records) @@ -24917,8 +24971,9 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], table->record[0])))) { - table->file->print_error(error,MYF(0)); /* purecov: inspected */ - DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (convert_heap_to_aria_update(join, join_tab, table, error)) + DBUG_RETURN(NESTED_LOOP_ERROR); + join_tab->aggr->set_write_func(end_unique_update); } goto end; } @@ -25036,8 +25091,9 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), if (unlikely((error= table->file->ha_update_tmp_row(table->record[1], table->record[0])))) { - table->file->print_error(error,MYF(0)); /* purecov: inspected */ - DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (convert_heap_to_aria_update(join, join_tab, table, error)) + DBUG_RETURN(NESTED_LOOP_ERROR); + rnd_inited= true; } if (!rnd_inited && ((error= table->file->ha_rnd_end()) || From ed0e99d2f30b210c136552eba6e6821ae2fa3151 Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 19:48:41 -0400 Subject: [PATCH 25/27] Reclaim tail records on failed blob allocation When `hp_write_one_blob()` fails partway through tail allocation (e.g. `HA_ERR_RECORD_FILE_FULL`), `hp_free_run_chain()` puts the partial chain onto the delete list. These records were just tail-allocated but once on the delete list they could only be reused via free-list scavenging, not tail allocation, so `last_allocated` only grew forward. Add `hp_shrink_tail()` which pops tail-positioned records from the delete list head and decrements `last_allocated`. Crosses block boundaries by locating the previous leaf block via `hp_find_block()` and updating `last_blocks`. Empty blocks stay allocated in the tree (freed at table drop). Add `high_water_allocated` to `HP_BLOCK` to track the peak `last_allocated` before shrinking. In `hp_alloc_from_tail()`, when `block_pos == 0` and `last_allocated < high_water_allocated`, the next leaf block already exists in the tree: reuse it via `hp_find_block()` instead of calling `hp_get_new_block()`, avoiding memory waste from duplicate block allocations. Unit tests (41 new assertions in `hp_test_freelist-t.c`): - Single-block tail reclaim after failed blob insert - Cross-block reclaim (2 blocks, 1 boundary crossing) - 3-block reclaim (2 boundary crossings, `last_blocks` restoration) - Orphaned block reuse: non-blob and blob inserts fill reclaimed blocks without growing `data_length` --- include/heap.h | 1 + storage/heap/hp_blob.c | 77 +++++ storage/heap/hp_clear.c | 2 + storage/heap/hp_create.c | 1 + storage/heap/hp_test_freelist-t.c | 449 +++++++++++++++++++++++++++++- storage/heap/hp_write.c | 14 +- 6 files changed, 540 insertions(+), 4 deletions(-) diff --git a/include/heap.h b/include/heap.h index 79943b428dff6..e8648b30eda72 100644 --- a/include/heap.h +++ b/include/heap.h @@ -105,6 +105,7 @@ typedef struct st_heap_block uint recbuffer; /* Length of one saved record */ ulong records_in_block; /* Records in one heap-block */ ulong last_allocated; /* number of records there is allocated space for */ + ulong high_water_allocated; /* peak last_allocated before hp_shrink_tail() */ size_t alloc_size; /* Allocate blocks of this size */ } HP_BLOCK; diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index e75fd6fa9f87a..cd0153afdf3f6 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -35,6 +35,75 @@ +/* + Try to reclaim deleted records at the tail of the HP_BLOCK tree. + + After hp_free_run_chain() puts tail-allocated records onto the delete + list, this function checks if those records are at the current tail + (highest positions in the last leaf block). If so, it pops them from + the delete list and decrements last_allocated, effectively returning + them for future tail allocation. + + Crosses block boundaries by updating last_blocks to the previous + leaf via hp_find_block(). Empty blocks stay allocated in the tree + (freed at table drop) but their slots become available for reuse. + + Maintains the scan-boundary invariant: + total_records + deleted == block.last_allocated + Each reclaimed slot does deleted-- and last_allocated--, keeping the + difference (total_records) unchanged. + + @param share Table share +*/ + +static void hp_shrink_tail(HP_SHARE *share) +{ + HP_BLOCK *block= &share->block; + uint recbuffer= block->recbuffer; + ulong records_in_block= block->records_in_block; + + DBUG_ASSERT(share->total_records + share->deleted == + block->last_allocated); + + if (block->last_allocated > block->high_water_allocated) + block->high_water_allocated= block->last_allocated; + + while (share->del_link && block->last_allocated > 0) + { + ulong block_pos= block->last_allocated % records_in_block; + uchar *last_blocks= (uchar*) block->level_info[0].last_blocks; + uchar *tail_pos; + + if (block_pos == 0) + tail_pos= last_blocks + (records_in_block - 1) * recbuffer; + else + tail_pos= last_blocks + (block_pos - 1) * recbuffer; + + if (share->del_link != tail_pos) + break; + + share->del_link= *((uchar**) share->del_link); + share->deleted--; + block->last_allocated--; + + /* + When the current leaf block becomes empty (block_pos was 1), + find the previous leaf block so subsequent iterations can + continue reclaiming there. The empty block stays allocated + in the tree (freed at table drop). + */ + if (block_pos == 1 && block->last_allocated > 0) + { + uchar *prev_rec= hp_find_block(block, block->last_allocated - 1); + ulong prev_offset= + (block->last_allocated - 1) % records_in_block; + block->level_info[0].last_blocks= + (HP_PTRS*)(prev_rec - prev_offset * recbuffer); + } + } +} + + /* Free one continuation chain of variable-length runs. @@ -481,7 +550,12 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, err: if (first_run) + { hp_free_run_chain(share, first_run); + hp_shrink_tail(share); + DBUG_ASSERT(share->total_records + share->deleted == + share->block.last_allocated); + } *first_run_out= NULL; return my_errno; } @@ -539,6 +613,9 @@ int hp_write_blobs(HP_INFO *info, const uchar *record, uchar *pos) hp_free_run_chain(share, chain); *((uchar**) (pos + rd->offset + rd->packlength))= NULL; } + hp_shrink_tail(share); + DBUG_ASSERT(share->total_records + share->deleted == + share->block.last_allocated); *((uchar**) (pos + desc->offset + desc->packlength))= NULL; DBUG_RETURN(my_errno); } diff --git a/storage/heap/hp_clear.c b/storage/heap/hp_clear.c index 9efb4170792a7..bde280f43487c 100644 --- a/storage/heap/hp_clear.c +++ b/storage/heap/hp_clear.c @@ -36,6 +36,7 @@ void hp_clear(HP_SHARE *info) (uchar*) 0); info->block.levels=0; info->block.last_allocated=0; + info->block.high_water_allocated=0; hp_clear_keys(info); info->records= info->deleted= info->total_records= 0; info->data_length= 0; @@ -101,6 +102,7 @@ void hp_clear_keys(HP_SHARE *info) (void) hp_free_level(block,block->levels,block->root,(uchar*) 0); block->levels=0; block->last_allocated=0; + block->high_water_allocated=0; keyinfo->hash_buckets= 0; } } diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c index a4693f9679ba4..2da80f7502f43 100644 --- a/storage/heap/hp_create.c +++ b/storage/heap/hp_create.c @@ -416,6 +416,7 @@ static void init_block(HP_BLOCK *block, size_t reclength, ulong min_records, block->records_in_block= records_in_block; block->recbuffer= recbuffer; block->last_allocated= 0L; + block->high_water_allocated= 0L; /* All alloctions are done with this size, if possible */ block->alloc_size= alloc_size - MALLOC_OVERHEAD; diff --git a/storage/heap/hp_test_freelist-t.c b/storage/heap/hp_test_freelist-t.c index b296b16e4b19e..3b17a85ca6299 100644 --- a/storage/heap/hp_test_freelist-t.c +++ b/storage/heap/hp_test_freelist-t.c @@ -359,11 +359,446 @@ static void test_true_capacity_exhaustion(void) } +/* + Test: tail reclaim on failed blob allocation (single block). + + Fills a block to within 5 slots of capacity, locks out new block + allocation, then inserts a blob needing more records than the + remaining tail. The blob partially allocates from the tail, then + fails. hp_shrink_tail() must reclaim the tail-allocated records. +*/ + +static void test_tail_reclaim_single_block(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 rib; + ulong last_alloc_before; + + uchar blob_data[200]; + memset(blob_data, 'T', sizeof(blob_data)); + + if (create_and_open("test_tail_reclaim_sb", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(8, "setup failed"); + return; + } + + rib= (int32) share->block.records_in_block; + + /* Fill block to rib - 5 */ + for (i= 0; i < rib - 5; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + } + ok(i == rib - 5, "filled block to rib-5 (%d rows)", i); + + last_alloc_before= (ulong) share->block.last_allocated; + + /* Lock out new block allocation */ + share->max_table_size= share->data_length + share->index_length; + + /* Insert blob needing ~14 records — only 4 tail slots available for blob */ + build_record(rec, 99999, blob_data, sizeof(blob_data)); + ok(heap_write(info, rec) != 0, + "insert with 200-byte blob fails (not enough tail)"); + ok(my_errno == HA_ERR_RECORD_FILE_FULL, + "error is HA_ERR_RECORD_FILE_FULL (got %d)", my_errno); + + /* + After failed insert: 4 blob continuation records were tail-allocated + then reclaimed by hp_shrink_tail(). The primary record was allocated + (+1 to last_allocated) and freed to the delete list. + */ + ok(share->block.last_allocated == last_alloc_before + 1, + "last_allocated: 4 blob recs reclaimed, primary on free list " + "(expected %lu, got %lu)", + last_alloc_before + 1, (ulong) share->block.last_allocated); + ok(share->deleted == 1, + "deleted == 1 (primary on free list, got %lu)", + (ulong) share->deleted); + ok(share->total_records == (ulong)(rib - 5), + "total_records back to pre-insert (%lu)", + (ulong) share->total_records); + ok(share->total_records + share->deleted == share->block.last_allocated, + "invariant: total_records + deleted == last_allocated"); + + /* Verify existing data readable */ + { + uchar key[4]; + int4store(key, 0); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "existing row 0 still readable"); + } + + /* Insert a small blob — should succeed using reclaimed tail */ + { + uchar small_blob[30]; + memset(small_blob, 's', sizeof(small_blob)); + build_record(rec, 88888, small_blob, sizeof(small_blob)); + ok(heap_write(info, rec) == 0, + "small blob insert succeeds using reclaimed tail"); + } + + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: tail reclaim across block boundaries. + + Fills a block to within 5 slots of capacity, allows exactly 2 blocks, + then inserts a blob large enough to span both blocks and require a third. + The allocation fails at the third block. hp_shrink_tail() must reclaim + all records across both blocks and update last_blocks to the first block. +*/ + +static void test_tail_reclaim_cross_block(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 rib; + ulong last_alloc_before; + HP_PTRS *first_block; + uchar *blob_data; + uint32 blob_len; + + if (create_and_open("test_tail_reclaim_xb", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(8, "setup failed"); + return; + } + + rib= (int32) share->block.records_in_block; + + /* Fill block to rib - 5 */ + for (i= 0; i < rib - 5; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + } + ok(i == rib - 5, "filled block to rib-5 (%d rows)", i); + + last_alloc_before= (ulong) share->block.last_allocated; + first_block= share->block.level_info[0].last_blocks; + + /* Allow exactly 2 blocks, fail at 3rd */ + share->max_records= (ulong)(2 * rib - 1); + + /* + Blob large enough to need more than 4 + rib continuation records. + Each Case C record holds 16 bytes of payload (inner records) plus + 5 bytes in the first record of each run. Use (rib + 20) * 16 bytes + to ensure it exceeds 2 blocks. + */ + blob_len= (uint32)((rib + 20) * 16); + blob_data= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED, blob_len, MYF(0)); + memset(blob_data, 'X', blob_len); + + build_record(rec, 99999, blob_data, (uint16) MY_MIN(blob_len, 65535)); + ok(heap_write(info, rec) != 0, + "insert with %u-byte blob fails (spans 2 blocks, needs 3rd)", + blob_len); + ok(my_errno == HA_ERR_RECORD_FILE_FULL, + "error is HA_ERR_RECORD_FILE_FULL (got %d)", my_errno); + + /* + All blob continuation records (4 from first block + rib from second) + were reclaimed by hp_shrink_tail(). Only the primary record remains + on the free list. + */ + ok(share->block.last_allocated == last_alloc_before + 1, + "last_allocated: all blob recs reclaimed across blocks " + "(expected %lu, got %lu)", + last_alloc_before + 1, (ulong) share->block.last_allocated); + ok(share->deleted == 1, + "deleted == 1 (primary on free list, got %lu)", + (ulong) share->deleted); + ok(share->total_records + share->deleted == share->block.last_allocated, + "invariant: total_records + deleted == last_allocated"); + + /* last_blocks must have been restored to the first block */ + ok(share->block.level_info[0].last_blocks == first_block, + "last_blocks restored to first block after cross-block reclaim"); + + /* Verify existing data readable */ + { + uchar key[4]; + int4store(key, 0); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "existing row 0 still readable"); + } + + /* Insert a small blob — should succeed using reclaimed tail in first block */ + { + uchar small_blob[30]; + memset(small_blob, 's', sizeof(small_blob)); + build_record(rec, 88888, small_blob, sizeof(small_blob)); + ok(heap_write(info, rec) == 0, + "small blob insert succeeds using reclaimed tail in first block"); + } + + my_free(blob_data); + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: tail reclaim across 3 block boundaries. + + Like test 6 but with a blob large enough to span 3 leaf blocks. + Verifies hp_find_block() correctly navigates the block tree when + hp_shrink_tail() crosses two block boundaries sequentially. +*/ + +static void test_tail_reclaim_three_blocks(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 rib; + ulong last_alloc_before; + HP_PTRS *first_block; + uchar *blob_data; + uint32 blob_len; + + if (create_and_open("test_tail_reclaim_3b", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(8, "setup failed"); + return; + } + + rib= (int32) share->block.records_in_block; + + /* Fill block to rib - 5 */ + for (i= 0; i < rib - 5; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + } + ok(i == rib - 5, "filled block to rib-5 (%d rows)", i); + + last_alloc_before= (ulong) share->block.last_allocated; + first_block= share->block.level_info[0].last_blocks; + + /* Allow exactly 3 blocks, fail at 4th */ + share->max_records= (ulong)(3 * rib - 1); + + /* + Blob needs more than 4 + 2*rib continuation records to span + 3 blocks and fail requesting the 4th. + */ + blob_len= (uint32)((2 * rib + 20) * 16); + blob_data= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED, blob_len, MYF(0)); + memset(blob_data, 'Z', blob_len); + + build_record(rec, 99999, blob_data, (uint16) MY_MIN(blob_len, 65535)); + ok(heap_write(info, rec) != 0, + "insert with %u-byte blob fails (spans 3 blocks, needs 4th)", + blob_len); + ok(my_errno == HA_ERR_RECORD_FILE_FULL, + "error is HA_ERR_RECORD_FILE_FULL (got %d)", my_errno); + + /* + All blob records across 3 blocks reclaimed: 4 from block 1 + + rib from block 2 + rib from block 3. Two block boundary crossings. + */ + ok(share->block.last_allocated == last_alloc_before + 1, + "last_allocated: all recs reclaimed across 3 blocks " + "(expected %lu, got %lu)", + last_alloc_before + 1, (ulong) share->block.last_allocated); + ok(share->deleted == 1, + "deleted == 1 (got %lu)", (ulong) share->deleted); + ok(share->total_records + share->deleted == share->block.last_allocated, + "invariant: total_records + deleted == last_allocated"); + ok(share->block.level_info[0].last_blocks == first_block, + "last_blocks restored to first block after 2 boundary crossings"); + + /* Verify existing data and insert small blob */ + { + uchar key[4]; + int4store(key, 0); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "existing row 0 still readable"); + } + { + uchar small_blob[30]; + memset(small_blob, 's', sizeof(small_blob)); + build_record(rec, 88888, small_blob, sizeof(small_blob)); + ok(heap_write(info, rec) == 0, + "small blob insert succeeds after 3-block reclaim"); + } + + my_free(blob_data); + heap_drop_table(info); + heap_close(info); +} + + +/* + Test: orphaned blocks are reused via high_water_allocated. + + After hp_shrink_tail() empties 2 leaf blocks, fills them back up + with non-blob rows. data_length must NOT grow (blocks are reused + via hp_find_block, not freshly allocated via hp_get_new_block). +*/ + +static void test_block_reuse_after_reclaim(void) +{ + HP_SHARE *share; + HP_INFO *info; + uchar rec[REC_LENGTH]; + int32 i; + int32 rib; + ulong last_alloc_before; + ulonglong data_len_after_shrink; + uchar *blob_data; + uint32 blob_len; + + if (create_and_open("test_block_reuse", &share, &info)) + { + ok(0, "setup failed: %d", my_errno); + skip(13, "setup failed"); + return; + } + + rib= (int32) share->block.records_in_block; + + /* Fill block to rib - 5 */ + for (i= 0; i < rib - 5; i++) + { + build_record(rec, i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + } + ok(i == rib - 5, "filled block to rib-5 (%d rows)", i); + + last_alloc_before= (ulong) share->block.last_allocated; + + /* Allow exactly 3 blocks, fail at 4th */ + share->max_records= (ulong)(3 * rib - 1); + + /* Blob that spans 3 blocks then fails */ + blob_len= (uint32)((2 * rib + 20) * 16); + blob_data= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED, blob_len, MYF(0)); + memset(blob_data, 'R', blob_len); + + build_record(rec, 99999, blob_data, (uint16) MY_MIN(blob_len, 65535)); + ok(heap_write(info, rec) != 0, + "blob insert fails as expected"); + + ok(share->block.last_allocated == last_alloc_before + 1, + "tail reclaimed after failure"); + ok(share->block.high_water_allocated == (ulong)(3 * rib), + "high_water_allocated set to 3*rib (got %lu, expected %lu)", + (ulong) share->block.high_water_allocated, (ulong)(3 * rib)); + + data_len_after_shrink= share->data_length; + + /* Remove max_records limit so we can fill freely */ + share->max_records= 0; + + /* + Insert 2*rib non-blob rows. The first reuses the free-list slot + (primary from failed insert). The remaining 2*rib-1 extend the + tail, crossing block boundaries at rib and 2*rib. At each boundary, + hp_alloc_from_tail() must REUSE the orphaned block (not allocate new). + Key check: data_length must NOT grow. + */ + { + int32 inserted= 0; + int32 to_insert= 2 * rib; + for (i= 0; i < to_insert; i++) + { + build_record(rec, rib + i, (const uchar*) "", 0); + if (heap_write(info, rec) != 0) + break; + inserted++; + } + ok(inserted == to_insert, + "inserted %d more rows across 2 reused blocks", inserted); + } + + ok(share->data_length == data_len_after_shrink, + "data_length unchanged: blocks reused, not newly allocated " + "(before=%llu, after=%llu)", + data_len_after_shrink, share->data_length); + + /* + last_allocated = (rib - 4) + (2*rib - 1) = 3*rib - 5 + The -4 is the post-shrink starting point (rib-5 data + 1 primary + on free list); the -1 accounts for the first insert reusing the + free-list slot instead of extending the tail. + */ + ok(share->block.last_allocated == (ulong)(3 * rib - 5), + "last_allocated grew through reused blocks (got %lu, expected %lu)", + (ulong) share->block.last_allocated, (ulong)(3 * rib - 5)); + + ok(share->total_records + share->deleted == share->block.last_allocated, + "invariant: total_records + deleted == last_allocated"); + + /* Verify data integrity: read first and last inserted rows */ + { + uchar key[4]; + int4store(key, 0); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "first row still readable"); + int4store(key, 3 * rib - 6); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "last inserted row readable"); + } + + /* + Now delete a row and insert a blob that crosses a block boundary. + hp_write_one_blob() -> hp_alloc_from_tail() uses the same reuse path. + high_water_allocated is still 3*rib; current last_allocated is 3*rib-5. + A blob needing 6+ continuation records will cross into the tail of + block 3 (5 slots left). No new block should be allocated. + */ + { + uchar key[4]; + ulonglong data_len_before; + uchar reuse_blob[50]; + memset(reuse_blob, 'B', sizeof(reuse_blob)); + + int4store(key, rib); + ok(heap_rkey(info, rec, 0, key, 4, HA_READ_KEY_EXACT) == 0, + "found row for delete before blob reuse test"); + ok(heap_delete(info, rec) == 0, "deleted row"); + + data_len_before= share->data_length; + build_record(rec, 77777, reuse_blob, sizeof(reuse_blob)); + ok(heap_write(info, rec) == 0, + "blob insert succeeds using reused blocks"); + ok(share->data_length == data_len_before, + "data_length unchanged after blob insert (blocks reused)"); + } + + my_free(blob_data); + heap_drop_table(info); + heap_close(info); +} + + int main(int argc __attribute__((unused)), char **argv __attribute__((unused))) { MY_INIT("hp_test_freelist"); - plan(38); + plan(79); diag("Test 1: free-list contiguity detects groups > 2 records"); test_freelist_contiguity_multirecord(); @@ -377,6 +812,18 @@ int main(int argc __attribute__((unused)), diag("Test 4: true capacity exhaustion fails correctly"); test_true_capacity_exhaustion(); + diag("Test 5: tail reclaim on failed blob (single block)"); + test_tail_reclaim_single_block(); + + diag("Test 6: tail reclaim across block boundaries"); + test_tail_reclaim_cross_block(); + + diag("Test 7: tail reclaim across 3 block boundaries"); + test_tail_reclaim_three_blocks(); + + diag("Test 8: orphaned blocks reused after reclaim"); + test_block_reuse_after_reclaim(); + my_end(0); return exit_status(); } diff --git a/storage/heap/hp_write.c b/storage/heap/hp_write.c index 1765433082a12..1094020367a88 100644 --- a/storage/heap/hp_write.c +++ b/storage/heap/hp_write.c @@ -214,9 +214,17 @@ uchar *hp_alloc_from_tail(HP_SHARE *info, uint *blocks) DBUG_RETURN(NULL); } - if (hp_get_new_block(info, &info->block, &length)) - DBUG_RETURN(NULL); - info->data_length+=length; + if (info->block.last_allocated < info->block.high_water_allocated) + { + info->block.level_info[0].last_blocks= + (HP_PTRS*) hp_find_block(&info->block, info->block.last_allocated); + } + else + { + if (hp_get_new_block(info, &info->block, &length)) + DBUG_RETURN(NULL); + info->data_length+=length; + } } available= (uint)(info->block.records_in_block - block_pos); requested= *blocks; From d03d6b7ca44231e09afe6e5ad34b293627017afe Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Fri, 1 May 2026 23:45:04 -0400 Subject: [PATCH 26/27] Avoid double blob materialization in `find_unique_row()` For blob tables, `find_unique_row()` previously materialized blobs twice: once during `hp_rec_key_cmp()` (per-segment via `hp_materialize_one_blob()`) and again via `hp_read_blobs()` after the match was found. Reorder the blob path to materialize-then-compare: save the input record, copy the stored candidate, call `hp_read_blobs()` once to materialize all blobs, then compare via `hp_rec_key_cmp()` with `info=NULL` since both records now have direct data pointers. Non-blob tables keep the original fast path unchanged. --- mysql-test/suite/heap/blob_find_unique.result | 154 ++++++++++++++++ mysql-test/suite/heap/blob_find_unique.test | 171 ++++++++++++++++++ storage/heap/ha_heap.cc | 78 +++++--- 3 files changed, 378 insertions(+), 25 deletions(-) create mode 100644 mysql-test/suite/heap/blob_find_unique.result create mode 100644 mysql-test/suite/heap/blob_find_unique.test diff --git a/mysql-test/suite/heap/blob_find_unique.result b/mysql-test/suite/heap/blob_find_unique.result new file mode 100644 index 0000000000000..0cf5086cdf1ec --- /dev/null +++ b/mysql-test/suite/heap/blob_find_unique.result @@ -0,0 +1,154 @@ +SET @save_max_heap_table_size= @@max_heap_table_size; +SET @@max_heap_table_size= 32*1024*1024; +# +# EXCEPT with Case C blobs (multi-run, >16KB) +# +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('a', 20000)), (2, REPEAT('b', 20000)), +(3, REPEAT('c', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('b', 20000)), (3, REPEAT('c', 20000)); +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( +SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; +a LENGTH(b) LEFT(b, 1) +1 20000 a +DROP TABLE t1, t2; +# +# INTERSECT with Case C blobs +# +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('x', 25000)), (2, REPEAT('y', 25000)), +(3, REPEAT('z', 25000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('y', 25000)), (4, REPEAT('w', 25000)); +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( +SELECT a, b FROM t1 INTERSECT SELECT a, b FROM t2 +) dt; +a LENGTH(b) LEFT(b, 1) +2 25000 y +DROP TABLE t1, t2; +# +# INTERSECT ALL with Case C blobs — duplicates preserved +# +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('p', 20000)), (1, REPEAT('p', 20000)), +(2, REPEAT('q', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (1, REPEAT('p', 20000)), (2, REPEAT('q', 20000)), +(2, REPEAT('q', 20000)); +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( +SELECT a, b FROM t1 INTERSECT ALL SELECT a, b FROM t2 +) dt; +a LENGTH(b) LEFT(b, 1) +1 20000 p +2 20000 q +DROP TABLE t1, t2; +# +# EXCEPT ALL with Case C blobs — duplicates tracked +# +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('m', 20000)), (1, REPEAT('m', 20000)), +(1, REPEAT('m', 20000)), (2, REPEAT('n', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (1, REPEAT('m', 20000)), (2, REPEAT('n', 20000)); +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( +SELECT a, b FROM t1 EXCEPT ALL SELECT a, b FROM t2 +) dt; +a LENGTH(b) LEFT(b, 1) +1 20000 m +1 20000 m +DROP TABLE t1, t2; +# +# Multiple blob columns — all materialized correctly +# +CREATE TABLE t1 (a TEXT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('a', 20000), REPEAT('1', 20000)), +(REPEAT('b', 20000), REPEAT('2', 20000)), +(REPEAT('c', 20000), REPEAT('3', 20000)); +CREATE TABLE t2 (a TEXT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('b', 20000), REPEAT('2', 20000)); +SELECT LENGTH(a), LEFT(a, 1), LENGTH(b), LEFT(b, 1) FROM ( +SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; +LENGTH(a) LEFT(a, 1) LENGTH(b) LEFT(b, 1) +20000 a 20000 1 +20000 c 20000 3 +DROP TABLE t1, t2; +# +# Mixed blob sizes: Case A (tiny), Case B (medium), Case C (large) +# +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, 'hi'), (2, REPEAT('m', 5000)), +(3, REPEAT('L', 25000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('m', 5000)); +SELECT a, LENGTH(b) FROM ( +SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; +a LENGTH(b) +1 2 +3 25000 +DROP TABLE t1, t2; +# +# PAD SPACE: trailing spaces should compare equal +# +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES ('hello'); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES ('hello '); +SELECT COUNT(*) FROM ( +SELECT a FROM t1 INTERSECT SELECT a FROM t2 +) dt; +COUNT(*) +1 +DROP TABLE t1, t2; +# +# Mismatch path: hash collision forcing multiple comparisons. +# Many rows with same integer but different blobs to exercise +# the restore-on-mismatch loop. +# +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('a', 20000)), (REPEAT('b', 20000)), +(REPEAT('c', 20000)), (REPEAT('d', 20000)), +(REPEAT('e', 20000)); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('c', 20000)), (REPEAT('e', 20000)); +SELECT LEFT(a, 1), LENGTH(a) FROM ( +SELECT a FROM t1 EXCEPT SELECT a FROM t2 +) dt; +LEFT(a, 1) LENGTH(a) +a 20000 +b 20000 +d 20000 +DROP TABLE t1, t2; +# +# Content integrity: verify blob data survives intact +# +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('X', 20000)); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('Y', 20000)); +SELECT LENGTH(a), a = REPEAT('X', 20000) AS intact FROM ( +SELECT a FROM t1 EXCEPT SELECT a FROM t2 +) dt; +LENGTH(a) intact +20000 1 +DROP TABLE t1, t2; +# +# EXCEPT with expressions producing temp table blobs +# +SELECT LENGTH(a), LEFT(a, 3) FROM ( +SELECT a FROM ( +SELECT REPEAT('abc', 7000) AS a +UNION ALL SELECT REPEAT('def', 7000) +UNION ALL SELECT REPEAT('abc', 7000) +) t1 +EXCEPT +SELECT a FROM ( +SELECT REPEAT('abc', 7000) AS a +) t2 +) dt; +LENGTH(a) LEFT(a, 3) +21000 def +SET @@max_heap_table_size= @save_max_heap_table_size; diff --git a/mysql-test/suite/heap/blob_find_unique.test b/mysql-test/suite/heap/blob_find_unique.test new file mode 100644 index 0000000000000..19e4995e62a0a --- /dev/null +++ b/mysql-test/suite/heap/blob_find_unique.test @@ -0,0 +1,171 @@ +# +# MDEV-38975: Test find_unique_row() optimization for blob columns. +# Exercises the materialize-then-compare path via EXCEPT, INTERSECT, +# INTERSECT ALL, and EXCEPT ALL, which call find_unique_row() on the +# HEAP temp table. +# + +SET @save_max_heap_table_size= @@max_heap_table_size; +SET @@max_heap_table_size= 32*1024*1024; + +--echo # +--echo # EXCEPT with Case C blobs (multi-run, >16KB) +--echo # +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('a', 20000)), (2, REPEAT('b', 20000)), + (3, REPEAT('c', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('b', 20000)), (3, REPEAT('c', 20000)); + +--sorted_result +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( + SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # INTERSECT with Case C blobs +--echo # +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('x', 25000)), (2, REPEAT('y', 25000)), + (3, REPEAT('z', 25000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('y', 25000)), (4, REPEAT('w', 25000)); + +--sorted_result +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( + SELECT a, b FROM t1 INTERSECT SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # INTERSECT ALL with Case C blobs — duplicates preserved +--echo # +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('p', 20000)), (1, REPEAT('p', 20000)), + (2, REPEAT('q', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (1, REPEAT('p', 20000)), (2, REPEAT('q', 20000)), + (2, REPEAT('q', 20000)); + +--sorted_result +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( + SELECT a, b FROM t1 INTERSECT ALL SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # EXCEPT ALL with Case C blobs — duplicates tracked +--echo # +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, REPEAT('m', 20000)), (1, REPEAT('m', 20000)), + (1, REPEAT('m', 20000)), (2, REPEAT('n', 20000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (1, REPEAT('m', 20000)), (2, REPEAT('n', 20000)); + +--sorted_result +SELECT a, LENGTH(b), LEFT(b, 1) FROM ( + SELECT a, b FROM t1 EXCEPT ALL SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # Multiple blob columns — all materialized correctly +--echo # +CREATE TABLE t1 (a TEXT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('a', 20000), REPEAT('1', 20000)), + (REPEAT('b', 20000), REPEAT('2', 20000)), + (REPEAT('c', 20000), REPEAT('3', 20000)); +CREATE TABLE t2 (a TEXT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('b', 20000), REPEAT('2', 20000)); + +--sorted_result +SELECT LENGTH(a), LEFT(a, 1), LENGTH(b), LEFT(b, 1) FROM ( + SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # Mixed blob sizes: Case A (tiny), Case B (medium), Case C (large) +--echo # +CREATE TABLE t1 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (1, 'hi'), (2, REPEAT('m', 5000)), + (3, REPEAT('L', 25000)); +CREATE TABLE t2 (a INT, b TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (2, REPEAT('m', 5000)); + +--sorted_result +SELECT a, LENGTH(b) FROM ( + SELECT a, b FROM t1 EXCEPT SELECT a, b FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # PAD SPACE: trailing spaces should compare equal +--echo # +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES ('hello'); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES ('hello '); + +SELECT COUNT(*) FROM ( + SELECT a FROM t1 INTERSECT SELECT a FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # Mismatch path: hash collision forcing multiple comparisons. +--echo # Many rows with same integer but different blobs to exercise +--echo # the restore-on-mismatch loop. +--echo # +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('a', 20000)), (REPEAT('b', 20000)), + (REPEAT('c', 20000)), (REPEAT('d', 20000)), + (REPEAT('e', 20000)); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('c', 20000)), (REPEAT('e', 20000)); + +--sorted_result +SELECT LEFT(a, 1), LENGTH(a) FROM ( + SELECT a FROM t1 EXCEPT SELECT a FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # Content integrity: verify blob data survives intact +--echo # +CREATE TABLE t1 (a TEXT) ENGINE=HEAP; +INSERT INTO t1 VALUES (REPEAT('X', 20000)); +CREATE TABLE t2 (a TEXT) ENGINE=HEAP; +INSERT INTO t2 VALUES (REPEAT('Y', 20000)); + +SELECT LENGTH(a), a = REPEAT('X', 20000) AS intact FROM ( + SELECT a FROM t1 EXCEPT SELECT a FROM t2 +) dt; + +DROP TABLE t1, t2; + +--echo # +--echo # EXCEPT with expressions producing temp table blobs +--echo # +SELECT LENGTH(a), LEFT(a, 3) FROM ( + SELECT a FROM ( + SELECT REPEAT('abc', 7000) AS a + UNION ALL SELECT REPEAT('def', 7000) + UNION ALL SELECT REPEAT('abc', 7000) + ) t1 + EXCEPT + SELECT a FROM ( + SELECT REPEAT('abc', 7000) AS a + ) t2 +) dt; + +SET @@max_heap_table_size= @save_max_heap_table_size; diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index e870d0b77bf84..577d72c391cac 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -900,37 +900,65 @@ int ha_heap::find_unique_row(uchar *record, uint unique_idx) HASH_INFO *pos= hp_find_hash(&keyinfo->block, hp_mask(rec_hash, share->blength, share->records)); + + if (!share->blob_count) + { + /* Non-blob path: compare then copy */ + do + { + if (pos->hash_of_key == rec_hash && + !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) + { + file->current_hash_ptr= pos; + file->current_ptr= pos->ptr_to_rec; + file->update= HA_STATE_AKTIV; + memcpy(record, file->current_ptr, (size_t) share->reclength); + DBUG_RETURN(0); + } + } while ((pos= pos->next_key)); + DBUG_RETURN(1); + } + + /* + Blob path: materialize-then-compare. + Pre-materialize all blobs via hp_read_blobs() before comparison, + then compare with info=NULL since both records have direct data + pointers. This avoids the double reassembly that would occur if + hp_rec_key_cmp() materialized each blob individually and then + hp_read_blobs() re-walked the same chains. + */ + uchar *input_copy= (uchar*) my_safe_alloca(share->reclength); + if (!input_copy) + DBUG_RETURN(-1); + memcpy(input_copy, record, (size_t) share->reclength); + + int result= 1; do { - /* - Hash pre-check avoids expensive blob materialization - for non-matching entries. - */ - if (pos->hash_of_key == rec_hash && - !hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, file)) + if (pos->hash_of_key != rec_hash) + continue; + + memcpy(record, pos->ptr_to_rec, (size_t) share->reclength); + if (hp_read_blobs(file, record, pos->ptr_to_rec)) + { + result= -1; + break; + } + if (!hp_rec_key_cmp(keyinfo, input_copy, record, NULL)) { file->current_hash_ptr= pos; file->current_ptr= pos->ptr_to_rec; - file->update = HA_STATE_AKTIV; - /* - We compare it only by record in the index, so better to read all - records. - */ - memcpy(record, file->current_ptr, (size_t) share->reclength); - /* - TODO: hp_rec_key_cmp() above already materialized blobs - via hp_materialize_one_blob(). A future optimization could - concatenate all non-zero-copy blobs into blob_buff during - comparison, avoiding this second materialization pass. - */ - if (share->blob_count && hp_read_blobs(file, record, file->current_ptr)) - DBUG_RETURN(-1); - - DBUG_RETURN(0); // found and position set + file->update= HA_STATE_AKTIV; + result= 0; + break; } - } - while ((pos= pos->next_key)); - DBUG_RETURN(1); // not found + memcpy(record, input_copy, (size_t) share->reclength); + } while ((pos= pos->next_key)); + + if (result) + memcpy(record, input_copy, (size_t) share->reclength); + my_safe_afree(input_copy, share->reclength); + DBUG_RETURN(result); } struct st_mysql_storage_engine heap_storage_engine= From e529664cf3a095c934efb12b538d4aa6ac05ed6b Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Sat, 2 May 2026 00:15:32 -0400 Subject: [PATCH 27/27] Replace `hp_blob_run_format()` enum with direct bit testing Remove `enum hp_blob_format` and `hp_blob_run_format()` indirection. Add `HP_ROW_MULTIPLE_REC` (bit 5) so all three blob storage formats have a dedicated flag bit. Add named inline predicates `hp_is_single_rec()`, `hp_is_zerocopy()`, `hp_is_multi_run()` matching the existing `hp_is_active()`/`hp_has_cont()`/`hp_is_cont()` pattern. Change `hp_write_run_data()` format parameter from enum to `uchar` receiving bit constants directly; simplify flags byte assignment from ternary to bitwise OR. Addresses review feedback F127-F128, F130, F132-F134. --- storage/heap/_check.c | 5 ++--- storage/heap/heapdef.h | 48 +++++++++++++++++++++------------------- storage/heap/hp_blob.c | 50 +++++++++++++++--------------------------- storage/heap/hp_scan.c | 3 +-- 4 files changed, 47 insertions(+), 59 deletions(-) diff --git a/storage/heap/_check.c b/storage/heap/_check.c index f107346765f74..97ac73537561d 100644 --- a/storage/heap/_check.c +++ b/storage/heap/_check.c @@ -82,11 +82,10 @@ int heap_check_heap(const HP_INFO *info, my_bool print_status) else if (hp_is_cont(current_ptr, share->visible)) { /* - Case A (HP_BLOB_CASE_A_SINGLE_REC): single record, no header. + Case A (HP_ROW_SINGLE_REC): single record, no header. Case B/C: read run_rec_count from header and skip the entire run. */ - if (hp_blob_run_format(current_ptr, share->visible) == - HP_BLOB_CASE_A_SINGLE_REC) + if (hp_is_single_rec(current_ptr, share->visible)) { cont_count++; } diff --git a/storage/heap/heapdef.h b/storage/heap/heapdef.h index 018c6f6dc7f09..9f495dd0f55ed 100644 --- a/storage/heap/heapdef.h +++ b/storage/heap/heapdef.h @@ -39,6 +39,7 @@ C_MODE_START #define HP_ROW_IS_CONT 4 /* Bit 2: this record IS a continuation record */ #define HP_ROW_CONT_ZEROCOPY 8 /* Bit 3: zero-copy layout (data in rec 1..N-1) */ #define HP_ROW_SINGLE_REC 16 /* Bit 4: single-record run, no header — data at offset 0 */ +#define HP_ROW_MULTIPLE_REC 32 /* Bit 5: multi-run chain, data needs reassembly */ /* Continuation run header: next_cont pointer + run_rec_count. @@ -70,6 +71,24 @@ static inline my_bool hp_is_cont(const uchar *rec, uint visible) return (rec[visible] & HP_ROW_IS_CONT) != 0; } +/* Case A: single-record run, no header — data at offset 0 */ +static inline my_bool hp_is_single_rec(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_SINGLE_REC) != 0; +} + +/* Case B: single run, data in rec 1..N-1 — zero-copy read */ +static inline my_bool hp_is_zerocopy(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_CONT_ZEROCOPY) != 0; +} + +/* Case C: multi-run chain — data needs reassembly into blob_buff */ +static inline my_bool hp_is_multi_run(const uchar *rec, uint visible) +{ + return (rec[visible] & HP_ROW_MULTIPLE_REC) != 0; +} + /* Continuation run header accessors. Read next_cont pointer and run_rec_count from the first record of a run. @@ -89,36 +108,21 @@ static inline uint16 hp_cont_rec_count(const uchar *chain) /* Blob continuation run storage format. - Case A (HP_BLOB_CASE_A_SINGLE_REC): Single-record run, no header. + Case A (HP_ROW_SINGLE_REC): Single-record run, no header. Data starts at offset 0, full `visible` bytes available for - payload. Detected by HP_ROW_SINGLE_REC flag. + payload. Detected by hp_is_single_rec(). Zero-copy: blob pointer → chain. - Case B (HP_BLOB_CASE_B_ZEROCOPY): Single run, multiple records. + Case B (HP_ROW_CONT_ZEROCOPY): Single run, multiple records. Header in rec 0, data contiguous in rec 1..N-1. Detected by - HP_ROW_CONT_ZEROCOPY flag. + hp_is_zerocopy(). Zero-copy: blob pointer → chain + recbuffer. - Case C (HP_BLOB_CASE_C_MULTI_RUN): One or more runs linked via + Case C (HP_ROW_MULTIPLE_REC): One or more runs linked via next_cont. Header in each run's rec 0, data in rec 0 (after - header) + rec 1..N-1. Requires reassembly into blob_buff. + header) + rec 1..N-1. Detected by hp_is_multi_run(). + Requires reassembly into blob_buff. */ -enum hp_blob_format { - HP_BLOB_CASE_A_SINGLE_REC, - HP_BLOB_CASE_B_ZEROCOPY, - HP_BLOB_CASE_C_MULTI_RUN -}; - -static inline enum hp_blob_format hp_blob_run_format(const uchar *chain, - uint visible) -{ - uchar flags= chain[visible]; - if (flags & HP_ROW_SINGLE_REC) - return HP_BLOB_CASE_A_SINGLE_REC; - if (flags & HP_ROW_CONT_ZEROCOPY) - return HP_BLOB_CASE_B_ZEROCOPY; - return HP_BLOB_CASE_C_MULTI_RUN; -} /* Minimum contiguous run size parameters. diff --git a/storage/heap/hp_blob.c b/storage/heap/hp_blob.c index cd0153afdf3f6..b66b6a1171df6 100644 --- a/storage/heap/hp_blob.c +++ b/storage/heap/hp_blob.c @@ -131,7 +131,7 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) uint16 run_rec_count; uint16 j; - if (hp_blob_run_format(chain, visible) == HP_BLOB_CASE_A_SINGLE_REC) + if (hp_is_single_rec(chain, visible)) { /* Case A: single record, no header */ next_run= NULL; @@ -171,7 +171,7 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) @param data_len Total blob data length @param run_start Pointer to first record of the run @param run_rec_count Number of consecutive records in this run - @param format Storage format (Case A / Case B / Case C) + @param format Storage format bit (HP_ROW_SINGLE_REC / HP_ROW_CONT_ZEROCOPY / HP_ROW_MULTIPLE_REC) @param offset [in/out] Current offset into blob data @note Caller must link runs by overwriting next_cont in the previous run. @@ -180,7 +180,7 @@ void hp_free_run_chain(HP_SHARE *share, uchar *chain) static void hp_write_run_data(HP_SHARE *share, const uchar *data, uint32 data_len, uchar *run_start, uint16 run_rec_count, - enum hp_blob_format format, + uchar format, uint32 *offset) { uint visible= share->visible; @@ -190,7 +190,7 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, uint32 chunk; uint16 rec; - if (format == HP_BLOB_CASE_A_SINGLE_REC) + if (format & HP_ROW_SINGLE_REC) { /* Case A: single-record run, no header. Data starts at offset 0, @@ -210,15 +210,14 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, *((uchar**) run_start)= NULL; int2store(run_start + sizeof(uchar*), run_rec_count); run_start[visible]= HP_ROW_ACTIVE | HP_ROW_IS_CONT | - (format == HP_BLOB_CASE_B_ZEROCOPY - ? HP_ROW_CONT_ZEROCOPY : 0); + (format & (HP_ROW_CONT_ZEROCOPY | HP_ROW_MULTIPLE_REC)); } /* We come here when we need data in the initial run block. In other words, we are not writing a multi-row zerocopy block. */ - if (format == HP_BLOB_CASE_C_MULTI_RUN) + if (format & HP_ROW_MULTIPLE_REC) { chunk= visible - HP_CONT_HEADER_SIZE; if (chunk > remaining) @@ -256,7 +255,7 @@ static void hp_write_run_data(HP_SHARE *share, const uchar *data, /* Unlink a contiguous group from the delete list and write blob data into it. - Does not support zerocopy (always uses HP_BLOB_CASE_C_MULTI_RUN). + Does not support zerocopy (always uses HP_ROW_MULTIPLE_REC format). @param share Table share @param data_ptr Blob data @@ -284,7 +283,7 @@ static void hp_unlink_and_write_run(HP_SHARE *share, const uchar *data_ptr, share->total_records+= run_count; hp_write_run_data(share, data_ptr, data_len, run_start, - run_count, HP_BLOB_CASE_C_MULTI_RUN, data_offset); + run_count, HP_ROW_MULTIPLE_REC, data_offset); if (*prev_run_start) memcpy(*prev_run_start, &run_start, sizeof(run_start)); @@ -476,7 +475,7 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, { /* Case A: single record, no header */ hp_write_run_data(share, data_ptr, data_len, run_start, - (uint16) run_rec_count, HP_BLOB_CASE_A_SINGLE_REC, + (uint16) run_rec_count, HP_ROW_SINGLE_REC, &data_offset); } else if (is_only_run && @@ -484,14 +483,14 @@ int hp_write_one_blob(HP_SHARE *share, const uchar *data_ptr, { /* Case B: data in rec 1..N-1, contiguous for zero-copy reads */ hp_write_run_data(share, data_ptr, data_len, run_start, - (uint16) run_rec_count, HP_BLOB_CASE_B_ZEROCOPY, + (uint16) run_rec_count, HP_ROW_CONT_ZEROCOPY, &data_offset); } else { /* Case C: multi-run or partial run */ hp_write_run_data(share, data_ptr, data_len, run_start, - (uint16) run_rec_count, HP_BLOB_CASE_C_MULTI_RUN, + (uint16) run_rec_count, HP_ROW_MULTIPLE_REC, &data_offset); } @@ -729,7 +728,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); /* Case A and Case B are zero-copy - need no reassembly buffer space */ - if (hp_blob_run_format(chain, visible) != HP_BLOB_CASE_C_MULTI_RUN) + if (!hp_is_multi_run(chain, visible)) { info->has_zerocopy_blobs= TRUE; continue; @@ -766,9 +765,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) memcpy(&chain, record + desc->offset + desc->packlength, sizeof(chain)); - switch (hp_blob_run_format(chain, visible)) - { - case HP_BLOB_CASE_A_SINGLE_REC: + if (hp_is_single_rec(chain, visible)) { /* Case A: single-record single-run, no header - zero-copy */ const uchar *blob_data= chain; @@ -776,7 +773,7 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) sizeof(blob_data)); continue; } - case HP_BLOB_CASE_B_ZEROCOPY: + if (hp_is_zerocopy(chain, visible)) { /* Case B: data in rec 1..N-1, contiguous - zero-copy */ const uchar *blob_data= chain + recbuffer; @@ -784,21 +781,15 @@ int hp_read_blobs(HP_INFO *info, uchar *record, const uchar *pos) sizeof(blob_data)); continue; } - case HP_BLOB_CASE_C_MULTI_RUN: { /* Case C: reassemble into blob_buff */ uchar *blob_data= buff_ptr; hp_reassemble_chain(chain, data_len, buff_ptr, visible, recbuffer); buff_ptr+= data_len; - /* Update blob pointer to reassembly buffer */ - { - memcpy(record + desc->offset + desc->packlength, &blob_data, - sizeof(blob_data)); - } - break; + memcpy(record + desc->offset + desc->packlength, &blob_data, + sizeof(blob_data)); } - } /* switch */ } DBUG_RETURN(0); @@ -832,15 +823,10 @@ const uchar *hp_materialize_one_blob(HP_INFO *info, if (data_len == 0 || !chain) return chain; - switch (hp_blob_run_format(chain, visible)) - { - case HP_BLOB_CASE_A_SINGLE_REC: + if (hp_is_single_rec(chain, visible)) return chain; /* Case A: no header, data at offset 0 */ - case HP_BLOB_CASE_B_ZEROCOPY: + if (hp_is_zerocopy(chain, visible)) return chain + recbuffer; /* Case B: data in rec 1..N-1 */ - case HP_BLOB_CASE_C_MULTI_RUN: - break; /* Case C: fall through to reassembly */ - } /* Case C: multiple runs, reassemble into blob_buff */ if (data_len > info->blob_buff_len) diff --git a/storage/heap/hp_scan.c b/storage/heap/hp_scan.c index 98dd6ea4bc771..6ab9b6439f8ca 100644 --- a/storage/heap/hp_scan.c +++ b/storage/heap/hp_scan.c @@ -107,8 +107,7 @@ int heap_scan(register HP_INFO *info, uchar *record) */ if (hp_is_cont(info->current_ptr, share->visible)) { - if (hp_blob_run_format(info->current_ptr, share->visible) - != HP_BLOB_CASE_A_SINGLE_REC) + if (!hp_is_single_rec(info->current_ptr, share->visible)) { uint16 run_rec_count= hp_cont_rec_count(info->current_ptr); if (run_rec_count > 1)