diff --git a/extra/mariabackup/backup_copy.cc b/extra/mariabackup/backup_copy.cc index 19cb768cd010c..dfe482963a6c9 100644 --- a/extra/mariabackup/backup_copy.cc +++ b/extra/mariabackup/backup_copy.cc @@ -1707,7 +1707,8 @@ copy_back() if it exists. */ ds_data = ds_create(dst_dir, DS_TYPE_LOCAL); - if (!file_exists("ib_logfile0")) { + MY_STAT stat_arg; + if (!my_stat("ib_logfile0", &stat_arg, MYF(0)) || !stat_arg.st_size) { /* After completed --prepare, redo log files are redundant. We must delete any redo logs at the destination, so that the database will not jump to a different log sequence number diff --git a/extra/mariabackup/write_filt.cc b/extra/mariabackup/write_filt.cc index a06338184056e..76b66fa995399 100644 --- a/extra/mariabackup/write_filt.cc +++ b/extra/mariabackup/write_filt.cc @@ -79,7 +79,7 @@ wf_incremental_init(xb_write_filt_ctxt_t *ctxt, char *dst_name, cp->delta_buf_base = static_cast(malloc(buf_size)); memset(cp->delta_buf_base, 0, buf_size); cp->delta_buf = static_cast - (ut_align(cp->delta_buf_base, UNIV_PAGE_SIZE_MAX)); + (ut_align(cp->delta_buf_base, cursor->page_size.physical())); /* write delta meta info */ snprintf(meta_name, sizeof(meta_name), "%s%s", dst_name, diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index f4c0af45cd709..6bc506f14d066 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2460,7 +2460,7 @@ static os_thread_ret_t log_copying_thread(void*) log_copying_running = false; my_thread_end(); - os_thread_exit(NULL); + os_thread_exit(); return(0); } @@ -2483,7 +2483,7 @@ static os_thread_ret_t io_watching_thread(void*) io_watching_thread_running = false; - os_thread_exit(NULL); + os_thread_exit(); return(0); } @@ -2523,7 +2523,7 @@ data_copy_thread_func( pthread_mutex_unlock(&ctxt->count_mutex); my_thread_end(); - os_thread_exit(NULL); + os_thread_exit(); OS_THREAD_DUMMY_RETURN; } @@ -4022,8 +4022,7 @@ xb_space_create_file( } ret = os_file_set_size(path, *file, - FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE, - false); + FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE); if (!ret) { msg("xtrabackup: cannot set size for file %s\n", path); os_file_close(*file); @@ -4414,13 +4413,20 @@ xtrabackup_apply_delta( if (offset_on_page == 0xFFFFFFFFUL) break; + uchar *buf = incremental_buffer + page_in_buffer * page_size; + const os_offset_t off = os_offset_t(offset_on_page)*page_size; + + if (off == 0) { + /* Read tablespace size from page 0, + and extend the file to specified size.*/ + os_offset_t n_pages = mach_read_from_4(buf + FSP_HEADER_OFFSET + FSP_SIZE); + success = os_file_set_size(dst_path, dst_file, n_pages*page_size); + if (!success) + goto error; + } + success = os_file_write(IORequestWrite, - dst_path, dst_file, - incremental_buffer + - page_in_buffer * page_size, - (offset_on_page << - page_size_shift), - page_size); + dst_path, dst_file, buf, off, page_size); if (!success) { goto error; } @@ -4430,8 +4436,10 @@ xtrabackup_apply_delta( } free(incremental_buffer_base); - if (src_file != OS_FILE_CLOSED) + if (src_file != OS_FILE_CLOSED) { os_file_close(src_file); + os_file_delete(0,src_path); + } if (dst_file != OS_FILE_CLOSED) os_file_close(dst_file); return TRUE; @@ -4785,7 +4793,8 @@ xtrabackup_prepare_func(char** argv) if (!ok) goto error_cleanup; } - srv_operation = SRV_OPERATION_RESTORE; + srv_operation = xtrabackup_export + ? SRV_OPERATION_RESTORE_EXPORT : SRV_OPERATION_RESTORE; if (innodb_init_param()) { goto error_cleanup; diff --git a/mysql-test/lib/My/SafeProcess.pm b/mysql-test/lib/My/SafeProcess.pm index f3ee772cca3f6..3260a6ed5933f 100644 --- a/mysql-test/lib/My/SafeProcess.pm +++ b/mysql-test/lib/My/SafeProcess.pm @@ -336,9 +336,14 @@ sub start_kill { sub dump_core { my ($self)= @_; - return if IS_WINDOWS; my $pid= $self->{SAFE_PID}; die "Can't get core from not started process" unless defined $pid; + + if (IS_WINDOWS) { + system("$safe_kill $pid dump"); + return 1; + } + _verbose("Sending ABRT to $self"); kill ("ABRT", $pid); return 1; diff --git a/mysql-test/lib/My/SafeProcess/CMakeLists.txt b/mysql-test/lib/My/SafeProcess/CMakeLists.txt index ec93f94a3e8bb..ff842f3468ff1 100644 --- a/mysql-test/lib/My/SafeProcess/CMakeLists.txt +++ b/mysql-test/lib/My/SafeProcess/CMakeLists.txt @@ -25,6 +25,7 @@ SET(INSTALL_ARGS IF (WIN32) MYSQL_ADD_EXECUTABLE(my_safe_process safe_process_win.cc ${INSTALL_ARGS}) MYSQL_ADD_EXECUTABLE(my_safe_kill safe_kill_win.cc ${INSTALL_ARGS}) + TARGET_LINK_LIBRARIES(my_safe_kill dbghelp psapi) ELSE() MYSQL_ADD_EXECUTABLE(my_safe_process safe_process.cc ${INSTALL_ARGS}) ENDIF() diff --git a/mysql-test/lib/My/SafeProcess/safe_kill_win.cc b/mysql-test/lib/My/SafeProcess/safe_kill_win.cc index 2ac29c61bc7c3..e5ec33af571dc 100644 --- a/mysql-test/lib/My/SafeProcess/safe_kill_win.cc +++ b/mysql-test/lib/My/SafeProcess/safe_kill_win.cc @@ -25,6 +25,80 @@ #include #include #include +#include +#include + +static int create_dump(DWORD pid) +{ + char path[MAX_PATH]; + char working_dir[MAX_PATH]; + int ret= -1; + HANDLE process= INVALID_HANDLE_VALUE; + HANDLE file= INVALID_HANDLE_VALUE; + char *p; + + process = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, (DWORD)pid); + if (!process) + { + fprintf(stderr,"safe_kill : cannot open process pid=%u to create dump, last error %u\n", + pid, GetLastError()); + goto exit; + } + + DWORD size = MAX_PATH; + if (QueryFullProcessImageName(process, 0, path, &size) == 0) + { + fprintf(stderr,"safe_kill : cannot read process path for pid %u, last error %u\n", + pid, GetLastError()); + goto exit; + } + + if ((p = strrchr(path, '.')) == 0) + p= path + strlen(path); + + strncpy(p, ".dmp", path + MAX_PATH - p); + + /* Create dump in current directory.*/ + const char *filename= strrchr(path, '\\'); + if (filename == 0) + filename = path; + else + filename++; + + if (!GetCurrentDirectory(MAX_PATH, working_dir)) + { + fprintf(stderr, "GetCurrentDirectory failed, last error %u",GetLastError()); + goto exit; + } + + file = CreateFile(filename, GENERIC_READ | GENERIC_WRITE, + 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); + + if (file == INVALID_HANDLE_VALUE) + { + fprintf(stderr,"safe_kill : CreateFile() failed for file %s, working dir %s, last error = %u\n", + filename, working_dir, GetLastError()); + goto exit; + } + + if (!MiniDumpWriteDump(process, pid, file, MiniDumpNormal, 0,0,0)) + { + fprintf(stderr, "Failed to write minidump to %s, working dir %s, last error %u\n", + filename, working_dir, GetLastError()); + goto exit; + } + + ret = 0; + fprintf(stderr, "Minidump written to %s, directory %s\n", filename, working_dir); + +exit: + if(process!= 0 && process != INVALID_HANDLE_VALUE) + CloseHandle(process); + + if (file != 0 && file != INVALID_HANDLE_VALUE) + CloseHandle(file); + return ret; +} int main(int argc, const char** argv ) { @@ -37,12 +111,16 @@ int main(int argc, const char** argv ) signal(SIGBREAK, SIG_IGN); signal(SIGTERM, SIG_IGN); - if (argc != 2) { - fprintf(stderr, "safe_kill \n"); + if ((argc != 2 && argc != 3) || (argc == 3 && strcmp(argv[2],"dump"))) { + fprintf(stderr, "safe_kill [dump]\n"); exit(2); } pid= atoi(argv[1]); + if (argc == 3) + { + return create_dump(pid); + } _snprintf(safe_process_name, sizeof(safe_process_name), "safe_process[%d]", pid); diff --git a/mysql-test/r/cte_nonrecursive.result b/mysql-test/r/cte_nonrecursive.result index 2fceebd197191..3ad6fb8fabe57 100644 --- a/mysql-test/r/cte_nonrecursive.result +++ b/mysql-test/r/cte_nonrecursive.result @@ -86,7 +86,7 @@ select * from t2,t where t2.c=t.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where 1 PRIMARY ref key0 key0 5 test.t2.c 2 -2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort +2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort explain select * from t2, (select a, count(*) from t1 where b >= 'c' group by a) as t where t2.c=t.a; @@ -176,7 +176,7 @@ select * from t2 where c in (select c from t); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where 1 PRIMARY ref key0 key0 8 test.t2.c 2 Using where; FirstMatch(t2) -2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort +2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort explain select * from t2 where c in (select c from (select count(*) as c from t1 @@ -245,8 +245,8 @@ select * from t as r1, t as r2 where r1.a=r2.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 8 Using where 1 PRIMARY ref key0 key0 5 r1.a 2 -3 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary -2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary +3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary +2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary explain select * from (select distinct a from t1 where b >= 'c') as r1, (select distinct a from t1 where b >= 'c') as r2 @@ -370,7 +370,7 @@ select * from t2,t where t2.c=t.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where 1 PRIMARY ref key0 key0 5 test.t2.c 2 -2 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where +2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where 3 UNION t2 ALL NULL NULL NULL NULL 4 Using where NULL UNION RESULT ALL NULL NULL NULL NULL NULL explain @@ -598,7 +598,7 @@ select * from v2; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where 1 PRIMARY ref key0 key0 5 test.t2.c 2 -3 SUBQUERY t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort +3 DERIVED t1 ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort # with clause in the specification of a view that whose definition # table alias for a with table create view v3 as @@ -1055,3 +1055,27 @@ deallocate prepare stmt1; deallocate prepare stmt2; drop view v1,v2; drop table t1,t2; +# +# MDEV-13796: UNION of two materialized CTEs +# +CREATE TABLE t1 (id int, k int); +CREATE TABLE t2 (id int); +INSERT INTO t1 VALUES (3,5), (1,7), (4,3); +INSERT INTO t2 VALUES (4), (3), (2); +WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id), +d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id) +SELECT * FROM d1 UNION SELECT * FROM d2; +SUM(k) +8 +explain WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id), +d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id) +SELECT * FROM d1 UNION SELECT * FROM d2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY ALL NULL NULL NULL NULL 9 +2 DERIVED t1 ALL NULL NULL NULL NULL 3 +2 DERIVED t2 ALL NULL NULL NULL NULL 3 Using where; Using join buffer (flat, BNL join) +4 UNION ALL NULL NULL NULL NULL 9 +3 DERIVED t1 ALL NULL NULL NULL NULL 3 +3 DERIVED t2 ALL NULL NULL NULL NULL 3 Using where; Using join buffer (flat, BNL join) +NULL UNION RESULT ALL NULL NULL NULL NULL NULL +DROP TABLE t1,t2; diff --git a/mysql-test/r/cte_recursive.result b/mysql-test/r/cte_recursive.result index 946ba16ac5c2b..a4f32927cf17b 100644 --- a/mysql-test/r/cte_recursive.result +++ b/mysql-test/r/cte_recursive.result @@ -86,7 +86,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a select * from t1; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 30 -2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where +2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where 3 UNION t1 ALL NULL NULL NULL NULL 5 3 UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL @@ -114,7 +114,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a select * from t1; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 5 -2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where +2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where 3 RECURSIVE UNION ALL NULL NULL NULL NULL 5 3 RECURSIVE UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL @@ -691,13 +691,13 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY ALL NULL NULL NULL NULL 2 100.00 Using where 1 PRIMARY ref key0 key0 5 c.h_id 2 100.00 1 PRIMARY ref key0 key0 5 c.w_id 2 100.00 -3 SUBQUERY folks ALL NULL NULL NULL NULL 12 100.00 Using where +3 DERIVED folks ALL NULL NULL NULL NULL 12 100.00 Using where 4 RECURSIVE UNION ALL NULL NULL NULL NULL 2 100.00 4 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join) 5 RECURSIVE UNION ALL NULL NULL NULL NULL 2 100.00 5 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL NULL -2 UNCACHEABLE SUBQUERY ALL NULL NULL NULL NULL 12 100.00 Using where +2 DERIVED ALL NULL NULL NULL NULL 12 100.00 Using where Warnings: Note 1003 with recursive ancestor_couple_ids as (/* select#2 */ select `a`.`father` AS `h_id`,`a`.`mother` AS `w_id` from `coupled_ancestors` `a` where `a`.`father` is not null and `a`.`mother` is not null), coupled_ancestors as (/* select#3 */ select `test`.`folks`.`id` AS `id`,`test`.`folks`.`name` AS `name`,`test`.`folks`.`dob` AS `dob`,`test`.`folks`.`father` AS `father`,`test`.`folks`.`mother` AS `mother` from `test`.`folks` where `test`.`folks`.`name` = 'Me' union all /* select#4 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `fa` where `test`.`p`.`id` = `fa`.`h_id` union all /* select#5 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `ma` where `test`.`p`.`id` = `ma`.`w_id`)/* select#1 */ select `h`.`name` AS `name`,`h`.`dob` AS `dob`,`w`.`name` AS `name`,`w`.`dob` AS `dob` from `ancestor_couple_ids` `c` join `coupled_ancestors` `h` join `coupled_ancestors` `w` where `h`.`id` = `c`.`h_id` and `w`.`id` = `c`.`w_id` # simple mutual recursion @@ -877,7 +877,7 @@ where p.id = a.father or p.id = a.mother select * from ancestors; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY ALL NULL NULL NULL NULL 12 100.00 -2 SUBQUERY folks ALL NULL NULL NULL NULL 12 100.00 Using where +2 DERIVED folks ALL NULL NULL NULL NULL 12 100.00 Using where 3 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 3 RECURSIVE UNION ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL NULL @@ -1236,7 +1236,7 @@ where p.id = ma.mother select * from ancestors; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 12 -2 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where +2 DERIVED folks ALL NULL NULL NULL NULL 12 Using where 3 RECURSIVE UNION p ALL PRIMARY NULL NULL NULL 12 3 RECURSIVE UNION ref key0 key0 5 test.p.id 2 4 RECURSIVE UNION p ALL PRIMARY NULL NULL NULL 12 @@ -1300,14 +1300,14 @@ from prev_gen select ancestors.name, ancestors.dob from ancestors; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 24 -4 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where +4 DERIVED folks ALL NULL NULL NULL NULL 12 Using where 6 RECURSIVE UNION ALL NULL NULL NULL NULL 12 -5 RECURSIVE UNION ALL NULL NULL NULL NULL 24 -NULL UNION RESULT ALL NULL NULL NULL NULL NULL -3 SUBQUERY folks ALL NULL NULL NULL NULL 12 Using where +3 DERIVED folks ALL NULL NULL NULL NULL 12 Using where 2 RECURSIVE UNION folks ALL PRIMARY NULL NULL NULL 12 2 RECURSIVE UNION ALL NULL NULL NULL NULL 12 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL +5 RECURSIVE UNION ALL NULL NULL NULL NULL 24 +NULL UNION RESULT ALL NULL NULL NULL NULL NULL explain FORMAT=JSON with recursive prev_gen @@ -1353,7 +1353,6 @@ EXPLAIN { "query_block": { "select_id": 4, - "operation": "UNION", "table": { "table_name": "folks", "access_type": "ALL", @@ -1382,7 +1381,6 @@ EXPLAIN { "query_block": { "select_id": 3, - "operation": "UNION", "table": { "table_name": "folks", "access_type": "ALL", @@ -1489,7 +1487,6 @@ EXPLAIN { "query_block": { "select_id": 3, - "operation": "UNION", "table": { "table_name": "v", "access_type": "ALL", @@ -1757,7 +1754,6 @@ EXPLAIN { "query_block": { "select_id": 2, - "operation": "UNION", "table": { "table_name": "t1", "access_type": "ALL", @@ -1840,7 +1836,7 @@ select t2.a from t1,t2 where t1.a+1=t2.a select * from t1; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY ALL NULL NULL NULL NULL 5 -2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where +2 DERIVED t2 ALL NULL NULL NULL NULL 5 Using where 4 RECURSIVE UNION ALL NULL NULL NULL NULL 5 4 RECURSIVE UNION t2 ALL NULL NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL @@ -2387,7 +2383,6 @@ ANALYZE { "query_block": { "select_id": 2, - "operation": "UNION", "table": { "message": "No tables used" } @@ -2794,7 +2789,7 @@ SELECT c1 FROM t, cte ) SELECT COUNT(*) FROM cte; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 PRIMARY ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 -2 SUBQUERY t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 +2 DERIVED t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 3 RECURSIVE UNION t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 3 RECURSIVE UNION ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL 0.00 NULL NULL @@ -2812,7 +2807,7 @@ SELECT c2 FROM t, cte ) SELECT COUNT(*) FROM cte; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 PRIMARY ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 -2 SUBQUERY t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 +2 DERIVED t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 3 RECURSIVE UNION t ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 3 RECURSIVE UNION ALL NULL NULL NULL NULL 4 4.00 100.00 100.00 Using join buffer (flat, BNL join) NULL UNION RESULT ALL NULL NULL NULL NULL NULL 0.00 NULL NULL diff --git a/mysql-test/r/func_json.result b/mysql-test/r/func_json.result index b1d3d96aa7343..15e4fbec6053a 100644 --- a/mysql-test/r/func_json.result +++ b/mysql-test/r/func_json.result @@ -152,6 +152,9 @@ json_contains('[{"abc":"def", "def":"abc"}]', '["foo","bar"]') select json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]'); json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]') 1 +select json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}'); +json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}') +1 select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]"); json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]") 1 @@ -402,6 +405,13 @@ abc select json_unquote('abc'); json_unquote('abc') abc +create table t1 (c VARCHAR(8)) DEFAULT CHARSET=latin1; +insert into t1 values ('abc'),('def'); +select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld from t1; +fld +{"foo": "{\"bar\": \"abc\"}", "qux": "abc"} +{"foo": "{\"bar\": \"def\"}", "qux": "def"} +drop table t1; select json_object("a", json_object("b", "abcd")); json_object("a", json_object("b", "abcd")) {"a": {"b": "abcd"}} @@ -443,6 +453,11 @@ json_length('{"a": 1, "b": {"c": 30}}', '$.b') select json_length('{"a": 1, "b": {"c": 30}}'); json_length('{"a": 1, "b": {"c": 30}}') 2 +select json_length('{}{'); +json_length('{}{') +NULL +Warnings: +Warning 4038 Syntax error in JSON text in argument 1 to function 'json_length' at position 3 create table json (j INT); show create table json; Table Create Table @@ -705,6 +720,11 @@ json_data SELECT JSON_OBJECT("user","Jožko Mrkvičká") as json_data; json_data {"user": "Jožko Mrkvičká"} +select json_contains_path('{"foo":"bar"}', 'one', '$[]'); +json_contains_path('{"foo":"bar"}', 'one', '$[]') +NULL +Warnings: +Warning 4042 Syntax error in JSON path in argument 3 to function 'json_contains_path' at position 3 # # Start of 10.3 tests # diff --git a/mysql-test/r/gis-precise.result b/mysql-test/r/gis-precise.result index f816278a0ba94..292dfe0462c46 100644 --- a/mysql-test/r/gis-precise.result +++ b/mysql-test/r/gis-precise.result @@ -485,6 +485,25 @@ ST_Touches(ST_PolygonFromText('POLYGON((0 0,0 5,5 5,5 0,0 0))'),ST_PointFromText select ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)')); ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)')) 0 +SELECT ST_RELATE( +ST_DIFFERENCE( +GEOMETRYFROMTEXT(' + MULTILINESTRING( + ( 12841 36140, 8005 31007, 26555 31075, 52765 41191, + 28978 6548, 45720 32057, 53345 3221 ), + ( 8304 59107, 25233 31592, 40502 25303, 8205 42940 ), + ( 7829 7305, 58841 56759, 64115 8512, 37562 54145, 2210 14701 ), + ( 20379 2805, 40807 27770, 28147 14883, 26439 29383, 55663 5086 ), + ( 35944 64702, 14433 23728, 49317 26241, 790 16941 ) + ) + '), +GEOMETRYFROMTEXT('POINT(46061 13545)') +), +GEOMETRYFROMTEXT('POINT(4599 60359)'), +'F*FFFF**F' + ) as relate_res; +relate_res +0 DROP TABLE IF EXISTS p1; CREATE PROCEDURE p1(dist DOUBLE, geom TEXT) BEGIN diff --git a/mysql-test/r/gis2.result b/mysql-test/r/gis2.result index 214431e1d2d12..c0b476e080bf4 100644 --- a/mysql-test/r/gis2.result +++ b/mysql-test/r/gis2.result @@ -12,3 +12,27 @@ WHERE ST_Contains(point_data, GeomFromText('Point(38.0248492 23.8512726)')); id 2 DROP TABLE t1; +create table t1 (p point default "qwer"); +ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field +create table t1 (p point default 0); +ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field +create table t1 (p point not null default st_geometryfromtext('point 0)')); +ERROR 42000: Invalid default value for 'p' +create table t1 (p point not null default st_geometryfromtext('point(0 0)')); +insert into t1 values(default); +select st_astext(p) from t1; +st_astext(p) +POINT(0 0) +drop table t1; +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))); +set timestamp=10; +insert into t1 values(default); +ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1 +drop table t1; +SET timestamp=default; +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))); +set timestamp=10; +alter table t1 add column i int; +ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1 +drop table t1; +SET timestamp=default; diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index 01a7099e3077d..1c073881a9df8 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -2140,3 +2140,35 @@ drop database db1; connection default; disconnect con1; set global sql_mode=default; +USE test; +# +# End of 10.0 tests +# +# +# Start of 10.1 tests +# +# +# MDEV-13242 Wrong results for queries with row constructors and information_schema +# +CREATE TABLE tt1(c1 INT); +CREATE TABLE tt2(c2 INT); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1', 'c1')); +count(*) +1 +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt2', 'c2')); +count(*) +1 +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')); +count(*) +2 +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (SELECT 'tt1','c1' FROM dual UNION SELECT 'tt2', 'c2' FROM dual); +count(*) +2 +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name='tt1' AND column_name='c1') OR (table_name='tt2' AND column_name='c2'); +count(*) +2 +SELECT column_name FROM information_schema.columns WHERE (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')) ORDER BY column_name; +column_name +c1 +c2 +DROP TABLE tt1, tt2; diff --git a/mysql-test/r/type_float.result b/mysql-test/r/type_float.result index 9a92ff21e9faf..57cdd1561dfb6 100644 --- a/mysql-test/r/type_float.result +++ b/mysql-test/r/type_float.result @@ -772,5 +772,31 @@ t1 CREATE TABLE `t1` ( ) ENGINE=MyISAM DEFAULT CHARSET=latin1 drop table if exists t1; # +# MDEV-11586 UNION of FLOAT type results in erroneous precision +# +CREATE TABLE t1 (f FLOAT); +INSERT INTO t1 VALUES (1.1); +SELECT f FROM t1 UNION SELECT 1; +f +1.100000023841858 +1 +SELECT 1 UNION SELECT f FROM t1; +1 +1 +1.100000023841858 +SELECT f FROM t1 UNION SELECT 2147483647; +f +1.100000023841858 +2147483647 +SELECT 2147483647 UNION SELECT f FROM t1; +2147483647 +2147483647 +1.100000023841858 +SELECT CASE WHEN 0 THEN (SELECT f FROM t1) ELSE 2147483647 END AS c1, +CASE WHEN 1 THEN 2147483647 ELSE (SELECT f FROM t1) END AS c2; +c1 c2 +2147483647 2147483647 +DROP TABLE t1; +# # End of 10.2 tests # diff --git a/mysql-test/suite/innodb/disabled.def b/mysql-test/suite/innodb/disabled.def index 9a92e99df2e54..c435de278b9c5 100644 --- a/mysql-test/suite/innodb/disabled.def +++ b/mysql-test/suite/innodb/disabled.def @@ -10,14 +10,5 @@ # ############################################################################## -innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment -innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment -innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2 create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails diff --git a/mysql-test/suite/innodb/r/innodb-alter.result b/mysql-test/suite/innodb/r/innodb-alter.result index aa78f55c78f21..b06c6060375c6 100644 --- a/mysql-test/suite/innodb/r/innodb-alter.result +++ b/mysql-test/suite/innodb/r/innodb-alter.result @@ -857,3 +857,33 @@ DROP TABLE dest_db.t1; DROP TABLE source_db.t1; DROP DATABASE source_db; DROP DATABASE dest_db; +USE test; +# +# MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function +# +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +ALTER TABLE t1 ADD b TINYINT NOT NULL DEFAULT if(unix_timestamp()>1,1000,0); +ERROR 22003: Out of range value for column 'b' at row 1 +SELECT * FROM t1; +a +10 +DROP TABLE t1; +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +ALTER TABLE t1 ADD b DATE NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0); +affected rows: 0 +info: Records: 0 Duplicates: 0 Warnings: 0 +SELECT * FROM t1; +a b +10 2001-01-01 +DROP TABLE t1; +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +ALTER TABLE t1 ADD b TIME NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0); +affected rows: 0 +info: Records: 0 Duplicates: 0 Warnings: 0 +SELECT * FROM t1; +a b +10 10:20:30 +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/innodb-online-alter-gis.result b/mysql-test/suite/innodb/r/innodb-online-alter-gis.result index c7daac48e4809..79c0f2386aabc 100644 --- a/mysql-test/suite/innodb/r/innodb-online-alter-gis.result +++ b/mysql-test/suite/innodb/r/innodb-online-alter-gis.result @@ -37,3 +37,13 @@ Level Code Message show errors; Level Code Message drop table t1; +# +# MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function +# +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +ALTER TABLE t1 ADD COLUMN b LINESTRING DEFAULT POINT(1,1); +ERROR 22007: Incorrect LINESTRING value: 'POINT' for column 'b' at row 1 +DESCRIBE t1; +Field Type Null Key Default Extra +a int(11) YES NULL +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result index ff32bf694cb6a..d10727b95b4df 100644 --- a/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result +++ b/mysql-test/suite/innodb/r/innodb_defrag_concurrent.result @@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent; @@global.innodb_stats_persistent 0 set global innodb_defragment_stats_accuracy = 80; -CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB; +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, +b VARCHAR(256), +c INT, +g GEOMETRY NOT NULL, +t VARCHAR(256), +KEY second(a, b), +KEY third(c), +SPATIAL gk(g), +FULLTEXT INDEX fti(t)) ENGINE=INNODB; connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK; @@ -40,9 +48,9 @@ count(stat_value) > 0 connection con1; optimize table t1;; connection default; -INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);; +INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');; connection con2; -INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);; +INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');; connection con3; DELETE FROM t1 where a between 1 and 100;; connection con4; @@ -59,6 +67,9 @@ disconnect con4; optimize table t1; Table Op Msg_type Msg_text test.t1 optimize status OK +check table t1 extended; +Table Op Msg_type Msg_text +test.t1 check status OK select count(*) from t1; count(*) 15723 diff --git a/mysql-test/suite/innodb/r/log_file_size.result b/mysql-test/suite/innodb/r/log_file_size.result index b576061e74b3e..e049b34ad8115 100644 --- a/mysql-test/suite/innodb/r/log_file_size.result +++ b/mysql-test/suite/innodb/r/log_file_size.result @@ -1,4 +1,12 @@ CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; +SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: Log file .*ib_logfile0 size 0 is too small/ in mysqld.1.err +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK BEGIN; INSERT INTO t1 VALUES (42); SELECT * FROM t1; diff --git a/mysql-test/suite/innodb/t/innodb-alter.test b/mysql-test/suite/innodb/t/innodb-alter.test index 5e681f96b4a95..d936dcad15cf0 100644 --- a/mysql-test/suite/innodb/t/innodb-alter.test +++ b/mysql-test/suite/innodb/t/innodb-alter.test @@ -494,6 +494,34 @@ eval ALTER TABLE $source_db.t1 DROP INDEX index2, algorithm=inplace; eval DROP TABLE $source_db.t1; eval DROP DATABASE $source_db; eval DROP DATABASE $dest_db; +USE test; +--echo # +--echo # MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function +--echo # +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +--error ER_WARN_DATA_OUT_OF_RANGE +ALTER TABLE t1 ADD b TINYINT NOT NULL DEFAULT if(unix_timestamp()>1,1000,0); +SELECT * FROM t1; +DROP TABLE t1; + +# DATETIME-to-DATE truncation is OK +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +--enable_info +ALTER TABLE t1 ADD b DATE NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0); +--disable_info +SELECT * FROM t1; +DROP TABLE t1; + +# DATETIME-to-TIME truncation is OK +CREATE TABLE t1 (a INT NOT NULL DEFAULT 0) ENGINE=InnoDB; +iNSERT INTO t1 VALUES (10); +--enable_info +ALTER TABLE t1 ADD b TIME NOT NULL DEFAULT if(unix_timestamp()>1,TIMESTAMP'2001-01-01 10:20:30',0); +--disable_info +SELECT * FROM t1; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb-online-alter-gis.test b/mysql-test/suite/innodb/t/innodb-online-alter-gis.test index 64d07ba23aaff..2cb88d398bb94 100644 --- a/mysql-test/suite/innodb/t/innodb-online-alter-gis.test +++ b/mysql-test/suite/innodb/t/innodb-online-alter-gis.test @@ -19,3 +19,13 @@ ALTER ONLINE TABLE t1 ADD PRIMARY KEY(a),DROP INDEX d, LOCK=SHARED; show warnings; show errors; drop table t1; + +--echo # +--echo # MDEV-14038 ALTER TABLE does not exit on error with InnoDB + bad default function +--echo # + +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD +ALTER TABLE t1 ADD COLUMN b LINESTRING DEFAULT POINT(1,1); +DESCRIBE t1; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test index f596fab2a1567..bbcd72f1a3acf 100644 --- a/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test +++ b/mysql-test/suite/innodb/t/innodb_defrag_concurrent.test @@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent; set global innodb_defragment_stats_accuracy = 80; # Create table. -CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB; +# +# TODO: Currently we do not defragment spatial indexes, +# because doing it properly would require +# appropriate logic around the SSN (split +# sequence number). +# +# Also do not defragment auxiliary tables related to FULLTEXT INDEX. +# +# Both types added to this test to make sure they do not cause +# problems. +# +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, +b VARCHAR(256), +c INT, +g GEOMETRY NOT NULL, +t VARCHAR(256), +KEY second(a, b), +KEY third(c), +SPATIAL gk(g), +FULLTEXT INDEX fti(t)) ENGINE=INNODB; connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); @@ -36,7 +55,7 @@ let $i = $data_size; while ($i) { eval - INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i); + INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.'); dec $i; } --enable_query_log @@ -69,10 +88,10 @@ connection con1; --send optimize table t1; connection default; ---send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000); +--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.'); connection con2; ---send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000); +--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.'); connection con3; --send DELETE FROM t1 where a between 1 and 100; @@ -103,6 +122,7 @@ disconnect con3; disconnect con4; optimize table t1; +check table t1 extended; select count(*) from t1; select count(*) from t1 force index (second); diff --git a/mysql-test/suite/innodb/t/log_file_size.test b/mysql-test/suite/innodb/t/log_file_size.test index 206444115fc05..140198de4abfc 100644 --- a/mysql-test/suite/innodb/t/log_file_size.test +++ b/mysql-test/suite/innodb/t/log_file_size.test @@ -23,14 +23,33 @@ call mtr.add_suppression("InnoDB: Log file .*ib_logfile[01].* size"); call mtr.add_suppression("InnoDB: Unable to open .*ib_logfile0. to check native AIO read support"); FLUSH TABLES; --enable_query_log +let MYSQLD_DATADIR= `select @@datadir`; +CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; + +--source include/shutdown_mysqld.inc +--move_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile.old +write_file $MYSQLD_DATADIR/ib_logfile0; +EOF +let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); --let $restart_parameters= --innodb-thread-concurrency=1 --innodb-log-file-size=1m --innodb-log-files-in-group=2 ---source include/restart_mysqld.inc +--source include/start_mysqld.inc + +eval $check_no_innodb; +--remove_file $MYSQLD_DATADIR/ib_logfile0 +--move_file $MYSQLD_DATADIR/ib_logfile.old $MYSQLD_DATADIR/ib_logfile.0 +--source include/shutdown_mysqld.inc +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let SEARCH_PATTERN= InnoDB: Log file .*ib_logfile0 size 0 is too small; +--source include/search_pattern_in_file.inc +--source include/start_mysqld.inc +CHECK TABLE t1; --let $restart_parameters= --innodb-thread-concurrency=100 --innodb-log-file-size=10M --innodb-log-files-in-group=2 --source include/restart_mysqld.inc -CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; BEGIN; INSERT INTO t1 VALUES (42); @@ -52,9 +71,7 @@ SELECT * FROM t1; INSERT INTO t1 VALUES (0),(123); -let MYSQLD_DATADIR= `select @@datadir`; let SEARCH_ABORT = NOT FOUND; -let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; BEGIN; DELETE FROM t1 WHERE a>0; diff --git a/mysql-test/suite/innodb_gis/r/alter_spatial_index.result b/mysql-test/suite/innodb_gis/r/alter_spatial_index.result index 17f1f7e1b0633..a945e68aeb159 100644 --- a/mysql-test/suite/innodb_gis/r/alter_spatial_index.result +++ b/mysql-test/suite/innodb_gis/r/alter_spatial_index.result @@ -743,3 +743,19 @@ ALTER TABLE t1 ADD SPATIAL INDEX(p); ALTER TABLE t1 FORCE, LOCK=NONE; ERROR 0A000: LOCK=NONE is not supported. Reason: Do not support online operation on table with GIS index. Try LOCK=SHARED DROP TABLE t1; +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb; +set timestamp=10; +insert into t1 values(default); +ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1 +drop table t1; +SET timestamp=default; +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb; +set timestamp=10; +alter table t1 add column i int; +ERROR 22007: Incorrect POINT value: 'GEOMETRYCOLLECTION' for column 'p' at row 1 +drop table t1; +SET timestamp=default; +CREATE OR REPLACE TABLE t1 (a INT) ENGINE=InnoDB; +ALTER TABLE t1 ADD COLUMN b POINT DEFAULT '0'; +ERROR 22003: Cannot get geometry object from data you send to the GEOMETRY field +DROP TABLE t1; diff --git a/mysql-test/suite/innodb_gis/t/alter_spatial_index.test b/mysql-test/suite/innodb_gis/t/alter_spatial_index.test index 2b834ac69a6ff..703a89b406542 100644 --- a/mysql-test/suite/innodb_gis/t/alter_spatial_index.test +++ b/mysql-test/suite/innodb_gis/t/alter_spatial_index.test @@ -743,3 +743,23 @@ ALTER TABLE t1 ADD SPATIAL INDEX(p); --error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON ALTER TABLE t1 FORCE, LOCK=NONE; DROP TABLE t1; + +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb; +set timestamp=10; +--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD +insert into t1 values(default); +drop table t1; +SET timestamp=default; + +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))) ENGINE=innodb; +set timestamp=10; +--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD +alter table t1 add column i int; +drop table t1; +SET timestamp=default; + +CREATE OR REPLACE TABLE t1 (a INT) ENGINE=InnoDB; +--error ER_CANT_CREATE_GEOMETRY_OBJECT +ALTER TABLE t1 ADD COLUMN b POINT DEFAULT '0'; +DROP TABLE t1; + diff --git a/mysql-test/suite/mariabackup/xb_file_key_management.result b/mysql-test/suite/mariabackup/xb_file_key_management.result index 8972da32f8bba..721d10a9d9194 100644 --- a/mysql-test/suite/mariabackup/xb_file_key_management.result +++ b/mysql-test/suite/mariabackup/xb_file_key_management.result @@ -9,6 +9,7 @@ INSERT INTO t VALUES('foobar2'); # remove datadir # xtrabackup move back # restart server +ib_logfile0 SELECT * FROM t; c foobar1 diff --git a/mysql-test/suite/mariabackup/xb_file_key_management.test b/mysql-test/suite/mariabackup/xb_file_key_management.test index 3887a889aaaff..2a17695205341 100644 --- a/mysql-test/suite/mariabackup/xb_file_key_management.test +++ b/mysql-test/suite/mariabackup/xb_file_key_management.test @@ -24,6 +24,7 @@ exec $XTRABACKUP --prepare --target-dir=$targetdir; --enable_result_log --list_files $targetdir ib_logfile* +--cat_file $targetdir/ib_logfile0 SELECT * FROM t; DROP TABLE t; diff --git a/mysql-test/t/cte_nonrecursive.test b/mysql-test/t/cte_nonrecursive.test index 980bff0169419..57b7ae1658fd2 100644 --- a/mysql-test/t/cte_nonrecursive.test +++ b/mysql-test/t/cte_nonrecursive.test @@ -724,3 +724,22 @@ deallocate prepare stmt2; drop view v1,v2; drop table t1,t2; + +--echo # +--echo # MDEV-13796: UNION of two materialized CTEs +--echo # + +CREATE TABLE t1 (id int, k int); +CREATE TABLE t2 (id int); +INSERT INTO t1 VALUES (3,5), (1,7), (4,3); +INSERT INTO t2 VALUES (4), (3), (2); + +let $q= +WITH d1 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id), + d2 AS (SELECT SUM(k) FROM t1, t2 as t2 WHERE t1.id = t2.id) +SELECT * FROM d1 UNION SELECT * FROM d2; + +eval $q; +eval explain $q; + +DROP TABLE t1,t2; diff --git a/mysql-test/t/func_json.test b/mysql-test/t/func_json.test index a34cede537c42..47ed0c3ca7537 100644 --- a/mysql-test/t/func_json.test +++ b/mysql-test/t/func_json.test @@ -56,6 +56,7 @@ select json_contains('[1, {"a":1}]', '{}'); select json_contains('[1, {"a":1}]', '{"a":1}'); select json_contains('[{"abc":"def", "def":"abc"}]', '["foo","bar"]'); select json_contains('[{"abc":"def", "def":"abc"}, "bar"]', '["bar", {}]'); +select json_contains('[{"a":"b"},{"c":"d"}]','{"c":"d"}'); select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]"); select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[10]"); @@ -163,6 +164,14 @@ drop table t1; select json_unquote('"abc"'); select json_unquote('abc'); +# +# MDEV-13703 Illegal mix of collations for operation 'json_object' on using JSON_UNQUOTE as an argument. +# +create table t1 (c VARCHAR(8)) DEFAULT CHARSET=latin1; +insert into t1 values ('abc'),('def'); + +select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld from t1; +drop table t1; select json_object("a", json_object("b", "abcd")); select json_object("a", '{"b": "abcd"}'); @@ -179,6 +188,7 @@ select json_length('{}'); select json_length('[1, 2, {"a": 3}]'); select json_length('{"a": 1, "b": {"c": 30}}', '$.b'); select json_length('{"a": 1, "b": {"c": 30}}'); +select json_length('{}{'); create table json (j INT); show create table json; @@ -361,6 +371,12 @@ select json_array(5,json_query('[1,2]','$')); SELECT JSON_ARRAY('1. ě 2. š 3. č 4. ř 5. ž 6. ý 7. á 8. í 9. é 10. ů 11. ú') AS json_data; SELECT JSON_OBJECT("user","Jožko Mrkvičká") as json_data; +# +# MDEV-12312 JSON_CONTAINS_PATH does not detect invalid path and returns TRUE. +# + +select json_contains_path('{"foo":"bar"}', 'one', '$[]'); + --echo # --echo # Start of 10.3 tests --echo # diff --git a/mysql-test/t/gis-precise.test b/mysql-test/t/gis-precise.test index 1f8259bb8285f..07fabae602570 100644 --- a/mysql-test/t/gis-precise.test +++ b/mysql-test/t/gis-precise.test @@ -363,5 +363,24 @@ select ST_Touches(ST_LineFromText('LINESTRING(0 0,5 5)'),ST_PointFromText('POINT select ST_Touches(ST_PolygonFromText('POLYGON((0 0,0 5,5 5,5 0,0 0))'),ST_PointFromText('POINT(0 0)')); select ST_Touches(ST_PointFromText('POINT(0 0)'),ST_PointFromText('POINT(0 0)')); +# MDEV-12705 10.1.18-MariaDB-1~jessie - mysqld got signal 11. +SELECT ST_RELATE( + ST_DIFFERENCE( + GEOMETRYFROMTEXT(' + MULTILINESTRING( + ( 12841 36140, 8005 31007, 26555 31075, 52765 41191, + 28978 6548, 45720 32057, 53345 3221 ), + ( 8304 59107, 25233 31592, 40502 25303, 8205 42940 ), + ( 7829 7305, 58841 56759, 64115 8512, 37562 54145, 2210 14701 ), + ( 20379 2805, 40807 27770, 28147 14883, 26439 29383, 55663 5086 ), + ( 35944 64702, 14433 23728, 49317 26241, 790 16941 ) + ) + '), + GEOMETRYFROMTEXT('POINT(46061 13545)') + ), + GEOMETRYFROMTEXT('POINT(4599 60359)'), + 'F*FFFF**F' + ) as relate_res; + --source include/gis_debug.inc diff --git a/mysql-test/t/gis2.test b/mysql-test/t/gis2.test index b734ab19ecd73..9731e2a91d052 100644 --- a/mysql-test/t/gis2.test +++ b/mysql-test/t/gis2.test @@ -15,3 +15,31 @@ SELECT id FROM t1 WHERE ST_Contains(point_data, GeomFromText('Point(38.0248492 23.8512726)')); DROP TABLE t1; +# +# MDEV-13923 Assertion `!is_set() || (m_status == DA_OK_BULK && is_bulk_op())' failed upon altering table with geometry field +# +--error ER_CANT_CREATE_GEOMETRY_OBJECT +create table t1 (p point default "qwer"); +--error ER_CANT_CREATE_GEOMETRY_OBJECT +create table t1 (p point default 0); +--error ER_INVALID_DEFAULT +create table t1 (p point not null default st_geometryfromtext('point 0)')); +create table t1 (p point not null default st_geometryfromtext('point(0 0)')); +insert into t1 values(default); +select st_astext(p) from t1; +drop table t1; + +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))); +set timestamp=10; +--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD +insert into t1 values(default); +drop table t1; +SET timestamp=default; + +create table t1 (p point not null default if(unix_timestamp()>10,POINT(1,1),LineString(Point(0,0),Point(1,1)))); +set timestamp=10; +--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD +alter table t1 add column i int; +drop table t1; +SET timestamp=default; + diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test index 157b4c6950805..417390dcafe1a 100644 --- a/mysql-test/t/information_schema.test +++ b/mysql-test/t/information_schema.test @@ -1861,3 +1861,29 @@ disconnect con1; --source include/wait_until_count_sessions.inc set global sql_mode=default; + +USE test; + +--echo # +--echo # End of 10.0 tests +--echo # + + +--echo # +--echo # Start of 10.1 tests +--echo # + + +--echo # +--echo # MDEV-13242 Wrong results for queries with row constructors and information_schema +--echo # + +CREATE TABLE tt1(c1 INT); +CREATE TABLE tt2(c2 INT); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1', 'c1')); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt2', 'c2')); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name, column_name) IN (SELECT 'tt1','c1' FROM dual UNION SELECT 'tt2', 'c2' FROM dual); +SELECT count(*) FROM information_schema.columns WHERE table_schema='test' AND (table_name='tt1' AND column_name='c1') OR (table_name='tt2' AND column_name='c2'); +SELECT column_name FROM information_schema.columns WHERE (table_name, column_name) IN (('tt1','c1'),('tt2', 'c2')) ORDER BY column_name; +DROP TABLE tt1, tt2; diff --git a/mysql-test/t/type_float.test b/mysql-test/t/type_float.test index 4e8cee75d6330..2d7c442850766 100644 --- a/mysql-test/t/type_float.test +++ b/mysql-test/t/type_float.test @@ -533,6 +533,21 @@ show create table t1; drop table if exists t1; +--echo # +--echo # MDEV-11586 UNION of FLOAT type results in erroneous precision +--echo # + +CREATE TABLE t1 (f FLOAT); +INSERT INTO t1 VALUES (1.1); +SELECT f FROM t1 UNION SELECT 1; +SELECT 1 UNION SELECT f FROM t1; +SELECT f FROM t1 UNION SELECT 2147483647; +SELECT 2147483647 UNION SELECT f FROM t1; +SELECT CASE WHEN 0 THEN (SELECT f FROM t1) ELSE 2147483647 END AS c1, + CASE WHEN 1 THEN 2147483647 ELSE (SELECT f FROM t1) END AS c2; +DROP TABLE t1; + + --echo # --echo # End of 10.2 tests --echo # diff --git a/plugin/aws_key_management/CMakeLists.txt b/plugin/aws_key_management/CMakeLists.txt index 913bd8b16ed8c..aa93fc3aa0388 100644 --- a/plugin/aws_key_management/CMakeLists.txt +++ b/plugin/aws_key_management/CMakeLists.txt @@ -117,7 +117,7 @@ ELSE() IF(CMAKE_VERSION LESS "3.0") SET(GIT_TAG "1.0.8") ELSE() - SET(GIT_TAG "1.1.27") + SET(GIT_TAG "1.2.11") ENDIF() SET(AWS_SDK_PATCH_COMMAND ) diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index 7b08d1754ad26..ef33423308517 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -1478,19 +1478,33 @@ Event_job_data::execute(THD *thd, bool drop) bool save_tx_read_only= thd->tx_read_only; thd->tx_read_only= false; - if (WSREP(thd)) - { + /* + This code is processing event execution and does not have client + connection. Here, event execution will now execute a prepared + DROP EVENT statement, but thd->lex->sql_command is set to + SQLCOM_CREATE_PROCEDURE + DROP EVENT will be logged in binlog, and we have to + replicate it to make all nodes have consistent event definitions + Wsrep DDL replication is triggered inside Events::drop_event(), + and here we need to prepare the THD so that DDL replication is + possible, essentially it requires setting sql_command to + SQLCOMM_DROP_EVENT, we will switch sql_command for the duration + of DDL replication only. + */ + const enum_sql_command sql_command_save= thd->lex->sql_command; + const bool sql_command_set= WSREP(thd); + + if (sql_command_set) thd->lex->sql_command = SQLCOM_DROP_EVENT; - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); - } ret= Events::drop_event(thd, &dbname, &name, FALSE); - WSREP_TO_ISOLATION_END; + if (sql_command_set) + { + WSREP_TO_ISOLATION_END; + thd->lex->sql_command = sql_command_save; + } -#ifdef WITH_WSREP - error: -#endif thd->tx_read_only= save_tx_read_only; thd->security_ctx->master_access= saved_master_access; } diff --git a/sql/events.cc b/sql/events.cc index 6a38d4d3a1f9b..6d71fcdcb39fe 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -335,6 +335,7 @@ Events::create_event(THD *thd, Event_parse_data *parse_data) if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0)) DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (lock_object_name(thd, MDL_key::EVENT, parse_data->dbname.str, parse_data->name.str)) @@ -417,6 +418,10 @@ Events::create_event(THD *thd, Event_parse_data *parse_data) thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(ret); +#ifdef WITH_WSREP + error: + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ } @@ -457,6 +462,9 @@ Events::update_event(THD *thd, Event_parse_data *parse_data, if (check_access(thd, EVENT_ACL, parse_data->dbname.str, NULL, NULL, 0, 0)) DBUG_RETURN(TRUE); + + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + if (lock_object_name(thd, MDL_key::EVENT, parse_data->dbname.str, parse_data->name.str)) DBUG_RETURN(TRUE); @@ -541,6 +549,10 @@ Events::update_event(THD *thd, Event_parse_data *parse_data, thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(ret); +#ifdef WITH_WSREP +error: + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ } @@ -582,6 +594,8 @@ Events::drop_event(THD *thd, const LEX_CSTRING *dbname, if (check_access(thd, EVENT_ACL, dbname->str, NULL, NULL, 0, 0)) DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + /* Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for DROP EVENT command. @@ -603,6 +617,10 @@ Events::drop_event(THD *thd, const LEX_CSTRING *dbname, thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(ret); +#ifdef WITH_WSREP +error: + DBUG_RETURN(TRUE); +#endif } diff --git a/sql/field.cc b/sql/field.cc index c7aa0dce16ec2..e7329feecb15a 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -240,7 +240,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP MYSQL_TYPE_FLOAT, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 - MYSQL_TYPE_FLOAT, MYSQL_TYPE_FLOAT, + MYSQL_TYPE_DOUBLE, MYSQL_TYPE_FLOAT, //MYSQL_TYPE_DATE MYSQL_TYPE_TIME MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR @@ -2240,15 +2240,15 @@ Field *Field::clone(MEM_ROOT *root, my_ptrdiff_t diff) return tmp; } -void Field::set_default() +int Field::set_default() { if (default_value) { Query_arena backup_arena; table->in_use->set_n_backup_active_arena(table->expr_arena, &backup_arena); - (void) default_value->expr->save_in_field(this, 0); + int rc= default_value->expr->save_in_field(this, 0); table->in_use->restore_active_arena(table->expr_arena, &backup_arena); - return; + return rc; } /* Copy constant value stored in s->default_values */ my_ptrdiff_t l_offset= (my_ptrdiff_t) (table->s->default_values - @@ -2257,6 +2257,7 @@ void Field::set_default() if (maybe_null_in_table()) *null_ptr= ((*null_ptr & (uchar) ~null_bit) | (null_ptr[l_offset] & null_bit)); + return 0; } @@ -9558,7 +9559,7 @@ Field_bit::unpack(uchar *to, const uchar *from, const uchar *from_end, } -void Field_bit::set_default() +int Field_bit::set_default() { if (bit_len > 0) { @@ -9566,7 +9567,7 @@ void Field_bit::set_default() uchar bits= get_rec_bits(bit_ptr + col_offset, bit_ofs, bit_len); set_rec_bits(bits, bit_ptr, bit_ofs, bit_len); } - Field::set_default(); + return Field::set_default(); } /* diff --git a/sql/field.h b/sql/field.h index 17b84e058a85d..951d6940a88c6 100644 --- a/sql/field.h +++ b/sql/field.h @@ -958,7 +958,7 @@ class Field: public Value_source my_ptrdiff_t l_offset= (my_ptrdiff_t) (record - table->record[0]); return ptr + l_offset; } - virtual void set_default(); + virtual int set_default(); bool has_update_default_function() const { @@ -3762,7 +3762,7 @@ class Field_bit :public Field { virtual uchar *pack(uchar *to, const uchar *from, uint max_length); virtual const uchar *unpack(uchar *to, const uchar *from, const uchar *from_end, uint param_data); - virtual void set_default(); + virtual int set_default(); Field *new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_ptr, uint32 length, diff --git a/sql/item.h b/sql/item.h index a926ee9aa8527..82106fea92b77 100644 --- a/sql/item.h +++ b/sql/item.h @@ -4680,6 +4680,8 @@ class Item_cache_wrapper :public Item_result_field bool fix_fields(THD *thd, Item **it); void cleanup(); + Item *get_orig_item() const { return orig_item; } + /* Methods of getting value which should be cached in the cache */ void save_val(Field *to); double val_real(); diff --git a/sql/item_geofunc.cc b/sql/item_geofunc.cc index 04952739e8514..16d4cf7cefa22 100644 --- a/sql/item_geofunc.cc +++ b/sql/item_geofunc.cc @@ -66,9 +66,9 @@ String *Item_func_geometry_from_text::val_str(String *str) srid= (uint32)args[1]->val_int(); str->set_charset(&my_charset_bin); + str->length(0); if (str->reserve(SRID_SIZE, 512)) return 0; - str->length(0); str->q_append(srid); if ((null_value= !Geometry::create_from_wkt(&buffer, &trs, str, 0))) return 0; @@ -1323,6 +1323,8 @@ static int setup_relate_func(Geometry *g1, Geometry *g2, } else func->repeat_expression(shape_a); + if (func->reserve_op_buffer(1)) + return 1; func->add_operation(op_matrix(nc%3), 1); if (do_store_shapes) { @@ -1493,11 +1495,13 @@ longlong Item_func_spatial_precise_rel::val_int() Gcalc_function::op_intersection, 2); func.add_operation(Gcalc_function::op_internals, 1); shape_a= func.get_next_expression_pos(); - if ((null_value= g1.store_shapes(&trn))) + if ((null_value= g1.store_shapes(&trn)) || + func.reserve_op_buffer(1)) break; func.add_operation(Gcalc_function::op_internals, 1); shape_b= func.get_next_expression_pos(); - if ((null_value= g2.store_shapes(&trn))) + if ((null_value= g2.store_shapes(&trn)) || + func.reserve_op_buffer(1)) break; func.add_operation(Gcalc_function::v_find_t | Gcalc_function::op_intersection, 2); @@ -1732,6 +1736,8 @@ int Item_func_buffer::Transporter::single_point(double x, double y) { if (buffer_op == Gcalc_function::op_difference) { + if (m_fn->reserve_op_buffer(1)) + return 1; m_fn->add_operation(Gcalc_function::op_false, 0); return 0; } diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 2f4c1ef8e4685..b30d32c36accb 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -581,7 +581,8 @@ String *Item_func_json_quote::val_str(String *str) void Item_func_json_unquote::fix_length_and_dec() { - collation.set(&my_charset_utf8_general_ci); + collation.set(&my_charset_utf8_general_ci, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); max_length= args[0]->max_length; maybe_null= 1; } @@ -1011,6 +1012,8 @@ static int check_contains(json_engine_t *js, json_engine_t *value) case JSON_VALUE_ARRAY: if (value->value_type != JSON_VALUE_ARRAY) { + loc_js= *value; + set_js= FALSE; while (json_scan_next(js) == 0 && js->state != JST_ARRAY_END) { int c_level, v_scalar; @@ -1021,6 +1024,11 @@ static int check_contains(json_engine_t *js, json_engine_t *value) if (!(v_scalar= json_value_scalar(js))) c_level= json_get_level(js); + if (set_js) + *value= loc_js; + else + set_js= TRUE; + if (check_contains(js, value)) { if (json_skip_level(js)) @@ -1452,7 +1460,8 @@ void Item_func_json_array::fix_length_and_dec() if (arg_count == 0) { - collation.set(&my_charset_utf8_general_ci); + collation.set(&my_charset_utf8_general_ci, + DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); tmp_val.set_charset(&my_charset_utf8_general_ci); max_length= 2; return; @@ -2126,6 +2135,7 @@ longlong Item_func_json_length::val_int() json_engine_t je; uint length= 0; uint array_counters[JSON_DEPTH_LIMIT]; + int err; if ((null_value= args[0]->null_value)) return 0; @@ -2167,7 +2177,7 @@ longlong Item_func_json_length::val_int() if (json_value_scalar(&je)) return 1; - while (json_scan_next(&je) == 0 && + while (!(err= json_scan_next(&je)) && je.state != JST_OBJ_END && je.state != JST_ARRAY_END) { switch (je.state) @@ -2186,6 +2196,12 @@ longlong Item_func_json_length::val_int() }; } + if (!err) + { + /* Parse to the end of the JSON just to check it's valid. */ + while (json_scan_next(&je) == 0) {} + } + if (!je.s.error) return length; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 0c0b5a64953c1..4a3f107796d65 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3593,7 +3593,7 @@ bool Item_func_group_concat::setup(THD *thd) syntax of this function). If there is no ORDER BY clause, we don't create this tree. */ - init_tree(tree, (uint) MY_MIN(thd->variables.max_heap_table_size, + init_tree(tree, (size_t)MY_MIN(thd->variables.max_heap_table_size, thd->variables.sortbuff_size/16), 0, tree_key_length, group_concat_key_cmp_with_order, NULL, (void*) this, diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc index ba33d103d0c1f..2b3d23744058d 100644 --- a/sql/item_xmlfunc.cc +++ b/sql/item_xmlfunc.cc @@ -176,7 +176,7 @@ class Item_nodeset_func :public Item_str_func { nodebeg= (MY_XML_NODE*) pxml->ptr(); nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length()); - numnodes= nodeend - nodebeg; + numnodes= (uint)(nodeend - nodebeg); } void prepare(String *nodeset) { @@ -615,7 +615,7 @@ class Item_nodeset_to_const_comparator :public Item_bool_func if ((node->parent == flt->num) && (node->type == MY_XML_NODE_TEXT)) { - fake->set_value(node->beg, node->end - node->beg, + fake->set_value(node->beg, (uint)(node->end - node->beg), collation.collation); if (args[1]->val_int()) return 1; @@ -817,7 +817,7 @@ String *Item_nodeset_func_predicate::val_nodeset(String *str) Item_func *comp_func= (Item_func*)args[1]; uint pos= 0, size; prepare(str); - size= fltend - fltbeg; + size= (uint)(fltend - fltbeg); for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) { nodeset_func->context_cache.length(0); @@ -836,7 +836,7 @@ String *Item_nodeset_func_elementbyindex::val_nodeset(String *nodeset) Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; prepare(nodeset); MY_XPATH_FLT *flt; - uint pos, size= fltend - fltbeg; + uint pos, size= (uint)(fltend - fltbeg); for (pos= 0, flt= fltbeg; flt < fltend; flt++) { nodeset_func->context_cache.length(0); @@ -995,7 +995,7 @@ static Item *create_comparator(MY_XPATH *xpath, else if (a->type() == Item::XPATH_NODESET && b->type() == Item::XPATH_NODESET) { - uint len= xpath->query.end - context->beg; + uint len= (uint)(xpath->query.end - context->beg); set_if_smaller(len, 32); my_printf_error(ER_UNKNOWN_ERROR, "XPATH error: " @@ -1399,7 +1399,7 @@ MY_XPATH_FUNC * my_xpath_function(const char *beg, const char *end) { MY_XPATH_FUNC *k, *function_names; - uint length= end-beg; + uint length= (uint)(end-beg); switch (length) { case 1: return 0; @@ -1961,7 +1961,7 @@ static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath) return 0; xpath->item= new (xpath->thd->mem_root) Item_string(xpath->thd, xpath->prevtok.beg + 1, - xpath->prevtok.end - xpath->prevtok.beg - 2, + (uint)(xpath->prevtok.end - xpath->prevtok.beg - 2), xpath->cs); return 1; } @@ -2499,13 +2499,13 @@ static int my_xpath_parse_Number(MY_XPATH *xpath) if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) { xpath->item= new (thd->mem_root) Item_int(thd, xpath->prevtok.beg, - xpath->prevtok.end - xpath->prevtok.beg); + (uint)(xpath->prevtok.end - xpath->prevtok.beg)); return 1; } my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS); xpath->item= new (thd->mem_root) Item_float(thd, beg, - xpath->prevtok.end - beg); + (uint)(xpath->prevtok.end - beg)); return 1; } @@ -2632,7 +2632,7 @@ my_xpath_parse_VariableReference(MY_XPATH *xpath) { xpath->item= NULL; DBUG_ASSERT(xpath->query.end > dollar_pos); - uint len= xpath->query.end - dollar_pos; + uint len= (uint)(xpath->query.end - dollar_pos); set_if_smaller(len, 32); my_printf_error(ER_UNKNOWN_ERROR, "Unknown XPATH variable at: '%.*s'", MYF(0), len, dollar_pos); @@ -2660,7 +2660,7 @@ my_xpath_parse_NodeTest_QName(MY_XPATH *xpath) if (!my_xpath_parse_QName(xpath)) return 0; DBUG_ASSERT(xpath->context); - uint len= xpath->prevtok.end - xpath->prevtok.beg; + uint len= (uint)(xpath->prevtok.end - xpath->prevtok.beg); xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, xpath->prevtok.beg, len); return 1; @@ -2759,7 +2759,7 @@ bool Item_xml_str_func::fix_fields(THD *thd, Item **ref) if (!rc) { - uint clen= xpath.query.end - xpath.lasttok.beg; + uint clen= (uint)(xpath.query.end - xpath.lasttok.beg); set_if_smaller(clen, 32); my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.*s'", MYF(0), clen, xpath.lasttok.beg); diff --git a/sql/log_event.cc b/sql/log_event.cc index f57fa7d271857..98b1f858fee55 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1131,7 +1131,7 @@ int append_query_string(CHARSET_INFO *csinfo, String *to, *ptr++= '\''; } - to->length(orig_len + ptr - beg); + to->length((uint32)(orig_len + ptr - beg)); return 0; } #endif @@ -10036,7 +10036,7 @@ Execute_load_query_log_event::do_apply_event(rpl_group_info *rgi) p= strmake(p, STRING_WITH_LEN(" INTO ")); p= strmake(p, query+fn_pos_end, q_len-fn_pos_end); - error= Query_log_event::do_apply_event(rgi, buf, p-buf); + error= Query_log_event::do_apply_event(rgi, buf, (uint32)(p-buf)); /* Forging file name for deletion in same buffer */ *fname_end= 0; @@ -10488,7 +10488,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length) if (static_cast(m_rows_end - m_rows_cur) <= length) { size_t const block_size= 1024; - ulong cur_size= m_rows_cur - m_rows_buf; + size_t cur_size= m_rows_cur - m_rows_buf; DBUG_EXECUTE_IF("simulate_too_big_row_case1", cur_size= UINT_MAX32 - (block_size * 10); length= UINT_MAX32 - (block_size * 10);); @@ -10501,21 +10501,21 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length) DBUG_EXECUTE_IF("simulate_too_big_row_case4", cur_size= UINT_MAX32 - (block_size * 10); length= (block_size * 10) - block_size + 1;); - ulong remaining_space= UINT_MAX32 - cur_size; + size_t remaining_space= UINT_MAX32 - cur_size; /* Check that the new data fits within remaining space and we can add block_size without wrapping. */ - if (length > remaining_space || + if (cur_size > UINT_MAX32 || length > remaining_space || ((length + block_size) > remaining_space)) { sql_print_error("The row data is greater than 4GB, which is too big to " "write to the binary log."); DBUG_RETURN(ER_BINLOG_ROW_LOGGING_FAILED); } - ulong const new_alloc= + size_t const new_alloc= block_size * ((cur_size + length + block_size - 1) / block_size); - uchar* const new_buf= (uchar*)my_realloc((uchar*)m_rows_buf, (uint) new_alloc, + uchar* const new_buf= (uchar*)my_realloc((uchar*)m_rows_buf, new_alloc, MYF(MY_ALLOW_ZERO_PTR|MY_WME)); if (unlikely(!new_buf)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); @@ -11248,11 +11248,11 @@ bool Rows_log_event::write_compressed() uchar *m_rows_cur_tmp = m_rows_cur; bool ret = true; uint32 comlen, alloc_size; - comlen= alloc_size= binlog_get_compress_len(m_rows_cur_tmp - m_rows_buf_tmp); + comlen= alloc_size= binlog_get_compress_len((uint32)(m_rows_cur_tmp - m_rows_buf_tmp)); m_rows_buf = (uchar *)my_safe_alloca(alloc_size); if(m_rows_buf && !binlog_buf_compress((const char *)m_rows_buf_tmp, (char *)m_rows_buf, - m_rows_cur_tmp - m_rows_buf_tmp, &comlen)) + (uint32)(m_rows_cur_tmp - m_rows_buf_tmp), &comlen)) { m_rows_cur= comlen + m_rows_buf; ret= Log_event::write(); @@ -12488,7 +12488,7 @@ Rows_log_event::write_row(rpl_group_info *rgi, the size of the first row and use that value to initialize storage engine for bulk insertion */ DBUG_ASSERT(!(m_curr_row > m_curr_row_end)); - ulong estimated_rows= 0; + ha_rows estimated_rows= 0; if (m_curr_row < m_curr_row_end) estimated_rows= (m_rows_end - m_curr_row) / (m_curr_row_end - m_curr_row); else if (m_curr_row == m_curr_row_end) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index d0993151f76c4..218594b82b444 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -8800,8 +8800,8 @@ static int mysql_init_variables(void) /* Set directory paths */ mysql_real_data_home_len= - strmake_buf(mysql_real_data_home, - get_relative_path(MYSQL_DATADIR)) - mysql_real_data_home; + (uint)(strmake_buf(mysql_real_data_home, + get_relative_path(MYSQL_DATADIR)) - mysql_real_data_home); /* Replication parameters */ master_info_file= (char*) "master.info", relay_log_info_file= (char*) "relay-log.info"; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 5d31dd1662a86..024bb9dd93386 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -499,9 +499,9 @@ int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) if (trees_next == trees_end) { const int realloc_ratio= 2; /* Double size for next round */ - uint old_elements= (trees_end - trees); - uint old_size= sizeof(SEL_TREE**) * old_elements; - uint new_size= old_size * realloc_ratio; + size_t old_elements= (trees_end - trees); + size_t old_size= sizeof(SEL_TREE**) * old_elements; + size_t new_size= old_size * realloc_ratio; SEL_TREE **new_trees; if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size))) return -1; @@ -846,10 +846,10 @@ SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges, SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param) : Sql_alloc() { - uint elements= (arg->trees_end - arg->trees); + size_t elements= (arg->trees_end - arg->trees); if (elements > PREALLOCED_TREES) { - uint size= elements * sizeof (SEL_TREE **); + size_t size= elements * sizeof (SEL_TREE **); if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size))) goto mem_err; } @@ -951,7 +951,7 @@ int imerge_list_or_list(RANGE_OPT_PARAM *param, uint rc; bool is_last_check_pass= FALSE; SEL_IMERGE *imerge= im1->head(); - uint elems= imerge->trees_next-imerge->trees; + uint elems= (uint)(imerge->trees_next-imerge->trees); MEM_ROOT *mem_root= current_thd->mem_root; im1->empty(); @@ -1051,7 +1051,7 @@ int imerge_list_or_tree(RANGE_OPT_PARAM *param, SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param); if (or_tree) { - uint elems= imerge->trees_next-imerge->trees; + uint elems= (uint)(imerge->trees_next-imerge->trees); rc= imerge->or_sel_tree_with_checks(param, elems, or_tree, TRUE, &is_last_check_pass); if (!is_last_check_pass) @@ -2897,7 +2897,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) uint keynr; uint max_quick_key_parts= 0; MY_BITMAP *used_fields= &table->cond_set; - double table_records= table->stat_records(); + double table_records= (double)table->stat_records(); DBUG_ENTER("calculate_cond_selectivity_for_table"); table->cond_selectivity= 1.0; @@ -3994,8 +3994,8 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) store_length_array, range_par->min_key, range_par->max_key, - tmp_min_key - range_par->min_key, - tmp_max_key - range_par->max_key, + (uint)(tmp_min_key - range_par->min_key), + (uint)(tmp_max_key - range_par->max_key), flag, &ppar->part_iter); if (!res) @@ -4659,7 +4659,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, } } - uint n_child_scans= imerge->trees_next - imerge->trees; + size_t n_child_scans= imerge->trees_next - imerge->trees; if (!n_child_scans) DBUG_RETURN(NULL); @@ -5203,7 +5203,7 @@ bool prepare_search_best_index_intersect(PARAM *param, INDEX_SCAN_INFO **scan_ptr; INDEX_SCAN_INFO *cpk_scan= NULL; TABLE *table= param->table; - uint n_index_scans= tree->index_scans_end - tree->index_scans; + uint n_index_scans= (uint)(tree->index_scans_end - tree->index_scans); if (!n_index_scans) return 1; @@ -5846,7 +5846,7 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, } } - count= tree->index_scans_end - tree->index_scans; + count= (uint)(tree->index_scans_end - tree->index_scans); for (i= 0; i < count; i++) { index_scan= tree->index_scans[i]; @@ -6506,7 +6506,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, intersect_scans_best);); *are_all_covering= intersect->is_covering; - uint best_num= intersect_scans_best - intersect_scans; + uint best_num= (uint)(intersect_scans_best - intersect_scans); ror_intersect_cpy(intersect, intersect_best); /* @@ -6688,7 +6688,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, TRP_ROR_INTERSECT *trp; if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT)) DBUG_RETURN(trp); - uint best_num= (ror_scan_mark - tree->ror_scans); + uint best_num= (uint)(ror_scan_mark - tree->ror_scans); if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root, sizeof(ROR_SCAN_INFO*)* best_num))) @@ -11476,7 +11476,7 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, DBUG_RETURN(0); } - uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer); + uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer); if (count == 0) { /* Ranges have already been used up before. None is left for read. */ @@ -11521,7 +11521,7 @@ int QUICK_RANGE_SELECT_GEOM::get_next() DBUG_RETURN(result); } - uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer); + uint count= ranges.elements - (uint)(cur_range - (QUICK_RANGE**) ranges.buffer); if (count == 0) { /* Ranges have already been used up before. None is left for read. */ @@ -11975,7 +11975,7 @@ void QUICK_SELECT_I::add_key_and_length(String *key_names, bool *first) { char buf[64]; - uint length; + size_t length; KEY *key_info= head->key_info + index; if (*first) @@ -12529,7 +12529,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) { cur_group_prefix_len+= cur_part->store_length; ++cur_group_key_parts; - max_key_part= cur_part - cur_index_info->key_part + 1; + max_key_part= (uint)(cur_part - cur_index_info->key_part) + 1; used_key_parts_map.set_bit(max_key_part); } else @@ -13252,7 +13252,7 @@ get_field_keypart(KEY *index, Field *field) part < end; part++) { if (field->eq(part->field)) - return part - index->key_part + 1; + return (uint)(part - index->key_part + 1); } return 0; } diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc index b3350191d1351..ace6208fd7776 100644 --- a/sql/opt_range_mrr.cc +++ b/sql/opt_range_mrr.cc @@ -199,9 +199,9 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) { { RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; - uint min_key_length= cur->min_key - seq->param->min_key; - uint max_key_length= cur->max_key - seq->param->max_key; - uint len= cur->min_key - cur[-1].min_key; + size_t min_key_length= cur->min_key - seq->param->min_key; + size_t max_key_length= cur->max_key - seq->param->max_key; + size_t len= cur->min_key - cur[-1].min_key; if (!(min_key_length == max_key_length && !memcmp(cur[-1].min_key, cur[-1].max_key, len) && !key_tree->min_flag && !key_tree->max_flag)) @@ -238,7 +238,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) /* Ok got a tuple */ RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; - uint min_key_length= cur->min_key - seq->param->min_key; + uint min_key_length= (uint)(cur->min_key - seq->param->min_key); range->ptr= (char*)(intptr)(key_tree->part); if (cur->min_key_flag & GEOM_FLAG) @@ -256,13 +256,13 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) range->range_flag= cur->min_key_flag | cur->max_key_flag; range->start_key.key= seq->param->min_key; - range->start_key.length= cur->min_key - seq->param->min_key; + range->start_key.length= (uint)(cur->min_key - seq->param->min_key); range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts); range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : HA_READ_KEY_EXACT); range->end_key.key= seq->param->max_key; - range->end_key.length= cur->max_key - seq->param->max_key; + range->end_key.length= (uint)(cur->max_key - seq->param->max_key); range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts); diff --git a/sql/opt_sum.cc b/sql/opt_sum.cc index ab587b8b279da..8a75aaed8d647 100644 --- a/sql/opt_sum.cc +++ b/sql/opt_sum.cc @@ -768,12 +768,12 @@ static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo, key_part_map org_key_part_used= *key_part_used; if (eq_type || between || max_fl == less_fl) { - uint length= (key_ptr-ref->key_buff)+part->store_length; + uint length= (uint)(key_ptr-ref->key_buff)+part->store_length; if (ref->key_length < length) { /* Ultimately ref->key_length will contain the length of the search key */ ref->key_length= length; - ref->key_parts= (part - keyinfo->key_part) + 1; + ref->key_parts= (uint)(part - keyinfo->key_part) + 1; } if (!*prefix_len && part+1 == field_part) *prefix_len= length; diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc index d3d1bc97a70e7..191360a6969e8 100644 --- a/sql/opt_table_elimination.cc +++ b/sql/opt_table_elimination.cc @@ -848,7 +848,7 @@ bool check_func_dependency(JOIN *join, */ uint and_level=0; build_eq_mods_for_cond(join->thd, &dac, &last_eq_mod, &and_level, cond); - if (!(dac.n_equality_mods= last_eq_mod - dac.equality_mods)) + if (!(dac.n_equality_mods= (uint)(last_eq_mod - dac.equality_mods))) return FALSE; /* No useful conditions */ List bound_modules; @@ -1061,7 +1061,7 @@ bool Dep_analysis_context::setup_equality_modules_deps(List eq_mod < equality_mods + n_equality_mods; eq_mod++) { - deps_recorder.expr_offset= eq_mod - equality_mods; + deps_recorder.expr_offset= (uint)(eq_mod - equality_mods); deps_recorder.visited_other_tables= FALSE; eq_mod->unbound_args= 0; @@ -1079,7 +1079,7 @@ bool Dep_analysis_context::setup_equality_modules_deps(List Dep_value_field* field_val; while ((field_val= it++)) { - uint offs= field_val->bitmap_offset + eq_mod - equality_mods; + uint offs= (uint)(field_val->bitmap_offset + eq_mod - equality_mods); bitmap_set_bit(&expr_deps, offs); } } @@ -1158,7 +1158,7 @@ void build_eq_mods_for_cond(THD *thd, Dep_analysis_context *ctx, if (cond->type() == Item_func::COND_ITEM) { List_iterator_fast li(*((Item_cond*) cond)->argument_list()); - uint orig_offset= *eq_mod - ctx->equality_mods; + size_t orig_offset= *eq_mod - ctx->equality_mods; /* AND/OR */ if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) diff --git a/sql/parse_file.cc b/sql/parse_file.cc index 196feabb23536..94f72b7b492cc 100644 --- a/sql/parse_file.cc +++ b/sql/parse_file.cc @@ -256,7 +256,7 @@ sql_create_definition_file(const LEX_CSTRING *dir, File handler; IO_CACHE file; char path[FN_REFLEN+1]; // +1 to put temporary file name for sure - int path_end; + size_t path_end; File_option *param; DBUG_ENTER("sql_create_definition_file"); DBUG_PRINT("enter", ("Dir: %s, file: %s, base %p", diff --git a/sql/partition_info.cc b/sql/partition_info.cc index cc6553fc1585d..47fb60ea12a71 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -1674,7 +1674,7 @@ void partition_info::print_no_partition_found(TABLE *table_arg, myf errflag) bool partition_info::set_part_expr(THD *thd, char *start_token, Item *item_ptr, char *end_token, bool is_subpart) { - uint expr_len= end_token - start_token; + size_t expr_len= end_token - start_token; char *func_string= (char*) thd->memdup(start_token, expr_len); if (!func_string) diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index 51e93003c565c..1b1059cc52925 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -80,7 +80,7 @@ pack_row(TABLE *table, MY_BITMAP const* cols, unsigned int null_mask= 1U; for ( ; (field= *p_field) ; p_field++) { - if (bitmap_is_set(cols, p_field - table->field)) + if (bitmap_is_set(cols, (uint)(p_field - table->field))) { my_ptrdiff_t offset; if (field->is_null(rec_offset)) @@ -262,7 +262,7 @@ unpack_row(rpl_group_info *rgi, No need to bother about columns that does not exist: they have gotten default values when being emptied above. */ - if (bitmap_is_set(cols, field_ptr - begin_ptr)) + if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr))) { if ((null_mask & 0xFF) == 0) { @@ -434,7 +434,7 @@ unpack_row(rpl_group_info *rgi, if (master_reclength) { if (*field_ptr) - *master_reclength = (*field_ptr)->ptr - table->record[0]; + *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]); else *master_reclength = table->s->reclength; } diff --git a/sql/rpl_record_old.cc b/sql/rpl_record_old.cc index a252bbff0f59d..fd37c6f91426a 100644 --- a/sql/rpl_record_old.cc +++ b/sql/rpl_record_old.cc @@ -134,7 +134,7 @@ unpack_row_old(rpl_group_info *rgi, { Field *const f= *field_ptr; - if (bitmap_is_set(cols, field_ptr - begin_ptr)) + if (bitmap_is_set(cols, (uint)(field_ptr - begin_ptr))) { f->move_field_offset(offset); ptr= f->unpack(f->ptr, ptr, row_buffer_end, 0); @@ -149,14 +149,14 @@ unpack_row_old(rpl_group_info *rgi, } } else - bitmap_clear_bit(rw_set, field_ptr - begin_ptr); + bitmap_clear_bit(rw_set, (uint)(field_ptr - begin_ptr)); } *row_end = ptr; if (master_reclength) { if (*field_ptr) - *master_reclength = (*field_ptr)->ptr - table->record[0]; + *master_reclength = (ulong)((*field_ptr)->ptr - table->record[0]); else *master_reclength = table->s->reclength; } diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index a6462cbca9b6f..054378ed268b7 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7675,8 +7675,11 @@ ER_SLAVE_SAME_ID ER_FLASHBACK_NOT_SUPPORTED eng "Flashback does not support %s %s" -# MARIAROCKS-TODO: Should we add RocksDB error messages here or use some other -# solution? + + +# +# MyRocks error messages +# ER_KEYS_OUT_OF_ORDER eng "Keys are out order during bulk load" @@ -7710,39 +7713,40 @@ ER_UNSUPPORTED_COLLATION ER_METADATA_INCONSISTENCY eng "Table '%s' does not exist, but metadata information exists inside MyRocks. This is a sign of data inconsistency. Please check if '%s.frm' exists, and try to restore it if it does not exist." -ER_KEY_CREATE_DURING_ALTER - eng "MyRocks failed creating new key definitions during alter." - -ER_SK_POPULATE_DURING_ALTER - eng "MyRocks failed populating secondary key during alter." - ER_CF_DIFFERENT eng "Column family ('%s') flag (%d) is different from an existing flag (%d). Assign a new CF flag, or do not change existing CF flag." +ER_RDB_TTL_DURATION_FORMAT + eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer." + ER_RDB_STATUS_GENERAL - eng "Status error %d received from RocksDB: %s" + eng "Status error %d received from RocksDB: %s" ER_RDB_STATUS_MSG - eng "%s, Status error %d received from RocksDB: %s" - -ER_NET_OK_PACKET_TOO_LARGE - eng "OK packet too large" + eng "%s, Status error %d received from RocksDB: %s" ER_RDB_TTL_UNSUPPORTED - eng "TTL support is currently disabled when table has secondary indexes or hidden PK." + eng "TTL support is currently disabled when table has a hidden PK." ER_RDB_TTL_COL_FORMAT eng "TTL column (%s) in MyRocks must be an unsigned non-null 64-bit integer, exist inside the table, and have an accompanying ttl duration." -ER_RDB_TTL_DURATION_FORMAT - eng "TTL duration (%s) in MyRocks must be an unsigned non-null 64-bit integer." - ER_PER_INDEX_CF_DEPRECATED eng "The per-index column family option has been deprecated" +ER_KEY_CREATE_DURING_ALTER + eng "MyRocks failed creating new key definitions during alter." + +ER_SK_POPULATE_DURING_ALTER + eng "MyRocks failed populating secondary key during alter." + +# MyRocks messages end ER_SUM_FUNC_WITH_WINDOW_FUNC_AS_ARG eng "Window functions can not be used as arguments to group functions." +ER_NET_OK_PACKET_TOO_LARGE + eng "OK packet too large" + ER_ILLEGAL_PARAMETER_DATA_TYPES2_FOR_OPERATION eng "Illegal parameter data types %s and %s for operation '%s'" ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION diff --git a/sql/spatial.cc b/sql/spatial.cc index 8817e82d6c45a..357e311543f9d 100644 --- a/sql/spatial.cc +++ b/sql/spatial.cc @@ -2565,7 +2565,7 @@ uint Gis_multi_polygon::init_from_opresult(String *bin, n_poly++; } bin->write_at_position(np_pos, n_poly); - return opres - opres_orig; + return (uint)(opres - opres_orig); } diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index b0550fb518932..2c192d661f48e 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -12398,7 +12398,7 @@ static bool parse_com_change_user_packet(MPVIO_EXT *mpvio, uint packet_length) char *end= user + packet_length; /* Safe because there is always a trailing \0 at the end of the packet */ char *passwd= strend(user) + 1; - uint user_len= passwd - user - 1; + uint user_len= (uint)(passwd - user - 1); char *db= passwd; char db_buff[SAFE_NAME_LEN + 1]; // buffer to store db in utf8 char user_buff[USERNAME_LENGTH + 1]; // buffer to store user in utf8 diff --git a/sql/sql_base.cc b/sql/sql_base.cc index c0736130cfe67..699633a11c10b 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -5479,7 +5479,7 @@ find_field_in_table(THD *thd, TABLE *table, const char *name, uint length, if (field_ptr && *field_ptr) { - *cached_field_index_ptr= field_ptr - table->field; + *cached_field_index_ptr= (uint)(field_ptr - table->field); field= *field_ptr; } else diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index 7849e16a839dd..f28ce514b2942 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -4286,7 +4286,7 @@ my_bool Query_cache::move_by_type(uchar **border, *pprev = block->pprev, *pnext = block->pnext, *new_block =(Query_cache_block *) *border; - uint tablename_offset = block->table()->table() - block->table()->db(); + size_t tablename_offset = block->table()->table() - block->table()->db(); char *data = (char*) block->data(); uchar *key; size_t key_length; @@ -4595,7 +4595,7 @@ uint Query_cache::filename_2_table_key (char *key, const char *path, filename= tablename + dirname_length(tablename + 2) + 2; /* Find start of databasename */ for (dbname= filename - 2 ; dbname[-1] != FN_LIBCHAR ; dbname--) ; - *db_length= (filename - dbname) - 1; + *db_length= (uint32)(filename - dbname) - 1; DBUG_PRINT("qcache", ("table '%-.*s.%s'", *db_length, dbname, filename)); DBUG_RETURN((uint) (strmake(strmake(key, dbname, diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc index c163044547f8e..54a56103d21c7 100644 --- a/sql/sql_cte.cc +++ b/sql/sql_cte.cc @@ -990,7 +990,7 @@ With_element *st_select_lex::find_table_def_in_with_clauses(TABLE_LIST *table) been done yet. */ if (with_elem && sl->master_unit() == with_elem->spec) - break; + break; With_clause *with_clause=sl->get_with_clause(); if (with_clause) { @@ -1038,13 +1038,21 @@ bool TABLE_LIST::set_as_with_table(THD *thd, With_element *with_elem) } with= with_elem; if (!with_elem->is_referenced() || with_elem->is_recursive) + { derived= with_elem->spec; + if (derived->get_master() != select_lex && + !is_with_table_recursive_reference()) + { + derived->move_as_slave(select_lex); + } + } else { if(!(derived= with_elem->clone_parsed_spec(thd, this))) return true; derived->with_element= with_elem; } + derived->first_select()->linkage= DERIVED_TABLE_TYPE; with_elem->inc_references(); return false; } diff --git a/sql/sql_db.cc b/sql/sql_db.cc index f91b92b1d4f68..7860fa6d550eb 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -1006,7 +1006,7 @@ mysql_rm_db_internal(THD *thd, const char *db, bool if_exists, bool silent) These DDL methods and logging are protected with the exclusive metadata lock on the schema. */ - if (write_to_binlog(thd, query, query_pos -1 - query, db, db_len)) + if (write_to_binlog(thd, query, (uint)(query_pos -1 - query), db, db_len)) { error= true; goto exit; @@ -1024,7 +1024,7 @@ mysql_rm_db_internal(THD *thd, const char *db, bool if_exists, bool silent) These DDL methods and logging are protected with the exclusive metadata lock on the schema. */ - if (write_to_binlog(thd, query, query_pos -1 - query, db, db_len)) + if (write_to_binlog(thd, query, (uint)(query_pos -1 - query), db, db_len)) { error= true; goto exit; diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 41741f3dcc709..e6ef8a4be9fb6 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -406,7 +406,7 @@ void JOIN_CACHE::create_flag_fields() } /* Theoretically the new value of flag_fields can be less than the old one */ - flag_fields= copy-field_descr; + flag_fields= (uint)(copy-field_descr); } @@ -1374,7 +1374,7 @@ uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full) } /* Save the offset of the field to put it later at the end of the record */ if (copy->referenced_field_no) - copy->offset= cp-curr_rec_pos; + copy->offset= (uint)(cp-curr_rec_pos); switch (copy->type) { case CACHE_BLOB: @@ -1778,7 +1778,7 @@ uint JOIN_CACHE::read_flag_fields() memcpy(copy->str, pos, copy->length); pos+= copy->length; } - return (pos-init_pos); + return (uint)(pos-init_pos); } diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index a479fc9fe3071..5f198e4025454 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -2392,6 +2392,30 @@ st_select_lex_node *st_select_lex_node:: insert_chain_before( return this; } + +/* + Detach the node from its master and attach it to a new master +*/ + +void st_select_lex_node::move_as_slave(st_select_lex_node *new_master) +{ + exclude_from_tree(); + if (new_master->slave) + { + st_select_lex_node *curr= new_master->slave; + for ( ; curr->next ; curr= curr->next) ; + prev= &curr->next; + } + else + { + prev= &new_master->slave; + new_master->slave= this; + } + next= 0; + master= new_master; +} + + /* Exclude a node from the tree lex structure, but leave it in the global list of nodes. @@ -4521,7 +4545,8 @@ void st_select_lex::set_explain_type(bool on_the_fly) pos_in_table_list=NULL for e.g. post-join aggregation JOIN_TABs. */ if (tab->table && tab->table->pos_in_table_list && - tab->table->pos_in_table_list->with) + tab->table->pos_in_table_list->with && + tab->table->pos_in_table_list->with->is_recursive) { uses_cte= true; break; diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 9eeb9652022e9..3f02158b9b1ac 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -631,6 +631,7 @@ class st_select_lex_node { } st_select_lex_node *insert_chain_before(st_select_lex_node **ptr_pos_to_insert, st_select_lex_node *end_chain_node); + void move_as_slave(st_select_lex_node *new_master); friend class st_select_lex_unit; friend bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel); friend bool mysql_make_view(THD *thd, TABLE_SHARE *share, TABLE_LIST *table, diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index fc9a1eab7582f..de4bfd0436b59 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5245,7 +5245,6 @@ mysql_execute_command(THD *thd) if (res) break; - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) switch (lex->sql_command) { case SQLCOM_CREATE_EVENT: { @@ -5279,7 +5278,6 @@ mysql_execute_command(THD *thd) &lex->spname->m_name); break; case SQLCOM_DROP_EVENT: - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= Events::drop_event(thd, &lex->spname->m_db, &lex->spname->m_name, lex->if_exists()))) @@ -6019,7 +6017,6 @@ mysql_execute_command(THD *thd) Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands as specified through the thd->lex->create_view->mode flag. */ - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_view(thd, first_table, thd->lex->create_view->mode); break; } @@ -6035,7 +6032,6 @@ mysql_execute_command(THD *thd) case SQLCOM_CREATE_TRIGGER: { /* Conditionally writes to binlog. */ - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 1); break; @@ -6043,7 +6039,6 @@ mysql_execute_command(THD *thd) case SQLCOM_DROP_TRIGGER: { /* Conditionally writes to binlog. */ - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } @@ -6108,13 +6103,11 @@ mysql_execute_command(THD *thd) my_ok(thd); break; case SQLCOM_INSTALL_PLUGIN: - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); break; case SQLCOM_UNINSTALL_PLUGIN: - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 4d25de0f29936..89ac8559a62e0 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -2128,12 +2128,16 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, bool error; int argc=orig_argc; char **argv=orig_argv; + unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = + { MYSQL_AUDIT_GENERAL_CLASSMASK }; DBUG_ENTER("mysql_install_plugin"); tables.init_one_table("mysql", 5, "plugin", 6, "plugin", TL_WRITE); if (!opt_noacl && check_table_access(thd, INSERT_ACL, &tables, FALSE, 1, FALSE)) DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + /* need to open before acquiring LOCK_plugin or it will deadlock */ if (! (table = open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT))) @@ -2166,8 +2170,6 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, See also mysql_uninstall_plugin() and initialize_audit_plugin() */ - unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = - { MYSQL_AUDIT_GENERAL_CLASSMASK }; if (mysql_audit_general_enabled()) mysql_audit_acquire_plugins(thd, event_class_mask); @@ -2199,6 +2201,10 @@ bool mysql_install_plugin(THD *thd, const LEX_CSTRING *name, if (argv) free_defaults(argv); DBUG_RETURN(error); +#ifdef WITH_WSREP +error: + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ } @@ -2265,6 +2271,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, TABLE_LIST tables; LEX_CSTRING dl= *dl_arg; bool error= false; + unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = + { MYSQL_AUDIT_GENERAL_CLASSMASK }; DBUG_ENTER("mysql_uninstall_plugin"); tables.init_one_table("mysql", 5, "plugin", 6, "plugin", TL_WRITE); @@ -2272,6 +2280,8 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, if (!opt_noacl && check_table_access(thd, DELETE_ACL, &tables, FALSE, 1, FALSE)) DBUG_RETURN(TRUE); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + /* need to open before acquiring LOCK_plugin or it will deadlock */ if (! (table= open_ltable(thd, &tables, TL_WRITE, MYSQL_LOCK_IGNORE_TIMEOUT))) DBUG_RETURN(TRUE); @@ -2297,8 +2307,6 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, See also mysql_install_plugin() and initialize_audit_plugin() */ - unsigned long event_class_mask[MYSQL_AUDIT_CLASS_MASK_SIZE] = - { MYSQL_AUDIT_GENERAL_CLASSMASK }; if (mysql_audit_general_enabled()) mysql_audit_acquire_plugins(thd, event_class_mask); @@ -2329,6 +2337,10 @@ bool mysql_uninstall_plugin(THD *thd, const LEX_CSTRING *name, mysql_mutex_unlock(&LOCK_plugin); DBUG_RETURN(error); +#ifdef WITH_WSREP +error: + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 1f802b7de831d..c7663e2675095 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -2680,7 +2680,7 @@ static int send_events(binlog_send_info *info, IO_CACHE* log, LOG_INFO* linfo, Gtid_list_log_event glev(&info->until_binlog_state, 0); if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg) || - fake_gtid_list_event(info, &glev, &info->errmsg, my_b_tell(log))) + fake_gtid_list_event(info, &glev, &info->errmsg, (uint32)my_b_tell(log))) { info->error= ER_UNKNOWN_ERROR; return 1; @@ -2690,7 +2690,7 @@ static int send_events(binlog_send_info *info, IO_CACHE* log, LOG_INFO* linfo, if (info->until_gtid_state && is_until_reached(info, &ev_offset, event_type, &info->errmsg, - my_b_tell(log))) + (uint32)my_b_tell(log))) { if (info->errmsg) { @@ -2745,7 +2745,7 @@ static int send_one_binlog_file(binlog_send_info *info, if (end_pos <= 1) { /** end of file or error */ - return end_pos; + return (int)end_pos; } /** diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e4ab9c0b4056b..90bd191a79b90 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -6142,7 +6142,7 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, { TABLE *table= s->table; double sel= table->cond_selectivity; - double table_records= table->stat_records(); + double table_records= (double)table->stat_records(); dbl_records= table_records * sel; return dbl_records; } @@ -6168,7 +6168,7 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, if (s->table->quick_condition_rows != s->found_records) records= s->table->quick_condition_rows; - dbl_records= records; + dbl_records= (double)records; return dbl_records; } @@ -6859,7 +6859,7 @@ static void choose_initial_table_order(JOIN *join) if ((emb_subq= get_emb_subq(*tab))) break; } - uint n_subquery_tabs= tabs_end - tab; + uint n_subquery_tabs= (uint)(tabs_end - tab); if (!n_subquery_tabs) DBUG_VOID_RETURN; @@ -6887,7 +6887,7 @@ static void choose_initial_table_order(JOIN *join) last_tab_for_subq < subq_tabs_end && get_emb_subq(*last_tab_for_subq) == cur_subq_nest; last_tab_for_subq++) {} - uint n_subquery_tables= last_tab_for_subq - subq_tab; + uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab); /* Walk the original array and find where this subquery would have been @@ -6905,7 +6905,7 @@ static void choose_initial_table_order(JOIN *join) if (!need_tables) { /* Move away the top-level tables that are after top_level_tab */ - uint top_tail_len= last_top_level_tab - top_level_tab - 1; + size_t top_tail_len= last_top_level_tab - top_level_tab - 1; memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1, sizeof(JOIN_TAB*)*top_tail_len); last_top_level_tab += n_subquery_tables; @@ -7651,7 +7651,7 @@ double JOIN::get_examined_rows() JOIN_TAB *tab= first_breadth_first_tab(); JOIN_TAB *prev_tab= tab; - examined_rows= tab->get_examined_rows(); + examined_rows= (double)tab->get_examined_rows(); while ((tab= next_breadth_first_tab(first_breadth_first_tab(), top_join_tab_count, tab))) @@ -7949,7 +7949,7 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } if (keyparts > 1) { - ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse; + ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse); prev_ref_keyuse= keyuse; } } @@ -9311,8 +9311,8 @@ bool JOIN::get_best_combination() j= j->bush_root_tab; } - top_join_tab_count= join_tab_ranges.head()->end - - join_tab_ranges.head()->start; + top_join_tab_count= (uint)(join_tab_ranges.head()->end - + join_tab_ranges.head()->start); update_depend_map(this); DBUG_RETURN(0); @@ -10888,7 +10888,7 @@ static uint make_join_orderinfo(JOIN *join) if (join->need_tmp) return join->table_count; tab= join->get_sort_by_join_tab(); - return tab ? tab-join->join_tab : join->table_count; + return tab ? (uint)(tab-join->join_tab) : join->table_count; } /* @@ -11905,8 +11905,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) str.append(" final_pushdown_cond"); print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY);); } - uint n_top_tables= join->join_tab_ranges.head()->end - - join->join_tab_ranges.head()->start; + uint n_top_tables= (uint)(join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start); join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ @@ -12130,7 +12130,7 @@ ha_rows JOIN_TAB::get_examined_rows() SQL_SELECT *sel= filesort? filesort->select : this->select; if (sel && sel->quick && use_quick != 2) - examined_rows= sel->quick->records; + examined_rows= (double)sel->quick->records; else if (type == JT_NEXT || type == JT_ALL || type == JT_HASH || type ==JT_HASH_NEXT) { @@ -12140,19 +12140,19 @@ ha_rows JOIN_TAB::get_examined_rows() @todo This estimate is wrong, a LIMIT query may examine much more rows than the LIMIT itself. */ - examined_rows= limit; + examined_rows= (double)limit; } else { if (table->is_filled_at_execution()) - examined_rows= records; + examined_rows= (double)records; else { /* handler->info(HA_STATUS_VARIABLE) has been called in make_join_statistics() */ - examined_rows= table->stat_records(); + examined_rows= (double)table->stat_records(); } } } @@ -13981,7 +13981,7 @@ static int compare_fields_by_table_order(Item *field1, tab2= tab2->bush_root_tab; } - cmp= tab1 - tab2; + cmp= (int)(tab1 - tab2); if (!cmp) { @@ -17145,7 +17145,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List &fields, share->default_values= table->record[1]+alloc_length; } copy_func[0]=0; // End marker - param->func_count= copy_func - param->items_to_copy; + param->func_count= (uint)(copy_func - param->items_to_copy); setup_tmp_table_column_bitmaps(table, bitmaps); @@ -17829,7 +17829,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, Emulate behaviour by making column not-nullable when creating the table. */ - uint cols= (*recinfo-start_recinfo); + uint cols= (uint)(*recinfo-start_recinfo); start_recinfo[cols-1].null_bit= 0; } } @@ -21047,7 +21047,7 @@ static int test_if_order_by_key(JOIN *join, (1) this is an extended key (2) we've reached its end */ - key_parts= (key_part - table->key_info[idx].key_part); + key_parts= (uint)(key_part - table->key_info[idx].key_part); if (have_pk_suffix && reverse == 0 && // all were =const so far key_parts == table->key_info[idx].ext_key_parts && @@ -24688,7 +24688,7 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, } else { - double examined_rows= get_examined_rows(); + double examined_rows= (double)get_examined_rows(); eta->rows_set= true; eta->rows= (ha_rows) examined_rows; @@ -26073,8 +26073,8 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, Start from quick select's rows and cost. These are always cheaper than full index scan/cost. */ - double best_rows= table->quick_rows[keynr]; - double best_cost= table->quick_costs[keynr]; + double best_rows= (double)table->quick_rows[keynr]; + double best_cost= (double)table->quick_costs[keynr]; /* Check if ref(const) access was possible on this index. @@ -26108,7 +26108,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, if (ref_rows > 0) { - double tmp= ref_rows; + double tmp= (double)ref_rows; /* Reuse the cost formula from best_access_path: */ set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key); if (table->covering_keys.is_set(keynr)) @@ -26119,7 +26119,7 @@ static bool get_range_limit_read_cost(const JOIN_TAB *tab, if (tmp < best_cost) { best_cost= tmp; - best_rows= ref_rows; + best_rows= (double)ref_rows; } } } @@ -26232,7 +26232,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (join) { - uint tablenr= tab - join->join_tab; + uint tablenr= (uint)(tab - join->join_tab); read_time= join->best_positions[tablenr].read_time; for (uint i= tablenr+1; i < join->table_count; i++) fanout*= join->best_positions[i].records_read; // fanout is always >= 1 diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 4dcc519029c51..116af627adc3d 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -3460,7 +3460,7 @@ static bool show_status_array(THD *thd, const char *wild, prefix_end=strnmov(name_buffer, prefix, sizeof(name_buffer)-1); if (*prefix) *prefix_end++= '_'; - len=name_buffer + sizeof(name_buffer) - prefix_end; + len=(int)(name_buffer + sizeof(name_buffer) - prefix_end); #ifdef WITH_WSREP bool is_wsrep_var= FALSE; @@ -3803,6 +3803,15 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table) return 0; } } + else if (item->type() == Item::ROW_ITEM) + { + Item_row *item_row= static_cast(item); + for (uint i= 0; i < item_row->cols(); i++) + { + if (!uses_only_table_name_fields(item_row->element_index(i), table)) + return 0; + } + } else if (item->type() == Item::FIELD_ITEM) { Item_field *item_field= (Item_field*)item; @@ -3822,6 +3831,11 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table) item_field->field_name.length))) return 0; } + else if (item->type() == Item::EXPR_CACHE_ITEM) + { + Item_cache_wrapper *tmp= static_cast(item); + return uses_only_table_name_fields(tmp->get_orig_item(), table); + } else if (item->type() == Item::REF_ITEM) return uses_only_table_name_fields(item->real_item(), table); @@ -5435,7 +5449,7 @@ static void store_column_type(TABLE *table, Field *field, CHARSET_INFO *cs, */ tmp_buff= strchr(column_type.c_ptr_safe(), ' '); table->field[offset]->store(column_type.ptr(), - (tmp_buff ? tmp_buff - column_type.ptr() : + (tmp_buff ? (uint)(tmp_buff - column_type.ptr()) : column_type.length()), cs); is_blob= (field->type() == MYSQL_TYPE_BLOB); @@ -6405,7 +6419,7 @@ static int get_schema_views_record(THD *thd, TABLE_LIST *tables, table->field[5]->store(STRING_WITH_LEN("NO"), cs); } - definer_len= (strxmov(definer, tables->definer.user.str, "@", + definer_len= (uint)(strxmov(definer, tables->definer.user.str, "@", tables->definer.host.str, NullS) - definer); table->field[6]->store(definer, definer_len, cs); if (tables->view_suid) diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 224b7541dee39..e1465d47f729c 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -843,7 +843,7 @@ class Table_stat: public Stat_table else { stat_field->set_notnull(); - stat_field->store(table->collected_stats->cardinality); + stat_field->store(table->collected_stats->cardinality,true); } } @@ -1054,7 +1054,7 @@ class Column_stat: public Stat_table switch (i) { case COLUMN_STAT_MIN_VALUE: if (table_field->type() == MYSQL_TYPE_BIT) - stat_field->store(table_field->collected_stats->min_value->val_int()); + stat_field->store(table_field->collected_stats->min_value->val_int(),true); else { table_field->collected_stats->min_value->val_str(&val); @@ -1063,7 +1063,7 @@ class Column_stat: public Stat_table break; case COLUMN_STAT_MAX_VALUE: if (table_field->type() == MYSQL_TYPE_BIT) - stat_field->store(table_field->collected_stats->max_value->val_int()); + stat_field->store(table_field->collected_stats->max_value->val_int(),true); else { table_field->collected_stats->max_value->val_str(&val); @@ -1630,7 +1630,7 @@ class Count_distinct_field: public Sql_alloc of the parameters to be passed to the constructor of the Unique object. */ - Count_distinct_field(Field *field, uint max_heap_table_size) + Count_distinct_field(Field *field, size_t max_heap_table_size) { table_field= field; tree_key_length= field->pack_length(); @@ -1728,7 +1728,7 @@ class Count_distinct_field_bit: public Count_distinct_field { public: - Count_distinct_field_bit(Field *field, uint max_heap_table_size) + Count_distinct_field_bit(Field *field, size_t max_heap_table_size) { table_field= field; tree_key_length= sizeof(ulonglong); @@ -1824,7 +1824,7 @@ class Index_prefix_calc: public Sql_alloc if ((calc_state= (Prefix_calc_state *) thd->alloc(sizeof(Prefix_calc_state)*key_parts))) { - uint keyno= key_info-table->key_info; + uint keyno= (uint)(key_info-table->key_info); for (i= 0, state= calc_state; i < key_parts; i++, state++) { /* @@ -2438,7 +2438,7 @@ int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share, inline void Column_statistics_collected::init(THD *thd, Field *table_field) { - uint max_heap_table_size= thd->variables.max_heap_table_size; + size_t max_heap_table_size= (size_t)thd->variables.max_heap_table_size; TABLE *table= table_field->table; uint pk= table->s->primary_key; @@ -3719,14 +3719,14 @@ double get_column_avg_frequency(Field * field) */ if (!table->s->field) { - res= table->stat_records(); + res= (double)table->stat_records(); return res; } Column_statistics *col_stats= field->read_stats; if (!col_stats) - res= table->stat_records(); + res= (double)table->stat_records(); else res= col_stats->get_avg_frequency(); return res; @@ -3765,7 +3765,7 @@ double get_column_range_cardinality(Field *field, double res; TABLE *table= field->table; Column_statistics *col_stats= field->read_stats; - double tab_records= table->stat_records(); + double tab_records= (double)table->stat_records(); if (!col_stats) return tab_records; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index cfc571b22efe3..98453b15586ae 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -230,7 +230,7 @@ uint explain_filename(THD* thd, { db_name= table_name; /* calculate the length */ - db_name_len= tmp_p - db_name; + db_name_len= (int)(tmp_p - db_name); tmp_p++; table_name= tmp_p; } @@ -252,7 +252,7 @@ uint explain_filename(THD* thd, case 's': if ((tmp_p[1] == 'P' || tmp_p[1] == 'p') && tmp_p[2] == '#') { - part_name_len= tmp_p - part_name - 1; + part_name_len= (int)(tmp_p - part_name - 1); subpart_name= tmp_p + 3; tmp_p+= 3; } @@ -284,7 +284,7 @@ uint explain_filename(THD* thd, } if (part_name) { - table_name_len= part_name - table_name - 3; + table_name_len= (int)(part_name - table_name - 3); if (subpart_name) subpart_name_len= strlen(subpart_name); else @@ -357,7 +357,7 @@ uint explain_filename(THD* thd, to_p= strnmov(to_p, " */", end_p - to_p); } DBUG_PRINT("exit", ("to '%s'", to)); - DBUG_RETURN(to_p - to); + DBUG_RETURN((uint)(to_p - to)); } @@ -553,7 +553,7 @@ uint build_table_filename(char *buff, size_t bufflen, const char *db, pos= strxnmov(pos, end - pos, tbbuff, ext, NullS); DBUG_PRINT("exit", ("buff: '%s'", buff)); - DBUG_RETURN(pos - buff); + DBUG_RETURN((uint)(pos - buff)); } @@ -2134,7 +2134,7 @@ static uint32 comment_length(THD *thd, uint32 comment_pos, for (query+= 3; query < query_end; query++) { if (query[-1] == '*' && query[0] == '/') - return (char*) query - *comment_start + 1; + return (uint32)((char*) query - *comment_start + 1); } return 0; } @@ -2724,7 +2724,7 @@ bool quick_rm_table(THD *thd, handlerton *base, const char *db, bool error= 0; DBUG_ENTER("quick_rm_table"); - uint path_length= table_path ? + size_t path_length= table_path ? (strxnmov(path, sizeof(path) - 1, table_path, reg_ext, NullS) - path) : build_table_filename(path, sizeof(path)-1, db, table_name, reg_ext, flags); if (mysql_file_delete(key_file_frm, path, MYF(0))) @@ -6550,7 +6550,7 @@ static bool fill_alter_inplace_info(THD *thd, table_key; ha_alter_info->index_add_buffer [ha_alter_info->index_add_count++]= - new_key - ha_alter_info->key_info_buffer; + (uint)(new_key - ha_alter_info->key_info_buffer); /* Mark all old fields which are used in newly created index. */ DBUG_PRINT("info", ("index changed: '%s'", table_key->name)); } @@ -6574,7 +6574,7 @@ static bool fill_alter_inplace_info(THD *thd, /* Key not found. Add the offset of the key to the add buffer. */ ha_alter_info->index_add_buffer [ha_alter_info->index_add_count++]= - new_key - ha_alter_info->key_info_buffer; + (uint)(new_key - ha_alter_info->key_info_buffer); DBUG_PRINT("info", ("index added: '%s'", new_key->name)); } else diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 1baa5c3d98369..39693de80aec4 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -172,7 +172,7 @@ TEST_join(JOIN *join) in order not to garble the tabular output below. */ String ref_key_parts[MAX_TABLES]; - int tables_in_range= jt_range->end - jt_range->start; + int tables_in_range= (int)(jt_range->end - jt_range->start); for (i= 0; i < tables_in_range; i++) { JOIN_TAB *tab= jt_range->start + i; diff --git a/sql/sql_time.cc b/sql/sql_time.cc index c8ec1fc7f6a76..24aa7b1b8a62e 100644 --- a/sql/sql_time.cc +++ b/sql/sql_time.cc @@ -274,7 +274,7 @@ to_ascii(CHARSET_INFO *cs, *dst++= static_cast(wc); } *dst= '\0'; - return dst - dst0; + return (uint)(dst - dst0); } diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 291d55d61a208..bc452d59d248b 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -453,6 +453,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0)); DBUG_RETURN(TRUE); } + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!create) { @@ -616,6 +617,10 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) my_ok(thd); DBUG_RETURN(result); +#ifdef WITH_WSREP + error: + DBUG_RETURN(true); +#endif /* WITH_WSREP */ } /** diff --git a/sql/sql_update.cc b/sql/sql_update.cc index c6959509a082f..83c2e105f0762 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -80,7 +80,7 @@ bool compare_record(const TABLE *table) { if (field->real_maybe_null()) { - uchar null_byte_index= field->null_ptr - table->record[0]; + uchar null_byte_index= (uchar)(field->null_ptr - table->record[0]); if (((table->record[0][null_byte_index]) & field->null_bit) != ((table->record[1][null_byte_index]) & field->null_bit)) diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 75d8841d25c7b..32fa481395ea8 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -430,6 +430,8 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, lex->link_first_table_back(view, link_to_local); view->open_type= OT_BASE_ONLY; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) + if (check_dependencies_in_with_clauses(lex->with_clauses_list)) { res= TRUE; @@ -708,6 +710,10 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, lex->link_first_table_back(view, link_to_local); unit->cleanup(); DBUG_RETURN(res || thd->is_error()); +#ifdef WITH_WSREP + error: + DBUG_RETURN(true); +#endif /* WITH_WSREP */ } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 3505d1037104c..005cbd5fd02b3 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -392,8 +392,8 @@ LEX::create_item_for_sp_var(LEX_CSTRING *name, sp_variable *spvar, DBUG_ASSERT(spcont && spvar); /* Position and length of the SP variable name in the query. */ - pos_in_q= start_in_q - sphead->m_tmp_query; - len_in_q= end_in_q - start_in_q; + pos_in_q= (uint)(start_in_q - sphead->m_tmp_query); + len_in_q= (uint)(end_in_q - start_in_q); item= new (thd->mem_root) Item_splocal(thd, name, spvar->offset, spvar->sql_type(), diff --git a/sql/strfunc.cc b/sql/strfunc.cc index b09eadb098e4c..1c0b672fbcc30 100644 --- a/sql/strfunc.cc +++ b/sql/strfunc.cc @@ -339,7 +339,7 @@ int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needl if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length, (uchar *) needle->str, needle->length)) { - return (pos - haystack); + return (int)(pos - haystack); } return -1; } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index a399ccdd5e0b9..a2df923b2ad1f 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1469,7 +1469,7 @@ static Sys_var_ulonglong Sys_max_heap_table_size( "max_heap_table_size", "Don't allow creation of heap tables bigger than this", SESSION_VAR(max_heap_table_size), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(16384, (ulonglong)~(intptr)0), DEFAULT(16*1024*1024), + VALID_RANGE(16384, SIZE_T_MAX), DEFAULT(16*1024*1024), BLOCK_SIZE(1024)); static ulong mdl_locks_cache_size; diff --git a/sql/table.cc b/sql/table.cc index 7131e9d4a7de7..593c0eda3a8e0 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -2523,7 +2523,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, the correct null_bytes can now be set, since bitfields have been taken into account */ - share->null_bytes= (null_pos - (uchar*) null_flags + + share->null_bytes= (uint)(null_pos - (uchar*) null_flags + (null_bit_pos + 7) / 8); share->last_null_bit_pos= null_bit_pos; share->null_bytes_for_compare= null_bits_are_used ? share->null_bytes : 0; @@ -6013,8 +6013,8 @@ Field_iterator_table_ref::get_or_create_column_ref(THD *thd, TABLE_LIST *parent_ /* The field belongs to a merge view or information schema table. */ Field_translator *translated_field= view_field_it.field_translator(); nj_col= new Natural_join_column(translated_field, table_ref); - field_count= table_ref->field_translation_end - - table_ref->field_translation; + field_count= (uint)(table_ref->field_translation_end - + table_ref->field_translation); } else { diff --git a/sql/unireg.cc b/sql/unireg.cc index 5d5b82ba01598..b49c3cfbb0954 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -981,13 +981,18 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options, null_count+= field->length & 7; if (field->default_value && !field->default_value->flags && - !(field->flags & BLOB_FLAG)) + (!(field->flags & BLOB_FLAG) || + field->real_field_type() == MYSQL_TYPE_GEOMETRY)) { Item *expr= field->default_value->expr; + int res= !expr->fixed && // may be already fixed if ALTER TABLE expr->fix_fields(thd, &expr); if (!res) res= expr->save_in_field(regfield, 1); + if (!res && (field->flags & BLOB_FLAG)) + regfield->reset(); + /* If not ok or warning of level 'note' */ if (res != 0 && res != 3) { @@ -996,6 +1001,7 @@ static bool make_empty_rec(THD *thd, uchar *buff, uint table_options, delete regfield; //To avoid memory leak goto err; } + delete regfield; //To avoid memory leak } else if (regfield->real_type() == MYSQL_TYPE_ENUM && (field->flags & NOT_NULL_FLAG)) diff --git a/storage/connect/csort.cpp b/storage/connect/csort.cpp index 13f325d8f3f7a..670131b8fd2f6 100644 --- a/storage/connect/csort.cpp +++ b/storage/connect/csort.cpp @@ -351,7 +351,7 @@ void CSORT::Qstx(int *base, int *max) zlo = zhi = cnm = 0; // Avoid warning message - lo = max - base; // Number of elements as longs + lo = (int)(max - base); // Number of elements as longs if (Dup) cnm = Cmpnum(lo); @@ -472,7 +472,7 @@ void CSORT::Qstx(int *base, int *max) i = him + 1; if (Pof) - Pof[him - Pex] = Pof[mid - Pex] = i - j; + Pof[him - Pex] = Pof[mid - Pex] = (int)(i - j); /*******************************************************************/ /* Look at sizes of the two partitions, do the smaller one first */ @@ -481,8 +481,8 @@ void CSORT::Qstx(int *base, int *max) /* But only repeat (recursively or by branching) if the partition */ /* is of at least size THRESH. */ /*******************************************************************/ - lo = j - base; - hi = max - i; + lo = (int)(j - base); + hi = (int)(max - i); if (Dup) { // Update progress information zlo = Cmpnum(lo); @@ -726,7 +726,7 @@ void CSORT::Qstc(int *base, int *max) zlo = zhi = cnm = 0; // Avoid warning message - lo = max - base; // Number of elements as longs + lo = (int)(max - base); // Number of elements as longs if (Dup) cnm = Cmpnum(lo); @@ -853,7 +853,7 @@ void CSORT::Qstc(int *base, int *max) /* the offset array values indicating break point and block size. */ /*******************************************************************/ if (Pof) - Pof[lt - Pex] = Pof[(jj - 1) - Pex] = jj - lt; + Pof[lt - Pex] = Pof[(jj - 1) - Pex] = (int)(jj - lt); /*******************************************************************/ /* Look at sizes of the two partitions, do the smaller one first */ @@ -862,8 +862,8 @@ void CSORT::Qstc(int *base, int *max) /* But only repeat (recursively or by branching) if the partition */ /* is of at least size THRESH. */ /*******************************************************************/ - lo = lt - base; - hi = gt - Swix; + lo = (int)(lt - base); + hi = (int)(gt - Swix); if (Dup) { // Update progress information zlo = Cmpnum(lo); diff --git a/storage/connect/domdoc.cpp b/storage/connect/domdoc.cpp index e24e10835c187..ba8eb829abde0 100644 --- a/storage/connect/domdoc.cpp +++ b/storage/connect/domdoc.cpp @@ -13,6 +13,7 @@ #elif defined(MSX4) #import "msxml4.dll" //Causes error C2872: DOMNodeType: ambiguous symbol ?? #elif defined(MSX6) +#pragma warning(suppress : 4192) #import "msxml6.dll" //Causes error C2872: DOMNodeType: ambiguous symbol ?? #else // MSX4 #error MSX? is not defined @@ -540,7 +541,7 @@ PXNODE DOMNODE::AddChildNode(PGLOBAL g, PCSZ name, PXNODE np) // If name has the format m[n] only m is taken as node name if ((p = strchr(name, '['))) - pn = BufAlloc(g, name, p - name); + pn = BufAlloc(g, name, (int)(p - name)); else pn = name; diff --git a/storage/connect/filamap.cpp b/storage/connect/filamap.cpp index 84dff422db71b..67481136d8130 100644 --- a/storage/connect/filamap.cpp +++ b/storage/connect/filamap.cpp @@ -247,7 +247,7 @@ int MAPFAM::GetRowID(void) /***********************************************************************/ int MAPFAM::GetPos(void) { - return Fpos - Memory; + return (int)(Fpos - Memory); } // end of GetPos /***********************************************************************/ @@ -255,7 +255,7 @@ int MAPFAM::GetPos(void) /***********************************************************************/ int MAPFAM::GetNextPos(void) { - return Mempos - Memory; + return (int)(Mempos - Memory); } // end of GetNextPos /***********************************************************************/ @@ -368,7 +368,7 @@ int MAPFAM::ReadBuffer(PGLOBAL g) } // endif Mempos // Set caller line buffer - len = (Mempos - Fpos) - n; + len = (int)(Mempos - Fpos) - n; // Don't rely on ENDING setting if (len > 0 && *(Mempos - 2) == '\r') @@ -428,7 +428,7 @@ int MAPFAM::DeleteRecords(PGLOBAL g, int irc) /* not required here, just setting of future Spos and Tpos. */ /*******************************************************************/ Tpos = Spos = Fpos; - } else if ((n = Fpos - Spos) > 0) { + } else if ((n = (int)(Fpos - Spos)) > 0) { /*******************************************************************/ /* Non consecutive line to delete. Move intermediate lines. */ /*******************************************************************/ @@ -461,7 +461,7 @@ int MAPFAM::DeleteRecords(PGLOBAL g, int irc) /*****************************************************************/ /* Remove extra records. */ /*****************************************************************/ - n = Tpos - Memory; + n = (int)(Tpos - Memory); #if defined(__WIN__) DWORD drc = SetFilePointer(fp->Handle, n, NULL, FILE_BEGIN); @@ -627,7 +627,7 @@ int MBKFAM::ReadBuffer(PGLOBAL g) break; // Set caller line buffer - len = (Mempos - Fpos) - Ending; + len = (int)(Mempos - Fpos) - Ending; memcpy(Tdbp->GetLine(), Fpos, len); Tdbp->GetLine()[len] = '\0'; return RC_OK; diff --git a/storage/connect/filamgz.cpp b/storage/connect/filamgz.cpp index df366ef15f9b9..3078935e8a4c1 100644 --- a/storage/connect/filamgz.cpp +++ b/storage/connect/filamgz.cpp @@ -537,7 +537,7 @@ int ZBKFAM::ReadBuffer(PGLOBAL g) while (*NxtLine++ != '\n') ; // Set caller line buffer - n = NxtLine - CurLine - Ending; + n = (int)(NxtLine - CurLine - Ending); memcpy(Tdbp->GetLine(), CurLine, n); Tdbp->GetLine()[n] = '\0'; return RC_OK; @@ -588,7 +588,7 @@ int ZBKFAM::ReadBuffer(PGLOBAL g) for (NxtLine = CurLine; *NxtLine++ != '\n';) ; // Set caller line buffer - n = NxtLine - CurLine - Ending; + n = (int)(NxtLine - CurLine - Ending); memcpy(Tdbp->GetLine(), CurLine, n); Tdbp->GetLine()[n] = '\0'; Rbuf = (CurBlk == Block - 1) ? Last : Nrec; @@ -1087,7 +1087,7 @@ bool ZLBFAM::SetPos(PGLOBAL g, int pos __attribute__((unused))) /***********************************************************************/ int ZLBFAM::ReadBuffer(PGLOBAL g) { - int n; + size_t n; void *rdbuf; /*********************************************************************/ @@ -1299,7 +1299,7 @@ int ZLBFAM::WriteBuffer(PGLOBAL g) else NxtLine = CurLine + Lrecl; - BlkLen = NxtLine - To_Buf; + BlkLen = (int)(NxtLine - To_Buf); if (WriteCompressedBuffer(g)) { Closing = TRUE; // To tell CloseDB about a Write error diff --git a/storage/connect/filamtxt.cpp b/storage/connect/filamtxt.cpp index c456ee9e9b727..12727b66335fa 100644 --- a/storage/connect/filamtxt.cpp +++ b/storage/connect/filamtxt.cpp @@ -1351,7 +1351,7 @@ int BLKFAM::GetPos(void) /***********************************************************************/ int BLKFAM::GetNextPos(void) { - return Fpos + NxtLine - CurLine; + return (int)(Fpos + NxtLine - CurLine); } // end of GetNextPos /***********************************************************************/ @@ -1396,7 +1396,8 @@ int BLKFAM::SkipRecord(PGLOBAL, bool header) /***********************************************************************/ int BLKFAM::ReadBuffer(PGLOBAL g) { - int i, n, rc = RC_OK; + int i, rc = RC_OK; + size_t n; /*********************************************************************/ /* Sequential reading when Placed is not true. */ @@ -1497,7 +1498,7 @@ int BLKFAM::ReadBuffer(PGLOBAL g) fin: // Store the current record file position for Delete and Update - Fpos = BlkPos[CurBlk] + CurLine - To_Buf; + Fpos = (int)(BlkPos[CurBlk] + CurLine - To_Buf); return rc; } // end of ReadBuffer @@ -1524,7 +1525,7 @@ int BLKFAM::WriteBuffer(PGLOBAL g) // Now start the writing process. NxtLine = CurLine + strlen(CurLine); - BlkLen = NxtLine - To_Buf; + BlkLen = (int)(NxtLine - To_Buf); if (fwrite(To_Buf, 1, BlkLen, Stream) != (size_t)BlkLen) { sprintf(g->Message, MSG(FWRITE_ERROR), strerror(errno)); diff --git a/storage/connect/filamzip.cpp b/storage/connect/filamzip.cpp index dfd9343af76ca..f94362a3d87f3 100644 --- a/storage/connect/filamzip.cpp +++ b/storage/connect/filamzip.cpp @@ -748,7 +748,7 @@ UNZFAM::UNZFAM(PUNZFAM txfp) : MAPFAM(txfp) /***********************************************************************/ int UNZFAM::GetFileLength(PGLOBAL g) { - int len = (zutp && zutp->entryopen) ? Top - Memory + int len = (zutp && zutp->entryopen) ? (int)(Top - Memory) : TXTFAM::GetFileLength(g) * 3; if (trace) @@ -1088,7 +1088,7 @@ int ZIPFAM::WriteBuffer(PGLOBAL g) // Prepare to write the new line strcat(strcpy(To_Buf, Tdbp->GetLine()), (Bin) ? CrLf : "\n"); - len = strchr(To_Buf, '\n') - To_Buf + 1; + len = (int)(strchr(To_Buf, '\n') - To_Buf + 1); return zutp->writeEntry(g, To_Buf, len); } // end of WriteBuffer diff --git a/storage/connect/fmdlex.c b/storage/connect/fmdlex.c index ef4f7bfc65a62..4bf075acf4279 100644 --- a/storage/connect/fmdlex.c +++ b/storage/connect/fmdlex.c @@ -283,7 +283,7 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ - yyleng = yy_cp - yy_bp; \ + yyleng = (int)(yy_cp - yy_bp); \ yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; @@ -695,7 +695,7 @@ case YY_STATE_EOF(dqt): case YY_END_OF_BUFFER: { /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = yy_cp - yytext_ptr - 1; + int yy_amount_of_matched_text = (int)(yy_cp - yytext_ptr - 1); /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = yy_hold_char; @@ -862,7 +862,7 @@ static int yy_get_next_buffer() /* Try to read more data. */ /* First move last chars to start of buffer. */ - number_to_move = yy_c_buf_p - yytext_ptr; + number_to_move = (int)(yy_c_buf_p - yytext_ptr); for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); @@ -888,7 +888,7 @@ static int yy_get_next_buffer() /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = yy_current_buffer; - int yy_c_buf_p_offset = yy_c_buf_p - b->yy_ch_buf; + int yy_c_buf_p_offset = (int)(yy_c_buf_p - b->yy_ch_buf); b->yy_buf_size *= 2; b->yy_ch_buf = (char *) diff --git a/storage/connect/macutil.cpp b/storage/connect/macutil.cpp index b9600bdac2e5d..f95f3adcc6edc 100644 --- a/storage/connect/macutil.cpp +++ b/storage/connect/macutil.cpp @@ -230,13 +230,13 @@ bool MACINFO::GetOneInfo(PGLOBAL g, int flag, void *v, int lv) case 11: // Description if ((p = strstr(Curp->Description, " - Packet Scheduler Miniport"))) { strncpy(buf, Curp->Description, p - Curp->Description); - i = p - Curp->Description; + i = (int)(p - Curp->Description); strncpy(buf, Curp->Description, i); buf[i] = 0; p = buf; } else if ((p = strstr(Curp->Description, " - Miniport d'ordonnancement de paquets"))) { - i = p - Curp->Description; + i = (int)(p - Curp->Description); strncpy(buf, Curp->Description, i); buf[i] = 0; p = buf; diff --git a/storage/connect/myconn.cpp b/storage/connect/myconn.cpp index 08bb24e14dfb6..28e6f076e779a 100644 --- a/storage/connect/myconn.cpp +++ b/storage/connect/myconn.cpp @@ -248,7 +248,7 @@ PQRYRES MyColumns(PGLOBAL g, THD *thd, const char *host, const char *db, while (true) { p2 = strchr(p1, '\''); - len = MY_MAX(len, p2 - p1); + len = MY_MAX(len, (int)(p2 - p1)); if (*++p2 != ',') break; p1 = p2 + 2; } // endwhile diff --git a/storage/connect/odbconn.cpp b/storage/connect/odbconn.cpp index 70a0a6a145037..3b0cb5626726b 100644 --- a/storage/connect/odbconn.cpp +++ b/storage/connect/odbconn.cpp @@ -2427,7 +2427,7 @@ int ODBConn::GetCatInfo(CATPARM *cap) else if (vlen[n] == SQL_NULL_DATA) pval[n]->SetNull(true); else if (crp->Type == TYPE_STRING/* && vlen[n] != SQL_NULL_DATA*/) - pval[n]->SetValue_char(pbuf[n], vlen[n]); + pval[n]->SetValue_char(pbuf[n], (int)vlen[n]); else pval[n]->SetNull(false); diff --git a/storage/connect/plgdbutl.cpp b/storage/connect/plgdbutl.cpp index 25da31625163f..e46d260203e55 100644 --- a/storage/connect/plgdbutl.cpp +++ b/storage/connect/plgdbutl.cpp @@ -540,7 +540,7 @@ bool EvalLikePattern(LPCSTR sp, LPCSTR tp) { LPSTR p; char c; - int n; + ssize_t n; bool b, t = false; if (trace) diff --git a/storage/connect/tabfmt.cpp b/storage/connect/tabfmt.cpp index 516601a5eb48f..f616f24d16b47 100644 --- a/storage/connect/tabfmt.cpp +++ b/storage/connect/tabfmt.cpp @@ -934,7 +934,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g) if (p) { //len = p++ - p2; - len = p - p2 - 1;; + len = (int)(p - p2 - 1); // if (Sep != ' ') // for (; *p == ' '; p++) ; // Skip blanks @@ -978,7 +978,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g) return RC_NF; } else if ((p = strchr(p2, Sep))) - len = p - p2; + len = (int)(p - p2); else if (i == Fields - 1) len = strlen(p2); else if (Accept && Maxerr == 0) { @@ -996,7 +996,7 @@ int TDBCSV::ReadBuffer(PGLOBAL g) } else len = 0; - Offset[i] = p2 - To_Line; + Offset[i] = (int)(p2 - To_Line); if (Mode != MODE_UPDATE) Fldlen[i] = len; diff --git a/storage/connect/tabmac.cpp b/storage/connect/tabmac.cpp index a28b5d7108cb3..8260ab6539109 100644 --- a/storage/connect/tabmac.cpp +++ b/storage/connect/tabmac.cpp @@ -367,13 +367,13 @@ void MACCOL::ReadColumn(PGLOBAL g) case 11: // Description if ((p = strstr(adp->Description, " - Packet Scheduler Miniport"))) { strncpy(buf, adp->Description, p - adp->Description); - i = p - adp->Description; + i = (int)(p - adp->Description); strncpy(buf, adp->Description, i); buf[i] = 0; p = buf; } else if ((p = strstr(adp->Description, " - Miniport d'ordonnancement de paquets"))) { - i = p - adp->Description; + i = (int)(p - adp->Description); strncpy(buf, adp->Description, i); buf[i] = 0; p = buf; diff --git a/storage/connect/value.cpp b/storage/connect/value.cpp index a80da8085483b..eae72984ca693 100644 --- a/storage/connect/value.cpp +++ b/storage/connect/value.cpp @@ -1738,7 +1738,7 @@ DECVAL::DECVAL(PSZ s) : TYPVAL(s) if (s) { char *p = strchr(Strp, '.'); - Prec = (p) ? Len - (p - Strp) : 0; + Prec = (p) ? (int)(Len - (p - Strp)) : 0; } // endif s Type = TYPE_DECIM; @@ -2647,7 +2647,7 @@ bool DTVAL::SetValue_char(const char *p, int n) // Trim trailing blanks for (p2 = p + n -1; p < p2 && *p2 == ' '; p2--); - if ((rc = (n = p2 - p + 1) > Len)) + if ((rc = (n = (int)(p2 - p + 1)) > Len)) n = Len; memcpy(Sdate, p, n); diff --git a/storage/connect/xobject.cpp b/storage/connect/xobject.cpp index 85af377970196..c595ce5d6c43b 100644 --- a/storage/connect/xobject.cpp +++ b/storage/connect/xobject.cpp @@ -204,7 +204,7 @@ STRING::STRING(PGLOBAL g, uint n, PCSZ str) *Strp = 0; Next = GetNext(); - Size = Next - Strp; + Size = (int)(Next - Strp); Trc = false; } else { // This should normally never happen @@ -239,7 +239,7 @@ char *STRING::Realloc(uint len) p = Strp; Next = GetNext(); - Size = Next - p; + Size = (int)(Next - p); return p; } // end of Realloc diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 3e9f26ad12510..e638af8a21778 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -77,22 +77,85 @@ btr_corruption_report( /* Latching strategy of the InnoDB B-tree -------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. - -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. - -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. + +Node pointer page latches acquisition is protected by index->lock latch. + +Before MariaDB 10.2.2, all node pointer pages were protected by index->lock +either in S (shared) or X (exclusive) mode and block->lock was not acquired on +node pointer pages. + +After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect +node pointer pages and obtaiment of node pointer page latches is protected by +index->lock. + +(0) Definition: B-tree level. + +(0.1) The leaf pages of the B-tree are at level 0. + +(0.2) The parent of a page at level L has level L+1. (The level of the +root page is equal to the tree height.) + +(0.3) The B-tree lock (index->lock) is the parent of the root page and +has a level = tree height + 1. + +Index->lock has 3 possible locking modes: + +(1) S-latch: + +(1.1) All latches for pages must be obtained in descending order of tree level. + +(1.2) Before obtaining the first node pointer page latch at a given B-tree +level, parent latch must be held (at level +1 ). + +(1.3) If a node pointer page is already latched at the same level +we can only obtain latch to its right sibling page latch at the same level. + +(1.4) Release of the node pointer page latches must be done in +child-to-parent order. (Prevents deadlocks when obtained index->lock +in SX mode). + +(1.4.1) Level L node pointer page latch can be released only when +no latches at children level i.e. level < L are hold. + +(1.4.2) All latches from node pointer pages must be released so +that no latches are obtained between. + +(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer +latch obtained. + +(2) SX-latch: + +In this case rules (1.2) and (1.3) from S-latch case are relaxed and +merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition +can be skipped at some tree levels and latches can be obtained in +a less restricted order. + +(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending +order of tree level. + +(2.2) When a node pointer latch at level L is obtained, +the left sibling page latch in the same level or some ancestor +page latch (at level > L) must be hold. + +(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can +be any node pointer page. + +(3) X-latch: + +Node pointer latches can be obtained in any order. + +NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages: + +index->lock S-latch is needed in read for the node pointer traversal. When the leaf +level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all +node pointer latches). Left to right index travelsal in leaf page level can be safely done +by obtaining right sibling leaf page latch and then releasing the old page latch. + +Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock +S-latch. + +B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page +allocations are protected by index->lock X-latch. Node pointers ------------- @@ -1041,7 +1104,8 @@ btr_free_root( { fseg_header_t* header; - ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX + | MTR_MEMO_PAGE_SX_FIX)); ut_ad(mtr->is_named_space(block->page.id.space())); btr_search_drop_page_hash_index(block); diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 139e3116d0677..5fae57fe2d439 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -564,8 +564,7 @@ PageBulk::storeExt( page_cur->block = m_block; dberr_t err = btr_store_big_rec_extern_fields( - &btr_pcur, NULL, offsets, big_rec, m_mtr, - BTR_STORE_INSERT_BULK); + &btr_pcur, offsets, big_rec, m_mtr, BTR_STORE_INSERT_BULK); ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec)); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index bb13b6a71d3d6..4bb87cfaafb89 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -6647,7 +6647,6 @@ btr_store_big_rec_extern_fields( btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if btr_mtr is restarted, then this can be repositioned. */ - const upd_t* upd, /*!< in: update vector */ ulint* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 335b4fc220d52..70444ca1830df 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -564,7 +564,7 @@ btr_defragment_merge_pages( page_get_infimum_rec(from_page)); node_ptr = dict_index_build_node_ptr( index, rec, page_get_page_no(from_page), - heap, level + 1); + heap, level); btr_insert_on_non_leaf_level(0, index, level+1, node_ptr, mtr); } @@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*) now = ut_timer_now(); mtr_start(&mtr); - btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); cursor = btr_pcur_get_btr_cur(pcur); index = btr_cur_get_index(cursor); - first_block = btr_cur_get_block(cursor); mtr.set_named_space(index->space); + /* To follow the latching order defined in WL#6326, acquire index->lock X-latch. + This entitles us to acquire page latches in any order for the index. */ + mtr_x_lock(&index->lock, &mtr); + /* This will acquire index->lock SX-latch, which per WL#6363 is allowed + when we are already holding the X-latch. */ + btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); + first_block = btr_cur_get_block(cursor); last_block = btr_defragment_n_pages(first_block, index, srv_defragment_n_pages, diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 8575de8bfa375..e7a2a8443304a 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -824,7 +824,6 @@ buf_flush_update_zip_checksum( static_cast(srv_checksum_algorithm)); mach_write_to_8(page + FIL_PAGE_LSN, lsn); - memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); } @@ -1077,7 +1076,6 @@ buf_flush_write_block_low( bpage->newest_modification); ut_a(page_zip_verify_checksum(frame, bpage->size.physical())); - memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); break; case BUF_BLOCK_FILE_PAGE: frame = bpage->zip.data; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index d533cfad47a23..b2ba3d2fae3e4 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -912,8 +912,7 @@ dict_index_contains_col_or_prefix( ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); if (dict_index_is_clust(index)) { - - return(TRUE); + return(!is_virtual); } if (is_virtual) { diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 230706976dc99..bcaa5ae4a1571 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -439,6 +439,9 @@ dict_mem_table_col_rename_low( ut_ad(from_len <= NAME_LEN); ut_ad(to_len <= NAME_LEN); + char from[NAME_LEN]; + strncpy(from, s, NAME_LEN); + if (from_len == to_len) { /* The easy case: simply replace the column name in table->col_names. */ @@ -523,14 +526,54 @@ dict_mem_table_col_rename_low( foreign = *it; - for (unsigned f = 0; f < foreign->n_fields; f++) { - /* These can point straight to - table->col_names, because the foreign key - constraints will be freed at the same time - when the table object is freed. */ - foreign->foreign_col_names[f] - = dict_index_get_nth_field( - foreign->foreign_index, f)->name; + if (foreign->foreign_index == NULL) { + /* We may go here when we set foreign_key_checks to 0, + and then try to rename a column and modify the + corresponding foreign key constraint. The index + would have been dropped, we have to find an equivalent + one */ + for (unsigned f = 0; f < foreign->n_fields; f++) { + if (strcmp(foreign->foreign_col_names[f], from) + == 0) { + + char** rc = const_cast( + foreign->foreign_col_names + + f); + + if (to_len <= strlen(*rc)) { + memcpy(*rc, to, to_len + 1); + } else { + *rc = static_cast( + mem_heap_dup( + foreign->heap, + to, + to_len + 1)); + } + } + } + + dict_index_t* new_index = dict_foreign_find_index( + foreign->foreign_table, NULL, + foreign->foreign_col_names, + foreign->n_fields, NULL, true, false, + NULL, NULL, NULL); + /* There must be an equivalent index in this case. */ + ut_ad(new_index != NULL); + + foreign->foreign_index = new_index; + + } else { + + for (unsigned f = 0; f < foreign->n_fields; f++) { + /* These can point straight to + table->col_names, because the foreign key + constraints will be freed at the same time + when the table object is freed. */ + foreign->foreign_col_names[f] + = dict_index_get_nth_field( + foreign->foreign_index, + f)->name; + } } } @@ -540,6 +583,8 @@ dict_mem_table_col_rename_low( foreign = *it; + ut_ad(foreign->referenced_index != NULL); + for (unsigned f = 0; f < foreign->n_fields; f++) { /* foreign->referenced_col_names[] need to be copies, because the constraint may become diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 2897d5f9be875..73132754fdf47 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1061,139 +1061,28 @@ fil_space_extend_must_retry( const page_size_t pageSize(space->flags); const ulint page_size = pageSize.physical(); -#ifdef _WIN32 - os_offset_t new_file_size = - std::max( - os_offset_t(size - file_start_page_no) * page_size, - os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE)); - - /* os_file_change_size_win32() handles both compressed(sparse) - and normal files correctly. - It allocates physical storage for normal files and "virtual" - storage for sparse ones.*/ - *success = os_file_change_size_win32(node->name, - node->handle, new_file_size); + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.*/ + os_offset_t new_size = std::max( + os_offset_t(size - file_start_page_no) * page_size, + os_offset_t(FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE)); + + *success = os_file_set_size(node->name, node->handle, new_size, + FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); + os_has_said_disk_full = *success; if (*success) { last_page_no = size; } else { - ib::error() << "extending file '" << node->name - << " to size " << new_file_size << " failed"; - } -#else - /* We will logically extend the file with ftruncate() if - page_compression is enabled, because the file is expected to - be sparse in that case. Make sure that ftruncate() can deal - with large files. */ - const bool is_sparse = sizeof(off_t) >= 8 - && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); - - if (is_sparse) { - /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. - fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. - Do not shrink short ROW_FORMAT=COMPRESSED files. */ - off_t s = std::max(off_t(size - file_start_page_no) - * off_t(page_size), - off_t(FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - *success = !ftruncate(node->handle, s); - if (!*success) { - ib::error() << "ftruncate of file '" << node->name - << "' from " - << os_offset_t(last_page_no - - file_start_page_no) - * page_size << " to " << os_offset_t(s) - << " bytes failed with " << errno; - } else { - last_page_no = size; - } - } else { - const os_offset_t start_offset - = os_offset_t(last_page_no - file_start_page_no) - * page_size; - const ulint n_pages = size - last_page_no; - const os_offset_t len = os_offset_t(n_pages) * page_size; -# ifdef HAVE_POSIX_FALLOCATE - int err; - do { - err = posix_fallocate(node->handle, start_offset, len); - } while (err == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - if (err != EINVAL) { - - *success = !err; - if (!*success) { - ib::error() << "extending file '" << node->name - << "' from " - << start_offset - << " to " << len + start_offset - << " bytes failed with: " << err; - } - } else -# endif /* HAVE_POSIX_FALLOCATE */ - { - /* Extend at most 1 megabyte pages at a time */ - ulint n_bytes = std::min(ulint(1) << 20, n_pages) - * page_size; - byte* buf2 = static_cast( - calloc(1, n_bytes + page_size)); - *success = buf2 != NULL; - if (!buf2) { - ib::error() << "Cannot allocate " - << n_bytes + page_size - << " bytes to extend file"; - } - byte* const buf = static_cast( - ut_align(buf2, page_size)); - IORequest request(IORequest::WRITE); - - - os_offset_t offset = start_offset; - const os_offset_t end = start_offset + len; - const bool read_only_mode = space->purpose - == FIL_TYPE_TEMPORARY && srv_read_only_mode; - - while (*success && offset < end) { - dberr_t err = os_aio( - request, OS_AIO_SYNC, node->name, - node->handle, buf, offset, n_bytes, - read_only_mode, NULL, NULL); - - if (err != DB_SUCCESS) { - *success = false; - ib::error() << "writing zeroes to file '" - << node->name << "' from " - << offset << " to " << offset + n_bytes - << " bytes failed with: " - << ut_strerr(err); - break; - } - - offset += n_bytes; - - n_bytes = std::min(n_bytes, - static_cast(end - offset)); - } - - free(buf2); - } + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t fsize = os_file_get_size(node->handle); + ut_a(fsize != os_offset_t(-1)); - os_has_said_disk_full = *success; - if (*success) { - last_page_no = size; - } else { - /* Let us measure the size of the file - to determine how much we were able to - extend it */ - os_offset_t fsize = os_file_get_size(node->handle); - ut_a(fsize != os_offset_t(-1)); - - last_page_no = ulint(fsize / page_size) - + file_start_page_no; - } + last_page_no = ulint(fsize / page_size) + + file_start_page_no; } -#endif mutex_enter(&fil_system->mutex); ut_a(node->being_extended); @@ -1206,11 +1095,7 @@ fil_space_extend_must_retry( const ulint pages_in_MiB = node->size & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1); - fil_node_complete_io(node, -#ifndef _WIN32 - !is_sparse ? IORequestWrite : -#endif /* _WIN32 */ - IORequestRead); + fil_node_complete_io(node,IORequestRead); /* Keep the last data file size info up to date, rounded to full megabytes */ @@ -1333,6 +1218,7 @@ fil_mutex_enter_and_prepare_for_io( fil_flush_file_spaces(FIL_TYPE_TABLESPACE); count++; + mutex_enter(&fil_system->mutex); continue; } } @@ -3237,10 +3123,11 @@ fil_truncate_tablespace( bool success = os_file_truncate(node->name, node->handle, 0); if (success) { - os_offset_t size = size_in_pages * UNIV_PAGE_SIZE; + os_offset_t size = os_offset_t(size_in_pages) * UNIV_PAGE_SIZE; success = os_file_set_size( - node->name, node->handle, size, srv_read_only_mode); + node->name, node->handle, size, + FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); if (success) { space->stop_new_ops = false; @@ -3835,72 +3722,17 @@ fil_ibd_create( return(DB_ERROR); } - bool punch_hole = false; + const bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags); #ifdef _WIN32 - - if (FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)) { - punch_hole = os_file_set_sparse_win32(file); - } - - success = os_file_change_size_win32(path, file, size * UNIV_PAGE_SIZE); - -#else - - success= false; -#ifdef HAVE_POSIX_FALLOCATE - /* - Extend the file using posix_fallocate(). This is required by - FusionIO HW/Firmware but should also be the prefered way to extend - a file. - */ - int ret; - do { - ret = posix_fallocate(file, 0, size * UNIV_PAGE_SIZE); - } while (ret == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - if (ret == 0) { - success = true; - } else if (ret != EINVAL) { - ib::error() << - "posix_fallocate(): Failed to preallocate" - " data for file " << path - << ", desired size " - << size * UNIV_PAGE_SIZE - << " Operating system error number " << ret - << ". Check" - " that the disk is not full or a disk quota" - " exceeded. Some operating system error" - " numbers are described at " REFMAN - "operating-system-error-codes.html"; - } -#endif /* HAVE_POSIX_FALLOCATE */ - - if (!success) { - success = os_file_set_size( - path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode); - } - - /* Note: We are actually punching a hole, previous contents will - be lost after this call, if it succeeds. In this case the file - should be full of NULs. */ - - punch_hole = os_is_sparse_file_supported(file); - - if (punch_hole) { - - dberr_t punch_err; - - punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE); - - if (punch_err != DB_SUCCESS) { - punch_hole = false; - } + if (is_compressed) { + os_file_set_sparse_win32(file); } #endif - ulint block_size = os_file_get_block_size(file, path); + success = os_file_set_size( + path, file, + os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT, is_compressed); if (!success) { os_file_close(file); @@ -3908,6 +3740,10 @@ fil_ibd_create( return(DB_OUT_OF_FILE_SPACE); } + bool punch_hole = os_is_sparse_file_supported(file); + + ulint block_size = os_file_get_block_size(file, path); + /* We have to write the space id to the file immediately and flush the file to disk. This is because in crash recovery we must be aware what tablespaces exist and what are their space id's, so that we can apply diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index cc156a5353a77..c459c8296e08f 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -410,8 +410,7 @@ SysTablespace::set_size( bool success = os_file_set_size( file.m_filepath, file.m_handle, - static_cast(file.m_size << UNIV_PAGE_SIZE_SHIFT), - m_ignore_read_only ? false : srv_read_only_mode); + static_cast(file.m_size) << UNIV_PAGE_SIZE_SHIFT); if (success) { ib::info() << "File '" << file.filepath() << "' size is now " diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index 594f337c978b8..858d84f1a5e06 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -149,13 +149,6 @@ struct fts_query_t { bool multi_exist; /*!< multiple FTS_EXIST oper */ st_mysql_ftparser* parser; /*!< fts plugin parser */ - - /** limit value for the fts query */ - ulonglong limit; - - /** number of docs fetched by query. This is to restrict the - result with limit value */ - ulonglong n_docs; }; /** For phrase matching, first we collect the documents and the positions @@ -3228,11 +3221,6 @@ fts_query_filter_doc_ids( ulint decoded = 0; ib_rbt_t* doc_freqs = word_freq->doc_freqs; - if (query->limit != ULONG_UNDEFINED - && query->n_docs >= query->limit) { - return(DB_SUCCESS); - } - /* Decode the ilist and add the doc ids to the query doc_id set. */ while (decoded < len) { ulint freq = 0; @@ -3320,17 +3308,11 @@ fts_query_filter_doc_ids( /* Add the word to the document's matched RB tree. */ fts_query_add_word_to_document(query, doc_id, word); } - - if (query->limit != ULONG_UNDEFINED - && query->limit <= ++query->n_docs) { - goto func_exit; - } } /* Some sanity checks. */ ut_a(doc_id == node->last_doc_id); -func_exit: if (query->total_size > fts_result_cache_limit) { return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); } else { @@ -3941,7 +3923,6 @@ fts_query_can_optimize( @param[in] query_str FTS query @param[in] query_len FTS query string len in bytes @param[in,out] result result doc ids -@param[in] limit limit value @return DB_SUCCESS if successful otherwise error code */ dberr_t fts_query( @@ -3950,8 +3931,7 @@ fts_query( uint flags, const byte* query_str, ulint query_len, - fts_result_t** result, - ulonglong limit) + fts_result_t** result) { fts_query_t query; dberr_t error = DB_SUCCESS; @@ -4013,10 +3993,6 @@ fts_query( query.total_docs = dict_table_get_n_rows(index->table); - query.limit = limit; - - query.n_docs = 0; - query.fts_common_table.suffix = "DELETED"; /* Read the deleted doc_ids, we need these for filtering. */ @@ -4078,19 +4054,6 @@ fts_query( fts_result_cache_limit = 2048; ); - /* Optimisation is allowed for limit value - when - i) No ranking involved - ii) Only FTS Union operations involved. */ - if (query.limit != ULONG_UNDEFINED - && !fts_ast_node_check_union(ast)) { - query.limit = ULONG_UNDEFINED; - } - - DBUG_EXECUTE_IF("fts_union_limit_off", - query.limit = ULONG_UNDEFINED; - ); - /* Traverse the Abstract Syntax Tree (AST) and execute the query. */ query.error = fts_ast_visit( diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 383f84d1dd18e..1a370d7979dce 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6425,16 +6425,27 @@ ha_innobase::open(const char* name, int, uint) ib_table = open_dict_table(name, norm_name, is_part, ignore_err); - uint n_fields = mysql_fields(table); + if (NULL == ib_table) { - if (ib_table != NULL - && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && n_fields != dict_table_get_n_tot_u_cols(ib_table)) - || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && (n_fields != dict_table_get_n_tot_u_cols(ib_table) - 1)))) { + if (is_part) { + sql_print_error("Failed to open table %s.\n", + norm_name); + } +no_such_table: + free_share(m_share); + set_my_errno(ENOENT); + DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + } + + uint n_fields = mysql_fields(table); + uint n_cols = dict_table_get_n_user_cols(ib_table) + + dict_table_get_n_v_cols(ib_table) + - !!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID); + + if (n_cols != n_fields) { ib::warn() << "Table " << norm_name << " contains " - << dict_table_get_n_tot_u_cols(ib_table) << " user" + << n_cols << " user" " defined columns in InnoDB, but " << n_fields << " columns in MariaDB. Please check" " INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN @@ -6446,21 +6457,7 @@ ha_innobase::open(const char* name, int, uint) ib_table->file_unreadable = true; ib_table->corrupted = true; dict_table_close(ib_table, FALSE, FALSE); - ib_table = NULL; - is_part = NULL; - } - - if (NULL == ib_table) { - - if (is_part) { - sql_print_error("Failed to open table %s.\n", - norm_name); - } - - free_share(m_share); - set_my_errno(ENOENT); - - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + goto no_such_table; } innobase_copy_frm_flags_from_table_share(ib_table, table->s); @@ -8210,13 +8207,16 @@ ha_innobase::build_template( } else { ibool contain; - if (innobase_is_v_fld(table->field[i])) { - contain = dict_index_contains_col_or_prefix( - index, num_v, true); - } else { + if (!innobase_is_v_fld(table->field[i])) { contain = dict_index_contains_col_or_prefix( index, i - num_v, false); + } else if (dict_index_is_clust(index)) { + num_v++; + continue; + } else { + contain = dict_index_contains_col_or_prefix( + index, num_v, true); } field = build_template_needs_field( @@ -10540,10 +10540,8 @@ ha_innobase::ft_init_ext( const byte* q = reinterpret_cast( const_cast(query)); - // JAN: TODO: support for ft_init_ext_with_hints(), remove the line below - m_prebuilt->m_fts_limit= ULONG_UNDEFINED; - dberr_t error = fts_query(trx, index, flags, q, query_len, &result, - m_prebuilt->m_fts_limit); + // FIXME: support ft_init_ext_with_hints(), pass LIMIT + dberr_t error = fts_query(trx, index, flags, q, query_len, &result); if (error != DB_SUCCESS) { my_error(convert_error_code_to_mysql(error, 0, NULL), MYF(0)); @@ -15077,7 +15075,7 @@ ha_innobase::optimize( calls to OPTIMIZE, which is undesirable. */ /* TODO: Defragment is disabled for now */ - if (0) { + if (srv_defragment) { int err; err = defragment_table(m_prebuilt->table->name.m_name, NULL, false); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index da8fc3ae67236..69a6c1f31d0b9 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -570,6 +570,13 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); + /* Before 10.2.2 information about virtual columns was not stored in + system tables. We need to do a full alter to rebuild proper 10.2.2+ + metadata with the information about virtual columns */ + if (table->s->mysql_version < 100202 && table->s->virtual_fields) { + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + if (high_level_read_only || srv_sys_space.created_new_raw() || srv_force_recovery) { @@ -1069,8 +1076,15 @@ ha_innobase::check_if_supported_inplace_alter( /* Compute the DEFAULT values of non-constant columns (VCOL_SESSION_FUNC | VCOL_TIME_FUNC). */ - (*af)->set_default(); - goto next_column; + switch ((*af)->set_default()) { + case 0: /* OK */ + case 3: /* DATETIME to TIME or DATE conversion */ + goto next_column; + case -1: /* OOM, or GEOMETRY type mismatch */ + case 1: /* A number adjusted to the min/max value */ + case 2: /* String truncation, or conversion problem */ + break; + } } DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index e62a5e90ce248..f0948fdaebfce 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -661,7 +661,6 @@ btr_store_big_rec_extern_fields( btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if btr_mtr is restarted, then this can be repositioned. */ - const upd_t* upd, /*!< in: update vector */ ulint* offsets, /*!< in/out: rec_get_offsets() on pcur. the "external storage" flags in offsets will correctly correspond diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index d6fe41670d474..a0fd78e4e0d33 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -808,14 +808,6 @@ dict_table_get_n_user_cols( /*=======================*/ const dict_table_t* table) /*!< in: table */ MY_ATTRIBUTE((warn_unused_result)); -/** Gets the number of user-defined virtual and non-virtual columns in a table -in the dictionary cache. -@param[in] table table -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_tot_u_cols( - const dict_table_t* table); /********************************************************************//** Gets the number of all non-virtual columns (also system) in a table in the dictionary cache. diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index b4e7d3e34c732..76e0c287444ac 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -400,22 +400,6 @@ dict_table_get_n_user_cols( return(table->n_cols - dict_table_get_n_sys_cols(table)); } -/** Gets the number of user-defined virtual and non-virtual columns in a table -in the dictionary cache. -@param[in] table table -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_tot_u_cols( - const dict_table_t* table) -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(dict_table_get_n_user_cols(table) - + dict_table_get_n_v_cols(table)); -} - /********************************************************************//** Gets the number of all non-virtual columns (also system) in a table in the dictionary cache. diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index f1d53165cdddf..30b8b66b83b38 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -579,7 +579,6 @@ fts_commit( @param[in] query_str FTS query @param[in] query_len FTS query string len in bytes @param[in,out] result result doc ids -@param[in] limit limit value @return DB_SUCCESS if successful otherwise error code */ dberr_t fts_query( @@ -588,8 +587,7 @@ fts_query( uint flags, const byte* query_str, ulint query_len, - fts_result_t** result, - ulonglong limit) + fts_result_t** result) MY_ATTRIBUTE((warn_unused_result)); /******************************************************************//** diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 1b90ea8d7e7f0..c0806ad297758 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1232,19 +1232,27 @@ os_file_get_size( os_file_t file) MY_ATTRIBUTE((warn_unused_result)); -/** Write the specified number of zeros to a newly created file. -@param[in] name name of the file or path as a null-terminated - string -@param[in] file handle to a file -@param[in] size file size -@param[in] read_only Enable read-only checks if true -@return true if success */ +/** Extend a file. + +On Windows, extending a file allocates blocks for the file, +unless the file is sparse. + +On Unix, we will extend the file with ftruncate(), if +file needs to be sparse. Otherwise posix_fallocate() is used +when available, and if not, binary zeroes are added to the end +of file. + +@param[in] name file name +@param[in] file file handle +@param[in] size desired file size +@param[in] sparse whether to create a sparse file (no preallocating) +@return whether the operation succeeded */ bool os_file_set_size( const char* name, os_file_t file, os_offset_t size, - bool read_only) + bool is_sparse = false) MY_ATTRIBUTE((warn_unused_result)); /** Truncates a file at its current position. @@ -1575,8 +1583,10 @@ os_file_set_umask(ulint umask); Make file sparse, on Windows. @param[in] file file handle +@param[in] is_sparse if true, make file sparse, + otherwise "unsparse" the file @return true on success, false on error */ -bool os_file_set_sparse_win32(os_file_t file); +bool os_file_set_sparse_win32(os_file_t file, bool is_sparse = true); /** Changes file size on Windows diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 8d3752974a628..a7a55d202e8cc 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -844,9 +844,6 @@ struct row_prebuilt_t { /** The MySQL table object */ TABLE* m_mysql_table; - - /** limit value to avoid fts result overflow */ - ulonglong m_fts_limit; }; /** Callback for row_mysql_sys_index_iterate() */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index a51520e881cb7..e24aa89f046e0 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -508,10 +508,12 @@ enum srv_operation_mode { SRV_OPERATION_NORMAL, /** Mariabackup taking a backup */ SRV_OPERATION_BACKUP, - /** Mariabackup restoring a backup */ + /** Mariabackup restoring a backup for subsequent --copy-back */ SRV_OPERATION_RESTORE, /** Mariabackup restoring the incremental part of a backup */ - SRV_OPERATION_RESTORE_DELTA + SRV_OPERATION_RESTORE_DELTA, + /** Mariabackup restoring a backup for subsequent --export */ + SRV_OPERATION_RESTORE_EXPORT }; /** Current mode of operation */ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 0141d8105ed24..4853304e79109 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6614,15 +6614,15 @@ lock_validate() Release both mutexes during the validation check. */ for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - const lock_t* lock; ib_uint64_t limit = 0; - while ((lock = lock_rec_validate(i, &limit)) != 0) { - - ulint space = lock->un_member.rec_lock.space; - ulint page_no = lock->un_member.rec_lock.page_no; - - pages.insert(std::make_pair(space, page_no)); + while (const lock_t* lock = lock_rec_validate(i, &limit)) { + if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED) { + /* The lock bitmap is empty; ignore it. */ + continue; + } + const lock_rec_t& l = lock->un_member.rec_lock; + pages.insert(std::make_pair(l.space, l.page_no)); } } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index fd0940b08df40..a91b62d11d99c 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1922,7 +1922,8 @@ void recv_apply_hashed_log_recs(bool last_batch) { ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); mutex_enter(&recv_sys->mutex); @@ -1941,7 +1942,8 @@ recv_apply_hashed_log_recs(bool last_batch) ut_ad(!last_batch == log_mutex_own()); recv_no_ibuf_operations = !last_batch - || srv_operation == SRV_OPERATION_RESTORE; + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT; ut_d(recv_no_log_write = recv_no_ibuf_operations); @@ -2960,7 +2962,8 @@ static dberr_t recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i) { - if (srv_operation == SRV_OPERATION_RESTORE) { + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { ib::warn() << "Tablespace " << i->first << " was not" " found at " << i->second.name << " when" " restoring a (partial?) backup. All redo log" @@ -3118,7 +3121,8 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) dberr_t err = DB_SUCCESS; ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); /* Initialize red-black tree for fast insertions into the flush_list during recovery process. */ diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index c894a3c15abee..b5c6381537e85 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -4743,11 +4743,20 @@ Sets a sparse flag on Windows file. @param[in] file file handle @return true on success, false on error */ -bool os_file_set_sparse_win32(os_file_t file) +#include +bool os_file_set_sparse_win32(os_file_t file, bool is_sparse) { - + if (!is_sparse && !IsWindows8OrGreater()) { + /* Cannot unset sparse flag on older Windows. + Until Windows8 it is documented to produce unpredictable results, + if there are unallocated ranges in file.*/ + return false; + } DWORD temp; - return os_win32_device_io_control(file, FSCTL_SET_SPARSE, 0, 0, 0, 0,&temp); + FILE_SET_SPARSE_BUFFER sparse_buffer; + sparse_buffer.SetSparse = is_sparse; + return os_win32_device_io_control(file, + FSCTL_SET_SPARSE, &sparse_buffer, sizeof(sparse_buffer), 0, 0,&temp); } @@ -5319,23 +5328,73 @@ os_file_set_nocache( #endif /* _WIN32 */ -/** Write the specified number of zeros to a newly created file. -@param[in] name name of the file or path as a null-terminated - string -@param[in] file handle to a file -@param[in] size file size -@param[in] read_only Enable read-only checks if true -@return true if success */ +/** Extend a file. + +On Windows, extending a file allocates blocks for the file, +unless the file is sparse. + +On Unix, we will extend the file with ftruncate(), if +file needs to be sparse. Otherwise posix_fallocate() is used +when available, and if not, binary zeroes are added to the end +of file. + +@param[in] name file name +@param[in] file file handle +@param[in] size desired file size +@param[in] sparse whether to create a sparse file (no preallocating) +@return whether the operation succeeded */ bool os_file_set_size( const char* name, os_file_t file, os_offset_t size, - bool read_only) + bool is_sparse) { #ifdef _WIN32 + /* On Windows, changing file size works well and as expected for both + sparse and normal files. + + However, 10.2 up until 10.2.9 made every file sparse in innodb, + causing NTFS fragmentation issues(MDEV-13941). We try to undo + the damage, and unsparse the file.*/ + + if (!is_sparse && os_is_sparse_file_supported(file)) { + if (!os_file_set_sparse_win32(file, false)) + /* Unsparsing file failed. Fallback to writing binary + zeros, to avoid even higher fragmentation.*/ + goto fallback; + } + return os_file_change_size_win32(name, file, size); -#endif + +fallback: +#else + if (is_sparse) { + bool success = !ftruncate(file, size); + if (!success) { + ib::error() << "ftruncate of file " << name << + " to " << size << " bytes failed with error " << errno; + } + return(success); + } + +# ifdef HAVE_POSIX_FALLOCATE + int err; + do { + err = posix_fallocate(file, 0, size); + } while (err == EINTR + && srv_shutdown_state == SRV_SHUTDOWN_NONE); + + if (err) { + ib::error() << + "preallocating " << size << " bytes for" << + "file " << name << "failed with error " << err; + } + errno = err; + return(!err); +# endif /* HAVE_POSIX_ALLOCATE */ +#endif /* _WIN32*/ + /* Write up to 1 megabyte at a time. */ ulint buf_size = ut_min( static_cast(64), @@ -5353,13 +5412,14 @@ os_file_set_size( /* Write buffer full of zeros */ memset(buf, 0, buf_size); - if (size >= (os_offset_t) 100 << 20) { + os_offset_t current_size = os_file_get_size(file); + bool write_progress_info = + (size - current_size >= (os_offset_t) 100 << 20); + if (write_progress_info) { ib::info() << "Progress in MB:"; } - os_offset_t current_size = 0; - while (current_size < size) { ulint n_bytes; @@ -5382,8 +5442,9 @@ os_file_set_size( } /* Print about progress for each 100 MB written */ - if ((current_size + n_bytes) / (100 << 20) - != current_size / (100 << 20)) { + if (write_progress_info && + ((current_size + n_bytes) / (100 << 20) + != current_size / (100 << 20))) { fprintf(stderr, " %lu00", (ulong) ((current_size + n_bytes) @@ -5393,7 +5454,7 @@ os_file_set_size( current_size += n_bytes; } - if (size >= (os_offset_t) 100 << 20) { + if (write_progress_info) { fprintf(stderr, "\n"); } @@ -5578,10 +5639,11 @@ os_is_sparse_file_supported(os_file_t fh) ); #ifdef _WIN32 - BY_HANDLE_FILE_INFORMATION info; - if (GetFileInformationByHandle(fh,&info)) { - if (info.dwFileAttributes != INVALID_FILE_ATTRIBUTES) { - return (info.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0; + FILE_ATTRIBUTE_TAG_INFO info; + if (GetFileInformationByHandleEx(fh, FileAttributeTagInfo, + &info, (DWORD)sizeof(info))) { + if (info.FileAttributes != INVALID_FILE_ATTRIBUTES) { + return (info.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) != 0; } } return false; diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 9b9d19ae96009..0818585b00c9c 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -398,7 +398,7 @@ row_ins_clust_index_entry_by_modify( DEBUG_SYNC_C("before_row_ins_upd_extern"); err = btr_store_big_rec_extern_fields( - pcur, update, *offsets, big_rec, mtr, + pcur, *offsets, big_rec, mtr, BTR_STORE_INSERT_UPDATE); DEBUG_SYNC_C("after_row_ins_upd_extern"); dtuple_big_rec_free(big_rec); @@ -2502,7 +2502,7 @@ row_ins_index_entry_big_rec( DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern"); error = btr_store_big_rec_extern_fields( - &pcur, 0, offsets, big_rec, &mtr, BTR_STORE_INSERT); + &pcur, offsets, big_rec, &mtr, BTR_STORE_INSERT); DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern"); if (error == DB_SUCCESS diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 747959fcde53b..3fc7deab4aef5 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -2307,7 +2307,7 @@ row_log_table_apply_update( if (big_rec) { if (error == DB_SUCCESS) { error = btr_store_big_rec_extern_fields( - &pcur, update, cur_offsets, big_rec, &mtr, + &pcur, cur_offsets, big_rec, &mtr, BTR_STORE_UPDATE); } diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index d78c363ff733d..8a67290b070eb 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2895,8 +2895,7 @@ row_upd_clust_rec( DEBUG_SYNC_C("before_row_upd_extern"); err = btr_store_big_rec_extern_fields( - pcur, node->update, offsets, big_rec, mtr, - BTR_STORE_UPDATE); + pcur, offsets, big_rec, mtr, BTR_STORE_UPDATE); DEBUG_SYNC_C("after_row_upd_extern"); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index d7e1e062d7ade..d6dd580518614 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -378,8 +378,7 @@ create_log_file( ib::info() << "Setting log file " << name << " size to " << srv_log_file_size << " bytes"; - ret = os_file_set_size(name, *file, srv_log_file_size, - srv_read_only_mode); + ret = os_file_set_size(name, *file, srv_log_file_size); if (!ret) { ib::error() << "Cannot set log file " << name << " size to " << srv_log_file_size << " bytes"; @@ -398,13 +397,14 @@ create_log_file( /** Delete all log files. @param[in,out] logfilename buffer for log file name @param[in] dirnamelen length of the directory path -@param[in] n_files number of files to delete */ +@param[in] n_files number of files to delete +@param[in] i first file to delete */ static void -delete_log_files(char* logfilename, size_t dirnamelen, unsigned n_files) +delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0) { /* Remove any old log files. */ - for (unsigned i = 0; i < n_files; i++) { + for (; i < n_files; i++) { sprintf(logfilename + dirnamelen, "ib_logfile%u", i); /* Ignore errors about non-existent files or files @@ -658,8 +658,7 @@ srv_undo_tablespace_create( << "wait..."; ret = os_file_set_size( - name, fh, size << UNIV_PAGE_SIZE_SHIFT, - srv_read_only_mode); + name, fh, os_offset_t(size) << UNIV_PAGE_SIZE_SHIFT); if (!ret) { ib::info() << "Error in creating " << name @@ -913,6 +912,7 @@ srv_undo_tablespaces_init(bool create_new_db) } /* fall through */ case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_EXPORT: ut_ad(!create_new_db); /* Check if any of the UNDO tablespace needs fix-up because @@ -1323,6 +1323,7 @@ srv_shutdown_all_bg_threads() break; case SRV_OPERATION_NORMAL: case SRV_OPERATION_RESTORE: + case SRV_OPERATION_RESTORE_EXPORT: if (!buf_page_cleaner_is_active && os_aio_all_slots_free()) { os_aio_wake_all_threads_at_shutdown(); @@ -1494,7 +1495,8 @@ innobase_start_or_create_for_mysql() unsigned i = 0; ut_ad(srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE); + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { srv_read_only_mode = true; @@ -1984,7 +1986,9 @@ innobase_start_or_create_for_mysql() if (err == DB_NOT_FOUND) { if (i == 0) { if (srv_operation - == SRV_OPERATION_RESTORE) { + == SRV_OPERATION_RESTORE + || srv_operation + == SRV_OPERATION_RESTORE_EXPORT) { return(DB_SUCCESS); } if (flushed_lsn @@ -2048,6 +2052,26 @@ innobase_start_or_create_for_mysql() } if (i == 0) { + if (size == 0 + && (srv_operation + == SRV_OPERATION_RESTORE + || srv_operation + == SRV_OPERATION_RESTORE_EXPORT)) { + /* Tolerate an empty ib_logfile0 + from a previous run of + mariabackup --prepare. */ + return(DB_SUCCESS); + } + /* The first log file must consist of + at least the following 512-byte pages: + header, checkpoint page 1, empty, + checkpoint page 2, redo log page(s) */ + if (size <= OS_FILE_LOG_BLOCK_SIZE * 4) { + ib::error() << "Log file " + << logfilename << " size " + << size << " is too small"; + return(srv_init_abort(DB_ERROR)); + } srv_log_file_size = size; } else if (size != srv_log_file_size) { @@ -2314,11 +2338,13 @@ innobase_start_or_create_for_mysql() recv_recovery_from_checkpoint_finish(); - if (srv_operation == SRV_OPERATION_RESTORE) { + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { /* After applying the redo log from SRV_OPERATION_BACKUP, flush the changes - to the data files and delete the log file. - No further change to InnoDB files is needed. */ + to the data files and truncate or delete the log. + Unless --export is specified, no further change to + InnoDB files is needed. */ ut_ad(!srv_force_recovery); ut_ad(srv_n_log_files_found <= 1); ut_ad(recv_no_log_write); @@ -2328,8 +2354,18 @@ innobase_start_or_create_for_mysql() fil_close_log_files(true); log_group_close_all(); if (err == DB_SUCCESS) { + bool trunc = srv_operation + == SRV_OPERATION_RESTORE; + /* Delete subsequent log files. */ delete_log_files(logfilename, dirnamelen, - srv_n_log_files_found); + srv_n_log_files_found, trunc); + if (trunc) { + /* Truncate the first log file. */ + strcpy(logfilename + dirnamelen, + "ib_logfile0"); + FILE* f = fopen(logfilename, "w"); + fclose(f); + } } return(err); } @@ -2794,6 +2830,7 @@ innodb_shutdown() case SRV_OPERATION_BACKUP: case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_DELTA: + case SRV_OPERATION_RESTORE_EXPORT: fil_close_all_files(); break; case SRV_OPERATION_NORMAL: diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index 6322b14335fd4..37b649107138c 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR): 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR) LOCK COMPATIBILITY MATRIX - S SX X - S + + - - SX + - - - X - - - + + | S|SX| X| + --+--+--+--+ + S| +| +| -| + --+--+--+--+ + SX| +| -| -| + --+--+--+--+ + X| -| -| -| + --+--+--+--+ The lock_word is always read and updated atomically and consistently, so that it always represents the state of the lock, and the state of the lock changes diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 2408e4bdaf454..ffba8f314fbdc 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -620,6 +620,7 @@ trx_free_prepared( && trx->is_recovered && (!srv_was_started || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT || srv_read_only_mode || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO))); ut_a(trx->magic_n == TRX_MAGIC_N); diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt index 6db82119b45d1..395535660628f 100644 --- a/storage/rocksdb/CMakeLists.txt +++ b/storage/rocksdb/CMakeLists.txt @@ -21,6 +21,7 @@ IF(HAVE_SCHED_GETCPU) ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1 -DROCKSDB_SCHED_GETCPU_PRESENT) ENDIF() + # We've had our builders hang during the build process. This prevents MariaRocks # to be built on 32 bit intel OS kernels. IF(CMAKE_SYSTEM_PROCESSOR MATCHES "i[36]86") @@ -79,6 +80,8 @@ SET(ROCKSDB_SE_SOURCES ha_rocksdb.h rdb_i_s.cc rdb_i_s.h + rdb_io_watchdog.h + rdb_io_watchdog.cc rdb_mutex_wrapper.cc rdb_mutex_wrapper.h rdb_index_merge.cc @@ -96,6 +99,11 @@ SET(ROCKSDB_SE_SOURCES rdb_psi.cc ) +# MariaDB: the following is added in build_rocksdb.cmake, when appropriate: +# This is a strong requirement coming from RocksDB. No conditional checks here. +#ADD_DEFINITIONS(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX +#) + MYSQL_ADD_PLUGIN(rocksdb ${ROCKSDB_SE_SOURCES} STORAGE_ENGINE MODULE_OUTPUT_NAME ha_rocksdb COMPONENT rocksdb-engine) @@ -105,11 +113,6 @@ IF(NOT TARGET rocksdb) RETURN() ENDIF() -# MARIAROCKS-TODO: ??? -CHECK_FUNCTION_EXISTS(fallocate HAVE_FALLOCATE) -IF(HAVE_FALLOCATE) - ADD_DEFINITIONS(-DROCKSDB_FALLOCATE_PRESENT) -ENDIF() CHECK_CXX_SOURCE_COMPILES(" @@ -138,7 +141,6 @@ ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib rdb_perf_context.h rdb_sst_info.cc rdb_sst_info.h - rdb_io_watchdog.cc rdb_io_watchdog.h rdb_buff.h rdb_mariadb_port.h ) @@ -169,7 +171,7 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") # (also had to add -frtti above, because something that event_listener.cc # includes requires it. So, now everything in MariaRocks is compiled with # -frtti) - set_source_files_properties(event_listener.cc rdb_cf_options.cc + set_source_files_properties(event_listener.cc rdb_cf_options.cc rdb_sst_info.cc PROPERTIES COMPILE_FLAGS -frtti) ENDIF() @@ -178,6 +180,12 @@ IF(HAVE_SCHED_GETCPU) ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1) ENDIF() +IF (NOT "$ENV{WITH_TBB}" STREQUAL "") + SET(rocksdb_static_libs ${rocksdb_static_libs} + $ENV{WITH_TBB}/libtbb${PIC_EXT}.a) + ADD_DEFINITIONS(-DTBB) +ENDIF() + # # MariaDB: Dynamic plugin build is not suitable with unittest ATM # @@ -189,6 +197,7 @@ if (UNIX AND NOT APPLE) SET(rocksdb_static_libs ${rocksdb_static_libs} "-lrt") endif() + ADD_LIBRARY(rocksdb_tools STATIC rocksdb/tools/ldb_tool.cc rocksdb/tools/ldb_cmd.cc diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index f0d19dd019f7e..f7a2cdda46f2b 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -158,13 +158,13 @@ set(ROCKSDB_SOURCES db/convenience.cc db/db_filesnapshot.cc db/db_impl.cc - db/db_impl_write.cc db/db_impl_compaction_flush.cc - db/db_impl_files.cc - db/db_impl_open.cc db/db_impl_debug.cc db/db_impl_experimental.cc + db/db_impl_files.cc + db/db_impl_open.cc db/db_impl_readonly.cc + db/db_impl_write.cc db/db_info_dumper.cc db/db_iter.cc db/dbformat.cc @@ -204,9 +204,14 @@ set(ROCKSDB_SOURCES env/mock_env.cc memtable/alloc_tracker.cc memtable/hash_cuckoo_rep.cc + memtable/hash_cuckoo_rep.cc memtable/hash_linklist_rep.cc + memtable/hash_linklist_rep.cc + memtable/hash_skiplist_rep.cc memtable/hash_skiplist_rep.cc memtable/skiplistrep.cc + memtable/skiplistrep.cc + memtable/vectorrep.cc memtable/vectorrep.cc memtable/write_buffer_manager.cc monitoring/histogram.cc @@ -218,7 +223,6 @@ set(ROCKSDB_SOURCES monitoring/statistics.cc monitoring/thread_status_impl.cc monitoring/thread_status_updater.cc - monitoring/thread_status_updater_debug.cc monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc @@ -248,7 +252,6 @@ set(ROCKSDB_SOURCES table/iterator.cc table/merging_iterator.cc table/meta_blocks.cc - table/mock_table.cc table/partitioned_filter_block.cc table/persistent_cache_helper.cc table/plain_table_builder.cc @@ -297,13 +300,6 @@ set(ROCKSDB_SOURCES util/xxhash.cc utilities/backupable/backupable_db.cc utilities/blob_db/blob_db.cc - utilities/blob_db/blob_db_impl.cc - utilities/blob_db/blob_db_options_impl.cc - utilities/blob_db/blob_dump_tool.cc - utilities/blob_db/blob_file.cc - utilities/blob_db/blob_log_format.cc - utilities/blob_db/blob_log_reader.cc - utilities/blob_db/blob_log_writer.cc utilities/checkpoint/checkpoint_impl.cc utilities/col_buf_decoder.cc utilities/col_buf_encoder.cc @@ -315,7 +311,6 @@ set(ROCKSDB_SOURCES utilities/document/json_document.cc utilities/document/json_document_builder.cc utilities/env_mirror.cc - utilities/env_timed.cc utilities/geodb/geodb_impl.cc utilities/leveldb_options/leveldb_options.cc utilities/lua/rocks_lua_compaction_filter.cc @@ -337,16 +332,17 @@ set(ROCKSDB_SOURCES utilities/spatialdb/spatial_db.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/transactions/optimistic_transaction_db_impl.cc - utilities/transactions/optimistic_transaction_impl.cc + utilities/transactions/pessimistic_transaction.cc + utilities/transactions/pessimistic_transaction_db.cc utilities/transactions/transaction_base.cc - utilities/transactions/transaction_db_impl.cc utilities/transactions/transaction_db_mutex_impl.cc - utilities/transactions/transaction_impl.cc utilities/transactions/transaction_lock_mgr.cc utilities/transactions/transaction_util.cc + utilities/transactions/write_prepared_txn.cc utilities/ttl/db_ttl_impl.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc + ) if(WIN32) @@ -382,3 +378,4 @@ target_link_libraries(rocksdblib ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "-fPIC -fno-builtin-memcmp -frtti") endif() + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 4915617e3aaf1..b28961197fa96 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -43,6 +43,7 @@ #include "./debug_sync.h" #include "./my_bit.h" #include "./my_stacktrace.h" +#include "./my_sys.h" #include "./sql_audit.h" #include "./sql_table.h" #include "./sql_hset.h" @@ -67,6 +68,7 @@ #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/convenience.h" #include "rocksdb/utilities/memory_util.h" +#include "rocksdb/utilities/sim_cache.h" #include "util/stop_watch.h" /* MyRocks includes */ @@ -114,6 +116,7 @@ namespace myrocks { static st_global_stats global_stats; static st_export_stats export_stats; static st_memory_stats memory_stats; +static st_io_stall_stats io_stall_stats; const std::string DEFAULT_CF_NAME("default"); const std::string DEFAULT_SYSTEM_CF_NAME("__system__"); @@ -170,7 +173,6 @@ static std::shared_ptr properties_collector_factory; Rdb_dict_manager dict_manager; Rdb_cf_manager cf_manager; Rdb_ddl_manager ddl_manager; -const char *m_mysql_gtid; Rdb_binlog_manager binlog_manager; #if !defined(_WIN32) && !defined(__APPLE__) @@ -406,6 +408,10 @@ static void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, void *var_ptr, const void *save); +static void rocksdb_set_max_latest_deadlocks(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + static void rdb_set_collation_exception_list(const char *exception_list); static void rocksdb_set_collation_exception_list(THD *thd, struct st_mysql_sys_var *var, @@ -422,6 +428,10 @@ rocksdb_set_bulk_load(THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr, const void *save); +static void rocksdb_set_bulk_load_allow_unsorted( + THD *thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *var_ptr, const void *save); + static void rocksdb_set_max_background_jobs(THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, @@ -430,12 +440,15 @@ static void rocksdb_set_max_background_jobs(THD *thd, // Options definitions ////////////////////////////////////////////////////////////////////////////// static long long rocksdb_block_cache_size; +static long long rocksdb_sim_cache_size; +static my_bool rocksdb_use_clock_cache; /* Use unsigned long long instead of uint64_t because of MySQL compatibility */ static unsigned long long // NOLINT(runtime/int) rocksdb_rate_limiter_bytes_per_sec; static unsigned long long // NOLINT(runtime/int) rocksdb_sst_mgr_rate_bytes_per_sec; static unsigned long long rocksdb_delayed_write_rate; +static uint32_t rocksdb_max_latest_deadlocks; static unsigned long // NOLINT(runtime/int) rocksdb_persistent_cache_size_mb; static ulong rocksdb_info_log_level; @@ -445,6 +458,7 @@ static ulong rocksdb_index_type; static uint32_t rocksdb_flush_log_at_trx_commit; static uint32_t rocksdb_debug_optimizer_n_rows; static my_bool rocksdb_force_compute_memtable_stats; +static uint32_t rocksdb_force_compute_memtable_stats_cachetime; static my_bool rocksdb_debug_optimizer_no_zero_cardinality; static uint32_t rocksdb_wal_recovery_mode; static uint32_t rocksdb_access_hint_on_compaction_start; @@ -462,6 +476,7 @@ static my_bool rocksdb_enable_ttl_read_filtering = 1; static int rocksdb_debug_ttl_rec_ts = 0; static int rocksdb_debug_ttl_snapshot_ts = 0; static int rocksdb_debug_ttl_read_filter_ts = 0; +static my_bool rocksdb_debug_ttl_ignore_pk = 0; static my_bool rocksdb_reset_stats = 0; static uint32_t rocksdb_io_write_timeout_secs = 0; static uint64_t rocksdb_number_stat_computes = 0; @@ -474,6 +489,7 @@ static char *rocksdb_datadir; static uint32_t rocksdb_table_stats_sampling_pct; static my_bool rocksdb_enable_bulk_load_api = 1; static my_bool rocksdb_print_snapshot_conflict_queries = 0; +static my_bool rocksdb_large_prefix = 0; char *compression_types_val= const_cast(get_rocksdb_supported_compression_types()); @@ -489,6 +505,8 @@ static std::unique_ptr rdb_init_rocksdb_db_options(void) { o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; + o->concurrent_prepare = true; + o->manual_wal_flush = true; return o; } @@ -583,9 +601,12 @@ const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024; const size_t RDB_MIN_MERGE_BUF_SIZE = 100; const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024; const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100; +const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0; +const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0; const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024; const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024; const int RDB_MAX_CHECKSUMS_PCT = 100; +const ulong RDB_DEADLOCK_DETECT_DEPTH = 50; // TODO: 0 means don't wait at all, and we don't support it yet? static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, @@ -596,6 +617,14 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG, "Enables deadlock detection", nullptr, nullptr, FALSE); +static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG, + "Number of transactions deadlock detection will " + "traverse through before assuming deadlock", + nullptr, nullptr, + /*default*/ RDB_DEADLOCK_DETECT_DEPTH, + /*min*/ 2, + /*max*/ ULONG_MAX, 0); + static MYSQL_THDVAR_BOOL( trace_sst_api, PLUGIN_VAR_RQCMDARG, "Generate trace output in the log for each call to the SstFileWriter", @@ -607,6 +636,11 @@ static MYSQL_THDVAR_BOOL( "unique_checks and enables rocksdb_commit_in_the_middle.", nullptr, rocksdb_set_bulk_load, FALSE); +static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG, + "Allow unsorted input during bulk-load. " + "Can be changed only when bulk load is disabled.", + nullptr, rocksdb_set_bulk_load_allow_unsorted, FALSE); + static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Enables using SstFileWriter for bulk loading", @@ -686,6 +720,18 @@ static MYSQL_THDVAR_ULONGLONG( /* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE, /* max */ SIZE_T_MAX, 1); +static MYSQL_THDVAR_ULONGLONG( + merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG, + "Fast index creation creates a large tmp file on disk during index " + "creation. Removing this large file all at once when index creation is " + "complete can cause trim stalls on Flash. This variable specifies a " + "duration to sleep (in milliseconds) between calling chsize() to truncate " + "the file in chunks. The chunk size is the same as merge_buf_size.", + nullptr, nullptr, + /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY, + /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY, + /* max */ SIZE_T_MAX, 1); + static MYSQL_SYSVAR_BOOL( create_if_missing, *reinterpret_cast(&rocksdb_db_options->create_if_missing), @@ -693,6 +739,20 @@ static MYSQL_SYSVAR_BOOL( "DBOptions::create_if_missing for RocksDB", nullptr, nullptr, rocksdb_db_options->create_if_missing); +static MYSQL_SYSVAR_BOOL( + concurrent_prepare, + *reinterpret_cast(&rocksdb_db_options->concurrent_prepare), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::concurrent_prepare for RocksDB", nullptr, nullptr, + rocksdb_db_options->concurrent_prepare); + +static MYSQL_SYSVAR_BOOL( + manual_wal_flush, + *reinterpret_cast(&rocksdb_db_options->manual_wal_flush), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr, + rocksdb_db_options->manual_wal_flush); + static MYSQL_SYSVAR_BOOL( create_missing_column_families, *reinterpret_cast( @@ -736,6 +796,13 @@ static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate, rocksdb_db_options->delayed_write_rate, 0, UINT64_MAX, 0); +static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks, + PLUGIN_VAR_RQCMDARG, + "Maximum number of recent " + "deadlocks to store", + nullptr, rocksdb_set_max_latest_deadlocks, + rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0); + static MYSQL_SYSVAR_ENUM( info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG, "Filter level for info logs to be written mysqld error log. " @@ -1011,6 +1078,22 @@ static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size, /* max */ LONGLONG_MAX, /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); +static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Simulated cache size for RocksDB", nullptr, + nullptr, + /* default */ 0, + /* min */ 0, + /* max */ LONGLONG_MAX, + /* Block size */ 0); + +static MYSQL_SYSVAR_BOOL( + use_clock_cache, + rocksdb_use_clock_cache, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Use ClockCache instead of default LRUCache for RocksDB", + nullptr, nullptr, false); + static MYSQL_SYSVAR_BOOL( cache_index_and_filter_blocks, *reinterpret_cast( @@ -1094,17 +1177,26 @@ static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options, ""); static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, - //psergey-july-merge:TODO: need this: | PLUGIN_VAR_ALLOCATED, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC + /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/, "Option updates per column family for RocksDB", nullptr, rocksdb_set_update_cf_options, nullptr); +enum rocksdb_flush_log_at_trx_commit_type : unsigned int { + FLUSH_LOG_NEVER = 0, + FLUSH_LOG_SYNC, + FLUSH_LOG_BACKGROUND, + FLUSH_LOG_MAX /* must be last */ +}; + static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit, rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG, "Sync on transaction commit. Similar to " "innodb_flush_log_at_trx_commit. 1: sync on commit, " "0,2: not sync on commit", - nullptr, nullptr, 1, 0, 2, 0); + nullptr, nullptr, /* default */ FLUSH_LOG_SYNC, + /* min */ FLUSH_LOG_NEVER, + /* max */ FLUSH_LOG_BACKGROUND, 0); static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG, "WriteOptions::disableWAL for RocksDB", nullptr, @@ -1148,6 +1240,13 @@ static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats, "Force to always compute memtable stats", nullptr, nullptr, TRUE); +static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime, + rocksdb_force_compute_memtable_stats_cachetime, + PLUGIN_VAR_RQCMDARG, + "Time in usecs to cache memtable estimates", nullptr, + nullptr, /* default */ 60 * 1000 * 1000, + /* min */ 0, /* max */ INT_MAX, 0); + static MYSQL_SYSVAR_BOOL( debug_optimizer_no_zero_cardinality, rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG, @@ -1214,6 +1313,12 @@ static MYSQL_SYSVAR_INT( "is not set. This variable is a no-op in non-debug builds.", nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0); +static MYSQL_SYSVAR_BOOL( + debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. If true, compaction filtering will not occur " + "on PK TTL data. This variable is a no-op in non-debug builds.", + nullptr, nullptr, FALSE); + static MYSQL_SYSVAR_BOOL( reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG, "Reset the RocksDB internal statistics without restarting the DB.", nullptr, @@ -1376,15 +1481,23 @@ static MYSQL_SYSVAR_UINT( RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0, /* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0); +static MYSQL_SYSVAR_BOOL( + large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG, + "Support large index prefix length of 3072 bytes. If off, the maximum " + "index prefix length is 767.", + nullptr, nullptr, FALSE); + static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(deadlock_detect), + MYSQL_SYSVAR(deadlock_detect_depth), MYSQL_SYSVAR(max_row_locks), MYSQL_SYSVAR(write_batch_max_bytes), MYSQL_SYSVAR(lock_scanned_rows), MYSQL_SYSVAR(bulk_load), + MYSQL_SYSVAR(bulk_load_allow_unsorted), MYSQL_SYSVAR(skip_unique_check_tables), MYSQL_SYSVAR(trace_sst_api), MYSQL_SYSVAR(commit_in_the_middle), @@ -1395,15 +1508,19 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(enable_bulk_load_api), MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(merge_combine_read_size), + MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms), MYSQL_SYSVAR(skip_bloom_filter_on_read), MYSQL_SYSVAR(create_if_missing), + MYSQL_SYSVAR(concurrent_prepare), + MYSQL_SYSVAR(manual_wal_flush), MYSQL_SYSVAR(create_missing_column_families), MYSQL_SYSVAR(error_if_exists), MYSQL_SYSVAR(paranoid_checks), MYSQL_SYSVAR(rate_limiter_bytes_per_sec), MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec), MYSQL_SYSVAR(delayed_write_rate), + MYSQL_SYSVAR(max_latest_deadlocks), MYSQL_SYSVAR(info_log_level), MYSQL_SYSVAR(max_open_files), MYSQL_SYSVAR(max_total_wal_size), @@ -1443,6 +1560,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(enable_write_thread_adaptive_yield), MYSQL_SYSVAR(block_cache_size), + MYSQL_SYSVAR(sim_cache_size), + MYSQL_SYSVAR(use_clock_cache), MYSQL_SYSVAR(cache_index_and_filter_blocks), MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), MYSQL_SYSVAR(index_type), @@ -1468,6 +1587,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(force_index_records_in_range), MYSQL_SYSVAR(debug_optimizer_n_rows), MYSQL_SYSVAR(force_compute_memtable_stats), + MYSQL_SYSVAR(force_compute_memtable_stats_cachetime), MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), MYSQL_SYSVAR(compact_cf), @@ -1484,6 +1604,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(debug_ttl_rec_ts), MYSQL_SYSVAR(debug_ttl_snapshot_ts), MYSQL_SYSVAR(debug_ttl_read_filter_ts), + MYSQL_SYSVAR(debug_ttl_ignore_pk), MYSQL_SYSVAR(reset_stats), MYSQL_SYSVAR(io_write_timeout), MYSQL_SYSVAR(flush_memtable_on_analyze), @@ -1506,13 +1627,15 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(validate_tables), MYSQL_SYSVAR(table_stats_sampling_pct), + + MYSQL_SYSVAR(large_prefix), nullptr}; static rocksdb::WriteOptions rdb_get_rocksdb_write_options(my_core::THD *const thd) { rocksdb::WriteOptions opt; - opt.sync = (rocksdb_flush_log_at_trx_commit == 1); + opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); opt.disableWAL = THDVAR(thd, write_disable_wal); opt.ignore_missing_column_families = THDVAR(thd, write_ignore_missing_column_families); @@ -1795,6 +1918,13 @@ class Rdb_transaction { } } + void update_bytes_written(ulonglong bytes_written) { + if (m_tbl_io_perf != nullptr) { + m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd), + bytes_written); + } + } + void set_params(int timeout_sec_arg, int max_row_locks_arg) { m_timeout_sec = timeout_sec_arg; m_max_row_locks = max_row_locks_arg; @@ -2292,9 +2422,10 @@ class Rdb_transaction_impl : public Rdb_transaction { tx_opts.set_snapshot = false; tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec); tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect); + tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth); tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes); - write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1); + write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); write_opts.ignore_missing_column_families = THDVAR(m_thd, write_ignore_missing_column_families); @@ -2513,7 +2644,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { void start_tx() override { reset(); - write_opts.sync = (rocksdb_flush_log_at_trx_commit == 1); + write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); write_opts.ignore_missing_column_families = THDVAR(m_thd, write_ignore_missing_column_families); @@ -2557,14 +2688,21 @@ namespace { class Rdb_perf_context_guard { Rdb_io_perf m_io_perf; - THD *m_thd; + Rdb_io_perf *m_io_perf_ptr; + Rdb_transaction *m_tx; + uint m_level; -public: + public: Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete; Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete; - explicit Rdb_perf_context_guard(THD *const thd) : m_thd(thd) { - Rdb_transaction *&tx = get_tx_from_thd(m_thd); + explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level) + : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) { + m_io_perf_ptr->start(m_level); + } + + explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level) + : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) { /* if perf_context information is already being recorded, this becomes a no-op @@ -2575,9 +2713,10 @@ class Rdb_perf_context_guard { } ~Rdb_perf_context_guard() { - Rdb_transaction *&tx = get_tx_from_thd(m_thd); - if (tx != nullptr) { - tx->io_perf_end_and_record(); + if (m_tx != nullptr) { + m_tx->io_perf_end_and_record(); + } else if (m_io_perf_ptr != nullptr) { + m_io_perf_ptr->end_and_record(m_level); } } }; @@ -2667,8 +2806,17 @@ static std::string rdb_xid_to_string(const XID &src) { */ static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__))) DBUG_ASSERT(rdb != nullptr); - rocksdb_wal_group_syncs++; - const rocksdb::Status s = rdb->SyncWAL(); + + rocksdb::Status s; + /* + target_lsn is set to 0 when MySQL wants to sync the wal files + */ + if (target_lsn == 0 || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + rocksdb_wal_group_syncs++; + s = rdb->FlushWAL(target_lsn == 0 || + rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); + } + if (!s.ok()) { rdb_log_status_error(s); return HA_EXIT_FAILURE; @@ -2713,8 +2861,9 @@ static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) if (!tx->prepare(rdb_xid_to_string(xid))) { return HA_EXIT_FAILURE; } - if (thd->durability_property == HA_IGNORE_DURABILITY -#ifdef MARIAROCKS_NOT_YET + if (thd->durability_property == HA_IGNORE_DURABILITY ) +#ifdef MARIAROCKS_NOT_YET + (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) { && THDVAR(thd, flush_log_at_trx_commit)) { #endif @@ -2889,12 +3038,12 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx) rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); - /* this will trigger saving of perf_context information */ - Rdb_perf_context_guard guard(thd); - /* note: h->external_lock(F_UNLCK) is called after this function is called) */ Rdb_transaction *&tx = get_tx_from_thd(thd); + /* this will trigger saving of perf_context information */ + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + if (tx != nullptr) { if (commit_tx || (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { @@ -2931,8 +3080,8 @@ static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx) static int rocksdb_rollback(handlerton *const hton, THD *const thd, bool rollback_tx) { - Rdb_perf_context_guard guard(thd); Rdb_transaction *&tx = get_tx_from_thd(thd); + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); if (tx != nullptr) { if (rollback_tx) { @@ -3055,7 +3204,82 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { "=========================================\n"; } -public: + static std::string get_dlock_txn_info(const rocksdb::DeadlockInfo &txn, + const GL_INDEX_ID &gl_index_id, + bool is_last_path = false) { + std::string txn_data; + + /* extract table name and index names using the index id */ + std::string table_name = ddl_manager.safe_get_table_name(gl_index_id); + if (table_name.empty()) { + table_name = + "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); + } + auto kd = ddl_manager.safe_find(gl_index_id); + std::string idx_name = + (kd) ? kd->get_name() + : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); + + /* get the name of the column family */ + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id); + std::string cf_name = cfh->GetName(); + + txn_data += format_string( + "TRANSACTIONID: %u\n" + "COLUMN FAMILY NAME: %s\n" + "WAITING KEY: %s\n" + "LOCK TYPE: %s\n" + "INDEX NAME: %s\n" + "TABLE NAME: %s\n", + txn.m_txn_id, cf_name.c_str(), + rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()) + .c_str(), + txn.m_exclusive ? "EXCLUSIVE" : "SHARED", idx_name.c_str(), + table_name.c_str()); + if (!is_last_path) { + txn_data += "---------------WAITING FOR---------------\n"; + } + return txn_data; + } + + static std::string + get_dlock_path_info(const rocksdb::DeadlockPath &path_entry) { + std::string path_data; + if (path_entry.limit_exceeded) { + path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; + } else { + path_data += "\n*** DEADLOCK PATH\n" + "=========================================\n"; + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); + it++) { + auto txn = *it; + const GL_INDEX_ID gl_index_id = { + txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( + txn.m_waiting_key.c_str()))}; + path_data += get_dlock_txn_info(txn, gl_index_id); + } + + DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); + /* print the first txn in the path to display the full deadlock cycle */ + if (!path_entry.path.empty() && !path_entry.limit_exceeded) { + auto txn = path_entry.path[0]; + const GL_INDEX_ID gl_index_id = { + txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( + txn.m_waiting_key.c_str()))}; + path_data += get_dlock_txn_info(txn, gl_index_id, true); + + /* prints the txn id of the transaction that caused the deadlock */ + auto deadlocking_txn = *(path_entry.path.end() - 1); + path_data += + format_string("\n--------TRANSACTIONID: %u GOT DEADLOCK---------\n", + deadlocking_txn.m_txn_id); + } + } + + return path_data; + } + + public: Rdb_snapshot_status() : m_data(get_header()) {} std::string getResult() { return m_data + get_footer(); } @@ -3080,11 +3304,20 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { "%s\n" "lock count %llu, write count %llu\n" "insert count %llu, update count %llu, delete count %llu\n", - curr_time - snapshot_timestamp, buffer, tx->get_lock_count(), + (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(), tx->get_write_count(), tx->get_insert_count(), tx->get_update_count(), tx->get_delete_count()); } } + + void populate_deadlock_buffer() { + auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; + + for (auto path_entry : dlock_buffer) { + m_data += get_dlock_path_info(path_entry); + } + } }; /** @@ -3184,10 +3417,10 @@ static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd, Rdb_snapshot_status showStatus; Rdb_transaction::walk_tx_list(&showStatus); + showStatus.populate_deadlock_buffer(); /* Send the result data back to MySQL */ - return print_stats(thd, "SNAPSHOTS", "rocksdb", showStatus.getResult(), - stat_print); + return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print); } #endif @@ -3310,7 +3543,6 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, str.clear(); rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, &temp_usage_by_type); - snprintf(buf, sizeof(buf), "\nMemTable Total: %llu", (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]); str.append(buf); @@ -3327,7 +3559,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, (ulonglong)internal_cache_count * kDefaultInternalCacheSize); str.append(buf); res |= print_stats(thd, "MEMORY_STATS", "rocksdb", str, stat_print); - +#ifdef MARIAROCKS_NOT_YET /* Show the background thread status */ std::vector thread_list; rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list); @@ -3364,6 +3596,8 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, str, stat_print); } } +#endif + #ifdef MARIAROCKS_NOT_YET } else if (stat_type == HA_ENGINE_TRX) { /* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */ @@ -3413,8 +3647,6 @@ static int rocksdb_start_tx_and_assign_read_view( user for whom the transaction should be committed */ { - Rdb_perf_context_guard guard(thd); - ulong const tx_isolation = my_core::thd_tx_isolation(thd); if (tx_isolation != ISO_REPEATABLE_READ) { @@ -3433,6 +3665,8 @@ static int rocksdb_start_tx_and_assign_read_view( mysql_mutex_assert_owner(&LOCK_commit_ordered); Rdb_transaction *const tx = get_or_create_tx(thd); + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + DBUG_ASSERT(!tx->has_snapshot()); tx->set_tx_read_only(true); rocksdb_register_tx(hton, thd, tx); @@ -3476,6 +3710,7 @@ static void rocksdb_update_table_stats( int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock, const char *engine)) { my_io_perf_t io_perf_read; + my_io_perf_t io_perf_write; my_io_perf_t io_perf; page_stats_t page_stats; comp_stats_t comp_stats; @@ -3490,6 +3725,7 @@ static void rocksdb_update_table_stats( memset(&io_perf, 0, sizeof(io_perf)); memset(&page_stats, 0, sizeof(page_stats)); memset(&comp_stats, 0, sizeof(comp_stats)); + memset(&io_perf_write, 0, sizeof(io_perf_write)); tablenames = rdb_open_tables.get_table_names(); @@ -3522,6 +3758,8 @@ static void rocksdb_update_table_stats( io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load(); io_perf_read.requests = table_handler->m_io_perf_read.requests.load(); + io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load(); + io_perf_write.requests = table_handler->m_io_perf_write.requests.load(); lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load(); deadlock_stats = table_handler->m_deadlock_counter.load(); @@ -3549,9 +3787,10 @@ static void rocksdb_update_table_stats( sizeof(dbname_sys)); my_core::filename_to_tablename(tablename.c_str(), tablename_sys, sizeof(tablename_sys)); - (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, &io_perf, - &io_perf, &io_perf, &io_perf, &page_stats, &comp_stats, 0, - lock_wait_timeout_stats, deadlock_stats, rocksdb_hton_name); + (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, + &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats, + &comp_stats, 0, lock_wait_timeout_stats, deadlock_stats, + rocksdb_hton_name); } } #endif @@ -3767,8 +4006,18 @@ static int rocksdb_init_func(void *const p) { (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; if (!rocksdb_tbl_options->no_block_cache) { - rocksdb_tbl_options->block_cache = - rocksdb::NewLRUCache(rocksdb_block_cache_size); + std::shared_ptr block_cache = rocksdb_use_clock_cache + ? rocksdb::NewClockCache(rocksdb_block_cache_size) + : rocksdb::NewLRUCache(rocksdb_block_cache_size); + if (rocksdb_sim_cache_size > 0) { + // Simulated cache enabled + // Wrap block cache inside a simulated cache and pass it to RocksDB + rocksdb_tbl_options->block_cache = + rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6); + } else { + // Pass block cache to RocksDB + rocksdb_tbl_options->block_cache = block_cache; + } } // Using newer BlockBasedTable format version for better compression // and better memory allocation. @@ -4163,6 +4412,7 @@ Rdb_open_tables_map::get_table_handler(const char *const table_name) { thr_lock_init(&table_handler->m_thr_lock); #ifdef MARIAROCKS_NOT_YET table_handler->m_io_perf_read.init(); + table_handler->m_io_perf_write.init(); #endif } DBUG_ASSERT(table_handler->m_ref_count >= 0); @@ -4420,11 +4670,11 @@ bool ha_rocksdb::init_with_fields() { rows within a transaction, etc, because the compaction filter ignores snapshots when filtering keys. */ -bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val, +bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, + const rocksdb::Slice &ttl_rec_val, const int64_t curr_ts) { - DBUG_ASSERT(m_pk_descr != nullptr); - DBUG_ASSERT(m_pk_descr->has_ttl()); - DBUG_ASSERT(m_pk_descr->m_ttl_rec_offset != UINT_MAX); + DBUG_ASSERT(kd.has_ttl()); + DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX); /* Curr_ts can only be 0 if there are no snapshots open. @@ -4450,7 +4700,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val, Find where the 8-byte ttl is for each record in this index. */ uint64 ts; - if (!reader.read(m_pk_descr->m_ttl_rec_offset) || reader.read_uint64(&ts)) { + if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) { /* This condition should never be reached since all TTL records have an 8 byte ttl field in front. Don't filter the record out, and log an error. @@ -4458,7 +4708,7 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val, std::string buf; buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(), RDB_MAX_HEXDUMP_LEN); - const GL_INDEX_ID gl_index_id = m_pk_descr->get_gl_index_id(); + const GL_INDEX_ID gl_index_id = kd.get_gl_index_id(); // NO_LINT_DEBUG sql_print_error("Decoding ttl from PK value failed, " "for index (%u,%u), val: %s", @@ -4472,10 +4722,23 @@ bool ha_rocksdb::should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val, #ifndef NDEBUG read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); #endif - return ts + m_pk_descr->m_ttl_duration + read_filter_ts <= + return ts + kd.m_ttl_duration + read_filter_ts <= static_cast(curr_ts); } +void ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward) { + if (kd.has_ttl()) { + while (iter->Valid() && + should_hide_ttl_rec( + kd, iter->value(), + get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { + rocksdb_smart_next(seek_backward, iter); + } + } +} + /** Convert record from table->record[0] form into a form that can be written into rocksdb. @@ -4501,6 +4764,7 @@ int ha_rocksdb::convert_record_to_storage_format( if (has_ttl) { /* If it's a TTL record, reserve space for 8 byte TTL value in front. */ m_storage_record.fill(ROCKSDB_SIZEOF_TTL_RECORD + m_null_bytes_in_rec, 0); + m_ttl_bytes_updated = false; /* If the TTL is contained within the key, we use the offset to find the @@ -4531,6 +4795,8 @@ int ha_rocksdb::convert_record_to_storage_format( rdb_netbuf_to_uint64(reinterpret_cast(data)) + rdb_dbug_set_ttl_rec_ts()); #endif + // Also store in m_ttl_bytes to propagate to update_sk + memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); } else if (!has_ttl_column) { /* For implicitly generated TTL records we need to copy over the old @@ -4549,6 +4815,8 @@ int ha_rocksdb::convert_record_to_storage_format( #endif char *const data = const_cast(m_storage_record.ptr()); rdb_netbuf_store_uint64(reinterpret_cast(data), ts); + // Also store in m_ttl_bytes to propagate to update_sk + memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); } } } else { @@ -4630,6 +4898,15 @@ int ha_rocksdb::convert_record_to_storage_format( ts += rdb_dbug_set_ttl_rec_ts(); #endif rdb_netbuf_store_uint64(reinterpret_cast(data), ts); + + // If this is an update and the timestamp has been updated, take note + // so we can avoid updating SKs unnecessarily. + if (!row_info.old_pk_slice.empty()) { + m_ttl_bytes_updated = + memcmp(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); + } + // Store timestamp in m_ttl_bytes to propagate to update_sk + memcpy(m_ttl_bytes, data, ROCKSDB_SIZEOF_TTL_RECORD); } } } @@ -4907,8 +5184,9 @@ int ha_rocksdb::convert_record_from_storage_format( } if (m_maybe_unpack_info) { - unpack_info = reader.read(RDB_UNPACK_HEADER_SIZE); - if (!unpack_info || unpack_info[0] != RDB_UNPACK_DATA_TAG) { + unpack_info = reader.get_current_ptr(); + if (!unpack_info || !Rdb_key_def::is_unpack_data_tag(unpack_info[0]) || + !reader.read(Rdb_key_def::get_unpack_header_size(unpack_info[0]))) { return HA_ERR_ROCKSDB_CORRUPT_DATA; } @@ -4916,7 +5194,8 @@ int ha_rocksdb::convert_record_from_storage_format( rdb_netbuf_to_uint16(reinterpret_cast(unpack_info + 1)); unpack_slice = rocksdb::Slice(unpack_info, unpack_info_len); - reader.read(unpack_info_len - RDB_UNPACK_HEADER_SIZE); + reader.read(unpack_info_len - + Rdb_key_def::get_unpack_header_size(unpack_info[0])); } int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, @@ -5072,8 +5351,7 @@ void ha_rocksdb::setup_field_converters() { If hidden pk exists, we skip this check since the field will never be part of the hidden pk. */ - if (!has_hidden_pk(table) && - field->part_of_key.is_set(table->s->primary_key)) { + if (!has_hidden_pk(table)) { KEY *const pk_info = &table->key_info[table->s->primary_key]; for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { /* key_part->fieldnr is counted from 1 */ @@ -5276,7 +5554,10 @@ int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock, nullptr); m_io_perf.init(&m_table_handler->m_table_perf_context, - &m_table_handler->m_io_perf_read, &stats); + &m_table_handler->m_io_perf_read, + &m_table_handler->m_io_perf_write, &stats); + Rdb_perf_context_guard guard(&m_io_perf, + rocksdb_perf_context_level(ha_thd())); std::string fullname; err = rdb_normalize_tablename(name, &fullname); @@ -5375,6 +5656,13 @@ int ha_rocksdb::close(void) { m_table_handler = nullptr; } + // These are needed to suppress valgrind errors in rocksdb.partition + m_storage_record.free(); + m_last_rowkey.free(); + m_sk_tails.free(); + m_sk_tails_old.free(); + m_pk_unpack_info.free(); + DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -5410,6 +5698,14 @@ static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) == ((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1), "Number of error messages doesn't match number of error codes"); +//psergey-merge: do we need this in MariaDB: we have get_error_messages +//below... +#if 0 +static const char *rdb_get_error_message(int nr) { + return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST]; +} +#endif + static const char **rdb_get_error_messages(int nr) { return rdb_error_messages; } bool ha_rocksdb::get_error_message(const int error, String *const buf) { @@ -5964,9 +6260,8 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, DBUG_RETURN(err); } - /* We don't currently support TTL on tables with secondary keys. */ - if (ttl_duration > 0 && - (table_arg->s->keys > 1 || is_hidden_pk(i, table_arg, tbl_def_arg))) { + /* We don't currently support TTL on tables with hidden primary keys. */ + if (ttl_duration > 0 && is_hidden_pk(i, table_arg, tbl_def_arg)) { my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); DBUG_RETURN(HA_EXIT_FAILURE); } @@ -6005,6 +6300,12 @@ int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint &i, kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1; }); + DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", { + if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) { + kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2; + } + }); + uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0); uint32 ttl_rec_offset = @@ -6366,7 +6667,7 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, from the POV of the current transaction. If it has, try going to the next key. */ - if (kd.has_ttl() && should_hide_ttl_rec(iter->value(), ttl_filter_ts)) { + if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) { rocksdb_smart_next(kd.m_is_reverse_cf, iter); continue; } @@ -6402,7 +6703,7 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd, if ((full_key_match && kd.value_matches_prefix(m_scan_it->key(), key_slice)) || (kd.has_ttl() && - should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts))) { + should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) { rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it); continue; } @@ -6432,7 +6733,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, key. */ while (m_scan_it->Valid() && kd.has_ttl() && - should_hide_ttl_rec(m_scan_it->value(), ttl_filter_ts)) { + should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); } @@ -6590,17 +6891,21 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, int rc = 0; uint pk_size; - if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { - /* Get the key columns and primary key value */ - const rocksdb::Slice &rkey = m_scan_it->key(); + /* Get the key columns and primary key value */ + const rocksdb::Slice &rkey = m_scan_it->key(); + const rocksdb::Slice &value = m_scan_it->value(); + + bool covered_lookup = + m_keyread_only || kd.covers_lookup(table, &value, &m_lookup_bitmap); + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); - const rocksdb::Slice &value = m_scan_it->value(); if (pk_size == RDB_INVALID_KEY_LEN) { rc = HA_ERR_ROCKSDB_CORRUPT_DATA; } else { rc = kd.unpack_record(table, buf, &rkey, &value, m_verify_row_debug_checksums); + global_stats.covered_secondary_key_lookups.inc(); } } else { if (kd.m_is_reverse_cf) @@ -6710,11 +7015,15 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { m_last_rowkey.copy((const char *)m_pk_packed_tuple, size, &my_charset_bin); - if (m_keyread_only && m_lock_rows == RDB_LOCK_NONE && + rocksdb::Slice value = m_scan_it->value(); + bool covered_lookup = + m_keyread_only || m_key_descr_arr[keyno]->covers_lookup( + table, &value, &m_lookup_bitmap); + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !has_hidden_pk(table)) { - rocksdb::Slice value = m_scan_it->value(); rc = m_key_descr_arr[keyno]->unpack_record( table, buf, &key, &value, m_verify_row_debug_checksums); + global_stats.covered_secondary_key_lookups.inc(); } else { rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); } @@ -6823,6 +7132,7 @@ int ha_rocksdb::prepare_range_scan(const key_range *start_key, HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ + int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, key_part_map keypart_map, enum ha_rkey_function find_flag) { @@ -7039,6 +7349,8 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, const Rdb_key_def &kd = *m_key_descr_arr[active_index]; while (1) { + rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + if (!m_scan_it->Valid()) { table->status = STATUS_NOT_FOUND; return HA_ERR_END_OF_FILE; @@ -7068,10 +7380,7 @@ int ha_rocksdb::find_icp_matching_index_rec(const bool &move_forward, const enum icp_result icp_status= handler_index_cond_check(this); if (icp_status == ICP_NO_MATCH) { - if (move_forward) - m_scan_it->Next(); - else - m_scan_it->Prev(); + rocksdb_smart_next(!move_forward, m_scan_it); continue; /* Get the next (or prev) index tuple */ } else if (icp_status == ICP_OUT_OF_RANGE || icp_status == ICP_ABORTED_BY_USER) { @@ -7402,7 +7711,8 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, if (found) { /* If we found the record, but it's expired, pretend we didn't find it. */ if (!skip_ttl_check && m_pk_descr->has_ttl() && - should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(), + should_hide_ttl_rec(*m_pk_descr, + rocksdb::Slice(&m_retrieved_record.front(), m_retrieved_record.size()), tx->m_snapshot_timestamp)) { DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); @@ -7481,6 +7791,8 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { else m_scan_it->Prev(); } + rocksdb_skip_expired_records(*m_key_descr_arr[active_index], m_scan_it, + !move_forward); rc = find_icp_matching_index_rec(move_forward, buf); if (!rc) rc = secondary_index_read(active_index, buf); @@ -7837,6 +8149,12 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg, is_hidden_pk(index, table_arg, tbl_def_arg); } +uint ha_rocksdb::max_supported_key_part_length() const { + DBUG_ENTER_FUNC(); + DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE + : MAX_INDEX_COL_LEN_SMALL); +} + const char *ha_rocksdb::get_key_name(const uint index, const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg) { @@ -8219,7 +8537,8 @@ int ha_rocksdb::check_uniqueness_and_lock( m_retrieved_record by check_and_lock_unique_pk(). */ if (is_pk(key_id, table, m_tbl_def) && found && m_pk_descr->has_ttl() && - should_hide_ttl_rec(rocksdb::Slice(&m_retrieved_record.front(), + should_hide_ttl_rec(*m_pk_descr, + rocksdb::Slice(&m_retrieved_record.front(), m_retrieved_record.size()), (row_info.tx->m_snapshot_timestamp ? row_info.tx->m_snapshot_timestamp @@ -8267,48 +8586,112 @@ int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg, int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, const rocksdb::Slice &key, - const rocksdb::Slice &value) { - rocksdb::ColumnFamilyHandle *const cf = kd.get_cf(); + const rocksdb::Slice &value, bool sort) { + DBUG_ENTER_FUNC(); + + rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); DBUG_ASSERT(cf != nullptr); - if (m_sst_info == nullptr) { - m_sst_info = std::make_shared( - rdb, m_table_handler->m_table_name, kd.get_name(), cf, - *rocksdb_db_options, THDVAR(ha_thd(), trace_sst_api)); - tx->start_bulk_load(this); - m_bulk_load_tx = tx; - } + int res = HA_EXIT_SUCCESS; - DBUG_ASSERT(m_sst_info != nullptr); + if (sort) { + GL_INDEX_ID kd_gl_id = kd.get_gl_index_id(); + auto it = m_key_merge.find(kd_gl_id); + if (it == m_key_merge.end()) { + m_key_merge.emplace( + std::piecewise_construct, std::make_tuple(kd_gl_id), + std::make_tuple( + thd_rocksdb_tmpdir(), THDVAR(ha_thd(), merge_buf_size), + THDVAR(ha_thd(), merge_combine_read_size), + THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms), cf)); + it = m_key_merge.find(kd_gl_id); + if ((res = it->second.init()) != 0) { + DBUG_RETURN(res); + } - return m_sst_info->put(key, value); + if (m_bulk_load_tx == nullptr) { + tx->start_bulk_load(this); + m_bulk_load_tx = tx; + } + } + res = it->second.add(key, value); + } else { + if (!m_sst_info) { + m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, + kd.get_name(), cf, *rocksdb_db_options, + THDVAR(ha_thd(), trace_sst_api))); + tx->start_bulk_load(this); + m_bulk_load_tx = tx; + } + + DBUG_ASSERT(m_sst_info); + + res = m_sst_info->put(key, value); + } + + DBUG_RETURN(res); } int ha_rocksdb::finalize_bulk_load() { - int rc = 0; + DBUG_ENTER_FUNC(); + + DBUG_ASSERT_IMP(!m_key_merge.empty() || m_sst_info, + m_bulk_load_tx != nullptr); /* Skip if there are no possible ongoing bulk loads */ - if (m_sst_info == nullptr && m_bulk_load_tx == nullptr) { - return rc; + if (m_key_merge.empty() && !m_sst_info && m_bulk_load_tx == nullptr) { + DBUG_RETURN(HA_EXIT_SUCCESS); } + int res = HA_EXIT_SUCCESS; + RDB_MUTEX_LOCK_CHECK(m_bulk_load_mutex); - /* - We need this check because it's possible that m_sst_info has been - flushed and cleared by another thread by the time the mutex has been - acquired. - */ - if (m_sst_info != nullptr) { - rc = m_sst_info->commit(); - m_sst_info = nullptr; + if (m_sst_info) { + res = m_sst_info->commit(); + m_sst_info.reset(); + } + + if (!m_key_merge.empty()) { + rocksdb::Slice merge_key; + rocksdb::Slice merge_val; + for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { + const std::string &index_name = + ddl_manager.safe_find(it->first)->get_name(); + Rdb_index_merge &rdb_merge = it->second; + Rdb_sst_info sst_info(rdb, m_table_handler->m_table_name, index_name, + rdb_merge.get_cf(), *rocksdb_db_options, + THDVAR(ha_thd(), trace_sst_api)); + + while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) { + if ((res = sst_info.put(merge_key, merge_val)) != 0) { + break; + } + } + // res == -1 => finished ok; res > 0 => error + if (res <= 0) { + if ((res = sst_info.commit()) != 0) { + break; + } + } + } + m_key_merge.clear(); + + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + } + + if (m_bulk_load_tx != nullptr) { m_bulk_load_tx->end_bulk_load(this); m_bulk_load_tx = nullptr; } RDB_MUTEX_UNLOCK_CHECK(m_bulk_load_mutex); - return rc; + DBUG_RETURN(res); } int ha_rocksdb::update_pk(const Rdb_key_def &kd, @@ -8316,6 +8699,7 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, const bool &pk_changed) { const uint key_id = kd.get_keyno(); const bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); + ulonglong bytes_written = 0; /* If the PK has changed, or if this PK uses single deletes and this is an @@ -8330,6 +8714,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, if (!s.ok()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler); + } else { + bytes_written = row_info.old_pk_slice.size(); } } @@ -8350,7 +8736,8 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, /* Write the primary key directly to an SST file using an SstFileWriter */ - rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice); + rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice, + THDVAR(table->in_use, bulk_load_allow_unsorted)); } else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) { /* It is responsibility of the user to make sure that the data being @@ -8372,6 +8759,10 @@ int ha_rocksdb::update_pk(const Rdb_key_def &kd, } } + if (rc == HA_EXIT_SUCCESS) { + row_info.tx->update_bytes_written( + bytes_written + row_info.new_pk_slice.size() + value_slice.size()); + } return rc; } @@ -8385,24 +8776,31 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, rocksdb::Slice old_key_slice; const uint key_id = kd.get_keyno(); + + ulonglong bytes_written = 0; + /* - Can skip updating this key if none of the key fields have changed. + Can skip updating this key if none of the key fields have changed and, if + this table has TTL, the TTL timestamp has not changed. */ - if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) { + if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) && + (!kd.has_ttl() || !m_ttl_bytes_updated)) { return HA_EXIT_SUCCESS; } const bool store_row_debug_checksums = should_store_row_debug_checksums(); - new_packed_size = kd.pack_record( - table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, - &m_sk_tails, store_row_debug_checksums, row_info.hidden_pk_id); + new_packed_size = + kd.pack_record(table_arg, m_pack_buffer, row_info.new_data, + m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums, + row_info.hidden_pk_id, 0, nullptr, nullptr, m_ttl_bytes); if (row_info.old_data != nullptr) { // The old value old_packed_size = kd.pack_record( table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, - &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id); + &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0, + nullptr, nullptr, m_ttl_bytes); /* Check if we are going to write the same value. This can happen when @@ -8420,6 +8818,7 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, */ if (old_packed_size == new_packed_size && m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() && + !(kd.has_ttl() && m_ttl_bytes_updated) && memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) == 0 && memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(), @@ -8438,6 +8837,8 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), old_key_slice); + + bytes_written = old_key_slice.size(); } new_key_slice = rocksdb::Slice( @@ -8449,6 +8850,9 @@ int ha_rocksdb::update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice, new_value_slice); + row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() + + new_value_slice.size()); + return HA_EXIT_SUCCESS; } @@ -8456,14 +8860,19 @@ int ha_rocksdb::update_indexes(const struct update_row_info &row_info, const bool &pk_changed) { int rc; + // The PK must be updated first to pull out the TTL value. + rc = update_pk(*m_pk_descr, row_info, pk_changed); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + // Update the remaining indexes. for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) { - const Rdb_key_def &kd = *m_key_descr_arr[key_id]; if (is_pk(key_id, table, m_tbl_def)) { - rc = update_pk(kd, row_info, pk_changed); - } else { - rc = update_sk(table, kd, row_info); + continue; } + rc = update_sk(table, *m_key_descr_arr[key_id], row_info); if (rc != HA_EXIT_SUCCESS) { return rc; } @@ -8741,7 +9150,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete"); if (m_pk_descr->has_ttl() && - should_hide_ttl_rec(m_scan_it->value(), tx->m_snapshot_timestamp)) { + should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(), + tx->m_snapshot_timestamp)) { continue; } @@ -8772,7 +9182,8 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { if (m_pk_descr->has_ttl() && should_hide_ttl_rec( - value, get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { + *m_pk_descr, value, + get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { continue; } @@ -8816,6 +9227,10 @@ int ha_rocksdb::index_init(uint idx, bool sorted) { setup_read_decoders(); + if (!m_keyread_only) { + m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); + } + // If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update // when accessing the index, so don't acquire the snapshot right away. // Otherwise acquire the snapshot immediately. @@ -8835,6 +9250,8 @@ int ha_rocksdb::index_end() { release_scan_iterator(); + bitmap_free(&m_lookup_bitmap); + active_index = MAX_KEY; in_range_check_pushed_down = FALSE; @@ -8878,6 +9295,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) { rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length()); Rdb_transaction *const tx = get_or_create_tx(table->in_use); + ulonglong bytes_written = 0; const uint index = pk_index(table, m_tbl_def); rocksdb::Status s = @@ -8885,6 +9303,8 @@ int ha_rocksdb::delete_row(const uchar *const buf) { if (!s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, m_table_handler)); + } else { + bytes_written = key_slice.size(); } longlong hidden_pk_id = 0; @@ -8907,6 +9327,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) { /* Deleting on secondary key doesn't need any locks: */ tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), secondary_key_slice); + bytes_written += secondary_key_slice.size(); } } @@ -8919,6 +9340,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) { stats.rows_deleted++; #endif update_row_stats(ROWS_DELETED); + tx->update_bytes_written(bytes_written); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -9005,13 +9427,37 @@ int ha_rocksdb::info(uint flag) { stats.records+= sz/ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; stats.data_file_length+= sz; } - // Second, compute memtable stats - uint64_t memtableCount; - uint64_t memtableSize; - rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r, - &memtableCount, &memtableSize); - stats.records += memtableCount; - stats.data_file_length += memtableSize; + // Second, compute memtable stats. This call is expensive, so cache + // values computed for some time. + uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime; + uint64_t time = (cachetime == 0) ? 0 : my_interval_timer() / 1000; + if (cachetime == 0 || + time > m_table_handler->m_mtcache_last_update + cachetime) { + uint64_t memtableCount; + uint64_t memtableSize; + + rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r, + &memtableCount, &memtableSize); + + // Atomically update all of these fields at the same time + if (cachetime > 0) { + if (m_table_handler->m_mtcache_lock.fetch_add( + 1, std::memory_order_acquire) == 0) { + m_table_handler->m_mtcache_count = memtableCount; + m_table_handler->m_mtcache_size = memtableSize; + m_table_handler->m_mtcache_last_update = time; + } + m_table_handler->m_mtcache_lock.fetch_sub(1, + std::memory_order_release); + } + + stats.records += memtableCount; + stats.data_file_length += memtableSize; + } else { + // Cached data is still valid, so use it instead + stats.records += m_table_handler->m_mtcache_count; + stats.data_file_length += m_table_handler->m_mtcache_size; + } if (rocksdb_debug_optimizer_n_rows > 0) stats.records = rocksdb_debug_optimizer_n_rows; @@ -9691,6 +10137,7 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { char key_buf[MAX_KEY_LENGTH]; uint key_len; + ulonglong bytes_written = 0; /* Remove all records in each index. (This is is not crash-safe, but it doesn't matter, because bulk row @@ -9722,11 +10169,13 @@ int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, m_table_handler); } - + bytes_written += key.size(); it->Next(); } } + tx->update_bytes_written(bytes_written); + return HA_EXIT_SUCCESS; } @@ -9775,6 +10224,8 @@ int ha_rocksdb::rename_table(const char *const from, const char *const to) { DBUG_RETURN(-1); } + DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";); + const std::unique_ptr wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); dict_manager.lock(); @@ -10486,7 +10937,6 @@ int ha_rocksdb::inplace_populate_sk( const std::unordered_set> &indexes) { DBUG_ENTER_FUNC(); int res = HA_EXIT_SUCCESS; - const std::unique_ptr wb = dict_manager.begin(); rocksdb::WriteBatch *const batch = wb.get(); @@ -10542,7 +10992,7 @@ int ha_rocksdb::inplace_populate_sk( open handlers have been closed at this point, and the one we're on is the only one left. */ - if (m_sst_info != nullptr) { + if (m_sst_info) { if ((res = finalize_bulk_load())) { DBUG_RETURN(res); } @@ -10552,14 +11002,16 @@ int ha_rocksdb::inplace_populate_sk( const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size); const ulonglong rdb_merge_combine_read_size = THDVAR(ha_thd(), merge_combine_read_size); + const ulonglong rdb_merge_tmp_file_removal_delay = + THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms); for (const auto &index : indexes) { - const rocksdb::Comparator *index_comp = index->get_cf()->GetComparator(); bool is_unique_index = new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME; - Rdb_index_merge rdb_merge(thd_rocksdb_tmpdir(), rdb_merge_buf_size, - rdb_merge_combine_read_size, index_comp); + Rdb_index_merge rdb_merge( + thd_rocksdb_tmpdir(), rdb_merge_buf_size, rdb_merge_combine_read_size, + rdb_merge_tmp_file_removal_delay, index->get_cf()); if ((res = rdb_merge.init())) { DBUG_RETURN(res); @@ -10588,7 +11040,8 @@ int ha_rocksdb::inplace_populate_sk( /* Create new secondary index entry */ const int new_packed_size = index->pack_record( new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple, - &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id); + &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0, + nullptr, nullptr, m_ttl_bytes); const rocksdb::Slice key = rocksdb::Slice( reinterpret_cast(m_sk_packed_tuple), new_packed_size); @@ -10653,7 +11106,7 @@ int ha_rocksdb::inplace_populate_sk( /* Insert key and slice to SST via SSTFileWriter API. */ - if ((res = bulk_load_key(tx, *index, merge_key, merge_val))) { + if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) { break; } } @@ -11006,6 +11459,9 @@ static void myrocks_update_status() { export_stats.queries_point = global_stats.queries[QUERIES_POINT]; export_stats.queries_range = global_stats.queries[QUERIES_RANGE]; + + export_stats.covered_secondary_key_lookups = + global_stats.covered_secondary_key_lookups; } static void myrocks_update_memory_status() { @@ -11049,6 +11505,9 @@ static SHOW_VAR myrocks_status_variables[] = { SHOW_LONGLONG), DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups", + &export_stats.covered_secondary_key_lookups, + SHOW_LONGLONG), {NullS, NullS, SHOW_LONG}}; @@ -11059,6 +11518,91 @@ static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) { var->value = reinterpret_cast(&myrocks_status_variables); } +static ulonglong +io_stall_prop_value(const std::map &props, + const std::string &key) { + std::map::const_iterator iter = + props.find("io_stalls." + key); + if (iter != props.end()) { + return std::stoull(iter->second); + } else { + DBUG_PRINT("warning", + ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str())); + DBUG_ASSERT(0); + return 0; + } +} + +static void update_rocksdb_stall_status() { + st_io_stall_stats local_io_stall_stats; + for (const auto &cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name); + if (cfh == nullptr) { + continue; + } + + std::map props; + if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) { + continue; + } + + local_io_stall_stats.level0_slowdown += + io_stall_prop_value(props, "level0_slowdown"); + local_io_stall_stats.level0_slowdown_with_compaction += + io_stall_prop_value(props, "level0_slowdown_with_compaction"); + local_io_stall_stats.level0_numfiles += + io_stall_prop_value(props, "level0_numfiles"); + local_io_stall_stats.level0_numfiles_with_compaction += + io_stall_prop_value(props, "level0_numfiles_with_compaction"); + local_io_stall_stats.stop_for_pending_compaction_bytes += + io_stall_prop_value(props, "stop_for_pending_compaction_bytes"); + local_io_stall_stats.slowdown_for_pending_compaction_bytes += + io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes"); + local_io_stall_stats.memtable_compaction += + io_stall_prop_value(props, "memtable_compaction"); + local_io_stall_stats.memtable_slowdown += + io_stall_prop_value(props, "memtable_slowdown"); + local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop"); + local_io_stall_stats.total_slowdown += + io_stall_prop_value(props, "total_slowdown"); + } + io_stall_stats = local_io_stall_stats; +} + +static SHOW_VAR rocksdb_stall_status_variables[] = { + DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns", + &io_stall_stats.level0_slowdown, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns", + &io_stall_stats.level0_slowdown_with_compaction, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops", + &io_stall_stats.level0_numfiles, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops", + &io_stall_stats.level0_numfiles_with_compaction, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops", + &io_stall_stats.stop_for_pending_compaction_bytes, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns", + &io_stall_stats.slowdown_for_pending_compaction_bytes, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_limit_stops", + &io_stall_stats.memtable_compaction, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns", + &io_stall_stats.memtable_slowdown, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown, + SHOW_LONGLONG), + // end of the array marker + {NullS, NullS, SHOW_LONG}}; + +static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) { + update_rocksdb_stall_status(); + var->type = SHOW_ARRAY; + var->value = reinterpret_cast(&rocksdb_stall_status_variables); +} + static SHOW_VAR rocksdb_status_vars[] = { DEF_STATUS_VAR(block_cache_miss), DEF_STATUS_VAR(block_cache_hit), @@ -11126,7 +11670,12 @@ static SHOW_VAR rocksdb_status_vars[] = { SHOW_LONGLONG), DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, SHOW_LONGLONG), + // the variables generated by SHOW_FUNC are sorted only by prefix (first + // arg in the tuple below), so make sure it is unique to make sorting + // deterministic as quick sort is not stable {"rocksdb", reinterpret_cast(&show_myrocks_vars), SHOW_FUNC}, + {"rocksdb_stall", reinterpret_cast(&show_rocksdb_stall_vars), + SHOW_FUNC}, {NullS, NullS, SHOW_LONG}}; /* @@ -11173,10 +11722,13 @@ void Rdb_background_thread::run() { // pthread_cond_timedwait()) to wait on. set_timespec(ts_next_sync, WAKE_UP_INTERVAL); - // Flush the WAL. - if (rdb && (rocksdb_flush_log_at_trx_commit == 2)) { + // Flush the WAL. Sync it for both background and never modes to copy + // InnoDB's behavior. For mode never, the wal file isn't even written, + // whereas background writes to the wal file, but issues the syncs in a + // background thread. + if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC)) { DBUG_ASSERT(!rocksdb_db_options->allow_mmap_writes); - const rocksdb::Status s = rdb->SyncWAL(); + const rocksdb::Status s = rdb->FlushWAL(true); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); } @@ -11272,6 +11824,7 @@ int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; } int rdb_dbug_set_ttl_read_filter_ts() { return rocksdb_debug_ttl_read_filter_ts; } +bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; } #endif void rdb_update_global_stats(const operation_type &type, uint count, @@ -11496,6 +12049,17 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var, + void *var_ptr, const void *save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + const uint32_t new_val = *static_cast(save); + if (rocksdb_max_latest_deadlocks != new_val) { + rocksdb_max_latest_deadlocks = new_val; + rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks); + } + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + void rdb_set_collation_exception_list(const char *const exception_list) { DBUG_ASSERT(rdb_collation_exceptions != nullptr); @@ -11545,6 +12109,18 @@ void rocksdb_set_bulk_load(THD *const thd, struct st_mysql_sys_var *const var *static_cast(var_ptr) = *static_cast(save); } +void rocksdb_set_bulk_load_allow_unsorted( + THD *const thd, + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr, const void *const save) { + if (THDVAR(thd, bulk_load)) { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET", + "Cannot change this setting while bulk load is enabled"); + } else { + *static_cast(var_ptr) = *static_cast(save); + } +} + static void rocksdb_set_max_background_jobs(THD *thd, struct st_mysql_sys_var *const var, void *const var_ptr, @@ -11739,6 +12315,19 @@ double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { } // namespace myrocks +/** + Construct and emit duplicate key error message using information + from table's record buffer. + + @sa print_keydup_error(table, key, msg, errflag, thd, org_table_name). +*/ + +void print_keydup_error(TABLE *table, KEY *key, myf errflag, + const THD *thd, const char *org_table_name) +{ + print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag); +} + /* Register the storage engine plugin outside of myrocks namespace so that mysql_declare_plugin does not get confused when it does diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 4cbee5cb64da9..2949f6d6588fd 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -22,6 +22,7 @@ /* C++ standard header files */ #include #include +#include #include #include @@ -249,6 +250,12 @@ enum collations_used { */ #define ROCKSDB_SIZEOF_TTL_RECORD sizeof(longlong) +/* + Maximum index prefix length in bytes. +*/ +#define MAX_INDEX_COL_LEN_LARGE 3072 +#define MAX_INDEX_COL_LEN_SMALL 767 + /* MyRocks specific error codes. NB! Please make sure that you will update HA_ERR_ROCKSDB_LAST when adding new ones. Also update the strings in @@ -307,7 +314,14 @@ struct Rdb_table_handler { /* Stores cumulative table statistics */ my_io_perf_atomic_t m_io_perf_read; + my_io_perf_atomic_t m_io_perf_write; Rdb_atomic_perf_counters m_table_perf_context; + + /* Stores cached memtable estimate statistics */ + std::atomic_uint m_mtcache_lock; + uint64_t m_mtcache_count; + uint64_t m_mtcache_size; + uint64_t m_mtcache_last_update; }; class Rdb_key_def; @@ -374,6 +388,8 @@ struct st_global_stats { ib_counter_t system_rows[ROWS_MAX]; ib_counter_t queries[QUERIES_MAX]; + + ib_counter_t covered_secondary_key_lookups; }; /* Struct used for exporting status to MySQL */ @@ -393,6 +409,8 @@ struct st_export_stats { ulonglong queries_point; ulonglong queries_range; + + ulonglong covered_secondary_key_lookups; }; /* Struct used for exporting RocksDB memory status */ @@ -401,6 +419,27 @@ struct st_memory_stats { ulonglong memtable_unflushed; }; +/* Struct used for exporting RocksDB IO stalls stats */ +struct st_io_stall_stats { + ulonglong level0_slowdown; + ulonglong level0_slowdown_with_compaction; + ulonglong level0_numfiles; + ulonglong level0_numfiles_with_compaction; + ulonglong stop_for_pending_compaction_bytes; + ulonglong slowdown_for_pending_compaction_bytes; + ulonglong memtable_compaction; + ulonglong memtable_slowdown; + ulonglong total_stop; + ulonglong total_slowdown; + + st_io_stall_stats() + : level0_slowdown(0), level0_slowdown_with_compaction(0), + level0_numfiles(0), level0_numfiles_with_compaction(0), + stop_for_pending_compaction_bytes(0), + slowdown_for_pending_compaction_bytes(0), memtable_compaction(0), + memtable_slowdown(0), total_stop(0), total_slowdown(0) {} +}; + } // namespace myrocks #include "./rdb_buff.h" @@ -509,6 +548,12 @@ class ha_rocksdb : public my_core::handler { Pointer to the original TTL timestamp value (8 bytes) during UPDATE. */ char m_ttl_bytes[ROCKSDB_SIZEOF_TTL_RECORD]; + /* + The TTL timestamp value can change if the explicit TTL column is + updated. If we detect this when updating the PK, we indicate it here so + we know we must always update any SK's. + */ + bool m_ttl_bytes_updated; /* rowkey of the last record we've read, in StorageFormat. */ String m_last_rowkey; @@ -555,7 +600,9 @@ class ha_rocksdb : public my_core::handler { bool m_update_scope_is_valid; /* SST information used for bulk loading the primary key */ - std::shared_ptr m_sst_info; + std::unique_ptr m_sst_info; + /* External merge sorts for bulk load: key ID -> merge sort instance */ + std::unordered_map m_key_merge; Rdb_transaction *m_bulk_load_tx; /* Mutex to protect finalizing bulk load */ mysql_mutex_t m_bulk_load_mutex; @@ -652,6 +699,13 @@ class ha_rocksdb : public my_core::handler { /* Setup field_decoders based on type of scan and table->read_set */ void setup_read_decoders(); + /* + For the active index, indicates which columns must be covered for the + current lookup to be covered. If the bitmap field is null, that means this + index does not cover the current lookup for any record. + */ + MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0}; + /* Number of bytes in on-disk (storage) record format that are used for storing SQL NULL flags. @@ -889,11 +943,7 @@ class ha_rocksdb : public my_core::handler { DBUG_RETURN(MAX_REF_PARTS); } - uint max_supported_key_part_length() const override { - DBUG_ENTER_FUNC(); - - DBUG_RETURN(2048); - } + uint max_supported_key_part_length() const override; /** @brief unireg.cc will call this to make sure that the storage engine can handle @@ -1072,9 +1122,13 @@ class ha_rocksdb : public my_core::handler { rocksdb::Slice *const packed_rec) MY_ATTRIBUTE((__nonnull__)); - bool should_hide_ttl_rec(const rocksdb::Slice &ttl_rec_val, + bool should_hide_ttl_rec(const Rdb_key_def &kd, + const rocksdb::Slice &ttl_rec_val, const int64_t curr_ts) MY_ATTRIBUTE((__warn_unused_result__)); + void rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward); int index_first_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -1107,8 +1161,10 @@ class ha_rocksdb : public my_core::handler { struct unique_sk_buf_info *sk_info) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, - const rocksdb::Slice &key, const rocksdb::Slice &value) + const rocksdb::Slice &key, const rocksdb::Slice &value, + bool sort) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void update_bytes_written(ulonglong bytes_written); int update_pk(const Rdb_key_def &kd, const struct update_row_info &row_info, const bool &pk_changed) MY_ATTRIBUTE((__warn_unused_result__)); int update_sk(const TABLE *const table_arg, const Rdb_key_def &kd, diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h index e465ed7cf25a1..85c3968cc9987 100644 --- a/storage/rocksdb/ha_rocksdb_proto.h +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -77,6 +77,7 @@ bool rdb_is_ttl_read_filtering_enabled(); int rdb_dbug_set_ttl_rec_ts(); int rdb_dbug_set_ttl_snapshot_ts(); int rdb_dbug_set_ttl_read_filter_ts(); +bool rdb_dbug_set_ttl_ignore_pk(); #endif enum operation_type : int; diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc new file mode 100644 index 0000000000000..7adca5d7cf251 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc @@ -0,0 +1,174 @@ +############################################################################### +# Common test file for high priority DDL +############################################################################### + + +create user test_user1@localhost; +grant all on test to test_user1@localhost; +create user test_user2@localhost; +grant all on test to test_user2@localhost; + +# Default values +--let $con_block = con1 +--let $con_kill = default +--let $should_kill = 1 +--let $recreate_table = 1 +--let $throw_error = 1 + +## +## killing conflicting shared locks by alter table +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = alter table t1 modify i bigint; +--let $high_priority_cmd = alter high_priority table t1 modify i bigint; + +--source include/ddl_high_priority_module.inc + +## +## killing conflicting shared lock in a transaction +## transaction will rollback +## + +--let $blocking_sql = begin; insert into t1 values (4); select i from t1; +--let $cmd = alter table t1 rename t1_new; +--let $high_priority_cmd = alter high_priority table t1 rename t1_new; + +--source include/ddl_high_priority_module.inc + +select * from t1_new; +drop table t1_new; + +## +## simulate conflicting DDL which will not be killed +## + +# Simulate conflicting DDL +# This will hold MDL_SHARED_NO_READ_WRITE, which may be upgraded to exclusive +# locks to run DDLs like ALTER TABLE +# the upgradable/exclusive lock should not be killed + +--let $should_kill = 0 + +--let $blocking_sql = lock tables t1 write; +--let $cmd = drop table t1; +--let $high_priority_cmd = drop high_priority table t1; + +--source include/ddl_high_priority_module.inc + +# restore $should_kill +--let $should_kill = 1 + +## +## killing conflicting transaction by drop table DDL +## + +--let $blocking_sql = lock tables t1 read; begin; insert into t1 values (4); +--let $cmd = drop table t1; +--let $high_priority_cmd = drop high_priority table t1; + +--source include/ddl_high_priority_module.inc + +## +## no effect for regular users +## + +connect (con2,localhost,test_user2,,test,,); +# $con_kill is regular user +--let $con_kill = con2 +--let $should_kill = 0 + +--let $blocking_sql = lock tables t1 read; +--let $cmd = alter table t1 modify i bigint; +--let $high_priority_cmd = alter high_priority table t1 modify i bigint; + +--source include/ddl_high_priority_module.inc + +disconnect con2; + +# restore $con_kill +--let $con_kill = default +# restore $should_kill +--let $should_kill = 1 + +## +## create/drop index +## + +# create index + +--let $blocking_sql = lock tables t1 read; +--let $cmd = create index idx1 on t1 (i); +--let $high_priority_cmd = create high_priority index idx1 on t1 (i); + +--source include/ddl_high_priority_module.inc + +# drop index (use the previously created table) +--let $recreate_table = 0 + +--let $cmd = drop index idx1 on t1; +--let $high_priority_cmd = drop high_priority index idx1 on t1; + +--source include/ddl_high_priority_module.inc + +# restore $recreate_table +--let $recreate_table = 1 + +## +## high_priority truncate table +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = truncate t1; +--let $high_priority_cmd = truncate high_priority t1; + +--source include/ddl_high_priority_module.inc + +## +## high_priority create/drop trigger +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i; +--let $high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i; + +--source include/ddl_high_priority_module.inc + +# drop trigger (use the previously created table) +--let $recreate_table = 0 + +--let $cmd = drop trigger ins_sum; +--let $high_priority_cmd = drop high_priority trigger ins_sum; + +--source include/ddl_high_priority_module.inc + +# restore $recreate_table +--let $recreate_table = 1 + +## +## high_priority optimize table +## +## "optimize table" doesn't throw errors. It catches all errors, and +## returns a result set in a table +## + +--let $throw_error = 0 + +--let $blocking_sql = lock tables t1 read; +--let $cmd = optimize table t1; +--let $high_priority_cmd = optimize high_priority table t1; + +--source include/ddl_high_priority_module.inc + +# restore throw_error +--let $throw_error = 1 + +## +## clean up +## + +drop user test_user1@localhost; +drop user test_user2@localhost; +--disable_warnings +drop table if exists t1; +--enable_warnings diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc new file mode 100644 index 0000000000000..ffbdc3064554a --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc @@ -0,0 +1,141 @@ +############################################################################### +# This file plays as a function/module for ddl_high_priority test +# +# Usage: set the following variables before including +# +# $use_sys_var: whether using sys_var or syntax to trigger high_priority +# value: 0/1 +# +# $con_block: a blocking connection +# value: con1/con2/default +# +# $con_kill: a connection that will attempt to kill $con_blocking +# value: con1/con2/default +# +# $cmd: a regular command to evaluate (to use with sys var) +# value: sql command +# +# $high_priority_cmd: a high_priority command to evaluate +# value: sql command +# +# $should_kill: Expect the con_block to be killed or not +# value: 0/1 +# +# $recreate_table: Should recreate the test table or not +# value: 0/1 +# +# $throw_error: whether a command will throw lock_wait_timeout error. +# Note, optimize table catches all errors. +# value: 0/1 +############################################################################### + +## +## Print out the parameters of the test set +## (useful for debugging) +## +--echo +--echo ## Test parameters: +--echo ## use_sys_var = $use_sys_var +--echo ## con_block = $con_block +--echo ## con_kill = $con_kill +--echo ## cmd = $cmd +--echo ## high_priority_cmd = $high_priority_cmd +--echo ## should_kill = $should_kill +--echo ## recreate_table = $recreate_table +--echo ## throw_error = $throw_error +--echo + + +## +## Setup +## + +connection default; + +# create con1 +connect (con1,localhost,test_user1,,test,,); + +if ($recreate_table) { + # create t1 + --disable_warnings + drop table if exists t1; + --enable_warnings + create table t1 (i int); + show create table t1; + insert into t1 values (1), (2), (3); +} + +## +## Testing +## + +--echo connection: $con_block +--connection $con_block +--eval $blocking_sql + +--echo connection: $con_kill +--connection $con_kill +set lock_wait_timeout = 0.02; +set high_priority_lock_wait_timeout = 0.02; + +describe t1; + +--echo connection: default (for show processlist) +connection default; +--echo # both $con_block and $con_kill exist +--replace_column 1 3 5 6