diff --git a/mysql-test/suite/innodb/r/log_data_file_size.result b/mysql-test/suite/innodb/r/log_data_file_size.result new file mode 100644 index 0000000000000..d33752b089cfd --- /dev/null +++ b/mysql-test/suite/innodb/r/log_data_file_size.result @@ -0,0 +1,8 @@ +SET GLOBAL innodb_file_per_table=0; +CREATE TABLE t(a INT)ENGINE=InnoDB; +SET GLOBAL innodb_file_per_table=1; +CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB; +# Kill the server +DROP TABLE t,ibd4,ibd4f,ibd5; diff --git a/mysql-test/suite/innodb/t/log_data_file_size.opt b/mysql-test/suite/innodb/t/log_data_file_size.opt new file mode 100644 index 0000000000000..d9a364a32872d --- /dev/null +++ b/mysql-test/suite/innodb/t/log_data_file_size.opt @@ -0,0 +1,2 @@ +--loose-innodb-sys-indexes +--innodb-data-file-path=ibdata1:1M:autoextend diff --git a/mysql-test/suite/innodb/t/log_data_file_size.test b/mysql-test/suite/innodb/t/log_data_file_size.test new file mode 100644 index 0000000000000..0f40474e09b66 --- /dev/null +++ b/mysql-test/suite/innodb/t/log_data_file_size.test @@ -0,0 +1,65 @@ +--source include/have_innodb.inc +--source include/not_embedded.inc + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +let MYSQLD_DATADIR=`select @@datadir`; +let MYSQLD_IS_DEBUG=`select version() like '%debug%'`; +--source include/no_checkpoint_start.inc +SET GLOBAL innodb_file_per_table=0; +CREATE TABLE t(a INT)ENGINE=InnoDB; +let INNODB_ROOT_PAGE= `SELECT page_no FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES WHERE name='GEN_CLUST_INDEX'`; +SET GLOBAL innodb_file_per_table=1; + +CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB; + +let $drop_tables= DROP TABLE t,ibd4,ibd4f,ibd5; +--let CLEANUP_IF_CHECKPOINT= $drop_tables; +--source ../include/no_checkpoint_end.inc + +perl; +use Fcntl 'SEEK_CUR', 'SEEK_END'; + +my $page_size = $ENV{'INNODB_PAGE_SIZE'}; +my $restart = 'restart'; +if ($ENV{'MYSQLD_IS_DEBUG'}) +{ + # It is impractical to ensure that CREATE TABLE t will extend ibdata1. + # We rely on innodb_system_tablespace_extend_debug=1 + # to recover from this fault injection if no size change was redo-logged. + my $root = $ENV{'INNODB_ROOT_PAGE'}; + open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}ibdata1") or die; + my $size = sysseek(FILE, 0, SEEK_END) / $page_size; + seek(FILE, $page_size * ($root + 1), SEEK_SET) or die; + my $empty_tail= 1; + while() { unless (/\0*/gso) { $empty_tail= 0; last } } + if ($empty_tail) + { + $restart = 'restart: --innodb-data-file-size-debug=' . $size; + truncate(FILE, $page_size * $root); + } + close FILE; +} +open(FILE, ">$ENV{MYSQLTEST_VARDIR}/log/start_mysqld.txt") || die; +print FILE '--exec echo "', $restart, '" > $_expect_file_name +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +'; +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4.ibd") or die; +truncate(FILE, $page_size * 4); +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4f.ibd") or die; +truncate(FILE, $page_size * 4 + 1234); +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd5.ibd") or die; +truncate(FILE, $page_size * 5); +close FILE; +EOF + +--source $MYSQLTEST_VARDIR/log/start_mysqld.txt +--remove_file $MYSQLTEST_VARDIR/log/start_mysqld.txt + +eval $drop_tables; diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 602bbc50f776c..320d34fc63b23 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -635,6 +635,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_DATA_FILE_SIZE_DEBUG +SESSION_VALUE NULL +GLOBAL_VALUE 0 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT InnoDB system tablespace size to be set in recovery. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_DATA_HOME_DIR SESSION_VALUE NULL GLOBAL_VALUE diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index a78a438d92ab3..fae9ee3dbc1af 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -969,6 +969,348 @@ fil_try_to_close_file_in_LRU( return(false); } +/** Flush any writes cached by the file system. +@param[in,out] space tablespace */ +static +void +fil_flush_low(fil_space_t* space) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); + ut_ad(!space->stop_new_ops); + + if (fil_buffering_disabled(space)) { + + /* No need to flush. User has explicitly disabled + buffering. */ + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + ut_ad(space->n_pending_flushes == 0); + +#ifdef UNIV_DEBUG + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + ut_ad(node->modification_counter + == node->flush_counter); + ut_ad(node->n_pending_flushes == 0); + } +#endif /* UNIV_DEBUG */ + + return; + } + + /* Prevent dropping of the space while we are flushing */ + space->n_pending_flushes++; + + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + int64_t old_mod_counter = node->modification_counter; + + if (old_mod_counter <= node->flush_counter) { + continue; + } + + ut_a(node->is_open); + + switch (space->purpose) { + case FIL_TYPE_TEMPORARY: + ut_ad(0); // we already checked for this + case FIL_TYPE_TABLESPACE: + case FIL_TYPE_IMPORT: + fil_n_pending_tablespace_flushes++; + break; + case FIL_TYPE_LOG: + fil_n_pending_log_flushes++; + fil_n_log_flushes++; + break; + } +#ifdef _WIN32 + if (node->is_raw_disk) { + + goto skip_flush; + } +#endif /* _WIN32 */ +retry: + if (node->n_pending_flushes > 0) { + /* We want to avoid calling os_file_flush() on + the file twice at the same time, because we do + not know what bugs OS's may contain in file + i/o */ + + int64_t sig_count = os_event_reset(node->sync_event); + + mutex_exit(&fil_system->mutex); + + os_event_wait_low(node->sync_event, sig_count); + + mutex_enter(&fil_system->mutex); + + if (node->flush_counter >= old_mod_counter) { + + goto skip_flush; + } + + goto retry; + } + + ut_a(node->is_open); + node->n_pending_flushes++; + + mutex_exit(&fil_system->mutex); + + os_file_flush(node->handle); + + mutex_enter(&fil_system->mutex); + + os_event_set(node->sync_event); + + node->n_pending_flushes--; +skip_flush: + if (node->flush_counter < old_mod_counter) { + node->flush_counter = old_mod_counter; + + if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE( + fil_system->unflushed_spaces, + space); + } + } + + switch (space->purpose) { + case FIL_TYPE_TEMPORARY: + break; + case FIL_TYPE_TABLESPACE: + case FIL_TYPE_IMPORT: + fil_n_pending_tablespace_flushes--; + continue; + case FIL_TYPE_LOG: + fil_n_pending_log_flushes--; + continue; + } + + ut_ad(0); + } + + space->n_pending_flushes--; +} + +/** +Fill the pages with NULs +@param[in] node File node +@param[in] page_size physical page size +@param[in] start Offset from the start of the file in bytes +@param[in] len Length in bytes +@param[in] read_only_mode + if true, then read only mode checks are enforced. +@return DB_SUCCESS or error code */ +static +dberr_t +fil_write_zeros( + const fil_node_t* node, + ulint page_size, + os_offset_t start, + ulint len, + bool read_only_mode) +{ + ut_a(len > 0); + + /* Extend at most 1M at a time */ + ulint n_bytes = ut_min(static_cast(1024 * 1024), len); + byte* ptr = reinterpret_cast(ut_zalloc_nokey(n_bytes + + page_size)); + byte* buf = reinterpret_cast(ut_align(ptr, page_size)); + + os_offset_t offset = start; + dberr_t err = DB_SUCCESS; + const os_offset_t end = start + len; + IORequest request(IORequest::WRITE); + + while (offset < end) { + +#ifdef UNIV_HOTBACKUP + err = os_file_write( + request, node->name, node->handle, buf, offset, + n_bytes); +#else + err = os_aio( + request, OS_AIO_SYNC, node->name, + node->handle, buf, offset, n_bytes, read_only_mode, + NULL, NULL, NULL); +#endif /* UNIV_HOTBACKUP */ + + if (err != DB_SUCCESS) { + break; + } + + offset += n_bytes; + + n_bytes = ut_min(n_bytes, static_cast(end - offset)); + + DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", + DBUG_SUICIDE();); + } + + ut_free(ptr); + + return(err); +} + +/** Try to extend a tablespace. +@param[in,out] space tablespace to be extended +@param[in,out] node last file of the tablespace +@param[in] size desired size in number of pages +@param[out] success whether the operation succeeded +@return whether the operation should be retried */ +static UNIV_COLD __attribute__((warn_unused_result, nonnull)) +bool +fil_space_extend_must_retry( + fil_space_t* space, + fil_node_t* node, + ulint size, + bool* success) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(UT_LIST_GET_LAST(space->chain) == node); + ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); + + *success = space->size >= size; + + if (*success) { + /* Space already big enough */ + return(false); + } + + if (node->being_extended) { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + return(true); + } + + node->being_extended = true; + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The tablespace data file, such as .ibd file, is missing */ + node->being_extended = false; + return(false); + } + + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete, close or extend the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + + ut_ad(size > space->size); + + ulint pages_added = size - space->size; + const page_size_t pageSize(space->flags); + const ulint page_size = pageSize.physical(); + + os_offset_t start = os_file_get_size(node->handle); + ut_a(start != (os_offset_t) -1); + start &= ~(page_size - 1); + const os_offset_t end + = (node->size + pages_added) * page_size; + + *success = end <= start; + + if (!*success) { + DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", + DBUG_SUICIDE();); + +#ifdef HAVE_POSIX_FALLOCATE + /* On Linux, FusionIO atomic writes cannot extend + files, so we must use posix_fallocate(). */ + int ret = posix_fallocate(node->handle, start, + end - start); + + /* EINVAL means that fallocate() is not supported. + One known case is Linux ext3 file system with O_DIRECT. */ + if (ret == 0) { + } else if (ret != EINVAL) { + ib::error() + << "posix_fallocate(): Failed to preallocate" + " data for file " + << node->name << ", desired size " + << end << " bytes." + " Operating system error number " + << ret << ". Check" + " that the disk is not full or a disk quota" + " exceeded. Make sure the file system supports" + " this function. Some operating system error" + " numbers are described at " REFMAN + " operating-system-error-codes.html"; + } else +#endif + if (DB_SUCCESS != fil_write_zeros( + node, page_size, start, + static_cast(end - start), + space->purpose == FIL_TYPE_TEMPORARY + && srv_read_only_mode)) { + ib::warn() + << "Error while writing " << end - start + << " zeroes to " << node->name + << " starting at offset " << start; + } + + /* Check how many pages actually added */ + os_offset_t actual_end = os_file_get_size(node->handle); + ut_a(actual_end != static_cast(-1)); + ut_a(actual_end >= start); + + *success = end >= actual_end; + pages_added = static_cast( + (std::min(actual_end, end) - start) / page_size); + } + + os_has_said_disk_full = !*success; + + mutex_enter(&fil_system->mutex); + + space->size += pages_added; + + ut_a(node->being_extended); + node->being_extended = false; + node->size += pages_added; + const ulint pages_in_MiB = node->size + & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1); + + fil_node_complete_io(node, fil_system, IORequestWrite); + + /* Keep the last data file size info up to date, rounded to + full megabytes */ + + switch (space->id) { + case TRX_SYS_SPACE: + srv_sys_space.set_last_file_size(pages_in_MiB); + fil_flush_low(space); + return(false); + default: + // TODO: reject CREATE TEMPORARY TABLE...ROW_FORMAT=COMPRESSED + ut_ad(space->purpose == FIL_TYPE_TABLESPACE + || space->purpose == FIL_TYPE_TEMPORARY); + if (space->purpose == FIL_TYPE_TABLESPACE) { + fil_flush_low(space); + } + return(false); + case SRV_TMP_SPACE_ID: + ut_ad(space->purpose == FIL_TYPE_TEMPORARY); + srv_tmp_space.set_last_file_size(pages_in_MiB); + return(false); + } + +} + /*******************************************************************//** Reserves the fil_system mutex and tries to make sure we can open at least one file while holding it. This should be called before calling @@ -979,28 +1321,22 @@ fil_mutex_enter_and_prepare_for_io( /*===============================*/ ulint space_id) /*!< in: space id */ { - fil_space_t* space; - bool success; - bool print_info = false; - ulint count = 0; - ulint count2 = 0; - - for (;;) { + for (ulint count = 0, count2 = 0;;) { mutex_enter(&fil_system->mutex); - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files and system tablespace files always - open; this is important in preventing deadlocks in this - module, as a page read completion often performs - another read from the insert buffer. The insert buffer - is in tablespace 0, and we cannot end up waiting in - this function. */ - return; + if (space_id >= SRV_LOG_SPACE_FIRST_ID) { + /* We keep log files always open. */ + break; } - space = fil_space_get_by_id(space_id); + fil_space_t* space = fil_space_get_by_id(space_id); + + if (space == NULL) { + break; + } - if (space != NULL && space->stop_ios) { + if (space->stop_ios) { + ut_ad(space->id != 0); /* We are going to do a rename file and want to stop new i/o's for a while. */ @@ -1010,9 +1346,7 @@ fil_mutex_enter_and_prepare_for_io( " time " << count2; } - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP + mutex_exit(&fil_system->mutex); /* Wake the i/o-handler threads to make sure pending i/o's are performed */ @@ -1025,8 +1359,6 @@ fil_mutex_enter_and_prepare_for_io( fil_rename_tablespace() as well. */ os_thread_sleep(20000); -#endif /* UNIV_HOTBACKUP */ - /* Flush tablespaces so that we can close modified files in the LRU list */ fil_flush_file_spaces(FIL_TYPE_TABLESPACE); @@ -1038,68 +1370,107 @@ fil_mutex_enter_and_prepare_for_io( continue; } - if (fil_system->n_open < fil_system->max_n_open) { - - return; - } - - /* If the file is already open, no need to do anything; if the - space does not exist, we handle the situation in the function - which called this function. */ - - if (!space) { - return; - } - - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + fil_node_t* node = UT_LIST_GET_LAST(space->chain); + ut_ad(space->id == 0 + || node == UT_LIST_GET_FIRST(space->chain)); - if (!node || node->is_open) { - return; + if (space->id == 0) { + /* We keep the system tablespace files always + open; this is important in preventing + deadlocks in this module, as a page read + completion often performs another read from + the insert buffer. The insert buffer is in + tablespace 0, and we cannot end up waiting in + this function. */ + } else if (!node || node->is_open) { + /* If the file is already open, no need to do + anything; if the space does not exist, we handle the + situation in the function which called this + function */ + } else { + while (fil_system->n_open >= fil_system->max_n_open) { + /* Too many files are open */ + if (fil_try_to_close_file_in_LRU(count > 1)) { + /* No problem */ + } else if (count >= 2) { + ib::warn() << "innodb_open_files=" + << fil_system->max_n_open + << " is exceeded (" + << fil_system->n_open + << ") files stay open)"; + break; + } else { + mutex_exit(&fil_system->mutex); + os_aio_simulated_wake_handler_threads(); + os_thread_sleep(20000); + /* Flush tablespaces so that we can + close modified files in the LRU list */ + fil_flush_file_spaces(FIL_TYPE_TABLESPACE); + + count++; + continue; + } + } } - if (count > 1) { - print_info = true; - } + if (ulint size = UNIV_UNLIKELY(space->recv_size)) { + ut_ad(node); + bool success; + if (fil_space_extend_must_retry(space, node, size, + &success)) { + continue; + } - /* Too many files are open, try to close some */ - do { - success = fil_try_to_close_file_in_LRU(print_info); + ut_ad(mutex_own(&fil_system->mutex)); + /* Crash recovery requires the file extension + to succeed. */ + ut_a(success); + /* InnoDB data files cannot shrink. */ + ut_a(space->size >= size); - } while (success - && fil_system->n_open >= fil_system->max_n_open); + /* There could be multiple concurrent I/O requests for + this tablespace (multiple threads trying to extend + this tablespace). - if (fil_system->n_open < fil_system->max_n_open) { - /* Ok */ - return; - } + Also, fil_space_set_recv_size() may have been invoked + again during the file extension while fil_system->mutex + was not being held by us. - if (count >= 2) { - ib::warn() << "Too many (" << fil_system->n_open - << ") files stay open while the maximum" - " allowed value would be " - << fil_system->max_n_open << ". You may need" - " to raise the value of innodb_open_files in" - " my.cnf."; + Only if space->recv_size matches what we read + originally, reset the field. In this way, a + subsequent I/O request will handle any pending + fil_space_set_recv_size(). */ - return; + if (size == space->recv_size) { + space->recv_size = 0; + } } - mutex_exit(&fil_system->mutex); + break; + } +} -#ifndef UNIV_HOTBACKUP - /* Wake the i/o-handler threads to make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); +/** Try to extend a tablespace if it is smaller than the specified size. +@param[in,out] space tablespace +@param[in] size desired size in pages +@return whether the tablespace is at least as big as requested */ +bool +fil_space_extend( + fil_space_t* space, + ulint size) +{ + ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY); - os_thread_sleep(20000); -#endif /* !UNIV_HOTBACKUP */ - /* Flush tablespaces so that we can close modified files in - the LRU list. */ + bool success; - fil_flush_file_spaces(FIL_TYPE_TABLESPACE); + do { + fil_mutex_enter_and_prepare_for_io(space->id); + } while (fil_space_extend_must_retry( + space, UT_LIST_GET_LAST(space->chain), size, + &success)); - count++; - } + mutex_exit(&fil_system->mutex); + return(success); } /** Prepare to free a file node object from a tablespace memory cache. @@ -1546,6 +1917,24 @@ fil_space_get_first_path( return(path); } +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size) +{ + mutex_enter(&fil_system->mutex); + ut_ad(size); + ut_ad(id < SRV_LOG_SPACE_FIRST_ID); + + if (fil_space_t* space = fil_space_get_space(id)) { + space->recv_size = size; + } + + mutex_exit(&fil_system->mutex); +} + /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @@ -3906,8 +4295,7 @@ fil_ibd_open( } #ifdef UNIV_LINUX - const bool atomic_write = !srv_use_doublewrite_buf - && df_default.is_open() + const bool atomic_write = !srv_use_doublewrite_buf && df_default.is_open() && fil_fusionio_enable_atomic_write(df_default.handle()); #else const bool atomic_write = false; @@ -4785,415 +5173,96 @@ fil_space_for_table_exists_in_mem( fil_rename_tablespace( fnamespace->id, UT_LIST_GET_FIRST(fnamespace->chain)->name, - tmp_name, NULL); - } - - DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space", - DBUG_SUICIDE();); - - fil_rename_tablespace( - id, UT_LIST_GET_FIRST(space->chain)->name, - name, NULL); - - DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space", - DBUG_SUICIDE();); - - mutex_enter(&fil_system->mutex); - fnamespace = fil_space_get_by_name(name); - ut_ad(space == fnamespace); - mutex_exit(&fil_system->mutex); - - return(true); - } - - if (!print_error_if_does_not_exist) { - - mutex_exit(&fil_system->mutex); - - return(false); - } - - if (space == NULL) { - if (fnamespace == NULL) { - if (print_error_if_does_not_exist) { - fil_report_missing_tablespace(name, id); - } - } else { - ib::error() << "Table " << name << " in InnoDB data" - " dictionary has tablespace id " << id - << ", but a tablespace with that id does not" - " exist. There is a tablespace of name " - << fnamespace->name << " and id " - << fnamespace->id << ", though. Have you" - " deleted or moved .ibd files?"; - } -error_exit: - ib::warn() << TROUBLESHOOT_DATADICT_MSG; - - mutex_exit(&fil_system->mutex); - - return(false); - } - - if (0 != strcmp(space->name, name)) { - - ib::error() << "Table " << name << " in InnoDB data dictionary" - " has tablespace id " << id << ", but the tablespace" - " with that id has name " << space->name << "." - " Have you deleted or moved .ibd files?"; - - if (fnamespace != NULL) { - ib::error() << "There is a tablespace with the right" - " name: " << fnamespace->name << ", but its id" - " is " << fnamespace->id << "."; - } - - goto error_exit; - } - - mutex_exit(&fil_system->mutex); - - return(false); -} -#endif /* !UNIV_HOTBACKUP */ -/** Return the space ID based on the tablespace name. -The tablespace must be found in the tablespace memory cache. -This call is made from external to this module, so the mutex is not owned. -@param[in] tablespace Tablespace name -@return space ID if tablespace found, ULINT_UNDEFINED if space not. */ -ulint -fil_space_get_id_by_name( - const char* tablespace) -{ - mutex_enter(&fil_system->mutex); - - /* Search for a space with the same name. */ - fil_space_t* space = fil_space_get_by_name(tablespace); - ulint id = (space == NULL) ? ULINT_UNDEFINED : space->id; - - mutex_exit(&fil_system->mutex); - - return(id); -} - -/** -Fill the pages with NULs -@param[in] node File node -@param[in] page_size physical page size -@param[in] start Offset from the start of the file in bytes -@param[in] len Length in bytes -@param[in] read_only_mode - if true, then read only mode checks are enforced. -@return DB_SUCCESS or error code */ -static -dberr_t -fil_write_zeros( - const fil_node_t* node, - ulint page_size, - os_offset_t start, - ulint len, - bool read_only_mode) -{ - ut_a(len > 0); - - /* Extend at most 1M at a time */ - ulint n_bytes = ut_min(static_cast(1024 * 1024), len); - byte* ptr = reinterpret_cast(ut_zalloc_nokey(n_bytes - + page_size)); - byte* buf = reinterpret_cast(ut_align(ptr, page_size)); - - os_offset_t offset = start; - dberr_t err = DB_SUCCESS; - const os_offset_t end = start + len; - IORequest request(IORequest::WRITE); - - while (offset < end) { - -#ifdef UNIV_HOTBACKUP - err = os_file_write( - request, node->name, node->handle, buf, offset, - n_bytes); -#else - err = os_aio( - request, OS_AIO_SYNC, node->name, - node->handle, buf, offset, n_bytes, read_only_mode, - NULL, NULL, NULL); -#endif /* UNIV_HOTBACKUP */ - - if (err != DB_SUCCESS) { - break; - } - - offset += n_bytes; - - n_bytes = ut_min(n_bytes, static_cast(end - offset)); - - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", - DBUG_SUICIDE();); - } - - ut_free(ptr); - - return(err); -} - -/** Try to extend a tablespace if it is smaller than the specified size. -@param[in,out] space tablespace -@param[in] size desired size in pages -@return whether the tablespace is at least as big as requested */ -bool -fil_space_extend( - fil_space_t* space, - ulint size) -{ - /* In read-only mode we allow writes to temporary tables. */ - ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id)); - -retry: - -#ifdef UNIV_HOTBACKUP - page_size_t page_length(space->flags); - ulint actual_size = space->size; - ib::trace() << "space id : " << space->id << ", space name : " - << space->name << ", space size : " << actual_size << " pages," - << " desired space size : " << size << " pages," - << " page size : " << page_length.physical(); -#endif /* UNIV_HOTBACKUP */ - - bool success = true; - - fil_mutex_enter_and_prepare_for_io(space->id); - - if (space->size >= size) { - /* Space already big enough */ - mutex_exit(&fil_system->mutex); - return(true); - } - - page_size_t pageSize(space->flags); - const ulint page_size = pageSize.physical(); - fil_node_t* node = UT_LIST_GET_LAST(space->chain); - - if (!node->being_extended) { - /* Mark this node as undergoing extension. This flag - is used by other threads to wait for the extension - opereation to finish. */ - node->being_extended = true; - } else { - /* Another thread is currently extending the file. Wait - for it to finish. It'd have been better to use an event - driven mechanism but the entire module is peppered with - polling code. */ - - mutex_exit(&fil_system->mutex); - os_thread_sleep(100000); - goto retry; - } - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The tablespace data file, such as .ibd file, is missing */ - node->being_extended = false; - mutex_exit(&fil_system->mutex); - - return(false); - } - - /* At this point it is safe to release fil_system mutex. No - other thread can rename, delete or close the file because - we have set the node->being_extended flag. */ - mutex_exit(&fil_system->mutex); - - ulint pages_added; - - /* Note: This code is going to be executed independent of FusionIO HW - if the OS supports posix_fallocate() */ - - ut_ad(size > space->size); - - os_offset_t node_start = os_file_get_size(node->handle); - ut_a(node_start != (os_offset_t) -1); - - /* Node first page number */ - ulint node_first_page = space->size - node->size; - - /* Number of physical pages in the node/file */ - ulint n_node_physical_pages - = static_cast(node_start) / page_size; - - /* Number of pages to extend in the node/file */ - lint n_node_extend; - - n_node_extend = size - (node_first_page + node->size); - - /* If we already have enough physical pages to satisfy the - extend request on the node then ignore it */ - if (node->size + n_node_extend > n_node_physical_pages) { + tmp_name, NULL); + } - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", + DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space", DBUG_SUICIDE();); - os_offset_t len; - dberr_t err = DB_SUCCESS; - - len = ((node->size + n_node_extend) * page_size) - node_start; - ut_ad(len > 0); - const char* name = node->name == NULL ? space->name : node->name; - -#ifdef UNIV_LINUX - /* This is required by FusionIO HW/Firmware */ - int ret = posix_fallocate(node->handle, node_start, len); - - /* We already pass the valid offset and len in, if EINVAL - is returned, it could only mean that the file system doesn't - support fallocate(), currently one known case is - ext3 FS with O_DIRECT. We ignore EINVAL here so that the - error message won't flood. */ - if (ret != 0 && ret != EINVAL) { - ib::error() - << "posix_fallocate(): Failed to preallocate" - " data for file " - << name << ", desired size " - << len << " bytes." - " Operating system error number " - << ret << ". Check" - " that the disk is not full or a disk quota" - " exceeded. Make sure the file system supports" - " this function. Some operating system error" - " numbers are described at " REFMAN - " operating-system-error-codes.html"; + fil_rename_tablespace( + id, UT_LIST_GET_FIRST(space->chain)->name, + name, NULL); - err = DB_IO_ERROR; - } -#endif + DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space", + DBUG_SUICIDE();); - if (!node->atomic_write || err == DB_IO_ERROR) { + mutex_enter(&fil_system->mutex); + fnamespace = fil_space_get_by_name(name); + ut_ad(space == fnamespace); + mutex_exit(&fil_system->mutex); - bool read_only_mode; + return(true); + } - read_only_mode = (space->purpose != FIL_TYPE_TEMPORARY - ? false : srv_read_only_mode); + if (!print_error_if_does_not_exist) { - err = fil_write_zeros( - node, page_size, node_start, - static_cast(len), read_only_mode); + mutex_exit(&fil_system->mutex); - if (err != DB_SUCCESS) { + return(false); + } - ib::warn() - << "Error while writing " << len - << " zeroes to " << name - << " starting at offset " << node_start; + if (space == NULL) { + if (fnamespace == NULL) { + if (print_error_if_does_not_exist) { + fil_report_missing_tablespace(name, id); } + } else { + ib::error() << "Table " << name << " in InnoDB data" + " dictionary has tablespace id " << id + << ", but a tablespace with that id does not" + " exist. There is a tablespace of name " + << fnamespace->name << " and id " + << fnamespace->id << ", though. Have you" + " deleted or moved .ibd files?"; } +error_exit: + ib::warn() << TROUBLESHOOT_DATADICT_MSG; - /* Check how many pages actually added */ - os_offset_t end = os_file_get_size(node->handle); - ut_a(end != static_cast(-1) && end >= node_start); - - os_has_said_disk_full = !(success = (end == node_start + len)); - - pages_added = static_cast(end - node_start) / page_size; + mutex_exit(&fil_system->mutex); - } else { - success = true; - pages_added = n_node_extend; - os_has_said_disk_full = FALSE; + return(false); } - mutex_enter(&fil_system->mutex); - - ut_a(node->being_extended); - - node->size += pages_added; - space->size += pages_added; - node->being_extended = false; + if (0 != strcmp(space->name, name)) { - fil_node_complete_io(node, fil_system, IORequestWrite); + ib::error() << "Table " << name << " in InnoDB data dictionary" + " has tablespace id " << id << ", but the tablespace" + " with that id has name " << space->name << "." + " Have you deleted or moved .ibd files?"; -#ifndef UNIV_HOTBACKUP - /* Keep the last data file size info up to date, rounded to - full megabytes */ - ulint pages_per_mb = (1024 * 1024) / page_size; - ulint size_in_pages = ((node->size / pages_per_mb) * pages_per_mb); + if (fnamespace != NULL) { + ib::error() << "There is a tablespace with the right" + " name: " << fnamespace->name << ", but its id" + " is " << fnamespace->id << "."; + } - switch (space->id) { - case TRX_SYS_SPACE: - srv_sys_space.set_last_file_size(size_in_pages); - break; - case SRV_TMP_SPACE_ID: - srv_tmp_space.set_last_file_size(size_in_pages); - break; + goto error_exit; } -#else - ib::trace() << "extended space : " << space->name << " from " - << actual_size << " pages to " << space->size << " pages " - << ", desired space size : " << size << " pages."; -#endif /* !UNIV_HOTBACKUP */ mutex_exit(&fil_system->mutex); - fil_flush(space->id); - - return(success); + return(false); } - -#ifdef UNIV_HOTBACKUP -/********************************************************************//** -Extends all tablespaces to the size stored in the space header. During the -mysqlbackup --apply-log phase we extended the spaces on-demand so that log -records could be applied, but that may have left spaces still too small -compared to the size stored in the space header. */ -void -fil_extend_tablespaces_to_stored_len(void) -/*======================================*/ +#endif /* !UNIV_HOTBACKUP */ +/** Return the space ID based on the tablespace name. +The tablespace must be found in the tablespace memory cache. +This call is made from external to this module, so the mutex is not owned. +@param[in] tablespace Tablespace name +@return space ID if tablespace found, ULINT_UNDEFINED if space not. */ +ulint +fil_space_get_id_by_name( + const char* tablespace) { - byte* buf; - ulint actual_size; - ulint size_in_header; - dberr_t error; - bool success; - - buf = (byte*)ut_malloc_nokey(UNIV_PAGE_SIZE); - mutex_enter(&fil_system->mutex); - for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list); - space != NULL; - space = UT_LIST_GET_NEXT(space_list, space)) { - - ut_a(space->purpose == FIL_TYPE_TABLESPACE); - - mutex_exit(&fil_system->mutex); /* no need to protect with a - mutex, because this is a - single-threaded operation */ - error = fil_read( - page_id_t(space->id, 0), - page_size_t(space->flags), - 0, univ_page_size.physical(), buf); - - ut_a(error == DB_SUCCESS); - - size_in_header = fsp_header_get_field(buf, FSP_SIZE); - - success = fil_space_extend(space, size_in_header); - if (!success) { - ib::error() << "Could not extend the tablespace of " - << space->name << " to the size stored in" - " header, " << size_in_header << " pages;" - " size after extension " << actual_size - << " pages. Check that you have free disk" - " space and retry!"; - ut_a(success); - } - - mutex_enter(&fil_system->mutex); - } + /* Search for a space with the same name. */ + fil_space_t* space = fil_space_get_by_name(tablespace); + ulint id = (space == NULL) ? ULINT_UNDEFINED : space->id; mutex_exit(&fil_system->mutex); - ut_free(buf); + return(id); } -#endif /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/ @@ -5874,146 +5943,16 @@ fil_flush( ulint space_id) /*!< in: file space id (this can be a group of log files or a tablespace of the database) */ { - fil_node_t* node; - os_file_t file; - mutex_enter(&fil_system->mutex); - fil_space_t* space = fil_space_get_by_id(space_id); - - if (space == NULL - || space->purpose == FIL_TYPE_TEMPORARY - || space->stop_new_ops - || space->is_being_truncated) { - mutex_exit(&fil_system->mutex); - - return; - } - - if (fil_buffering_disabled(space)) { - - /* No need to flush. User has explicitly disabled - buffering. */ - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - ut_ad(space->n_pending_flushes == 0); - -#ifdef UNIV_DEBUG - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - ut_ad(node->modification_counter - == node->flush_counter); - ut_ad(node->n_pending_flushes == 0); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(&fil_system->mutex); - return; - } - - space->n_pending_flushes++; /*!< prevent dropping of the space while - we are flushing */ - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - int64_t old_mod_counter = node->modification_counter; - - if (old_mod_counter <= node->flush_counter) { - continue; - } - - ut_a(node->is_open); - - switch (space->purpose) { - case FIL_TYPE_TEMPORARY: - ut_ad(0); // we already checked for this - case FIL_TYPE_TABLESPACE: - case FIL_TYPE_IMPORT: - fil_n_pending_tablespace_flushes++; - break; - case FIL_TYPE_LOG: - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - break; - } -#ifdef _WIN32 - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif /* _WIN32 */ -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o */ - -#ifndef UNIV_HOTBACKUP - int64_t sig_count = os_event_reset(node->sync_event); -#endif /* !UNIV_HOTBACKUP */ - - mutex_exit(&fil_system->mutex); - - os_event_wait_low(node->sync_event, sig_count); - - mutex_enter(&fil_system->mutex); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->is_open); - file = node->handle; - node->n_pending_flushes++; - - mutex_exit(&fil_system->mutex); - - os_file_flush(file); - - mutex_enter(&fil_system->mutex); - - os_event_set(node->sync_event); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE( - fil_system->unflushed_spaces, - space); - } - } - - switch (space->purpose) { - case FIL_TYPE_TEMPORARY: - ut_ad(0); // we already checked for this - case FIL_TYPE_TABLESPACE: - case FIL_TYPE_IMPORT: - fil_n_pending_tablespace_flushes--; - continue; - case FIL_TYPE_LOG: - fil_n_pending_log_flushes--; - continue; + if (fil_space_t* space = fil_space_get_by_id(space_id)) { + if (space->purpose != FIL_TYPE_TEMPORARY + && !space->stop_new_ops + && !space->is_being_truncated) { + fil_flush_low(space); } - - ut_ad(0); } - space->n_pending_flushes--; - mutex_exit(&fil_system->mutex); } diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index 37c641fffac42..ac34be6f6a848 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -364,7 +364,8 @@ SysTablespace::check_size( also the data file could contain an incomplete extent. So we need to round the size downward to a megabyte.*/ - ulint rounded_size_pages = get_pages_from_size(size); + const ulint rounded_size_pages = static_cast( + size >> UNIV_PAGE_SIZE_SHIFT); /* If last file */ if (&file == &m_files.back() && m_auto_extend_last_file) { @@ -375,7 +376,7 @@ SysTablespace::check_size( ib::error() << "The Auto-extending " << name() << " data file '" << file.filepath() << "' is" " of a different size " << rounded_size_pages - << " pages (rounded down to MB) than specified" + << " pages than specified" " in the .cnf file: initial " << file.m_size << " pages, max " << m_last_file_size_max << " (relevant if non-zero) pages!"; @@ -388,7 +389,7 @@ SysTablespace::check_size( if (rounded_size_pages != file.m_size) { ib::error() << "The " << name() << " data file '" << file.filepath() << "' is of a different size " - << rounded_size_pages << " pages (rounded down to MB)" + << rounded_size_pages << " pages" " than the " << file.m_size << " pages specified in" " the .cnf file!"; return(DB_ERROR); @@ -779,7 +780,8 @@ SysTablespace::check_file_spec( return(DB_ERROR); } - if (get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) { + if (!m_auto_extend_last_file + && get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) { ib::error() << "Tablespace size must be at least " << min_expected_size / (1024 * 1024) << " MB"; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index db0aba33da73d..9ed7855b8dab6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4210,8 +4210,9 @@ innobase_init( /* There is hang on buffer pool when trying to get a new page if buffer pool size is too small for large page sizes */ - if (innobase_buffer_pool_size < (24 * 1024 * 1024)) { - ib::info() << "innobase_page_size " + if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF + && innobase_buffer_pool_size < (24 * 1024 * 1024)) { + ib::info() << "innodb_page_size=" << UNIV_PAGE_SIZE << " requires " << "innodb_buffer_pool_size > 24M current " << innobase_buffer_pool_size; @@ -22929,6 +22930,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug, " but the each purges were not done yet.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(data_file_size_debug, + srv_sys_space_size_debug, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "InnoDB system tablespace size to be set in recovery.", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug, srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG, "Make the first page of the given tablespace dirty.", @@ -23312,6 +23319,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(limit_optimistic_insert_debug), MYSQL_SYSVAR(trx_purge_view_update_only_debug), + MYSQL_SYSVAR(data_file_size_debug), MYSQL_SYSVAR(fil_make_page_dirty_debug), MYSQL_SYSVAR(saved_page_number_debug), MYSQL_SYSVAR(compress_debug), diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 6a4cc3f9d55f2..918a849be3d2c 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -182,6 +182,10 @@ struct fil_space_t { /*!< length of the FSP_FREE list */ ulint free_limit; /*!< contents of FSP_FREE_LIMIT */ + ulint recv_size; + /*!< recovered tablespace size in pages; + 0 if no size change was read from the redo log, + or if the size change was implemented */ ulint flags; /*!< tablespace flags; see fsp_flags_is_valid(), page_size_t(ulint) (constructor) */ @@ -238,9 +242,6 @@ struct fil_space_t { /** tablespace crypt data has been read */ bool page_0_crypt_read; - /** Space file block size */ - ulint file_block_size; - /** True if we have already printed compression failure */ bool printed_compression_failure; @@ -789,6 +790,12 @@ char* fil_space_get_first_path( ulint id); +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size); /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @@ -1258,15 +1265,6 @@ fil_space_for_table_exists_in_mem( mem_heap_t* heap, /*!< in: heap memory */ table_id_t table_id, /*!< in: table id */ dict_table_t* table); /*!< in: table or NULL */ -#else /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Extends all tablespaces to the size stored in the space header. During the -mysqlbackup --apply-log phase we extended the spaces on-demand so that log -records could be appllied, but that may have left spaces still too small -compared to the size stored in the space header. */ -void -fil_extend_tablespaces_to_stored_len(void); -/*======================================*/ #endif /* !UNIV_HOTBACKUP */ /** Try to extend a tablespace if it is smaller than the specified size. @param[in,out] space tablespace diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h index c25093491a2d4..4c88b268f3449 100644 --- a/storage/innobase/include/fsp0sysspace.h +++ b/storage/innobase/include/fsp0sysspace.h @@ -152,15 +152,6 @@ class SysTablespace : public Tablespace * ((1024 * 1024) / UNIV_PAGE_SIZE)); } - /** Roundoff to MegaBytes is similar as done in - SysTablespace::parse_units() function. - @return the pages when given size of file (bytes). */ - ulint get_pages_from_size(os_offset_t size) - { - return (ulint)((size / (1024 * 1024)) - * ((1024 * 1024) / UNIV_PAGE_SIZE)); - } - /** @return next increment size */ ulint get_increment() const; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 9d7b363cdd05e..748a21fdcd25e 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -538,6 +538,7 @@ extern my_bool srv_purge_view_update_only_debug; /** Value of MySQL global used to disable master thread. */ extern my_bool srv_master_thread_disabled_debug; +extern ulong srv_sys_space_size_debug; #endif /* UNIV_DEBUG */ #define SRV_SEMAPHORE_WAIT_EXTENSION 7200 diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index a7c3f3372877c..6901ba070aff3 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3109,6 +3109,7 @@ recv_parse_log_rec( return(0); } + const byte* old_ptr = new_ptr; new_ptr = recv_parse_or_apply_log_rec_body( *type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL); @@ -3117,6 +3118,14 @@ recv_parse_log_rec( return(0); } + if (*page_no == 0 && *type == MLOG_4BYTES + && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) { + old_ptr += 2; + fil_space_set_recv_size(*space, + mach_parse_compressed(&old_ptr, + end_ptr)); + } + return(new_ptr - ptr); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 86d821fd93511..73c8122753707 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -140,6 +140,10 @@ bool srv_sys_tablespaces_open = false; ibool srv_was_started = FALSE; /** TRUE if innobase_start_or_create_for_mysql() has been called */ static ibool srv_start_has_been_called = FALSE; +#ifdef UNIV_DEBUG +/** InnoDB system tablespace to set during recovery */ +UNIV_INTERN ulong srv_sys_space_size_debug; +#endif /* UNIV_DEBUG */ /** Bit flags for tracking background thread creation. They are used to determine which threads need to be stopped if we need to abort during @@ -194,9 +198,6 @@ static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024; /** */ #define SRV_MAX_N_PENDING_SYNC_IOS 100 -/** The round off to MB is similar as done in srv_parse_megabytes() */ -#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \ - ((1024 * 1024) / (UNIV_PAGE_SIZE)) #ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ mysql_pfs_key_t buf_dump_thread_key; @@ -2092,6 +2093,7 @@ innobase_start_or_create_for_mysql(void) shutdown */ fil_open_log_and_system_tablespace_files(); + ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug); err = srv_undo_tablespaces_init( create_new_db, diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index e7da4569f0d1f..133960ae8b4a6 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -925,6 +925,314 @@ fil_try_to_close_file_in_LRU( return(FALSE); } +/** Flush any writes cached by the file system. +@param[in,out] space tablespace */ +static +void +fil_flush_low(fil_space_t* space) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); + ut_ad(!space->stop_new_ops); + + if (fil_buffering_disabled(space)) { + + /* No need to flush. User has explicitly disabled + buffering. */ + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + ut_ad(space->n_pending_flushes == 0); + +#ifdef UNIV_DEBUG + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + ut_ad(node->modification_counter + == node->flush_counter); + ut_ad(node->n_pending_flushes == 0); + } +#endif /* UNIV_DEBUG */ + + return; + } + + /* Prevent dropping of the space while we are flushing */ + space->n_pending_flushes++; + + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + ib_int64_t old_mod_counter = node->modification_counter; + + if (old_mod_counter <= node->flush_counter) { + continue; + } + + ut_a(node->open); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + fil_n_log_flushes++; + } +#ifdef __WIN__ + if (node->is_raw_disk) { + + goto skip_flush; + } +#endif /* __WIN__ */ +retry: + if (node->n_pending_flushes > 0) { + /* We want to avoid calling os_file_flush() on + the file twice at the same time, because we do + not know what bugs OS's may contain in file + i/o */ + + ib_int64_t sig_count = + os_event_reset(node->sync_event); + + mutex_exit(&fil_system->mutex); + + os_event_wait_low(node->sync_event, sig_count); + + mutex_enter(&fil_system->mutex); + + if (node->flush_counter >= old_mod_counter) { + + goto skip_flush; + } + + goto retry; + } + + ut_a(node->open); + node->n_pending_flushes++; + + mutex_exit(&fil_system->mutex); + + os_file_flush(node->handle); + + mutex_enter(&fil_system->mutex); + + os_event_set(node->sync_event); + + node->n_pending_flushes--; +skip_flush: + if (node->flush_counter < old_mod_counter) { + node->flush_counter = old_mod_counter; + + if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE( + unflushed_spaces, + fil_system->unflushed_spaces, + space); + } + } + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } + } + + space->n_pending_flushes--; +} + +/** Try to extend a tablespace. +@param[in,out] space tablespace to be extended +@param[in,out] node last file of the tablespace +@param[in] size desired size in number of pages +@param[out] success whether the operation succeeded +@return whether the operation should be retried */ +static UNIV_COLD __attribute__((warn_unused_result, nonnull)) +bool +fil_space_extend_must_retry( + fil_space_t* space, + fil_node_t* node, + ulint size, + ibool* success) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(UT_LIST_GET_LAST(space->chain) == node); + ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); + + *success = space->size >= size; + + if (*success) { + /* Space already big enough */ + return(false); + } + + if (node->being_extended) { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + return(true); + } + + node->being_extended = true; + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The tablespace data file, such as .ibd file, is missing */ + node->being_extended = false; + return(false); + } + + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete or close the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + + ulint start_page_no = space->size; + ulint file_start_page_no = start_page_no - node->size; + + /* Determine correct file block size */ + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size( + node->handle, node->name); + space->file_block_size = node->file_block_size; + } + + ulint page_size = fsp_flags_get_zip_size(space->flags); + ulint pages_added = 0; + + if (!page_size) { + page_size = UNIV_PAGE_SIZE; + } + +#ifdef HAVE_POSIX_FALLOCATE + /* We must complete the I/O request after invoking + posix_fallocate() to avoid an assertion failure at shutdown. + Because no actual writes were dispatched, a read operation + will suffice. */ + const ulint io_completion_type = srv_use_posix_fallocate + ? OS_FILE_READ : OS_FILE_WRITE; + + if (srv_use_posix_fallocate) { + const os_offset_t start_offset = static_cast( + start_page_no) * page_size; + const os_offset_t len = static_cast( + pages_added) * page_size; + + *success = !posix_fallocate(node->handle, start_offset, len); + if (!*success) { + ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " + "space for file \'%s\' failed. Current size " + INT64PF ", desired size " INT64PF, + node->name, start_offset, len+start_offset); + os_file_handle_error_no_exit( + node->name, "posix_fallocate", + FALSE, __FILE__, __LINE__); + } + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + pages_added = 0; + } + } else +#else + const ulint io_completion_type = OS_FILE_WRITE; +#endif + { + byte* buf2; + byte* buf; + ulint buf_size; + + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size - start_page_no) + * page_size; + buf2 = static_cast(mem_alloc(buf_size + page_size)); + buf = static_cast(ut_align(buf2, page_size)); + + memset(buf, 0, buf_size); + + while (start_page_no < size) { + ulint n_pages + = ut_min(buf_size / page_size, + size - start_page_no); + + os_offset_t offset = static_cast( + start_page_no - file_start_page_no) + * page_size; + + const char* name = node->name == NULL + ? space->name : node->name; + + *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, + name, node->handle, buf, + offset, page_size * n_pages, + page_size, node, NULL, + space->id, NULL, 0); + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t size; + + size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); + + n_pages = ((ulint) (size / page_size)) + - node->size - pages_added; + + pages_added += n_pages; + break; + } + + start_page_no += n_pages; + pages_added += n_pages; + } + + mem_free(buf2); + } + + mutex_enter(&fil_system->mutex); + + ut_a(node->being_extended); + + space->size += pages_added; + node->size += pages_added; + + fil_node_complete_io(node, fil_system, io_completion_type); + + node->being_extended = FALSE; + + if (space->id == 0) { + ulint pages_per_mb = (1024 * 1024) / page_size; + + /* Keep the last data file size info up to date, rounded to + full megabytes */ + + srv_data_file_sizes[srv_n_data_files - 1] + = (node->size / pages_per_mb) * pages_per_mb; + } + + fil_flush_low(space); + return(false); +} + /*******************************************************************//** Reserves the fil_system mutex and tries to make sure we can open at least one file while holding it. This should be called before calling @@ -936,27 +1244,25 @@ fil_mutex_enter_and_prepare_for_io( ulint space_id) /*!< in: space id */ { fil_space_t* space; - ibool success; - ibool print_info = FALSE; ulint count = 0; ulint count2 = 0; retry: mutex_enter(&fil_system->mutex); - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files and system tablespace files always open; - this is important in preventing deadlocks in this module, as - a page read completion often performs another read from the - insert buffer. The insert buffer is in tablespace 0, and we - cannot end up waiting in this function. */ - + if (space_id >= SRV_LOG_SPACE_FIRST_ID) { + /* We keep log files always open. */ return; } space = fil_space_get_by_id(space_id); - if (space != NULL && space->stop_ios) { + if (space == NULL) { + return; + } + + if (space->stop_ios) { + ut_ad(space->id != 0); /* We are going to do a rename file and want to stop new i/o's for a while */ @@ -996,76 +1302,81 @@ fil_mutex_enter_and_prepare_for_io( goto retry; } - if (fil_system->n_open < fil_system->max_n_open) { + fil_node_t* node = UT_LIST_GET_LAST(space->chain); - return; - } + ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain)); - /* If the file is already open, no need to do anything; if the space - does not exist, we handle the situation in the function which called - this function */ - if (!space) { - return; - } - - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + if (space->id == 0) { + /* We keep the system tablespace files always open; + this is important in preventing deadlocks in this module, as + a page read completion often performs another read from the + insert buffer. The insert buffer is in tablespace 0, and we + cannot end up waiting in this function. */ + } else if (!node || node->open) { + /* If the file is already open, no need to do + anything; if the space does not exist, we handle the + situation in the function which called this + function */ + } else { + /* Too many files are open, try to close some */ + while (fil_system->n_open >= fil_system->max_n_open) { + if (fil_try_to_close_file_in_LRU(count > 1)) { + /* No problem */ + } else if (count >= 2) { + ib_logf(IB_LOG_LEVEL_WARN, + "innodb_open_files=%lu is exceeded" + " (%lu files stay open)", + fil_system->max_n_open, + fil_system->n_open); + break; + } else { + mutex_exit(&fil_system->mutex); - if (!node || node->open) { + /* Wake the i/o-handler threads to + make sure pending i/o's are + performed */ + os_aio_simulated_wake_handler_threads(); + os_thread_sleep(20000); - return; - } + /* Flush tablespaces so that we can + close modified files in the LRU list */ + fil_flush_file_spaces(FIL_TABLESPACE); - if (count > 1) { - print_info = TRUE; + count++; + goto retry; + } + } } - /* Too many files are open, try to close some */ -close_more: - success = fil_try_to_close_file_in_LRU(print_info); + if (ulint size = UNIV_UNLIKELY(space->recv_size)) { + ut_ad(node); + ibool success; + if (fil_space_extend_must_retry(space, node, size, &success)) { + goto retry; + } - if (success && fil_system->n_open >= fil_system->max_n_open) { + ut_ad(mutex_own(&fil_system->mutex)); + /* Crash recovery requires the file extension to succeed. */ + ut_a(success); + /* InnoDB data files cannot shrink. */ + ut_a(space->size >= size); - goto close_more; - } + /* There could be multiple concurrent I/O requests for + this tablespace (multiple threads trying to extend + this tablespace). - if (fil_system->n_open < fil_system->max_n_open) { - /* Ok */ + Also, fil_space_set_recv_size() may have been invoked + again during the file extension while fil_system->mutex + was not being held by us. - return; - } + Only if space->recv_size matches what we read originally, + reset the field. In this way, a subsequent I/O request + will handle any pending fil_space_set_recv_size(). */ - if (count >= 2) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: too many (%lu) files stay open" - " while the maximum\n" - "InnoDB: allowed value would be %lu.\n" - "InnoDB: You may need to raise the value of" - " innodb_open_files in\n" - "InnoDB: my.cnf.\n", - (ulong) fil_system->n_open, - (ulong) fil_system->max_n_open); - - return; + if (size == space->recv_size) { + space->recv_size = 0; + } } - - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP - /* Wake the i/o-handler threads to make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); - - os_thread_sleep(20000); -#endif - /* Flush tablespaces so that we can close modified files in the LRU - list */ - - fil_flush_file_spaces(FIL_TABLESPACE); - - count++; - - goto retry; } /*******************************************************************//** @@ -1582,6 +1893,24 @@ fil_space_get_first_path( return(path); } +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size) +{ + mutex_enter(&fil_system->mutex); + ut_ad(size); + ut_ad(id < SRV_LOG_SPACE_FIRST_ID); + + if (fil_space_t* space = fil_space_get_space(id)) { + space->recv_size = size; + } + + mutex_exit(&fil_system->mutex); +} + /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @@ -5263,209 +5592,23 @@ fil_extend_space_to_desired_size( extension; if the current space size is bigger than this already, the function does nothing */ { - fil_node_t* node; - fil_space_t* space; - byte* buf2; - byte* buf; - ulint buf_size; - ulint start_page_no; - ulint file_start_page_no; - ulint page_size; - ulint pages_added; - ibool success; - ut_ad(!srv_read_only_mode); -retry: - pages_added = 0; - success = TRUE; - - fil_mutex_enter_and_prepare_for_io(space_id); - - space = fil_space_get_by_id(space_id); - ut_a(space); - - if (space->size >= size_after_extend) { - /* Space already big enough */ - - *actual_size = space->size; - - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - page_size = fsp_flags_get_zip_size(space->flags); - if (!page_size) { - page_size = UNIV_PAGE_SIZE; - } - - node = UT_LIST_GET_LAST(space->chain); - - if (!node->being_extended) { - /* Mark this node as undergoing extension. This flag - is used by other threads to wait for the extension - opereation to finish. */ - node->being_extended = TRUE; - } else { - /* Another thread is currently extending the file. Wait - for it to finish. - It'd have been better to use event driven mechanism but - the entire module is peppered with polling stuff. */ - mutex_exit(&fil_system->mutex); - os_thread_sleep(100000); - goto retry; - } - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The tablespace data file, such as .ibd file, is missing */ - node->being_extended = false; - mutex_exit(&fil_system->mutex); - - return(false); - } - - /* At this point it is safe to release fil_system mutex. No - other thread can rename, delete or close the file because - we have set the node->being_extended flag. */ - mutex_exit(&fil_system->mutex); - - start_page_no = space->size; - file_start_page_no = space->size - node->size; - - /* Determine correct file block size */ - if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size(node->handle, node->name); - space->file_block_size = node->file_block_size; - } - -#ifdef HAVE_POSIX_FALLOCATE - if (srv_use_posix_fallocate) { - os_offset_t start_offset = start_page_no * page_size; - os_offset_t n_pages = (size_after_extend - start_page_no); - os_offset_t len = n_pages * page_size; - - if (posix_fallocate(node->handle, start_offset, len) == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF, - node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); - success = FALSE; - } else { - success = TRUE; - } - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28;os_has_said_disk_full = TRUE;); - - mutex_enter(&fil_system->mutex); - - if (success) { - node->size += n_pages; - space->size += n_pages; - os_has_said_disk_full = FALSE; - } - - /* If posix_fallocate was used to extent the file space - we need to complete the io. Because no actual writes were - dispatched read operation is enough here. Without this - there will be assertion at shutdown indicating that - all IO is not completed. */ - fil_node_complete_io(node, fil_system, OS_FILE_READ); - goto file_extended; - } -#endif - - /* Extend at most 64 pages at a time */ - buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = static_cast(mem_alloc(buf_size + page_size)); - buf = static_cast(ut_align(buf2, page_size)); - - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { - ulint n_pages - = ut_min(buf_size / page_size, - size_after_extend - start_page_no); - - os_offset_t offset - = ((os_offset_t) (start_page_no - file_start_page_no)) - * page_size; - - const char* name = node->name == NULL ? space->name : node->name; - -#ifdef UNIV_HOTBACKUP - success = os_file_write(name, node->handle, buf, - offset, page_size * n_pages); -#else - success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - name, node->handle, buf, - offset, page_size * n_pages, page_size, - node, NULL, space_id, NULL, 0); -#endif /* UNIV_HOTBACKUP */ - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); - - if (success) { - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - os_offset_t size; - - size = os_file_get_size(node->handle); - ut_a(size != (os_offset_t) -1); + for (;;) { + fil_mutex_enter_and_prepare_for_io(space_id); - n_pages = ((ulint) (size / page_size)) - - node->size - pages_added; + fil_space_t* space = fil_space_get_by_id(space_id); + ut_a(space); + ibool success; - pages_added += n_pages; - break; + if (!fil_space_extend_must_retry( + space, UT_LIST_GET_LAST(space->chain), + size_after_extend, &success)) { + *actual_size = space->size; + mutex_exit(&fil_system->mutex); + return(success); } - - start_page_no += n_pages; - pages_added += n_pages; - } - - mem_free(buf2); - - mutex_enter(&fil_system->mutex); - - ut_a(node->being_extended); - - space->size += pages_added; - node->size += pages_added; - - fil_node_complete_io(node, fil_system, OS_FILE_WRITE); - - /* At this point file has been extended */ -file_extended: - - node->being_extended = FALSE; - *actual_size = space->size; - -#ifndef UNIV_HOTBACKUP - if (space_id == 0) { - ulint pages_per_mb = (1024 * 1024) / page_size; - - /* Keep the last data file size info up to date, rounded to - full megabytes */ - - srv_data_file_sizes[srv_n_data_files - 1] - = (node->size / pages_per_mb) * pages_per_mb; } -#endif /* !UNIV_HOTBACKUP */ - - /* - printf("Extended %s to %lu, actual size %lu pages\n", space->name, - size_after_extend, *actual_size); */ - mutex_exit(&fil_system->mutex); - - fil_flush(space_id); - - return(success); } #ifdef UNIV_HOTBACKUP @@ -6184,14 +6327,9 @@ fil_flush( ulint space_id) /*!< in: file space id (this can be a group of log files or a tablespace of the database) */ { - fil_space_t* space; - fil_node_t* node; - os_file_t file; - - mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(space_id); + fil_space_t* space = fil_space_get_by_id(space_id); if (!space || space->stop_new_ops) { mutex_exit(&fil_system->mutex); @@ -6199,115 +6337,7 @@ fil_flush( return; } - if (fil_buffering_disabled(space)) { - - /* No need to flush. User has explicitly disabled - buffering. */ - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - ut_ad(space->n_pending_flushes == 0); - -#ifdef UNIV_DEBUG - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - ut_ad(node->modification_counter - == node->flush_counter); - ut_ad(node->n_pending_flushes == 0); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(&fil_system->mutex); - return; - } - - space->n_pending_flushes++; /*!< prevent dropping of the space while - we are flushing */ - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - ib_int64_t old_mod_counter = node->modification_counter; - - if (old_mod_counter <= node->flush_counter) { - continue; - } - - ut_a(node->open); - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes++; - } else { - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - } -#ifdef __WIN__ - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif /* __WIN__ */ -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o */ - - ib_int64_t sig_count = - os_event_reset(node->sync_event); - - mutex_exit(&fil_system->mutex); - - os_event_wait_low(node->sync_event, sig_count); - - mutex_enter(&fil_system->mutex); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->open); - file = node->handle; - node->n_pending_flushes++; - - mutex_exit(&fil_system->mutex); - - os_file_flush(file); - - mutex_enter(&fil_system->mutex); - - os_event_set(node->sync_event); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE( - unflushed_spaces, - fil_system->unflushed_spaces, - space); - } - } - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes--; - } else { - fil_n_pending_log_flushes--; - } - } - - space->n_pending_flushes--; + fil_flush_low(space); mutex_exit(&fil_system->mutex); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 984d508bd0424..8d564df2bb3eb 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -3906,14 +3906,15 @@ innobase_init( if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_DEF) { ib_logf(IB_LOG_LEVEL_INFO, "innodb_page_size has been " - "changed from default value %d to %ldd.", + "changed from default value %d to %ld.", UNIV_PAGE_SIZE_DEF, UNIV_PAGE_SIZE); /* There is hang on buffer pool when trying to get a new page if buffer pool size is too small for large page sizes */ - if (innobase_buffer_pool_size < (24 * 1024 * 1024)) { - ib_logf(IB_LOG_LEVEL_INFO, - "innobase_page_size %lu requires " + if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF + && innobase_buffer_pool_size < (24 * 1024 * 1024)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "innodb_page_size=%lu requires " "innodb_buffer_pool_size > 24M current %lld", UNIV_PAGE_SIZE, innobase_buffer_pool_size); @@ -21560,6 +21561,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug, "but the each purges were not done yet.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(data_file_size_debug, + srv_sys_space_size_debug, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "InnoDB system tablespace size to be set in recovery.", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug, srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG, "Make the first page of the given tablespace dirty.", @@ -21998,6 +22005,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(limit_optimistic_insert_debug), MYSQL_SYSVAR(trx_purge_view_update_only_debug), + MYSQL_SYSVAR(data_file_size_debug), MYSQL_SYSVAR(fil_make_page_dirty_debug), MYSQL_SYSVAR(saved_page_number_debug), #endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 95011ae61250d..38cc09bced3d6 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -292,6 +292,10 @@ struct fil_space_t { tablespace whose size we do not know yet; last incomplete megabytes in data files may be ignored if space == 0 */ + ulint recv_size; + /*!< recovered tablespace size in pages; + 0 if no size change was read from the redo log, + or if the size change was implemented */ ulint flags; /*!< tablespace flags; see fsp_flags_is_valid(), fsp_flags_get_zip_size() */ @@ -502,6 +506,12 @@ char* fil_space_get_first_path( /*=====================*/ ulint id); /*!< in: space id */ +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size); /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 7e727d0917f9a..a8b0608ccd4b9 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -623,6 +623,7 @@ extern my_bool srv_ibuf_disable_background_merge; #ifdef UNIV_DEBUG extern my_bool srv_purge_view_update_only_debug; +extern ulong srv_sys_space_size_debug; #endif /* UNIV_DEBUG */ #define SRV_SEMAPHORE_WAIT_EXTENSION 7200 diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 092c2ed88dc18..1777084e74610 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -2254,6 +2254,7 @@ recv_parse_log_rec( } #endif /* UNIV_LOG_LSN_DEBUG */ + byte* old_ptr = new_ptr; new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, NULL, NULL, *space); if (UNIV_UNLIKELY(new_ptr == NULL)) { @@ -2261,6 +2262,13 @@ recv_parse_log_rec( return(0); } + if (*page_no == 0 && *type == MLOG_4BYTES + && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) { + ulint size; + mach_parse_compressed(old_ptr + 2, end_ptr, &size); + fil_space_set_recv_size(*space, size); + } + if (*page_no > recv_max_parsed_page_no) { recv_max_parsed_page_no = *page_no; } diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index a7434e3d067db..ab7c30991542a 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -130,6 +130,10 @@ UNIV_INTERN ibool srv_is_being_started = FALSE; UNIV_INTERN ibool srv_was_started = FALSE; /** TRUE if innobase_start_or_create_for_mysql() has been called */ static ibool srv_start_has_been_called = FALSE; +#ifdef UNIV_DEBUG +/** InnoDB system tablespace to set during recovery */ +UNIV_INTERN ulong srv_sys_space_size_debug; +#endif /* UNIV_DEBUG */ /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ @@ -188,9 +192,6 @@ static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES = #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 -/** The round off to MB is similar as done in srv_parse_megabytes() */ -#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \ - ((1024 * 1024) / (UNIV_PAGE_SIZE)) #ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ UNIV_INTERN mysql_pfs_key_t io_handler_thread_key; @@ -1025,15 +1026,12 @@ open_or_create_data_files( size = os_file_get_size(files[i]); ut_a(size != (os_offset_t) -1); - /* Under some error conditions like disk full - narios or file size reaching filesystem - limit the data file could contain an incomplete - extent at the end. When we extend a data file - and if some failure happens, then also the data - file could contain an incomplete extent. So we - need to round the size downward to a megabyte.*/ + /* If InnoDB encountered an error or was killed + while extending the data file, the last page + could be incomplete. */ - rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size); + rounded_size_pages = static_cast( + size >> UNIV_PAGE_SIZE_SHIFT); if (i == srv_n_data_files - 1 && srv_auto_extend_last_data_file) { @@ -2160,9 +2158,11 @@ innobase_start_or_create_for_mysql(void) sum_of_new_sizes += srv_data_file_sizes[i]; } - if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) { + if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) { ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace size must be at least 10 MB"); + "Combined size in innodb_data_file_path" + " must be at least %u MiB", + 640 >> (20 - UNIV_PAGE_SIZE_SHIFT)); return(DB_ERROR); } @@ -2229,6 +2229,8 @@ innobase_start_or_create_for_mysql(void) return(err); } } else { + ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug); + for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { os_offset_t size; os_file_stat_t stat_info;