Skip to content

Commit

Permalink
MDEV-11520 Extending an InnoDB data file unnecessarily allocates
Browse files Browse the repository at this point in the history
a large memory buffer on Windows

fil_extend_space_to_desired_size(), os_file_set_size(): Use calloc()
for memory allocation, and handle failures. Properly check the return
status of posix_fallocate().

On Windows, instead of extending the file by at most 1 megabyte at a time,
write a zero-filled page at the end of the file.
According to the Microsoft blog post
https://blogs.msdn.microsoft.com/oldnewthing/20110922-00/?p=9573
this will physically extend the file by writing zero bytes.
(InnoDB never uses DeviceIoControl() to set the file sparse.)

For innodb_plugin, port the XtraDB fix for MySQL Bug#56433
(introducing fil_system->file_extend_mutex). The bug was
fixed differently in MySQL 5.6 (and MariaDB Server 10.0).
  • Loading branch information
dr-m committed Feb 21, 2017
1 parent 2bfe83a commit 978179a
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 166 deletions.
85 changes: 56 additions & 29 deletions storage/innobase/fil/fil0fil.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Expand Down Expand Up @@ -248,6 +249,7 @@ the ib_logfiles form a 'space' and it is handled here */
struct fil_system_struct {
#ifndef UNIV_HOTBACKUP
mutex_t mutex; /*!< The mutex protecting the cache */
mutex_t file_extend_mutex;
#endif /* !UNIV_HOTBACKUP */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
Expand Down Expand Up @@ -1658,6 +1660,8 @@ fil_init(

mutex_create(fil_system_mutex_key,
&fil_system->mutex, SYNC_ANY_LATCH);
mutex_create(fil_system_mutex_key,
&fil_system->file_extend_mutex, SYNC_OUTER_ANY_LATCH);

fil_system->spaces = hash_create(hash_size);
fil_system->name_hash = hash_create(hash_size);
Expand Down Expand Up @@ -4096,6 +4100,10 @@ fil_extend_space_to_desired_size(
ulint page_size;
ibool success = TRUE;

/* fil_system->file_extend_mutex is for http://bugs.mysql.com/56433
to prevent concurrent fil_extend_space_to_desired_size()
while fil_system->mutex is temporarily released */
mutex_enter(&fil_system->file_extend_mutex);
fil_mutex_enter_and_prepare_for_io(space_id);

space = fil_space_get_by_id(space_id);
Expand All @@ -4107,6 +4115,7 @@ fil_extend_space_to_desired_size(
*actual_size = space->size;

mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);

return(TRUE);
}
Expand All @@ -4123,22 +4132,24 @@ fil_extend_space_to_desired_size(
start_page_no = space->size;
file_start_page_no = space->size - node->size;

mutex_exit(&fil_system->mutex);

#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
ib_int64_t start_offset = start_page_no * page_size;
ib_int64_t end_offset = (size_after_extend - start_page_no) * page_size;
ib_int64_t desired_size = size_after_extend*page_size;
int err = posix_fallocate(
node->handle, start_offset, end_offset);

mutex_exit(&fil_system->mutex);
success = !err;

if (posix_fallocate(node->handle, start_offset, end_offset) == -1) {
fprintf(stderr, "InnoDB: Error: preallocating file "
"space for file \'%s\' failed. Current size "
" %lld, len %lld, desired size %lld\n",
node->name, start_offset, end_offset, desired_size);
success = FALSE;
} else {
success = TRUE;
if (!success) {
fprintf(stderr,
"InnoDB: Error: extending file %s"
" from %lld to %lld bytes"
" failed with error %d\n",
node->name, start_offset, end_offset, err);
}

mutex_enter(&fil_system->mutex);
Expand All @@ -4154,14 +4165,25 @@ fil_extend_space_to_desired_size(
}
#endif

#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
start_page_no = size_after_extend - 1;
buf_size = page_size;
#else
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = mem_alloc(buf_size + page_size);
#endif
buf2 = calloc(1, buf_size + page_size);
if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + page_size);
mutex_exit(&fil_system->file_extend_mutex);
return(FALSE);
}
buf = ut_align(buf2, page_size);

memset(buf, 0, buf_size);

while (start_page_no < size_after_extend) {
for (;;) {
ulint n_pages = ut_min(buf_size / page_size,
size_after_extend - start_page_no);

Expand All @@ -4170,6 +4192,7 @@ fil_extend_space_to_desired_size(
offset_low = ((start_page_no - file_start_page_no)
% (4096 * ((1024 * 1024) / page_size)))
* page_size;

#ifdef UNIV_HOTBACKUP
success = os_file_write(node->name, node->handle, buf,
offset_low, offset_high,
Expand All @@ -4181,34 +4204,37 @@ fil_extend_space_to_desired_size(
page_size * n_pages,
NULL, NULL);
#endif
if (success) {
node->size += n_pages;
space->size += n_pages;

os_has_said_disk_full = FALSE;
} else {
/* Let us measure the size of the file to determine
how much we were able to extend it */
/* Let us measure the size of the file to determine
how much we were able to extend it */

n_pages = ((ulint)
(os_file_get_size_as_iblonglong(
node->handle)
/ page_size)) - node->size;
n_pages = (ulint) (os_file_get_size_as_iblonglong(node->handle)
/ page_size);

node->size += n_pages;
space->size += n_pages;
mutex_enter(&fil_system->mutex);
ut_a(n_pages >= node->size);

start_page_no += n_pages - node->size;
space->size += n_pages - node->size;
node->size = n_pages;

if (success) {
os_has_said_disk_full = FALSE;
}

if (!success || start_page_no >= size_after_extend) {
break;
}

start_page_no += n_pages;
mutex_exit(&fil_system->mutex);
}

mem_free(buf2);

free(buf2);
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);

#ifdef HAVE_POSIX_FALLOCATE
complete_io:
#endif /* HAVE_POSIX_FALLOCATE */

*actual_size = space->size;

Expand All @@ -4228,6 +4254,7 @@ fil_extend_space_to_desired_size(
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
size_after_extend, *actual_size); */
mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);

fil_flush(space_id);

Expand Down
1 change: 1 addition & 0 deletions storage/innobase/include/sync0sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ or row lock! */
#define SYNC_BUF_BLOCK 146 /* Block mutex */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
#define SYNC_OUTER_ANY_LATCH 136
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130
Expand Down
82 changes: 28 additions & 54 deletions storage/innobase/os/os0file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2012, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
Expand Down Expand Up @@ -2027,48 +2028,44 @@ os_file_set_size(

ut_a(size == (size & 0xFFFFFFFF));

current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);

#ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
if (posix_fallocate(file, current_size, desired_size) == -1) {
if (srv_use_posix_fallocate) {
int err = posix_fallocate(file, 0, desired_size);
if (err) {
fprintf(stderr,
"InnoDB: Error: preallocating data for"
" file %s failed at\n"
"InnoDB: offset 0 size %lld %lld. Operating system"
" error number %d.\n"
"InnoDB: Check that the disk is not full"
" or a disk quota exceeded.\n"
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN "operating-system-error-codes.html\n",
name, (long long)size_high, (long long)size, errno);

return (FALSE);
"InnoDB: Error: preallocating %lld bytes for"
" file %s failed with error %d.\n",
desired_size, name, err);
}
return (TRUE);
return(!err);
}
#endif

#ifdef _WIN32
/* Write 1 page of zeroes at the desired end. */
buf_size = UNIV_PAGE_SIZE;
current_size = desired_size - buf_size;
#else
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
current_size = 0;
#endif
buf2 = calloc(1, buf_size + UNIV_PAGE_SIZE);

if (!buf2) {
fprintf(stderr, "InnoDB: Cannot allocate " ULINTPF
" bytes to extend file\n",
buf_size + UNIV_PAGE_SIZE);
return(FALSE);
}

/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);

/* Write buffer full of zeros */
memset(buf, 0, buf_size);

if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {

fprintf(stderr, "InnoDB: Progress in MB:");
}

while (current_size < desired_size) {
do {
ulint n_bytes;

if (desired_size - current_size < (ib_int64_t) buf_size) {
Expand All @@ -2082,37 +2079,14 @@ os_file_set_size(
(ulint)(current_size >> 32),
n_bytes);
if (!ret) {
ut_free(buf2);
goto error_handling;
}

/* Print about progress for each 100 MB written */
if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
!= current_size / (ib_int64_t)(100 * 1024 * 1024)) {

fprintf(stderr, " %lu00",
(ulong) ((current_size + n_bytes)
/ (ib_int64_t)(100 * 1024 * 1024)));
break;
}

current_size += n_bytes;
}

if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {

fprintf(stderr, "\n");
}
} while (current_size < desired_size);

ut_free(buf2);

ret = os_file_flush(file);

if (ret) {
return(TRUE);
}

error_handling:
return(FALSE);
free(buf2);
return(ret && os_file_flush(file));
}

/***********************************************************************//**
Expand Down
Loading

0 comments on commit 978179a

Please sign in to comment.