Skip to content

Commit ec4cf11

Browse files
committed
MDEV-11520 after-merge fix for 10.1: Use sparse files.
If page_compression (introduced in MariaDB Server 10.1) is enabled, the logical action is to not preallocate space to the data files, but to only logically extend the files with zeroes. fil_create_new_single_table_tablespace(): Create smaller files for ROW_FORMAT=COMPRESSED tables, but adhere to the minimum file size of 4*innodb_page_size. fil_space_extend_must_retry(), os_file_set_size(): On Windows, use SetFileInformationByHandle() and FILE_END_OF_FILE_INFO, which depends on bumping _WIN32_WINNT to 0x0600. FIXME: The files are not yet set up as sparse, so this will currently end up physically extending (preallocating) the files, wasting storage for unused pages. os_file_set_size(): Add the parameter "bool sparse=false" to declare that the file is to be extended logically, instead of being preallocated. The only caller with sparse=true is fil_create_new_single_table_tablespace(). (The system tablespace cannot be created with page_compression.) fil_space_extend_must_retry(), os_file_set_size(): Outside Windows, use ftruncate() to extend files that are supposed to be sparse. On systems where ftruncate() is limited to files less than 4GiB (if there are any), fil_space_extend_must_retry() retains the old logic of physically extending the file.
1 parent e1e920b commit ec4cf11

File tree

9 files changed

+309
-147
lines changed

9 files changed

+309
-147
lines changed

cmake/os/Windows.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ IF(CMAKE_C_COMPILER MATCHES "icl")
5050
ENDIF()
5151

5252
ADD_DEFINITIONS(-D_WINDOWS -D__WIN__ -D_CRT_SECURE_NO_DEPRECATE)
53-
ADD_DEFINITIONS(-D_WIN32_WINNT=0x0501)
53+
ADD_DEFINITIONS(-D_WIN32_WINNT=0x0600)
5454
# We do not want the windows.h macros min/max
5555
ADD_DEFINITIONS(-DNOMINMAX)
5656
# Speed up build process excluding unused header files

storage/innobase/fil/fil0fil.cc

Lines changed: 90 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -689,12 +689,10 @@ fil_node_open_file(
689689
return(false);
690690
}
691691

692-
if (!fsp_flags_is_compressed(flags)) {
693-
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
692+
if (ulint zip_size = fsp_flags_get_zip_size(flags)) {
693+
node->size = ulint(size_bytes / zip_size);
694694
} else {
695-
node->size = (ulint)
696-
(size_bytes
697-
/ fsp_flags_get_zip_size(flags));
695+
node->size = ulint(size_bytes / UNIV_PAGE_SIZE);
698696
}
699697

700698
#ifdef UNIV_HOTBACKUP
@@ -1039,20 +1037,57 @@ fil_space_extend_must_retry(
10391037
}
10401038

10411039
ulint page_size = fsp_flags_get_zip_size(space->flags);
1042-
10431040
if (!page_size) {
10441041
page_size = UNIV_PAGE_SIZE;
10451042
}
10461043

1047-
#ifdef HAVE_POSIX_FALLOCATE
1044+
#ifdef _WIN32
1045+
const ulint io_completion_type = OS_FILE_READ;
1046+
/* Logically or physically extend the file with zero bytes,
1047+
depending on whether it is sparse. */
1048+
1049+
/* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...)
1050+
when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */
1051+
{
1052+
FILE_END_OF_FILE_INFO feof;
1053+
/* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
1054+
fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
1055+
Do not shrink short ROW_FORMAT=COMPRESSED files. */
1056+
feof.EndOfFile.QuadPart = std::max(
1057+
os_offset_t(size - file_start_page_no) * page_size,
1058+
os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
1059+
* UNIV_PAGE_SIZE));
1060+
*success = SetFileInformationByHandle(node->handle,
1061+
FileEndOfFileInfo,
1062+
&feof, sizeof feof);
1063+
if (!*success) {
1064+
ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
1065+
" from " INT64PF
1066+
" to " INT64PF " bytes failed with %u",
1067+
node->name,
1068+
os_offset_t(node->size) * page_size,
1069+
feof.EndOfFile.QuadPart, GetLastError());
1070+
} else {
1071+
start_page_no = size;
1072+
}
1073+
}
1074+
#else
1075+
/* We will logically extend the file with ftruncate() if
1076+
page_compression is enabled, because the file is expected to
1077+
be sparse in that case. Make sure that ftruncate() can deal
1078+
with large files. */
1079+
const bool is_sparse = sizeof(off_t) >= 8
1080+
&& FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
1081+
1082+
# ifdef HAVE_POSIX_FALLOCATE
10481083
/* We must complete the I/O request after invoking
10491084
posix_fallocate() to avoid an assertion failure at shutdown.
10501085
Because no actual writes were dispatched, a read operation
10511086
will suffice. */
10521087
const ulint io_completion_type = srv_use_posix_fallocate
1053-
? OS_FILE_READ : OS_FILE_WRITE;
1088+
|| is_sparse ? OS_FILE_READ : OS_FILE_WRITE;
10541089

1055-
if (srv_use_posix_fallocate) {
1090+
if (srv_use_posix_fallocate && !is_sparse) {
10561091
const os_offset_t start_offset
10571092
= os_offset_t(start_page_no - file_start_page_no)
10581093
* page_size;
@@ -1078,19 +1113,33 @@ fil_space_extend_must_retry(
10781113
start_page_no = size;
10791114
}
10801115
} else
1081-
#else
1082-
const ulint io_completion_type = OS_FILE_WRITE;
1083-
#endif
1084-
{
1085-
#ifdef _WIN32
1086-
/* Write 1 page of zeroes at the desired end. */
1087-
ulint buf_size = page_size;
1088-
start_page_no = size - 1;
1089-
#else
1116+
# else
1117+
const ulint io_completion_type = is_sparse
1118+
? OS_FILE_READ : OS_FILE_WRITE;
1119+
# endif
1120+
if (is_sparse) {
1121+
/* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
1122+
fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
1123+
Do not shrink short ROW_FORMAT=COMPRESSED files. */
1124+
off_t s = std::max(off_t(size - file_start_page_no)
1125+
* off_t(page_size),
1126+
off_t(FIL_IBD_FILE_INITIAL_SIZE
1127+
* UNIV_PAGE_SIZE));
1128+
*success = !ftruncate(node->handle, s);
1129+
if (!*success) {
1130+
ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
1131+
" from " INT64PF " to " INT64PF " bytes"
1132+
" failed with error %d",
1133+
node->name,
1134+
os_offset_t(start_page_no - file_start_page_no)
1135+
* page_size, os_offset_t(s), errno);
1136+
} else {
1137+
start_page_no = size;
1138+
}
1139+
} else {
10901140
/* Extend at most 64 pages at a time */
10911141
ulint buf_size = ut_min(64, size - start_page_no)
10921142
* page_size;
1093-
#endif
10941143
byte* buf2 = static_cast<byte*>(
10951144
calloc(1, buf_size + page_size));
10961145
*success = buf2 != NULL;
@@ -1135,7 +1184,7 @@ fil_space_extend_must_retry(
11351184

11361185
free(buf2);
11371186
}
1138-
1187+
#endif
11391188
mutex_enter(&fil_system->mutex);
11401189

11411190
ut_a(node->being_extended);
@@ -3799,7 +3848,23 @@ fil_create_new_single_table_tablespace(
37993848
goto error_exit_3;
38003849
}
38013850

3802-
ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
3851+
{
3852+
/* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
3853+
fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
3854+
Do not create too short ROW_FORMAT=COMPRESSED files. */
3855+
const ulint zip_size = fsp_flags_get_zip_size(flags);
3856+
const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
3857+
const os_offset_t fsize = std::max(
3858+
os_offset_t(size) * page_size,
3859+
os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
3860+
* UNIV_PAGE_SIZE));
3861+
/* ROW_FORMAT=COMPRESSED files never use page_compression
3862+
(are never sparse). */
3863+
ut_ad(!zip_size || !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
3864+
3865+
ret = os_file_set_size(path, file, fsize,
3866+
FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
3867+
}
38033868

38043869
if (!ret) {
38053870
err = DB_OUT_OF_FILE_SPACE;
@@ -3827,14 +3892,8 @@ fil_create_new_single_table_tablespace(
38273892
fsp_header_init_fields(page, space_id, flags);
38283893
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
38293894

3830-
if (!(fsp_flags_is_compressed(flags))) {
3831-
buf_flush_init_for_writing(page, NULL, 0);
3832-
ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
3833-
} else {
3895+
if (const ulint zip_size = fsp_flags_get_zip_size(flags)) {
38343896
page_zip_des_t page_zip;
3835-
ulint zip_size;
3836-
3837-
zip_size = fsp_flags_get_zip_size(flags);
38383897

38393898
page_zip_set_size(&page_zip, zip_size);
38403899
page_zip.data = page + UNIV_PAGE_SIZE;
@@ -3845,6 +3904,9 @@ fil_create_new_single_table_tablespace(
38453904
page_zip.n_blobs = 0;
38463905
buf_flush_init_for_writing(page, &page_zip, 0);
38473906
ret = os_file_write(path, file, page_zip.data, 0, zip_size);
3907+
} else {
3908+
buf_flush_init_for_writing(page, NULL, 0);
3909+
ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
38483910
}
38493911

38503912
ut_free(buf2);

storage/innobase/include/os0file.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -881,17 +881,19 @@ os_file_get_size(
881881
/*=============*/
882882
os_file_t file) /*!< in: handle to a file */
883883
MY_ATTRIBUTE((warn_unused_result));
884-
/***********************************************************************//**
885-
Write the specified number of zeros to a newly created file.
886-
@return TRUE if success */
884+
/** Set the size of a newly created file.
885+
@param[in] name file name
886+
@param[in] file file handle
887+
@param[in] size desired file size
888+
@param[in] sparse whether to create a sparse file (no preallocating)
889+
@return whether the operation succeeded */
887890
UNIV_INTERN
888-
ibool
891+
bool
889892
os_file_set_size(
890-
/*=============*/
891-
const char* name, /*!< in: name of the file or path as a
892-
null-terminated string */
893-
os_file_t file, /*!< in: handle to a file */
894-
os_offset_t size) /*!< in: file size */
893+
const char* name,
894+
os_file_t file,
895+
os_offset_t size,
896+
bool is_sparse = false)
895897
MY_ATTRIBUTE((nonnull, warn_unused_result));
896898
/***********************************************************************//**
897899
Truncates a file at its current position.

storage/innobase/os/os0file.cc

Lines changed: 44 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ UNIV_INTERN ulint os_n_pending_writes = 0;
355355
UNIV_INTERN ulint os_n_pending_reads = 0;
356356

357357
/** After first fallocate failure we will disable os_file_trim */
358-
UNIV_INTERN ibool os_fallocate_failed = FALSE;
358+
static ibool os_fallocate_failed;
359359

360360
/**********************************************************************//**
361361
Directly manipulate the allocated disk space by deallocating for the file referred to
@@ -364,7 +364,7 @@ Within the specified range, partial file system blocks are zeroed, and whole
364364
file system blocks are removed from the file. After a successful call,
365365
subsequent reads from this range will return zeroes.
366366
@return true if success, false if error */
367-
UNIV_INTERN
367+
static
368368
ibool
369369
os_file_trim(
370370
/*=========*/
@@ -2355,24 +2355,44 @@ os_file_get_size(
23552355
#endif /* __WIN__ */
23562356
}
23572357

2358-
/***********************************************************************//**
2359-
Write the specified number of zeros to a newly created file.
2360-
@return TRUE if success */
2358+
/** Set the size of a newly created file.
2359+
@param[in] name file name
2360+
@param[in] file file handle
2361+
@param[in] size desired file size
2362+
@param[in] sparse whether to create a sparse file (no preallocating)
2363+
@return whether the operation succeeded */
23612364
UNIV_INTERN
2362-
ibool
2365+
bool
23632366
os_file_set_size(
2364-
/*=============*/
2365-
const char* name, /*!< in: name of the file or path as a
2366-
null-terminated string */
2367-
os_file_t file, /*!< in: handle to a file */
2368-
os_offset_t size) /*!< in: file size */
2367+
const char* name,
2368+
os_file_t file,
2369+
os_offset_t size,
2370+
bool is_sparse)
23692371
{
2370-
ibool ret;
2371-
byte* buf;
2372-
byte* buf2;
2373-
ulint buf_size;
2372+
#ifdef _WIN32
2373+
FILE_END_OF_FILE_INFO feof;
2374+
feof.EndOfFile.QuadPart = size;
2375+
bool success = SetFileInformationByHandle(file,
2376+
FileEndOfFileInfo,
2377+
&feof, sizeof feof);
2378+
if (!success) {
2379+
ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s"
2380+
" to " INT64PF " bytes failed with %u",
2381+
name, size, GetLastError());
2382+
}
2383+
return(success);
2384+
#else
2385+
if (is_sparse) {
2386+
bool success = !ftruncate(file, size);
2387+
if (!success) {
2388+
ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
2389+
" to " INT64PF " bytes failed with error %d",
2390+
name, size, errno);
2391+
}
2392+
return(success);
2393+
}
23742394

2375-
#ifdef HAVE_POSIX_FALLOCATE
2395+
# ifdef HAVE_POSIX_FALLOCATE
23762396
if (srv_use_posix_fallocate) {
23772397
int err = posix_fallocate(file, 0, size);
23782398
if (err) {
@@ -2383,29 +2403,25 @@ os_file_set_size(
23832403
}
23842404
return(!err);
23852405
}
2386-
#endif
2406+
# endif
23872407

2388-
#ifdef _WIN32
2389-
/* Write 1 page of zeroes at the desired end. */
2390-
buf_size = UNIV_PAGE_SIZE;
2391-
os_offset_t current_size = size - buf_size;
2392-
#else
23932408
/* Write up to 1 megabyte at a time. */
2394-
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
2409+
ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
23952410
* UNIV_PAGE_SIZE;
23962411
os_offset_t current_size = 0;
2397-
#endif
2398-
buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
2412+
2413+
byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
23992414

24002415
if (!buf2) {
24012416
ib_logf(IB_LOG_LEVEL_ERROR,
24022417
"Cannot allocate " ULINTPF " bytes to extend file\n",
24032418
buf_size + UNIV_PAGE_SIZE);
2404-
return(FALSE);
2419+
return(false);
24052420
}
24062421

24072422
/* Align the buffer for possible raw i/o */
2408-
buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
2423+
byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
2424+
bool ret;
24092425

24102426
do {
24112427
ulint n_bytes;
@@ -2428,6 +2444,7 @@ os_file_set_size(
24282444
free(buf2);
24292445

24302446
return(ret && os_file_flush(file));
2447+
#endif
24312448
}
24322449

24332450
/***********************************************************************//**

storage/innobase/srv/srv0start.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,14 +1128,13 @@ open_or_create_data_files(
11281128
(ulong) (srv_data_file_sizes[i]
11291129
>> (20 - UNIV_PAGE_SIZE_SHIFT)));
11301130

1131-
ib_logf(IB_LOG_LEVEL_INFO,
1132-
"Database physically writes the"
1133-
" file full: wait...");
1134-
11351131
ret = os_file_set_size(
11361132
name, files[i],
11371133
(os_offset_t) srv_data_file_sizes[i]
1138-
<< UNIV_PAGE_SIZE_SHIFT);
1134+
<< UNIV_PAGE_SIZE_SHIFT
1135+
/* TODO: enable page_compression on the
1136+
system tablespace and add
1137+
, FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)*/);
11391138

11401139
if (!ret) {
11411140
ib_logf(IB_LOG_LEVEL_ERROR,
@@ -1232,10 +1231,11 @@ srv_undo_tablespace_create(
12321231
"Setting file %s size to %lu MB",
12331232
name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
12341233

1235-
ib_logf(IB_LOG_LEVEL_INFO,
1236-
"Database physically writes the file full: wait...");
1237-
1238-
ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
1234+
ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT
1235+
/* TODO: enable page_compression on the
1236+
system tablespace and add
1237+
FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)
1238+
*/);
12391239

12401240
if (!ret) {
12411241
ib_logf(IB_LOG_LEVEL_INFO,

0 commit comments

Comments
 (0)