Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fs cache improvement for big reads #55158

Merged
merged 29 commits into from Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a72541d
Improvement for big reads
kssenii Sep 29, 2023
74a34a7
Merge remote-tracking branch 'upstream/master' into fs-cache-improvement
kssenii Oct 7, 2023
c7a3c74
Better
kssenii Oct 7, 2023
d7c5cae
Better
kssenii Oct 7, 2023
43c5e17
Merge remote-tracking branch 'upstream/master' into fs-cache-improvement
kssenii Oct 16, 2023
0907209
Better
kssenii Oct 16, 2023
6889e04
Merge branch 'master' into fs-cache-improvement
kssenii Oct 16, 2023
7515853
Fix build
kssenii Oct 17, 2023
8cff5f5
Merge remote-tracking branch 'origin/fs-cache-improvement' into fs-ca…
kssenii Oct 17, 2023
d837aa6
Fix
kssenii Oct 17, 2023
5d8b1ce
Fix
kssenii Oct 17, 2023
22bab4b
Fix configs
kssenii Oct 18, 2023
7aa5751
Update tests config
kssenii Oct 18, 2023
c792d95
Update config
kssenii Oct 18, 2023
89272e0
Fix upgrade check, randomize more settings
kssenii Oct 19, 2023
b13adbb
Fix style check
kssenii Oct 20, 2023
8a1ab02
Update s3_cache.xml
kssenii Oct 31, 2023
e5efc42
Merge branch 'master' into fs-cache-improvement
kssenii Nov 1, 2023
77507b8
Fix build
kssenii Nov 1, 2023
dbea507
Merge remote-tracking branch 'origin/master' into fs-cache-improvement
kssenii Nov 8, 2023
93e22e8
Better
kssenii Nov 8, 2023
ae09b16
Debug logging
kssenii Nov 9, 2023
214ac11
Looks fixed
kssenii Nov 10, 2023
a7fb6a3
Better
kssenii Nov 10, 2023
472cfdc
Review fix
kssenii Nov 16, 2023
a2ed756
Merge branch 'master' into fs-cache-improvement
kssenii Nov 16, 2023
fdf5cfd
Update FileCacheSettings.cpp
kssenii Nov 16, 2023
9523bd0
Fix config
kssenii Nov 17, 2023
d384762
Merge branch 'master' into fs-cache-improvement
kssenii Nov 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker/test/upgrade/run.sh
Expand Up @@ -78,6 +78,7 @@ remove_keeper_config "create_if_not_exists" "[01]"
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml

start
stop
Expand Down Expand Up @@ -114,6 +115,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml

start

Expand Down
1 change: 1 addition & 0 deletions src/Core/Settings.h
Expand Up @@ -723,6 +723,7 @@ class IColumn;
M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
M(UInt64, filesystem_cache_getorset_batch_size, 20, "A batch size for holding file segments for a single read range", 0) \
kssenii marked this conversation as resolved.
Show resolved Hide resolved
\
M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
Expand Down
34 changes: 22 additions & 12 deletions src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
Expand Up @@ -114,30 +114,40 @@ void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog(
cache_log->add(std::move(elem));
}

void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
bool CachedOnDiskReadBufferFromFile::nextFileSegmentsBatch()
{
if (initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Caching buffer already initialized");

implementation_buffer.reset();
size_t size = getRemainingSizeToRead();
kssenii marked this conversation as resolved.
Show resolved Hide resolved
if (!size)
return false;

if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
{
file_segments = cache->get(cache_key, offset, size);
file_segments = cache->get(cache_key, file_offset_of_buffer_end, size, settings.filesystem_cache_getorset_batch_size);
}
else
{
CreateFileSegmentSettings create_settings(FileSegmentKind::Regular);
file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings);
file_segments = cache->getOrSet(cache_key, file_offset_of_buffer_end, size, file_size.value(), create_settings, settings.filesystem_cache_getorset_batch_size);
}
return !file_segments->empty();
}

void CachedOnDiskReadBufferFromFile::initialize()
{
if (initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Caching buffer already initialized");

implementation_buffer.reset();

/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
*/
if (file_segments->empty())
if (!nextFileSegmentsBatch())
throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty");

chassert(!file_segments->empty());

LOG_TEST(
log,
"Having {} file segments to read: {}, current offset: {}",
Expand Down Expand Up @@ -512,7 +522,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
cache_file_reader.reset();

file_segments->popFront();
if (file_segments->empty())
if (file_segments->empty() && !nextFileSegmentsBatch())
return false;

current_file_segment = &file_segments->front();
Expand Down Expand Up @@ -788,9 +798,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
return false;

if (!initialized)
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
initialize();

if (file_segments->empty())
if (file_segments->empty() && !nextFileSegmentsBatch())
return false;

const size_t original_buffer_size = internal_buffer.size();
Expand Down Expand Up @@ -1159,7 +1169,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
return new_pos;
}

size_t CachedOnDiskReadBufferFromFile::getTotalSizeToRead()
size_t CachedOnDiskReadBufferFromFile::getRemainingSizeToRead()
{
/// Last position should be guaranteed to be set, as at least we always know file size.
if (!read_until_position)
Expand Down
6 changes: 4 additions & 2 deletions src/Disks/IO/CachedOnDiskReadBufferFromFile.h
Expand Up @@ -63,7 +63,7 @@ class CachedOnDiskReadBufferFromFile : public ReadBufferFromFileBase
private:
using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;

void initialize(size_t offset, size_t size);
void initialize();

/**
* Return a list of file segments ordered in ascending order. This list represents
Expand All @@ -85,7 +85,7 @@ class CachedOnDiskReadBufferFromFile : public ReadBufferFromFileBase

bool nextImplStep();

size_t getTotalSizeToRead();
size_t getRemainingSizeToRead();

bool completeFileSegmentAndGetNext();

Expand All @@ -95,6 +95,8 @@ class CachedOnDiskReadBufferFromFile : public ReadBufferFromFileBase

static bool canStartFromCache(size_t current_offset, const FileSegment & file_segment);

bool nextFileSegmentsBatch();

Poco::Logger * log;
FileCache::Key cache_key;
String source_file_path;
Expand Down
1 change: 1 addition & 0 deletions src/IO/ReadSettings.h
Expand Up @@ -100,6 +100,7 @@ struct ReadSettings
bool enable_filesystem_cache_log = false;
/// Don't populate cache when the read is not part of query execution (e.g. background thread).
bool avoid_readthrough_cache_outside_query_context = true;
size_t filesystem_cache_getorset_batch_size = 100;

size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
bool skip_download_if_exceeds_query_cache = true;
Expand Down