Skip to content

Commit

Permalink
Merge pull request #52402 from vitlibar/disable-fs-cache-for-backups
Browse files Browse the repository at this point in the history
Disable updating fs cache during backup/restore.
  • Loading branch information
vitlibar committed Aug 3, 2023
2 parents 0af0434 + 587877d commit 75b553b
Show file tree
Hide file tree
Showing 43 changed files with 306 additions and 116 deletions.
2 changes: 2 additions & 0 deletions src/Backups/BackupEntriesCollector.cpp
Expand Up @@ -77,10 +77,12 @@ BackupEntriesCollector::BackupEntriesCollector(
const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_)
: backup_query_elements(backup_query_elements_)
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, read_settings(read_settings_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
Expand Down
3 changes: 3 additions & 0 deletions src/Backups/BackupEntriesCollector.h
Expand Up @@ -30,6 +30,7 @@ class BackupEntriesCollector : private boost::noncopyable
BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_);
~BackupEntriesCollector();

Expand All @@ -40,6 +41,7 @@ class BackupEntriesCollector : private boost::noncopyable

const BackupSettings & getBackupSettings() const { return backup_settings; }
std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
const ReadSettings & getReadSettings() const { return read_settings; }
ContextPtr getContext() const { return context; }

/// Adds a backup entry which will be later returned by run().
Expand Down Expand Up @@ -93,6 +95,7 @@ class BackupEntriesCollector : private boost::noncopyable
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
const ReadSettings read_settings;
ContextPtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
Expand Down
8 changes: 4 additions & 4 deletions src/Backups/BackupEntryFromImmutableFile.cpp
Expand Up @@ -57,7 +57,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
return *file_size;
}

UInt128 BackupEntryFromImmutableFile::getChecksum() const
UInt128 BackupEntryFromImmutableFile::getChecksum(const ReadSettings & read_settings) const
{
{
std::lock_guard lock{size_and_checksum_mutex};
Expand All @@ -73,7 +73,7 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
}
}

auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum(read_settings);

{
std::lock_guard lock{size_and_checksum_mutex};
Expand All @@ -86,13 +86,13 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
}
}

std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
{
if (prefix_length == 0)
return 0;

if (prefix_length >= getSize())
return getChecksum();
return getChecksum(read_settings);

/// For immutable files we don't use partial checksums.
return std::nullopt;
Expand Down
4 changes: 2 additions & 2 deletions src/Backups/BackupEntryFromImmutableFile.h
Expand Up @@ -27,8 +27,8 @@ class BackupEntryFromImmutableFile : public BackupEntryWithChecksumCalculation<I
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;

UInt64 getSize() const override;
UInt128 getChecksum() const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
UInt128 getChecksum(const ReadSettings & read_settings) const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;

DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
bool isEncryptedByDisk() const override { return copy_encrypted; }
Expand Down
16 changes: 8 additions & 8 deletions src/Backups/BackupEntryFromSmallFile.cpp
Expand Up @@ -11,37 +11,37 @@ namespace DB
{
namespace
{
String readFile(const String & file_path)
String readFile(const String & file_path, const ReadSettings & read_settings)
{
auto buf = createReadBufferFromFileBase(file_path, /* settings= */ {});
auto buf = createReadBufferFromFileBase(file_path, read_settings);
String s;
readStringUntilEOF(s, *buf);
return s;
}

String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted)
String readFile(const DiskPtr & disk, const String & file_path, const ReadSettings & read_settings, bool copy_encrypted)
{
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path);
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, read_settings) : disk->readFile(file_path, read_settings);
String s;
readStringUntilEOF(s, *buf);
return s;
}
}


BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_)
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_)
: file_path(file_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_))
, data(readFile(file_path_))
, data(readFile(file_path_, read_settings_))
{
}

BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_)
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_)
: disk(disk_)
, file_path(file_path_)
, data_source_description(disk_->getDataSourceDescription())
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
, data(readFile(disk_, file_path, copy_encrypted))
, data(readFile(disk_, file_path, read_settings_, copy_encrypted))
{
}

Expand Down
4 changes: 2 additions & 2 deletions src/Backups/BackupEntryFromSmallFile.h
Expand Up @@ -13,8 +13,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
{
public:
explicit BackupEntryFromSmallFile(const String & file_path_);
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false);
explicit BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_);
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_ = false);

std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
UInt64 getSize() const override { return data.size(); }
Expand Down
15 changes: 6 additions & 9 deletions src/Backups/BackupEntryWithChecksumCalculation.cpp
Expand Up @@ -6,7 +6,7 @@ namespace DB
{

template <typename Base>
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum(const ReadSettings & read_settings) const
{
{
std::lock_guard lock{checksum_calculation_mutex};
Expand All @@ -26,7 +26,7 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
}
else
{
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(size));
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(size));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
Expand All @@ -37,23 +37,20 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
}

template <typename Base>
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length) const
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
{
if (prefix_length == 0)
return 0;

size_t size = this->getSize();
if (prefix_length >= size)
return this->getChecksum();
return this->getChecksum(read_settings);

std::lock_guard lock{checksum_calculation_mutex};

ReadSettings read_settings;
if (calculated_checksum)
read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size);

auto read_buffer = this->getReadBuffer(read_settings);
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size));
HashingReadBuffer hashing_read_buffer(*read_buffer);

hashing_read_buffer.ignore(prefix_length);
auto partial_checksum = hashing_read_buffer.getHash();

Expand Down
4 changes: 2 additions & 2 deletions src/Backups/BackupEntryWithChecksumCalculation.h
Expand Up @@ -11,8 +11,8 @@ template <typename Base>
class BackupEntryWithChecksumCalculation : public Base
{
public:
UInt128 getChecksum() const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
UInt128 getChecksum(const ReadSettings & read_settings) const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;

private:
mutable std::optional<UInt128> calculated_checksum;
Expand Down
4 changes: 2 additions & 2 deletions src/Backups/BackupEntryWrappedWith.h
Expand Up @@ -17,8 +17,8 @@ class BackupEntryWrappedWith : public IBackupEntry

std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); }
UInt64 getSize() const override { return entry->getSize(); }
UInt128 getChecksum() const override { return entry->getChecksum(); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); }
UInt128 getChecksum(const ReadSettings & read_settings) const override { return entry->getChecksum(read_settings); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return entry->getPartialChecksum(prefix_length, read_settings); }
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); }
bool isFromFile() const override { return entry->isFromFile(); }
Expand Down
4 changes: 4 additions & 0 deletions src/Backups/BackupFactory.h
Expand Up @@ -3,6 +3,8 @@
#include <Backups/IBackup.h>
#include <Backups/BackupInfo.h>
#include <Core/Types.h>
#include <IO/ReadSettings.h>
#include <IO/WriteSettings.h>
#include <Parsers/IAST_fwd.h>
#include <boost/noncopyable.hpp>
#include <memory>
Expand Down Expand Up @@ -37,6 +39,8 @@ class BackupFactory : boost::noncopyable
std::optional<UUID> backup_uuid;
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
ReadSettings read_settings;
WriteSettings write_settings;
};

static BackupFactory & instance();
Expand Down
25 changes: 15 additions & 10 deletions src/Backups/BackupFileInfo.cpp
Expand Up @@ -57,12 +57,12 @@ namespace

/// Calculate checksum for backup entry if it's empty.
/// Also able to calculate additional checksum of some prefix.
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size)
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size, const ReadSettings & read_settings)
{
ChecksumsForNewEntry res;
/// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation.
res.prefix_checksum = entry->getPartialChecksum(prefix_size);
res.full_checksum = entry->getChecksum();
res.prefix_checksum = entry->getPartialChecksum(prefix_size, read_settings);
res.full_checksum = entry->getChecksum(read_settings);
return res;
}

Expand Down Expand Up @@ -93,7 +93,12 @@ String BackupFileInfo::describe() const
}


BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log)
BackupFileInfo buildFileInfoForBackupEntry(
const String & file_name,
const BackupEntryPtr & backup_entry,
const BackupPtr & base_backup,
const ReadSettings & read_settings,
Poco::Logger * log)
{
auto adjusted_path = removeLeadingSlash(file_name);

Expand Down Expand Up @@ -126,7 +131,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
/// File with the same name but smaller size exist in previous backup
if (check_base == CheckBackupResult::HasPrefix)
{
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first, read_settings);
info.checksum = checksums.full_checksum;

/// We have prefix of this file in backup with the same checksum.
Expand All @@ -146,7 +151,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
{
/// We have full file or have nothing, first of all let's get checksum
/// of current file
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
info.checksum = checksums.full_checksum;

if (info.checksum == base_backup_file_info->second)
Expand All @@ -169,7 +174,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
}
else
{
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
info.checksum = checksums.full_checksum;
}

Expand All @@ -188,7 +193,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
return info;
}

BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool)
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool)
{
BackupFileInfos infos;
infos.resize(backup_entries.size());
Expand All @@ -210,7 +215,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
++num_active_jobs;
}

auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &base_backup, &thread_group, i, log](bool async)
auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, i, log](bool async)
{
SCOPE_EXIT_SAFE({
std::lock_guard lock{mutex};
Expand All @@ -237,7 +242,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
return;
}

infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, log);
infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, read_settings, log);
}
catch (...)
{
Expand Down
5 changes: 3 additions & 2 deletions src/Backups/BackupFileInfo.h
Expand Up @@ -13,6 +13,7 @@ class IBackupEntry;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
struct ReadSettings;


/// Information about a file stored in a backup.
Expand Down Expand Up @@ -66,9 +67,9 @@ struct BackupFileInfo
using BackupFileInfos = std::vector<BackupFileInfo>;

/// Builds a BackupFileInfo for a specified backup entry.
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log);
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, const ReadSettings & read_settings, Poco::Logger * log);

/// Builds a vector of BackupFileInfos for specified backup entries.
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool);
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool);

}
13 changes: 6 additions & 7 deletions src/Backups/BackupIO_Default.cpp
Expand Up @@ -4,17 +4,16 @@
#include <IO/copyData.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>


namespace DB
{

BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_)
BackupReaderDefault::BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, read_settings(read_settings_)
, write_settings(write_settings_)
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}
Expand All @@ -37,10 +36,10 @@ void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t f
write_buffer->finalize();
}

BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_)
BackupWriterDefault::BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, read_settings(read_settings_)
, write_settings(write_settings_)
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}
Expand Down
5 changes: 2 additions & 3 deletions src/Backups/BackupIO_Default.h
Expand Up @@ -3,7 +3,6 @@
#include <Backups/BackupIO.h>
#include <IO/ReadSettings.h>
#include <IO/WriteSettings.h>
#include <Interpreters/Context_fwd.h>


namespace DB
Expand All @@ -19,7 +18,7 @@ enum class WriteMode;
class BackupReaderDefault : public IBackupReader
{
public:
BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_);
BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
~BackupReaderDefault() override = default;

/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
Expand All @@ -46,7 +45,7 @@ class BackupReaderDefault : public IBackupReader
class BackupWriterDefault : public IBackupWriter
{
public:
BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_);
BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
~BackupWriterDefault() override = default;

bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
Expand Down

0 comments on commit 75b553b

Please sign in to comment.