Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change ZooKeeper path for zero-copy locks for shared data #32061

Merged
merged 24 commits into from Jan 8, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
827fa51
Add test for freeze/unfreeze with S3 zero-copy
ianton-ru Nov 18, 2021
0f9038e
Zero-copy: move shared mark outside table node in ZooKeeper
ianton-ru Nov 23, 2021
d409ab0
Fix wait for freeze in tests
ianton-ru Nov 29, 2021
80ab73c
Fix Zero-Copy replication lost locks, fix remove used remote data in …
ianton-ru Dec 1, 2021
c8fe1dc
Add tests for new zero-copy schema
ianton-ru Dec 1, 2021
e0a16a4
Merge master
ianton-ru Dec 1, 2021
0e685c1
Fix types
ianton-ru Dec 1, 2021
98bae1b
Fix tests
ianton-ru Dec 2, 2021
f0b9a43
Use table UUID in zero-copy shared label in ZooKeeper
ianton-ru Dec 17, 2021
c724b07
Remove zero-copy version converter
ianton-ru Dec 20, 2021
f390111
Add zero-copy version converted script
ianton-ru Dec 20, 2021
0c0bf66
Merge master
ianton-ru Dec 21, 2021
33cbfc8
Move logic for replicated part to StorageReplicatedMergeTree class
ianton-ru Dec 21, 2021
e6fd4bf
Merge branch 'master' into MDB-15474
ianton-ru Dec 21, 2021
e88b97d
Fix typos
ianton-ru Dec 21, 2021
0465aef
Fixes by code review responces
ianton-ru Dec 27, 2021
2d87f0a
Fix debug build
ianton-ru Dec 28, 2021
7a3c874
Merge branch 'master' into ianton-ru-MDB-15474
alesapin Dec 29, 2021
8b331cd
Remove method from IStorage
alesapin Dec 29, 2021
cbdba89
Merge branch 'master' into ianton-ru-MDB-15474
alesapin Dec 30, 2021
92cb451
Merge branch 'master' into MDB-15474
ianton-ru Dec 30, 2021
91e1ac4
Tiny improvements
alesapin Dec 30, 2021
b426cc4
Merge remote-tracking branch 'ianton-ru/MDB-15474' into ianton-ru-MDB…
alesapin Dec 30, 2021
6fcd5a7
Merge branch 'master' into MDB-15474
mergify[bot] Jan 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/Disks/DiskDecorator.h
Expand Up @@ -70,6 +70,18 @@ class DiskDecorator : public IDisk
void startup() override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override;

std::unique_ptr<ReadBufferFromFileBase> readMetaFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override { return delegate->readMetaFile(path, settings, size); }

std::unique_ptr<WriteBufferFromFileBase> writeMetaFile(
const String & path,
size_t buf_size,
WriteMode mode) override { return delegate->writeMetaFile(path, buf_size, mode); }

UInt32 getRefCount(const String & path) const override { return delegate->getRefCount(path); }

protected:
Executor & getExecutor() override;

Expand Down
18 changes: 18 additions & 0 deletions src/Disks/IDisk.cpp
Expand Up @@ -86,4 +86,22 @@ SyncGuardPtr IDisk::getDirectorySyncGuard(const String & /* path */) const
return nullptr;
}

std::unique_ptr<ReadBufferFromFileBase> IDisk::readMetaFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const
{
LOG_TRACE(&Poco::Logger::get("IDisk"), "Read local metafile: {}", path);
return readFile(path, settings, size);
}

std::unique_ptr<WriteBufferFromFileBase> IDisk::writeMetaFile(
const String & path,
size_t buf_size,
WriteMode mode)
{
LOG_TRACE(&Poco::Logger::get("IDisk"), "Write local metafile: {}", path);
return writeFile(path, buf_size, mode);
}

}
20 changes: 20 additions & 0 deletions src/Disks/IDisk.h
Expand Up @@ -247,6 +247,26 @@ class IDisk : public Space
/// Applies new settings for disk in runtime.
virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) {}

/// Open the local file for read and return ReadBufferFromFileBase object.
/// Overridden in IDiskRemote.
/// Used for work with custom metadata.
virtual std::unique_ptr<ReadBufferFromFileBase> readMetaFile(
const String & path,
const ReadSettings & settings = ReadSettings{},
std::optional<size_t> size = {}) const;

/// Open the local file for write and return WriteBufferFromFileBase object.
/// Overridden in IDiskRemote.
/// Used for work with custom metadata.
virtual std::unique_ptr<WriteBufferFromFileBase> writeMetaFile(
const String & path,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
WriteMode mode = WriteMode::Rewrite);

/// Return reference count for remote FS.
/// Overridden in IDiskRemote.
virtual UInt32 getRefCount(const String &) const { return 0; }

protected:
friend class DiskDecorator;

Expand Down
25 changes: 25 additions & 0 deletions src/Disks/IDiskRemote.cpp
Expand Up @@ -484,6 +484,7 @@ bool IDiskRemote::tryReserve(UInt64 bytes)

String IDiskRemote::getUniqueId(const String & path) const
{
LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path);
Metadata metadata(remote_fs_root_path, metadata_disk, path);
String id;
if (!metadata.remote_fs_objects.empty())
Expand All @@ -500,4 +501,28 @@ AsynchronousReaderPtr IDiskRemote::getThreadPoolReader()
return reader;
}

std::unique_ptr<ReadBufferFromFileBase> IDiskRemote::readMetaFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const
{
LOG_TRACE(log, "Read metafile: {}", path);
return metadata_disk->readFile(path, settings, size);
}

std::unique_ptr<WriteBufferFromFileBase> IDiskRemote::writeMetaFile(
const String & path,
size_t buf_size,
WriteMode mode)
{
LOG_TRACE(log, "Write metafile: {}", path);
return metadata_disk->writeFile(path, buf_size, mode);
}

UInt32 IDiskRemote::getRefCount(const String & path) const
{
auto meta = readMeta(path);
return meta.ref_count;
}

}
12 changes: 12 additions & 0 deletions src/Disks/IDiskRemote.h
Expand Up @@ -136,6 +136,18 @@ friend class DiskRemoteReservation;

static AsynchronousReaderPtr getThreadPoolReader();

virtual std::unique_ptr<ReadBufferFromFileBase> readMetaFile(
const String & path,
const ReadSettings & settings = ReadSettings{},
std::optional<size_t> size = {}) const override;

virtual std::unique_ptr<WriteBufferFromFileBase> writeMetaFile(
const String & path,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
WriteMode mode = WriteMode::Rewrite) override;

UInt32 getRefCount(const String & path) const override;

protected:
Poco::Logger * log;
const String name;
Expand Down
3 changes: 3 additions & 0 deletions src/Storages/IStorage.h
Expand Up @@ -137,6 +137,9 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
/// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas.
virtual bool supportsReplication() const { return false; }

/// Returns replica name for replicated storage
virtual String getReplicaName() const { return ""; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Used only in replicated storage. We don't need it here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


/// Returns true if the storage supports parallel insert.
virtual bool supportsParallelInsert() const { return false; }

Expand Down
14 changes: 14 additions & 0 deletions src/Storages/MergeTree/IMergeTreeDataPart.cpp
Expand Up @@ -1100,6 +1100,14 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
storage.lockSharedData(*this);
}

void IMergeTreeDataPart::cleanupOldName(const String & old_name) const
{
if (name == old_name)
return;

storage.unlockSharedData(*this, old_name);
}

std::optional<bool> IMergeTreeDataPart::keepSharedDataInDecoupledStorage() const
{
/// NOTE: It's needed for zero-copy replication
Expand Down Expand Up @@ -1561,6 +1569,12 @@ String IMergeTreeDataPart::getUniqueId() const
}


UInt32 IMergeTreeDataPart::getRefCount() const
{
return volume->getDisk()->getRefCount(fs::path(getFullRelativePath()) / "checksums.txt");
alesapin marked this conversation as resolved.
Show resolved Hide resolved
}


String IMergeTreeDataPart::getZeroLevelPartBlockID() const
{
if (info.level != 0)
Expand Down
11 changes: 9 additions & 2 deletions src/Storages/MergeTree/IMergeTreeDataPart.h
Expand Up @@ -334,6 +334,9 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const;

/// Cleanup after change part
virtual void cleanupOldName(const String & old_part_name) const;

/// Makes clone of a part in detached/ directory via hard links
virtual void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const;

Expand Down Expand Up @@ -398,10 +401,14 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
/// Returns serialization for column according to files in which column is written in part.
SerializationPtr getSerializationForColumn(const NameAndTypePair & column) const;

/// Return some uniq string for file
/// Required for distinguish different copies of the same part on S3
/// Return some uniq string for file.
/// Required for distinguish different copies of the same part on remote FS.
String getUniqueId() const;

/// Return hardlink count for part.
/// Required for keep data on remote FS when part has shadow copies.
UInt32 getRefCount() const;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to clarify that it's not some general-purpose refcount. The name should be something like getNumberOfReferenecesForZeroCopyReplication.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed to getNumberOfRefereneces


protected:

/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
Expand Down