Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix usage of plain metadata type with new disks configuration option #60396

Merged
merged 33 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
277e8d9
Fix usage plain metadata type with new configuration option
kssenii Feb 26, 2024
69b5bd0
Automatic style fix
robot-clickhouse Feb 26, 2024
f53f43b
Fixes for LocalObjectStorage and plain metadata
kssenii Feb 27, 2024
a284e71
Merge remote-tracking branch 'origin/fix-usage-of-plain-metadata' int…
kssenii Feb 27, 2024
fb38bd1
Remove debug logging
kssenii Feb 27, 2024
978fe9f
Add comments
kssenii Feb 27, 2024
3378825
Update test.py
kssenii Feb 27, 2024
98b27fd
Fix style check
kssenii Feb 27, 2024
4243ac1
Merge remote-tracking branch 'origin/master' into fix-usage-of-plain-…
kssenii Feb 27, 2024
cb8390e
Fix build
kssenii Feb 28, 2024
0d4648b
Fix clang-tidy
kssenii Feb 29, 2024
34a8b46
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Feb 29, 2024
f4b46f6
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 1, 2024
925fd00
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 4, 2024
a7db668
Update ObjectStorageFactory.cpp
kssenii Mar 4, 2024
3f90c93
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 5, 2024
6d4514c
Fix test
kssenii Mar 6, 2024
be98c95
Automatic style fix
robot-clickhouse Mar 6, 2024
8fc3581
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 7, 2024
a24e321
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 7, 2024
50b8495
Update .reference
kssenii Mar 7, 2024
31ed196
Fix build
kssenii Mar 7, 2024
bcd7133
Merge remote-tracking branch 'origin/master' into fix-usage-of-plain-…
kssenii Mar 7, 2024
e5e6625
Merge remote-tracking branch 'origin/master' into fix-usage-of-plain-…
kssenii Mar 10, 2024
0b63cb2
Fix
kssenii Mar 10, 2024
aacfefe
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 11, 2024
90b2743
Update test.py
kssenii Mar 11, 2024
19d8256
Update test.py
kssenii Mar 11, 2024
38f41ee
Fix integration test
kssenii Mar 11, 2024
5a71636
Fxi
kssenii Mar 11, 2024
61543ed
Merge branch 'master' into fix-usage-of-plain-metadata
kssenii Mar 12, 2024
4ce5245
Automatic style fix
robot-clickhouse Mar 12, 2024
c947484
Fxi again
kssenii Mar 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/Disks/DiskType.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
#include "DiskType.h"
#include <Poco/String.h>
#include <Common/Exception.h>

namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}

MetadataStorageType metadataTypeFromString(const String & type)
{
auto check_type = Poco::toLower(type);
if (check_type == "local")
return MetadataStorageType::Local;
if (check_type == "plain")
return MetadataStorageType::Plain;
if (check_type == "web")
return MetadataStorageType::StaticWeb;

throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
"MetadataStorageFactory: unknown metadata storage type: {}", type);
}

bool DataSourceDescription::operator==(const DataSourceDescription & other) const
{
Expand All @@ -14,4 +34,32 @@ bool DataSourceDescription::sameKind(const DataSourceDescription & other) const
== std::tie(other.type, other.object_storage_type, other.description);
}

std::string DataSourceDescription::toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
}
36 changes: 4 additions & 32 deletions src/Disks/DiskType.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ enum class ObjectStorageType
{
None,
S3,
S3_Plain,
kssenii marked this conversation as resolved.
Show resolved Hide resolved
Azure,
HDFS,
Web,
Expand All @@ -30,9 +29,11 @@ enum class MetadataStorageType
Local,
Plain,
StaticWeb,
Memory,
};

MetadataStorageType metadataTypeFromString(const String & type);
String toString(DataSourceType data_source_type);

struct DataSourceDescription
{
DataSourceType type;
Expand All @@ -47,36 +48,7 @@ struct DataSourceDescription
bool operator==(const DataSourceDescription & other) const;
bool sameKind(const DataSourceDescription & other) const;

std::string toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::S3_Plain:
return "s3_plain";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
std::string toString() const;
};

}
1 change: 1 addition & 0 deletions src/Disks/ObjectStorages/IObjectStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class IObjectStorage

virtual bool isReadOnly() const { return false; }
virtual bool isWriteOnce() const { return false; }
virtual bool isPlain() const { return false; }

virtual bool supportParallelWrite() const { return false; }

Expand Down
46 changes: 39 additions & 7 deletions src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_)
description = *block_device_id;
else
description = "/";

fs::create_directories(key_prefix);
}

bool LocalObjectStorage::exists(const StoredObject & object) const
Expand Down Expand Up @@ -106,13 +108,11 @@ std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLI
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{
const auto & path = object.remote_path;

if (!file_size)
file_size = tryGetSizeFromFilePath(path);
file_size = tryGetSizeFromFilePath(object.remote_path);

LOG_TEST(log, "Read object: {}", path);
return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size);
LOG_TEST(log, "Read object: {}", object.remote_path);
return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size);
}

std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NOLINT
Expand All @@ -126,6 +126,11 @@ std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NO
throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files");

LOG_TEST(log, "Write object: {}", object.remote_path);

/// Unlike real blob storage, in local fs we cannot create a file with non-existing prefix.
/// So let's create it.
fs::create_directories(fs::path(object.remote_path).parent_path());

return std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
}

Expand Down Expand Up @@ -157,9 +162,36 @@ void LocalObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
removeObjectIfExists(object);
}

ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path */) const
ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) const
{
ObjectMetadata object_metadata;
LOG_TEST(log, "Getting metadata for path: {}", path);
object_metadata.size_bytes = fs::file_size(path);
object_metadata.last_modified = Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(fs::last_write_time(path).time_since_epoch()).count());
return object_metadata;
}

void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const
{
for (const auto & entry : fs::directory_iterator(path))
{
if (entry.is_directory())
{
listObjects(entry.path(), children, 0);
continue;
}

auto metadata = getObjectMetadata(entry.path());
children.emplace_back(entry.path(), std::move(metadata));
}
}

bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for LocalObjectStorage");
/// Unlike real object storage, existence of a prefix path can be checked by
/// just checking existence of this prefix directly, so simple exists is enough here.
return exists(StoredObject(path));
}

void LocalObjectStorage::copyObject( // NOLINT
Expand Down
4 changes: 4 additions & 0 deletions src/Disks/ObjectStorages/Local/LocalObjectStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class LocalObjectStorage : public IObjectStorage

ObjectMetadata getObjectMetadata(const std::string & path) const override;

void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;

bool existsOrHasAnyChild(const std::string & path) const override;

void copyObject( /// NOLINT
const StoredObject & object_from,
const StoredObject & object_to,
Expand Down
34 changes: 29 additions & 5 deletions src/Disks/ObjectStorages/MetadataStorageFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,43 @@ void MetadataStorageFactory::registerMetadataStorageType(const std::string & met
}
}

MetadataStoragePtr MetadataStorageFactory::create(
const std::string & name,
std::string MetadataStorageFactory::getCompatibilityMetadataTypeHint(const ObjectStorageType & type)
{
switch (type)
{
case ObjectStorageType::S3:
case ObjectStorageType::HDFS:
case ObjectStorageType::Local:
case ObjectStorageType::Azure:
return "local";
case ObjectStorageType::Web:
return "web";
default:
return "";
}
}

std::string MetadataStorageFactory::getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const
const std::string & compatibility_type_hint)
{
if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config");
}

const auto type = config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
return config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
}

MetadataStoragePtr MetadataStorageFactory::create(
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const
{
const auto type = getMetadataType(config, config_prefix, compatibility_type_hint);
const auto it = registry.find(type);

if (it == registry.end())
Expand Down
7 changes: 7 additions & 0 deletions src/Disks/ObjectStorages/MetadataStorageFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ class MetadataStorageFactory final : private boost::noncopyable
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const;

static std::string getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string & compatibility_type_hint = "");

static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type);

private:
using Registry = std::unordered_map<String, Creator>;
Registry registry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path
std::string directory = object_key.serialize();
if (!directory.ends_with('/'))
directory += '/';

RelativePathsWithMetadata files;
object_storage->listObjects(directory, files, 1);
return !files.empty();
return object_storage->existsOrHasAnyChild(directory);
}

uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const
Expand Down