Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More efficient constructor for SerializationEnum #57887

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/DataTypes/DataTypeEnum.cpp
Expand Up @@ -170,7 +170,7 @@ bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
template <typename Type>
SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
{
return std::make_shared<SerializationEnum<Type>>(this->getValues());
return std::make_shared<SerializationEnum<Type>>(std::static_pointer_cast<const DataTypeEnum<Type>>(shared_from_this()));
}


Expand Down
24 changes: 12 additions & 12 deletions src/DataTypes/Serializations/SerializationEnum.cpp
Expand Up @@ -11,13 +11,13 @@ namespace DB
template <typename Type>
void SerializationEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}

template <typename Type>
void SerializationEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
}

template <typename Type>
Expand All @@ -30,22 +30,22 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}

template <typename Type>
void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeQuotedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}

template <typename Type>
void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
std::string field_name;
readQuotedStringWithSQLStyle(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
}

template <typename Type>
Expand All @@ -61,20 +61,20 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
{
std::string field_name;
readStringUntilEOF(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}

template <typename Type>
void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
writeJSONString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
}

template <typename Type>
void SerializationEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeXMLStringForTextElement(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
}

template <typename Type>
Expand All @@ -86,14 +86,14 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
}
}

template <typename Type>
void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSVString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
writeCSVString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}

template <typename Type>
Expand All @@ -105,7 +105,7 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
}
}

Expand All @@ -114,7 +114,7 @@ void SerializationEnum<Type>::serializeTextMarkdown(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
if (settings.markdown.escape_special_characters)
writeMarkdownEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
writeMarkdownEscapedString(ref_enum_values.getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
else
serializeTextEscaped(column, row_num, ostr, settings);
}
Expand Down
27 changes: 23 additions & 4 deletions src/DataTypes/Serializations/SerializationEnum.h
@@ -1,20 +1,35 @@
#pragma once

#include <memory>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/EnumValues.h>
#include <DataTypes/DataTypeEnum.h>

namespace DB
{

template <typename Type>
class SerializationEnum : public SerializationNumber<Type>, public EnumValues<Type>
class SerializationEnum : public SerializationNumber<Type>
{
public:
using typename SerializationNumber<Type>::FieldType;
using typename SerializationNumber<Type>::ColumnType;
using typename EnumValues<Type>::Values;
using Values = EnumValues<Type>::Values;

explicit SerializationEnum(const Values & values_) : EnumValues<Type>(values_) {}
// SerializationEnum can be constructed in two ways:
/// - Make a copy of the Enum name-to-type mapping.
/// - Only store a reference to an existing mapping. This is faster if the Enum has a lot of different values or if SerializationEnum is
/// constructed very frequently. Make sure that the pointed-to mapping has a longer lifespan than SerializationEnum!

explicit SerializationEnum(const Values & values_)
: own_enum_values(values_), ref_enum_values(own_enum_values.value())
{
}

explicit SerializationEnum(const std::shared_ptr<const DataTypeEnum<Type>> & enum_type)
: own_enum_type(enum_type), ref_enum_values(*enum_type)
{
}

void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
Expand All @@ -35,8 +50,12 @@ class SerializationEnum : public SerializationNumber<Type>, public EnumValues<Ty
{
FieldType x;
readText(x, istr);
return this->findByValue(x)->first;
return ref_enum_values.findByValue(x)->first;
}

std::optional<EnumValues<Type>> own_enum_values;
std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
const EnumValues<Type> & ref_enum_values;
};

}