Skip to content

Commit

Permalink
Merge pull request #55129 from CurtizJ/describe-virtuals
Browse files Browse the repository at this point in the history
Allow to include virtual columns into `DESCRIBE` query
  • Loading branch information
robot-ch-test-poll committed Sep 29, 2023
2 parents 4ef46d1 + 9b0e222 commit f5542e1
Show file tree
Hide file tree
Showing 5 changed files with 459 additions and 112 deletions.
2 changes: 2 additions & 0 deletions src/Core/Settings.h
Expand Up @@ -605,6 +605,8 @@ class IColumn;
M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
M(Bool, describe_include_virtual_columns, false, "If true, virtual columns of table will be included into result of DESCRIBE query", 0) \
M(Bool, describe_compact_output, false, "If true, include only column names and types into result of DESCRIBE query", 0) \
M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \
M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \
Expand Down
289 changes: 179 additions & 110 deletions src/Interpreters/InterpreterDescribeQuery.cpp
Expand Up @@ -21,7 +21,14 @@
namespace DB
{

Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns)
InterpreterDescribeQuery::InterpreterDescribeQuery(const ASTPtr & query_ptr_, ContextPtr context_)
: WithContext(context_)
, query_ptr(query_ptr_)
, settings(getContext()->getSettingsRef())
{
}

Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns, bool include_virtuals, bool compact)
{
Block block;

Expand All @@ -34,20 +41,23 @@ Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns)
col.name = "type";
block.insert(col);

col.name = "default_type";
block.insert(col);
if (!compact)
{
col.name = "default_type";
block.insert(col);

col.name = "default_expression";
block.insert(col);
col.name = "default_expression";
block.insert(col);

col.name = "comment";
block.insert(col);
col.name = "comment";
block.insert(col);

col.name = "codec_expression";
block.insert(col);
col.name = "codec_expression";
block.insert(col);

col.name = "ttl_expression";
block.insert(col);
col.name = "ttl_expression";
block.insert(col);
}

if (include_subcolumns)
{
Expand All @@ -57,149 +67,208 @@ Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns)
block.insert(col);
}

if (include_virtuals)
{
col.name = "is_virtual";
col.type = std::make_shared<DataTypeUInt8>();
col.column = col.type->createColumn();
block.insert(col);
}

return block;
}

BlockIO InterpreterDescribeQuery::execute()
{
std::vector<ColumnDescription> columns;
StorageSnapshotPtr storage_snapshot;

const auto & ast = query_ptr->as<ASTDescribeQuery &>();
const auto & table_expression = ast.table_expression->as<ASTTableExpression &>();
const auto & settings = getContext()->getSettingsRef();

if (table_expression.subquery)
{
NamesAndTypesList names_and_types;
auto select_query = table_expression.subquery->children.at(0);
auto current_context = getContext();
fillColumnsFromSubquery(table_expression);
else if (table_expression.table_function)
fillColumnsFromTableFunction(table_expression);
else
fillColumnsFromTable(table_expression);

if (settings.allow_experimental_analyzer)
{
SelectQueryOptions select_query_options;
names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList();
}
else
{
names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList();
}
Block sample_block = getSampleBlock(
settings.describe_include_subcolumns,
settings.describe_include_virtual_columns,
settings.describe_compact_output);

for (auto && [name, type] : names_and_types)
{
ColumnDescription description;
description.name = std::move(name);
description.type = std::move(type);
columns.emplace_back(std::move(description));
}
MutableColumns res_columns = sample_block.cloneEmptyColumns();

for (const auto & column : columns)
addColumn(column, false, res_columns);

for (const auto & column : virtual_columns)
addColumn(column, true, res_columns);

if (settings.describe_include_subcolumns)
{
for (const auto & column : columns)
addSubcolumns(column, false, res_columns);

for (const auto & column : virtual_columns)
addSubcolumns(column, true, res_columns);
}
else if (table_expression.table_function)

BlockIO res;
size_t num_rows = res_columns[0]->size();
auto source = std::make_shared<SourceFromSingleChunk>(sample_block, Chunk(std::move(res_columns), num_rows));
res.pipeline = QueryPipeline(std::move(source));

return res;
}

void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression)
{
NamesAndTypesList names_and_types;
auto select_query = table_expression.subquery->children.at(0);
auto current_context = getContext();

if (settings.allow_experimental_analyzer)
{
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext());
auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true);
for (const auto & table_function_column_description : table_function_column_descriptions)
columns.emplace_back(table_function_column_description);
SelectQueryOptions select_query_options;
names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList();
}
else
{
auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name);
getContext()->checkAccess(AccessType::SHOW_COLUMNS, table_id);
auto table = DatabaseCatalog::instance().getTable(table_id, getContext());
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);

auto metadata_snapshot = table->getInMemoryMetadataPtr();
storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext());
auto metadata_column_descriptions = metadata_snapshot->getColumns();
for (const auto & metadata_column_description : metadata_column_descriptions)
columns.emplace_back(metadata_column_description);
names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList();
}

bool extend_object_types = settings.describe_extend_object_types && storage_snapshot;
bool include_subcolumns = settings.describe_include_subcolumns;
for (auto && [name, type] : names_and_types)
columns.emplace_back(std::move(name), std::move(type));
}

Block sample_block = getSampleBlock(include_subcolumns);
MutableColumns res_columns = sample_block.cloneEmptyColumns();
void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression)
{
auto current_context = getContext();
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context);
auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true);
for (const auto & column : column_descriptions)
columns.emplace_back(column);

for (const auto & column : columns)
if (settings.describe_include_virtual_columns)
{
res_columns[0]->insert(column.name);
auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName());
if (table)
{
for (const auto & column : table->getVirtuals())
{
if (!column_descriptions.has(column.name))
virtual_columns.emplace_back(column.name, column.type);
}
}
}
}
void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression)
{
auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name);
getContext()->checkAccess(AccessType::SHOW_COLUMNS, table_id);
auto table = DatabaseCatalog::instance().getTable(table_id, getContext());
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);

DataTypePtr type;
if (extend_object_types)
type = storage_snapshot->getConcreteType(column.name);
else
type = column.type;
auto metadata_snapshot = table->getInMemoryMetadataPtr();
const auto & column_descriptions = metadata_snapshot->getColumns();
for (const auto & column : column_descriptions)
columns.emplace_back(column);

if (getContext()->getSettingsRef().print_pretty_type_names)
res_columns[1]->insert(type->getPrettyName());
else
res_columns[1]->insert(type->getName());
if (settings.describe_include_virtual_columns)
{
for (const auto & column : table->getVirtuals())
{
if (!column_descriptions.has(column.name))
virtual_columns.emplace_back(column.name, column.type);
}
}

if (settings.describe_extend_object_types)
storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext());
}

void InterpreterDescribeQuery::addColumn(const ColumnDescription & column, bool is_virtual, MutableColumns & res_columns)
{
size_t i = 0;
res_columns[i++]->insert(column.name);

auto type = storage_snapshot ? storage_snapshot->getConcreteType(column.name) : column.type;
if (settings.print_pretty_type_names)
res_columns[i++]->insert(type->getPrettyName());
else
res_columns[i++]->insert(type->getName());

if (!settings.describe_compact_output)
{
if (column.default_desc.expression)
{
res_columns[2]->insert(toString(column.default_desc.kind));
res_columns[3]->insert(queryToString(column.default_desc.expression));
res_columns[i++]->insert(toString(column.default_desc.kind));
res_columns[i++]->insert(queryToString(column.default_desc.expression));
}
else
{
res_columns[2]->insertDefault();
res_columns[3]->insertDefault();
res_columns[i++]->insertDefault();
res_columns[i++]->insertDefault();
}

res_columns[4]->insert(column.comment);
res_columns[i++]->insert(column.comment);

if (column.codec)
res_columns[5]->insert(queryToString(column.codec->as<ASTFunction>()->arguments));
res_columns[i++]->insert(queryToString(column.codec->as<ASTFunction>()->arguments));
else
res_columns[5]->insertDefault();
res_columns[i++]->insertDefault();

if (column.ttl)
res_columns[6]->insert(queryToString(column.ttl));
res_columns[i++]->insert(queryToString(column.ttl));
else
res_columns[6]->insertDefault();

if (include_subcolumns)
res_columns[7]->insertDefault();
res_columns[i++]->insertDefault();
}

if (include_subcolumns)
if (settings.describe_include_subcolumns)
res_columns[i++]->insertDefault();

if (settings.describe_include_virtual_columns)
res_columns[i++]->insert(is_virtual);
}

void InterpreterDescribeQuery::addSubcolumns(const ColumnDescription & column, bool is_virtual, MutableColumns & res_columns)
{
auto type = storage_snapshot ? storage_snapshot->getConcreteType(column.name) : column.type;

IDataType::forEachSubcolumn([&](const auto & path, const auto & name, const auto & data)
{
for (const auto & column : columns)
{
auto type = extend_object_types ? storage_snapshot->getConcreteType(column.name) : column.type;
size_t i = 0;
res_columns[i++]->insert(Nested::concatenateName(column.name, name));

IDataType::forEachSubcolumn([&](const auto & path, const auto & name, const auto & data)
{
res_columns[0]->insert(Nested::concatenateName(column.name, name));
res_columns[1]->insert(data.type->getName());

/// It's not trivial to calculate default expression for subcolumn.
/// So, leave it empty.
res_columns[2]->insertDefault();
res_columns[3]->insertDefault();
res_columns[4]->insert(column.comment);

if (column.codec && ISerialization::isSpecialCompressionAllowed(path))
res_columns[5]->insert(queryToString(column.codec->as<ASTFunction>()->arguments));
else
res_columns[5]->insertDefault();

if (column.ttl)
res_columns[6]->insert(queryToString(column.ttl));
else
res_columns[6]->insertDefault();

res_columns[7]->insert(1u);
}, ISerialization::SubstreamData(type->getDefaultSerialization()).withType(type));
if (settings.print_pretty_type_names)
res_columns[i++]->insert(data.type->getPrettyName());
else
res_columns[i++]->insert(data.type->getName());

if (!settings.describe_compact_output)
{
/// It's not trivial to calculate default expression for subcolumn.
/// So, leave it empty.
res_columns[i++]->insertDefault();
res_columns[i++]->insertDefault();
res_columns[i++]->insert(column.comment);

if (column.codec && ISerialization::isSpecialCompressionAllowed(path))
res_columns[i++]->insert(queryToString(column.codec->as<ASTFunction>()->arguments));
else
res_columns[i++]->insertDefault();

if (column.ttl)
res_columns[i++]->insert(queryToString(column.ttl));
else
res_columns[i++]->insertDefault();
}
}

BlockIO res;
size_t num_rows = res_columns[0]->size();
auto source = std::make_shared<SourceFromSingleChunk>(sample_block, Chunk(std::move(res_columns), num_rows));
res.pipeline = QueryPipeline(std::move(source));
res_columns[i++]->insert(1U);

return res;
if (settings.describe_include_virtual_columns)
res_columns[i++]->insert(is_virtual);

}, ISerialization::SubstreamData(type->getDefaultSerialization()).withType(type));
}

}

0 comments on commit f5542e1

Please sign in to comment.