Skip to content

Commit

Permalink
Merge branch 'ClickHouse:master' into existing-count
Browse files Browse the repository at this point in the history
  • Loading branch information
jewelzqiu committed Mar 1, 2024
2 parents f2fa9e2 + 891689a commit 94c5846
Show file tree
Hide file tree
Showing 48 changed files with 1,519 additions and 267 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/backport_branches.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
Expand All @@ -77,8 +75,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
Expand All @@ -83,8 +81,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
Expand All @@ -127,8 +125,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/release_branches.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ jobs:
test_name: Compatibility check (amd64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
CompatibilityCheckAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
Expand All @@ -78,8 +76,6 @@ jobs:
test_name: Compatibility check (aarch64)
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
Expand Down
1 change: 1 addition & 0 deletions .gitmessage
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#ci_set_reduced
#ci_set_arm
#ci_set_integration
#ci_set_analyzer

## To run specified job in CI:
#job_<JOB NAME>
Expand Down
155 changes: 155 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions docs/en/operations/system-tables/query_thread_log.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
Expand All @@ -32,8 +32,7 @@ Columns:
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — OS thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
Expand Down
186 changes: 186 additions & 0 deletions src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#include <Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.h>

#include <AggregateFunctions/AggregateFunctionFactory.h>

#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/UnionNode.h>

namespace DB
{

namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}

namespace
{

/// Try to eliminate min/max/any/anyLast.
class EliminateFunctionVisitor : public InDepthQueryTreeVisitorWithContext<EliminateFunctionVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<EliminateFunctionVisitor>;
using Base::Base;

using GroupByKeysStack = std::vector<QueryTreeNodePtrWithHashSet>;

void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_aggregators_of_group_by_keys)
return;

/// Collect group by keys.
auto * query_node = node->as<QueryNode>();
if (!query_node)
return;

if (!query_node->hasGroupBy())
{
group_by_keys_stack.push_back({});
}
else if (query_node->isGroupByWithTotals() || query_node->isGroupByWithCube() || query_node->isGroupByWithRollup())
{
/// Keep aggregator if group by is with totals/cube/rollup.
group_by_keys_stack.push_back({});
}
else
{
QueryTreeNodePtrWithHashSet group_by_keys;
for (auto & group_key : query_node->getGroupBy().getNodes())
{
/// For grouping sets case collect only keys that are presented in every set.
if (auto * list = group_key->as<ListNode>())
{
QueryTreeNodePtrWithHashSet common_keys_set;
for (auto & group_elem : list->getNodes())
{
if (group_by_keys.contains(group_elem))
common_keys_set.insert(group_elem);
}
group_by_keys = std::move(common_keys_set);
}
else
{
group_by_keys.insert(group_key);
}
}
group_by_keys_stack.push_back(std::move(group_by_keys));
}
}

/// Now we visit all nodes in QueryNode, we should remove group_by_keys from stack.
void leaveImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_aggregators_of_group_by_keys)
return;

if (node->getNodeType() == QueryTreeNodeType::FUNCTION)
{
if (aggregationCanBeEliminated(node, group_by_keys_stack.back()))
node = node->as<FunctionNode>()->getArguments().getNodes()[0];
}
else if (node->getNodeType() == QueryTreeNodeType::QUERY)
{
group_by_keys_stack.pop_back();
}
}

static bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child)
{
/// Skip ArrayJoin.
return !child->as<ArrayJoinNode>();
}

private:

struct NodeWithInfo
{
QueryTreeNodePtr node;
bool parents_are_only_deterministic = false;
};

bool aggregationCanBeEliminated(QueryTreeNodePtr & node, const QueryTreeNodePtrWithHashSet & group_by_keys)
{
if (group_by_keys.empty())
return false;

auto * function = node->as<FunctionNode>();
if (!function || !function->isAggregateFunction())
return false;

if (!(function->getFunctionName() == "min"
|| function->getFunctionName() == "max"
|| function->getFunctionName() == "any"
|| function->getFunctionName() == "anyLast"))
return false;

std::vector<NodeWithInfo> candidates;
auto & function_arguments = function->getArguments().getNodes();
if (function_arguments.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected a single argument of function '{}' but received {}", function->getFunctionName(), function_arguments.size());

if (!function->getResultType()->equals(*function_arguments[0]->getResultType()))
return false;

candidates.push_back({ function_arguments[0], true });

/// Using DFS we traverse function tree and try to find if it uses other keys as function arguments.
while (!candidates.empty())
{
auto [candidate, parents_are_only_deterministic] = candidates.back();
candidates.pop_back();

bool found = group_by_keys.contains(candidate);

switch (candidate->getNodeType())
{
case QueryTreeNodeType::FUNCTION:
{
auto * func = candidate->as<FunctionNode>();
auto & arguments = func->getArguments().getNodes();
if (arguments.empty())
return false;

if (!found)
{
bool is_deterministic_function = parents_are_only_deterministic &&
func->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic_function });
}
break;
}
case QueryTreeNodeType::COLUMN:
if (!found)
return false;
break;
case QueryTreeNodeType::CONSTANT:
if (!parents_are_only_deterministic)
return false;
break;
default:
return false;
}
}

return true;
}

GroupByKeysStack group_by_keys_stack;
};

}

void AggregateFunctionOfGroupByKeysPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
EliminateFunctionVisitor eliminator(context);
eliminator.visit(query_tree_node);
}

};
28 changes: 28 additions & 0 deletions src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include <Analyzer/IQueryTreePass.h>

namespace DB
{

/** Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section.
*
* Example: SELECT max(column) FROM table GROUP BY column;
* Result: SELECT column FROM table GROUP BY column;
*/
class AggregateFunctionOfGroupByKeysPass final : public IQueryTreePass
{
public:
String getName() override { return "AggregateFunctionOfGroupByKeys"; }

String getDescription() override
{
return "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section.";
}

void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;

};

}

2 changes: 1 addition & 1 deletion src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class OptimizeGroupByFunctionKeysVisitor : public InDepthQueryTreeVisitorWithCon
if (!found)
{
bool is_deterministic_function = parents_are_only_deterministic &&
function->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
func->getFunctionOrThrow()->isDeterministicInScopeOfQuery();
for (auto it = arguments.rbegin(); it != arguments.rend(); ++it)
candidates.push_back({ *it, is_deterministic_function });
}
Expand Down
5 changes: 4 additions & 1 deletion src/Analyzer/QueryTreePassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
#include <Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.h>
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>


Expand Down Expand Up @@ -164,7 +165,6 @@ class ValidationChecker : public InDepthQueryTreeVisitor<ValidationChecker>

/** ClickHouse query tree pass manager.
*
* TODO: Support setting optimize_aggregators_of_group_by_keys.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
*/
Expand Down Expand Up @@ -264,6 +264,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());

/// should before AggregateFunctionsArithmericOperationsPass
manager.addPass(std::make_unique<AggregateFunctionOfGroupByKeysPass>());

manager.addPass(std::make_unique<AggregateFunctionsArithmericOperationsPass>());
manager.addPass(std::make_unique<UniqInjectiveFunctionsEliminationPass>());
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());
Expand Down
14 changes: 11 additions & 3 deletions src/Common/CgroupsMemoryUsageObserver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ uint64_t CgroupsMemoryUsageObserver::File::readMemoryUsage() const
ReadBufferFromFileDescriptor buf(fd);
buf.rewind();

uint64_t mem_usage;
uint64_t mem_usage = 0;

switch (version)
{
Expand All @@ -214,21 +214,29 @@ uint64_t CgroupsMemoryUsageObserver::File::readMemoryUsage() const
/// rss 15
/// [...]
std::string key;
bool found_rss = false;

while (!buf.eof())
{
readStringUntilWhitespace(key, buf);
if (key != "rss")
{
std::string dummy;
readStringUntilNewlineInto(dummy, buf);
buf.ignore();
continue;
}

assertChar(' ', buf);
readIntText(mem_usage, buf);
assertChar('\n', buf);
found_rss = true;
break;
}
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name);

if (!found_rss)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name);

break;
}
case CgroupsVersion::V2:
{
Expand Down

0 comments on commit 94c5846

Please sign in to comment.