Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve isolation of query cache entries under re-created users or role switches #58611

Merged
merged 7 commits into from Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 0 additions & 4 deletions docs/en/operations/query-cache.md
Expand Up @@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.

:::note
Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
:::

## Configuration Settings and Usage

Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
Expand Down
12 changes: 6 additions & 6 deletions src/Common/CacheBase.h
Expand Up @@ -5,15 +5,15 @@
#include <Common/LRUCachePolicy.h>
#include <Common/SLRUCachePolicy.h>

#include <base/UUID.h>
#include <base/defines.h>

#include <atomic>
#include <cassert>
#include <chrono>
#include <memory>
#include <mutex>
#include <optional>
#include <unordered_map>

#include <base/defines.h>


namespace DB
{
Expand Down Expand Up @@ -227,10 +227,10 @@ class CacheBase
cache_policy->setMaxSizeInBytes(max_size_in_bytes);
}

void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries)
{
std::lock_guard lock(mutex);
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
cache_policy->setQuotaForUser(user_id, max_size_in_bytes, max_entries);
}

virtual ~CacheBase() = default;
Expand Down
5 changes: 3 additions & 2 deletions src/Common/ICachePolicy.h
Expand Up @@ -2,10 +2,11 @@

#include <Common/Exception.h>
#include <Common/ICachePolicyUserQuota.h>
#include <base/UUID.h>

#include <functional>
#include <memory>
#include <mutex>
#include <optional>

namespace DB
{
Expand Down Expand Up @@ -43,7 +44,7 @@ class ICachePolicy

virtual void setMaxCount(size_t /*max_count*/) = 0;
virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_id, max_size_in_bytes, max_entries); }

/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
Expand Down
17 changes: 9 additions & 8 deletions src/Common/ICachePolicyUserQuota.h
@@ -1,5 +1,6 @@
#pragma once

#include <base/UUID.h>
#include <base/types.h>

namespace DB
Expand All @@ -15,14 +16,14 @@ class ICachePolicyUserQuota
{
public:
/// Register or update the user's quota for the given resource.
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0;
virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) = 0;

/// Update the actual resource usage for the given user.
virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
virtual void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
virtual void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;

/// Is the user allowed to write a new entry into the cache?
virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0;
virtual bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const = 0;

virtual ~ICachePolicyUserQuota() = default;
};
Expand All @@ -33,10 +34,10 @@ using CachePolicyUserQuotaPtr = std::unique_ptr<ICachePolicyUserQuota>;
class NoCachePolicyUserQuota : public ICachePolicyUserQuota
{
public:
void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; }
void setQuotaForUser(const UUID & /*user_id*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
void increaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
void decreaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
bool approveWrite(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) const override { return true; }
};


Expand Down
51 changes: 29 additions & 22 deletions src/Common/TTLCachePolicy.h
@@ -1,6 +1,7 @@
#pragma once

#include <Common/ICachePolicy.h>
#include <base/UUID.h>

#include <limits>
#include <unordered_map>
Expand All @@ -11,37 +12,37 @@ namespace DB
class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota
{
public:
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) override
void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) override
{
quotas[user_name] = {max_size_in_bytes, max_entries};
quotas[user_id] = {max_size_in_bytes, max_entries};
}

void increaseActual(const String & user_name, size_t entry_size_in_bytes) override
void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) override
{
auto & actual_for_user = actual[user_name];
auto & actual_for_user = actual[user_id];
actual_for_user.size_in_bytes += entry_size_in_bytes;
actual_for_user.num_items += 1;
}

void decreaseActual(const String & user_name, size_t entry_size_in_bytes) override
void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) override
{
chassert(actual.contains(user_name));
chassert(actual.contains(user_id));

chassert(actual[user_name].size_in_bytes >= entry_size_in_bytes);
actual[user_name].size_in_bytes -= entry_size_in_bytes;
chassert(actual[user_id].size_in_bytes >= entry_size_in_bytes);
actual[user_id].size_in_bytes -= entry_size_in_bytes;

chassert(actual[user_name].num_items >= 1);
actual[user_name].num_items -= 1;
chassert(actual[user_id].num_items >= 1);
actual[user_id].num_items -= 1;
}

bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const override
bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const override
{
auto it_actual = actual.find(user_name);
auto it_actual = actual.find(user_id);
Resources actual_for_user{.size_in_bytes = 0, .num_items = 0}; /// assume zero actual resource consumption is user isn't found
if (it_actual != actual.end())
actual_for_user = it_actual->second;

auto it_quota = quotas.find(user_name);
auto it_quota = quotas.find(user_id);
Resources quota_for_user{.size_in_bytes = std::numeric_limits<size_t>::max(), .num_items = std::numeric_limits<size_t>::max()}; /// assume no threshold if no quota is found
if (it_quota != quotas.end())
quota_for_user = it_quota->second;
Expand Down Expand Up @@ -69,10 +70,10 @@ class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota
size_t num_items = 0;
};

/// user name --> cache size quota (in bytes) / number of items quota
std::map<String, Resources> quotas;
/// user name --> actual cache usage (in bytes) / number of items
std::map<String, Resources> actual;
/// user id --> cache size quota (in bytes) / number of items quota
std::map<UUID, Resources> quotas;
/// user id --> actual cache usage (in bytes) / number of items
std::map<UUID, Resources> actual;
};


Expand Down Expand Up @@ -132,7 +133,8 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
if (it == cache.end())
return;
size_t sz = weight_function(*it->second);
Base::user_quotas->decreaseActual(it->first.user_name, sz);
if (it->first.user_id.has_value())
Base::user_quotas->decreaseActual(*it->first.user_id, sz);
cache.erase(it);
size_in_bytes -= sz;
}
Expand Down Expand Up @@ -169,7 +171,9 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
/// Checks against per-user limits
auto sufficient_space_in_cache_for_user = [&]()
{
return Base::user_quotas->approveWrite(key.user_name, entry_size_in_bytes);
if (key.user_id.has_value())
return Base::user_quotas->approveWrite(*key.user_id, entry_size_in_bytes);
return true;
};

if (!sufficient_space_in_cache() || !sufficient_space_in_cache_for_user())
Expand All @@ -179,7 +183,8 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
if (is_stale_function(it->first))
{
size_t sz = weight_function(*it->second);
Base::user_quotas->decreaseActual(it->first.user_name, sz);
if (it->first.user_id.has_value())
Base::user_quotas->decreaseActual(*it->first.user_id, sz);
it = cache.erase(it);
size_in_bytes -= sz;
}
Expand All @@ -193,14 +198,16 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
if (auto it = cache.find(key); it != cache.end())
{
size_t sz = weight_function(*it->second);
Base::user_quotas->decreaseActual(it->first.user_name, sz);
if (it->first.user_id.has_value())
Base::user_quotas->decreaseActual(*it->first.user_id, sz);
cache.erase(it); // stupid bug: (*) doesn't replace existing entries (likely due to custom hash function), need to erase explicitly
size_in_bytes -= sz;
}

cache[key] = std::move(mapped); // (*)
size_in_bytes += entry_size_in_bytes;
Base::user_quotas->increaseActual(key.user_name, entry_size_in_bytes);
if (key.user_id.has_value())
Base::user_quotas->increaseActual(*key.user_id, entry_size_in_bytes);
}
}

Expand Down
18 changes: 12 additions & 6 deletions src/Interpreters/Cache/QueryCache.cpp
Expand Up @@ -129,21 +129,23 @@ String queryStringFromAST(ASTPtr ast)
QueryCache::Key::Key(
ASTPtr ast_,
Block header_,
const String & user_name_, bool is_shared_,
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
bool is_shared_,
std::chrono::time_point<std::chrono::system_clock> expires_at_,
bool is_compressed_)
: ast(removeQueryCacheSettings(ast_))
, header(header_)
, user_name(user_name_)
, user_id(user_id_)
, current_user_roles(current_user_roles_)
, is_shared(is_shared_)
, expires_at(expires_at_)
, is_compressed(is_compressed_)
, query_string(queryStringFromAST(ast_))
{
}

QueryCache::Key::Key(ASTPtr ast_, const String & user_name_)
: QueryCache::Key(ast_, {}, user_name_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name
QueryCache::Key::Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
: QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name
{
}

Expand Down Expand Up @@ -401,7 +403,9 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
const auto & entry_key = entry->key;
const auto & entry_mapped = entry->mapped;

if (!entry_key.is_shared && entry_key.user_name != key.user_name)
const bool is_same_user_id = ((!entry_key.user_id.has_value() && !key.user_id.has_value()) || (entry_key.user_id.has_value() && key.user_id.has_value() && *entry_key.user_id == *key.user_id));
const bool is_same_current_user_roles = (entry_key.current_user_roles == key.current_user_roles);
if (!entry_key.is_shared && (!is_same_user_id || !is_same_current_user_roles))
{
LOG_TRACE(logger, "Inaccessible query result found for query {}", doubleQuoteString(key.query_string));
return;
Expand Down Expand Up @@ -503,7 +507,9 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
/// Update the per-user cache quotas with the values stored in the query context. This happens per query which writes into the query
/// cache. Obviously, this is overkill but I could find the good place to hook into which is called when the settings profiles in
/// users.xml change.
cache.setQuotaForUser(key.user_name, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota);
/// user_id == std::nullopt is the internal user for which no quota can be configured
if (key.user_id.has_value())
cache.setQuotaForUser(*key.user_id, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota);

std::lock_guard lock(mutex);
return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);
Expand Down
21 changes: 16 additions & 5 deletions src/Interpreters/Cache/QueryCache.h
Expand Up @@ -4,9 +4,12 @@
#include <Common/logger_useful.h>
#include <Core/Block.h>
#include <Parsers/IAST_fwd.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <Processors/Chunk.h>
#include <Processors/Sources/SourceFromChunks.h>
#include <QueryPipeline/Pipe.h>
#include <base/UUID.h>

#include <optional>

namespace DB
{
Expand Down Expand Up @@ -51,8 +54,15 @@ class QueryCache
/// Result metadata for constructing the pipe.
const Block header;

/// The user who executed the query.
const String user_name;
/// The id and current roles of the user who executed the query.
/// These members are necessary to ensure that a (non-shared, see below) entry can only be written and read by the same user with
/// the same roles. Example attack scenarios:
/// - after DROP USER, it must not be possible to create a new user with with the dropped user name and access the dropped user's
/// query cache entries
/// - different roles of the same user may be tied to different row-level policies. It must not be possible to switch role and
/// access another role's cache entries
std::optional<UUID> user_id;
std::vector<UUID> current_user_roles;

/// If the associated entry can be read by other users. In general, sharing is a bad idea: First, it is unlikely that different
/// users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be able to bypass row
Expand All @@ -74,12 +84,13 @@ class QueryCache
/// Ctor to construct a Key for writing into query cache.
Key(ASTPtr ast_,
Block header_,
const String & user_name_, bool is_shared_,
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
bool is_shared_,
std::chrono::time_point<std::chrono::system_clock> expires_at_,
bool is_compressed);

/// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
Key(ASTPtr ast_, const String & user_name_);
Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);

bool operator==(const Key & other) const;
};
Expand Down
5 changes: 3 additions & 2 deletions src/Interpreters/executeQuery.cpp
Expand Up @@ -1010,7 +1010,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
if (can_use_query_cache && settings.enable_reads_from_query_cache)
{
QueryCache::Key key(ast, context->getUserName());
QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles());
QueryCache::Reader reader = query_cache->createReader(key);
if (reader.hasCacheEntryForKey())
{
Expand Down Expand Up @@ -1123,7 +1123,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
QueryCache::Key key(
ast, res.pipeline.getHeader(),
context->getUserName(), settings.query_cache_share_between_users,
context->getUserID(), context->getCurrentRoles(),
settings.query_cache_share_between_users,
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
settings.query_cache_compress_entries);

Expand Down
6 changes: 5 additions & 1 deletion src/Storages/System/StorageSystemQueryCache.cpp
Expand Up @@ -37,11 +37,15 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
std::vector<QueryCache::Cache::KeyMapped> content = query_cache->dump();

const String & user_name = context->getUserName();
std::optional<UUID> user_id = context->getUserID();
std::vector<UUID> current_user_roles = context->getCurrentRoles();

for (const auto & [key, query_result] : content)
{
/// Showing other user's queries is considered a security risk
if (!key.is_shared && key.user_name != user_name)
const bool is_same_user_id = ((!key.user_id.has_value() && !user_id.has_value()) || (key.user_id.has_value() && user_id.has_value() && *key.user_id == *user_id));
const bool is_same_current_user_roles = (key.current_user_roles == current_user_roles);
if (!key.is_shared && (!is_same_user_id || !is_same_current_user_roles))
continue;

res_columns[0]->insert(key.query_string); /// approximates the original query string
Expand Down
@@ -0,0 +1,28 @@
Attack 1
0
system.query_cache with old user 1
0
0 1
1 0
system.query_cache with new user 0
0
0 1
1 0
0 1
Attack 2
-- policy_1 test
1 1
3 1
6 1
-- policy_2 test
2 2
5 2
8 2
-- policy_1 with query cache test
1 1
3 1
6 1
-- policy_2 with query cache test
2 2
5 2
8 2