ClickHouse · rschu1ze · Jan 11, 2024 · Jan 8, 2024 · Jan 9, 2024 · Jan 9, 2024
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
@@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o
 the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
 This reduces maintenance effort and avoids redundancy.
 
-:::note
-Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
-:::
-
 ## Configuration Settings and Usage
 
 Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the

diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h
@@ -5,15 +5,15 @@
 #include <Common/LRUCachePolicy.h>
 #include <Common/SLRUCachePolicy.h>
 
+#include <base/UUID.h>
+#include <base/defines.h>
+
 #include <atomic>
-#include <cassert>
-#include <chrono>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <unordered_map>
 
-#include <base/defines.h>
-
 
 namespace DB
 {
@@ -227,10 +227,10 @@ class CacheBase
         cache_policy->setMaxSizeInBytes(max_size_in_bytes);
     }
 
-    void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
+    void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries)
     {
         std::lock_guard lock(mutex);
-        cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
+        cache_policy->setQuotaForUser(user_id, max_size_in_bytes, max_entries);
     }
 
     virtual ~CacheBase() = default;

diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h
@@ -2,10 +2,11 @@
 
 #include <Common/Exception.h>
 #include <Common/ICachePolicyUserQuota.h>
+#include <base/UUID.h>
 
 #include <functional>
 #include <memory>
-#include <mutex>
+#include <optional>
 
 namespace DB
 {
@@ -43,7 +44,7 @@ class ICachePolicy
 
     virtual void setMaxCount(size_t /*max_count*/) = 0;
     virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
-    virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
+    virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_id, max_size_in_bytes, max_entries); }
 
     /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
     /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.

diff --git a/src/Common/ICachePolicyUserQuota.h b/src/Common/ICachePolicyUserQuota.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <base/UUID.h>
 #include <base/types.h>
 
 namespace DB
@@ -15,14 +16,14 @@ class ICachePolicyUserQuota
 {
 public:
     /// Register or update the user's quota for the given resource.
-    virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0;
+    virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) = 0;
 
     /// Update the actual resource usage for the given user.
-    virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
-    virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
+    virtual void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
+    virtual void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
 
     /// Is the user allowed to write a new entry into the cache?
-    virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0;
+    virtual bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const = 0;
 
     virtual ~ICachePolicyUserQuota() = default;
 };
@@ -33,10 +34,10 @@ using CachePolicyUserQuotaPtr = std::unique_ptr<ICachePolicyUserQuota>;
 class NoCachePolicyUserQuota : public ICachePolicyUserQuota
 {
 public:
-    void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
-    void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
-    void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
-    bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; }
+    void setQuotaForUser(const UUID & /*user_id*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
+    void increaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
+    void decreaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
+    bool approveWrite(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) const override { return true; }
 };
 
 

diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Common/ICachePolicy.h>
+#include <base/UUID.h>
 
 #include <limits>
 #include <unordered_map>
@@ -11,37 +12,37 @@ namespace DB
 class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota
 {
 public:
-    void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) override
+    void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) override
     {
-        quotas[user_name] = {max_size_in_bytes, max_entries};
+        quotas[user_id] = {max_size_in_bytes, max_entries};
     }
 
-    void increaseActual(const String & user_name, size_t entry_size_in_bytes) override
+    void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) override
     {
-        auto & actual_for_user = actual[user_name];
+        auto & actual_for_user = actual[user_id];
         actual_for_user.size_in_bytes += entry_size_in_bytes;
         actual_for_user.num_items += 1;
     }
 
-    void decreaseActual(const String & user_name, size_t entry_size_in_bytes) override
+    void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) override
     {
-        chassert(actual.contains(user_name));
+        chassert(actual.contains(user_id));
 
-        chassert(actual[user_name].size_in_bytes >= entry_size_in_bytes);
-        actual[user_name].size_in_bytes -= entry_size_in_bytes;
+        chassert(actual[user_id].size_in_bytes >= entry_size_in_bytes);
+        actual[user_id].size_in_bytes -= entry_size_in_bytes;
 
-        chassert(actual[user_name].num_items >= 1);
-        actual[user_name].num_items -= 1;
+        chassert(actual[user_id].num_items >= 1);
+        actual[user_id].num_items -= 1;
     }
 
-    bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const override
+    bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const override
     {
-        auto it_actual = actual.find(user_name);
+        auto it_actual = actual.find(user_id);
         Resources actual_for_user{.size_in_bytes = 0, .num_items = 0}; /// assume zero actual resource consumption is user isn't found
         if (it_actual != actual.end())
             actual_for_user = it_actual->second;
 
-        auto it_quota = quotas.find(user_name);
+        auto it_quota = quotas.find(user_id);
         Resources quota_for_user{.size_in_bytes = std::numeric_limits<size_t>::max(), .num_items = std::numeric_limits<size_t>::max()}; /// assume no threshold if no quota is found
         if (it_quota != quotas.end())
             quota_for_user = it_quota->second;
@@ -69,10 +70,10 @@ class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota
         size_t num_items = 0;
     };
 
-    /// user name --> cache size quota (in bytes) / number of items quota
-    std::map<String, Resources> quotas;
-    /// user name --> actual cache usage (in bytes) / number of items
-    std::map<String, Resources> actual;
+    /// user id --> cache size quota (in bytes) / number of items quota
+    std::map<UUID, Resources> quotas;
+    /// user id --> actual cache usage (in bytes) / number of items
+    std::map<UUID, Resources> actual;
 };
 
 
@@ -132,7 +133,8 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
         if (it == cache.end())
             return;
         size_t sz = weight_function(*it->second);
-        Base::user_quotas->decreaseActual(it->first.user_name, sz);
+        if (it->first.user_id.has_value())
+            Base::user_quotas->decreaseActual(*it->first.user_id, sz);
         cache.erase(it);
         size_in_bytes -= sz;
     }
@@ -169,7 +171,9 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
         /// Checks against per-user limits
         auto sufficient_space_in_cache_for_user = [&]()
         {
-            return Base::user_quotas->approveWrite(key.user_name, entry_size_in_bytes);
+            if (key.user_id.has_value())
+                return Base::user_quotas->approveWrite(*key.user_id, entry_size_in_bytes);
+            return true;
         };
 
         if (!sufficient_space_in_cache() || !sufficient_space_in_cache_for_user())
@@ -179,7 +183,8 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
                 if (is_stale_function(it->first))
                 {
                     size_t sz = weight_function(*it->second);
-                    Base::user_quotas->decreaseActual(it->first.user_name, sz);
+                    if (it->first.user_id.has_value())
+                        Base::user_quotas->decreaseActual(*it->first.user_id, sz);
                     it = cache.erase(it);
                     size_in_bytes -= sz;
                 }
@@ -193,14 +198,16 @@ class TTLCachePolicy : public ICachePolicy<Key, Mapped, HashFunction, WeightFunc
             if (auto it = cache.find(key); it != cache.end())
             {
                 size_t sz = weight_function(*it->second);
-                Base::user_quotas->decreaseActual(it->first.user_name, sz);
+                if (it->first.user_id.has_value())
+                    Base::user_quotas->decreaseActual(*it->first.user_id, sz);
                 cache.erase(it); // stupid bug: (*) doesn't replace existing entries (likely due to custom hash function), need to erase explicitly
                 size_in_bytes -= sz;
             }
 
             cache[key] = std::move(mapped); // (*)
             size_in_bytes += entry_size_in_bytes;
-            Base::user_quotas->increaseActual(key.user_name, entry_size_in_bytes);
+            if (key.user_id.has_value())
+                Base::user_quotas->increaseActual(*key.user_id, entry_size_in_bytes);
         }
     }
 

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
@@ -129,21 +129,23 @@ String queryStringFromAST(ASTPtr ast)
 QueryCache::Key::Key(
     ASTPtr ast_,
     Block header_,
-    const String & user_name_, bool is_shared_,
+    std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
+    bool is_shared_,
     std::chrono::time_point<std::chrono::system_clock> expires_at_,
     bool is_compressed_)
     : ast(removeQueryCacheSettings(ast_))
     , header(header_)
-    , user_name(user_name_)
+    , user_id(user_id_)
+    , current_user_roles(current_user_roles_)
     , is_shared(is_shared_)
     , expires_at(expires_at_)
     , is_compressed(is_compressed_)
     , query_string(queryStringFromAST(ast_))
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, const String & user_name_)
-    : QueryCache::Key(ast_, {}, user_name_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name
+QueryCache::Key::Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
+    : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name
 {
 }
 
@@ -401,7 +403,9 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
     const auto & entry_key = entry->key;
     const auto & entry_mapped = entry->mapped;
 
-    if (!entry_key.is_shared && entry_key.user_name != key.user_name)
+    const bool is_same_user_id = ((!entry_key.user_id.has_value() && !key.user_id.has_value()) || (entry_key.user_id.has_value() && key.user_id.has_value() && *entry_key.user_id == *key.user_id));
+    const bool is_same_current_user_roles = (entry_key.current_user_roles == key.current_user_roles);
+    if (!entry_key.is_shared && (!is_same_user_id || !is_same_current_user_roles))
     {
         LOG_TRACE(logger, "Inaccessible query result found for query {}", doubleQuoteString(key.query_string));
         return;
@@ -503,7 +507,9 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis
     /// Update the per-user cache quotas with the values stored in the query context. This happens per query which writes into the query
     /// cache. Obviously, this is overkill but I could find the good place to hook into which is called when the settings profiles in
     /// users.xml change.
-    cache.setQuotaForUser(key.user_name, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota);
+    /// user_id == std::nullopt is the internal user for which no quota can be configured
+    if (key.user_id.has_value())
+        cache.setQuotaForUser(*key.user_id, max_query_cache_size_in_bytes_quota, max_query_cache_entries_quota);
 
     std::lock_guard lock(mutex);
     return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);

diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
@@ -4,9 +4,12 @@
 #include <Common/logger_useful.h>
 #include <Core/Block.h>
 #include <Parsers/IAST_fwd.h>
-#include <Processors/Sources/SourceFromChunks.h>
 #include <Processors/Chunk.h>
+#include <Processors/Sources/SourceFromChunks.h>
 #include <QueryPipeline/Pipe.h>
+#include <base/UUID.h>
+
+#include <optional>
 
 namespace DB
 {
@@ -51,8 +54,15 @@ class QueryCache
         /// Result metadata for constructing the pipe.
         const Block header;
 
-        /// The user who executed the query.
-        const String user_name;
+        /// The id and current roles of the user who executed the query.
+        /// These members are necessary to ensure that a (non-shared, see below) entry can only be written and read by the same user with
+        /// the same roles. Example attack scenarios:
+        /// - after DROP USER, it must not be possible to create a new user with with the dropped user name and access the dropped user's
+        ///   query cache entries
+        /// - different roles of the same user may be tied to different row-level policies. It must not be possible to switch role and
+        ///   access another role's cache entries
+        std::optional<UUID> user_id;
+        std::vector<UUID> current_user_roles;
 
         /// If the associated entry can be read by other users. In general, sharing is a bad idea: First, it is unlikely that different
         /// users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be able to bypass row
@@ -74,12 +84,13 @@ class QueryCache
         /// Ctor to construct a Key for writing into query cache.
         Key(ASTPtr ast_,
             Block header_,
-            const String & user_name_, bool is_shared_,
+            std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
+            bool is_shared_,
             std::chrono::time_point<std::chrono::system_clock> expires_at_,
             bool is_compressed);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
-        Key(ASTPtr ast_, const String & user_name_);
+        Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
 
         bool operator==(const Key & other) const;
     };

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
@@ -1010,7 +1010,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getUserName());
+                    QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles());
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1123,7 +1123,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                         {
                             QueryCache::Key key(
                                 ast, res.pipeline.getHeader(),
-                                context->getUserName(), settings.query_cache_share_between_users,
+                                context->getUserID(), context->getCurrentRoles(),
+                                settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
                                 settings.query_cache_compress_entries);
 

diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp
@@ -37,11 +37,15 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
     std::vector<QueryCache::Cache::KeyMapped> content = query_cache->dump();
 
     const String & user_name = context->getUserName();
+    std::optional<UUID> user_id = context->getUserID();
+    std::vector<UUID> current_user_roles = context->getCurrentRoles();
 
     for (const auto & [key, query_result] : content)
     {
         /// Showing other user's queries is considered a security risk
-        if (!key.is_shared && key.user_name != user_name)
+        const bool is_same_user_id = ((!key.user_id.has_value() && !user_id.has_value()) || (key.user_id.has_value() && user_id.has_value() && *key.user_id == *user_id));
+        const bool is_same_current_user_roles = (key.current_user_roles == current_user_roles);
+        if (!key.is_shared && (!is_same_user_id || !is_same_current_user_roles))
             continue;
 
         res_columns[0]->insert(key.query_string); /// approximates the original query string

diff --git a/tests/queries/0_stateless/02494_query_cache_user_isolation.reference b/tests/queries/0_stateless/02494_query_cache_user_isolation.reference
@@ -0,0 +1,28 @@
+Attack 1
+0
+system.query_cache with old user	1
+0
+0	1
+1	0
+system.query_cache with new user	0
+0
+0	1
+1	0
+0	1
+Attack 2
+-- policy_1 test
+1	1
+3	1
+6	1
+-- policy_2 test
+2	2
+5	2
+8	2
+-- policy_1 with query cache test
+1	1
+3	1
+6	1
+-- policy_2 with query cache test
+2	2
+5	2
+8	2