Skip to content

[CI crash] Hash table emplace operation in distinct transform #86296

@robot-clickhouse

Description

@robot-clickhouse
Stack trace details

The sipHash64(st.trace_full) is 14145905254348956910
The trace is from the master or release branch: True

The query for CIDB to compare the trace with the known one:

WITH
    (
        SELECT groupArrayDistinct(cleanStackTrace(trace_full) AS trace) FROM default.stack_traces
        WHERE sipHash64(trace) IN (14145905254348956910, {ANOTHER_TRACE_HASH}) -- FIXME: replace with the known hash
    ) AS traces,
    1.97 AS alpha,
    stack_frame_weights AS (
        WITH
            (
                SELECT count()
                FROM default.stack_traces
                FINAL
            ) AS total,
            2.0 AS beta,
            3.7 AS gamma
        SELECT
            arrayJoin(cleanStackTrace(trace_full)) AS frame,
            countDistinct(trace_full) AS count,
            log(total / count) AS IDF,
            sigmoid(beta * (IDF - gamma)) AS weight
        FROM default.stack_traces
        FINAL
        GROUP BY frame
    ),
    (SELECT groupArray(weight) AS w, groupArray(frame) AS f FROM stack_frame_weights) AS weights,
    (trace -> arrayMap((_frame, pos) -> (pow(pos, -alpha) * arrayFirst(w, f -> (f = _frame), weights.w, weights.f)), trace, arrayEnumerate(trace))) AS get_trace_weights,
    (arr -> arrayStringConcat(arr, '\n')) AS joinArr

SELECT arraySimilarity(traces[1], traces[2], get_trace_weights(traces[1]) AS weights1, get_trace_weights(traces[2]) AS weights2) AS similarity,
    arrayLevenshteinDistanceWeighted(traces[1], traces[2], weights1, weights2),
    joinArr(traces[1]), joinArr(traces[2]), joinArr(weights1), joinArr(weights2)

The following new stack trace from CI Logs system.crash_log found:

   CRC32Hash::operator()(StringRef) const
   HashTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::hash(StringRef const&) const
   void HashTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>::emplace<DB::ArenaKeyHolder&>(DB::ArenaKeyHolder&, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>*&, bool&)
   DB::ColumnsHashing::columns_hashing_impl::EmplaceResultImpl<void> DB::ColumnsHashing::columns_hashing_impl::HashMethodBase<DB::ColumnsHashing::HashMethodString<StringRef, void, true, false, false, false>, StringRef, void, false, false, false>::emplaceImpl<HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>, DB::ArenaKeyHolder>(DB::ArenaKeyHolder&, HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>&)
   DB::ColumnsHashing::columns_hashing_impl::EmplaceResultImpl<void> DB::ColumnsHashing::columns_hashing_impl::HashMethodBase<DB::ColumnsHashing::HashMethodString<StringRef, void, true, false, false, false>, StringRef, void, false, false, false>::emplaceKey<HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>>(HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>&, unsigned long, DB::Arena&)
   void DB::DistinctTransform::buildFilter<DB::SetMethodString<HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>>>(DB::SetMethodString<HashSetTable<StringRef, HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableNoState>, DefaultHash<StringRef>, HashTableGrowerWithPrecalculation<8ul>, Allocator<true, true>>>&, std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*>> const&, DB::PODArray<char8_t, 4096ul, Allocator<false, false>, 63ul, 64ul>&, unsigned long, DB::SetVariantsTemplate<DB::NonClearableSet>&) const
   DB::DistinctTransform::transform(DB::Chunk&)
   DB::ISimpleTransform::transform(DB::Chunk&, DB::Chunk&)
   DB::ISimpleTransform::work()
   DB::executeJob(DB::ExecutingGraph::Node*, DB::ReadProgressCallback*)
   DB::ExecutionThreadContext::executeTask()
   DB::PipelineExecutor::executeStepImpl(unsigned long, DB::IAcquiredSlot*, std::atomic<bool>*)
   DB::PipelineExecutor::executeSingleThread(unsigned long, DB::IAcquiredSlot*)
   DB::PipelineExecutor::executeImpl(unsigned long, bool)
   DB::PipelineExecutor::execute(unsigned long, bool)
   DB::threadFunction(DB::PullingAsyncPipelineExecutor::Data&, std::shared_ptr<DB::ThreadGroup>, unsigned long, bool)
   operator()
   decltype(std::declval<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&>()()) std::__invoke[$ABI]<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&)
   decltype(auto) std::__apply_tuple_impl[$ABI]<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&, std::tuple<>&>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&, std::tuple<>&, std::__tuple_indices<...>)
   decltype(auto) std::apply[$ABI]<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&, std::tuple<>&>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&, std::tuple<>&)
   operator()
   decltype(std::declval<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0>()()) std::__invoke[$ABI]<ThreadFromGlobalPoolImpl<true, true>::ThreadFromGlobalPoolImpl<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&&)::'lambda'()&>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&&)
   void std::__invoke_void_return_wrapper<void, true>::__call[$ABI]<ThreadFromGlobalPoolImpl<true, true>::ThreadFromGlobalPoolImpl<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&&)::'lambda'()&>(ThreadFromGlobalPoolImpl<true, true>::ThreadFromGlobalPoolImpl<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&&)::'lambda'()&)
   ThreadPoolImpl<std::thread>::ThreadFromThreadPool::worker()
   decltype(*std::declval<ThreadPoolImpl<std::thread>::ThreadFromThreadPool*>().*std::declval<void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*)()>()()) std::__invoke[$ABI]<void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*)(), ThreadPoolImpl<std::thread>::ThreadFromThreadPool*, void>(void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*&&)(), ThreadPoolImpl<std::thread>::ThreadFromThreadPool*&&)
   void std::__thread_execute[$ABI]<std::unique_ptr<std::__thread_struct, std::default_delete<std::__thread_struct>>, void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*)(), ThreadPoolImpl<std::thread>::ThreadFromThreadPool*, 2ul>(std::tuple<std::unique_ptr<std::__thread_struct, std::default_delete<std::__thread_struct>>, void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*)(), ThreadPoolImpl<std::thread>::ThreadFromThreadPool*>&, std::__tuple_indices<2ul>)
   void* std::__thread_proxy[$ABI]<std::tuple<std::unique_ptr<std::__thread_struct, std::default_delete<std::__thread_struct>>, void (ThreadPoolImpl<std::thread>::ThreadFromThreadPool::*)(), ThreadPoolImpl<std::thread>::ThreadFromThreadPool*>>(void*)

Possible causes:

  • Incorrect hash calculation leading to incorrect key placement
  • Mismatch between key type and hash function
  • Improper handling of string references in hash table
  • Incorrect memory management in arena key holder
  • Hash table overflow or incorrect resizing parameters

The stack trace appeared in the following checks:

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions