Skip to content

Commit

Permalink
Backport #40241 to 21.8: Fix possible segfault in CapnProto input format
Browse files Browse the repository at this point in the history
  • Loading branch information
robot-clickhouse committed Aug 17, 2022
1 parent 6af2d80 commit ca79d45
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace ErrorCodes
extern const int BAD_TYPE_OF_FIELD;
extern const int THERE_IS_NO_COLUMN;
extern const int LOGICAL_ERROR;
extern const int INCORRECT_DATA;
}

static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i)
Expand Down Expand Up @@ -206,7 +207,12 @@ CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header,
kj::Array<capnp::word> CapnProtoRowInputFormat::readMessage()
{
uint32_t segment_count;
in.readStrict(reinterpret_cast<char*>(&segment_count), sizeof(uint32_t));
in->readStrict(reinterpret_cast<char*>(&segment_count), sizeof(uint32_t));
/// Don't allow large amount of segments as it's done in capnproto library:
/// https://github.com/capnproto/capnproto/blob/931074914eda9ca574b5c24d1169c0f7a5156594/c%2B%2B/src/capnp/serialize.c%2B%2B#L181
/// Large amount of segments can indicate that corruption happened.
if (segment_count >= 512)
throw Exception(ErrorCodes::INCORRECT_DATA, "Message has too many segments. Most likely, data was corrupted");

// one for segmentCount and one because segmentCount starts from 0
const auto prefix_size = (2 + segment_count) * sizeof(uint32_t);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
23 changes: 23 additions & 0 deletions tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
# Tags: no-fasttest, no-parallel, no-replicated-database

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
mkdir -p $USER_FILES_PATH/test_02402
cp $CURDIR/data_capnp/overflow.capnp $USER_FILES_PATH/test_02402/

SCHEMADIR=$(clickhouse-client --query "select * from file('test_02402/overflow.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)")

CLIENT_SCHEMADIR=$CURDIR/format_schemas
SERVER_SCHEMADIR=test_02402

mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR
cp -r $CLIENT_SCHEMADIR/02402_* $SCHEMADIR/$SERVER_SCHEMADIR/

$CLICKHOUSE_CLIENT --query="SELECT * FROM file('test_02402/overflow.capnp', 'CapnProto') SETTINGS format_schema='$SERVER_SCHEMADIR/02402_overflow:CapnProto'" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL';

rm -rf $USER_FILES_PATH/test_02402
rm -rf ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?}
Binary file not shown.
30 changes: 30 additions & 0 deletions tests/queries/0_stateless/format_schemas/02402_overflow.capnp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
@0x803231eaa402b968;

struct NestedNestedOne
{
nestednestednumber @0 : UInt64;
}
struct NestedNestedTwo
{
nestednestedtext @0 : Text;
}
struct NestedOne
{
nestednestedone @0 : NestedNestedOne;
nestednestedtwo @1 : NestedNestedTwo;
nestednumber @2: UInt64;
}
struct NestedTwo
{
nestednestedone @0 : NestedNestedOne;
nestednestedtwo @1 : NestedNestedTwo;
nestedtext @2 : Text;
}
struct CapnProto
{
number @0 : UInt64;
string @1 : Text;
nestedone @2 : NestedOne;
nestedtwo @3 : NestedTwo;
nestedthree @4 : NestedNestedTwo;
}

0 comments on commit ca79d45

Please sign in to comment.