Skip to content

Commit

Permalink
Backport #55891 to 23.9: Try to fix possible segfault in Native ORC i…
Browse files Browse the repository at this point in the history
…nput format
  • Loading branch information
robot-clickhouse committed Oct 24, 2023
1 parent e416cc3 commit 9eee41f
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
Expand Up @@ -496,16 +496,21 @@ readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::T
const auto * orc_str_column = dynamic_cast<const orc::StringVectorBatch *>(orc_column);
size_t reserver_size = 0;
for (size_t i = 0; i < orc_str_column->numElements; ++i)
reserver_size += orc_str_column->length[i] + 1;
{
if (!orc_str_column->hasNulls || orc_str_column->notNull[i])
reserver_size += orc_str_column->length[i];
reserver_size += 1;
}

column_chars_t.reserve(reserver_size);
column_offsets.reserve(orc_str_column->numElements);

size_t curr_offset = 0;
for (size_t i = 0; i < orc_str_column->numElements; ++i)
{
const auto * buf = orc_str_column->data[i];
if (buf)
if (!orc_str_column->hasNulls || orc_str_column->notNull[i])
{
const auto * buf = orc_str_column->data[i];
size_t buf_size = orc_str_column->length[i];
column_chars_t.insert_assume_reserved(buf, buf + buf_size);
curr_offset += buf_size;
Expand All @@ -531,7 +536,7 @@ readColumnWithFixedStringData(const orc::ColumnVectorBatch * orc_column, const o
const auto * orc_str_column = dynamic_cast<const orc::StringVectorBatch *>(orc_column);
for (size_t i = 0; i < orc_str_column->numElements; ++i)
{
if (orc_str_column->data[i])
if (!orc_str_column->hasNulls || orc_str_column->notNull[i])
column_chars_t.insert_assume_reserved(orc_str_column->data[i], orc_str_column->data[i] + orc_str_column->length[i]);
else
column_chars_t.resize_fill(column_chars_t.size() + fixed_len);
Expand Down Expand Up @@ -580,7 +585,7 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or
for (size_t i = 0; i < orc_str_column->numElements; ++i)
{
/// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6.
if (orc_str_column->data[i] && orc_str_column->length[i] != sizeof(IPv6))
if ((!orc_str_column->hasNulls || orc_str_column->notNull[i]) && orc_str_column->length[i] != sizeof(IPv6))
return readColumnWithStringData(orc_column, orc_type, column_name);
}

Expand All @@ -591,10 +596,10 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or

for (size_t i = 0; i < orc_str_column->numElements; ++i)
{
if (!orc_str_column->data[i]) [[unlikely]]
ipv6_column.insertDefault();
else
if (!orc_str_column->hasNulls || orc_str_column->notNull[i])
ipv6_column.insertData(orc_str_column->data[i], orc_str_column->length[i]);
else
ipv6_column.insertDefault();
}

return {std::move(internal_column), std::move(internal_type), column_name};
Expand Down Expand Up @@ -628,9 +633,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(

for (size_t i = 0; i < orc_str_column->numElements; ++i)
{
if (!orc_str_column->data[i]) [[unlikely]]
integer_column.insertDefault();
else
if (!orc_str_column->hasNulls || orc_str_column->notNull[i])
{
if (sizeof(typename ColumnType::ValueType) != orc_str_column->length[i])
throw Exception(
Expand All @@ -642,6 +645,10 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(

integer_column.insertData(orc_str_column->data[i], orc_str_column->length[i]);
}
else
{
integer_column.insertDefault();
}
}
return {std::move(internal_column), column_type, column_name};
}
Expand Down

0 comments on commit 9eee41f

Please sign in to comment.