Skip to content

Commit

Permalink
Merge pull request #58186 from ClickHouse/backport/23.3/58181
Browse files Browse the repository at this point in the history
Backport #58181 to 23.3: Remove parallel parsing for JSONCompactEachRow
  • Loading branch information
alexey-milovidov committed Dec 23, 2023
2 parents f217bdc + 1b5c007 commit 4cc06d1
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 22 deletions.
3 changes: 0 additions & 3 deletions src/Formats/registerFormats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory);
void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory);
#if USE_HIVE
void registerFileSegmentationEngineHiveText(FormatFactory & factory);
#endif
Expand Down Expand Up @@ -153,7 +152,6 @@ void registerFormats()
registerFileSegmentationEngineJSONEachRow(factory);
registerFileSegmentationEngineJSONAsString(factory);
registerFileSegmentationEngineJSONAsObject(factory);
registerFileSegmentationEngineJSONCompactEachRow(factory);
#if USE_HIVE
registerFileSegmentationEngineHiveText(factory);
#endif
Expand Down Expand Up @@ -277,4 +275,3 @@ void registerFormats()
}

}

19 changes: 0 additions & 19 deletions src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,23 +262,4 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory)
}
}

void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory)
{
auto register_func = [&](const String & format_name, bool with_names, bool with_types)
{
/// In case when we have names and/or types in the first two/one rows,
/// we need to read at least one more row of actual data. So, set
/// the minimum of rows for segmentation engine according to
/// parameters with_names and with_types.
size_t min_rows = 1 + int(with_names) + int(with_types);
factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
{
return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_bytes, min_rows, max_rows);
});
};

registerWithNamesAndTypes("JSONCompactEachRow", register_func);
registerWithNamesAndTypes("JSONCompactStringsEachRow", register_func);
}

}
Binary file added tests/queries/0_stateless/02951_data.jsonl.zst
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
15021837090950060251
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash
# Tags: no-parallel

CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh

${CLICKHOUSE_LOCAL} --input-format-parallel-parsing 1 --query "
SELECT sum(cityHash64(*)) FROM file('$CUR_DIR/02951_data.jsonl.zst', JSONCompactEachRow, '
time_offset Decimal64(3),
lat Float64,
lon Float64,
altitude String,
ground_speed Float32,
track_degrees Float32,
flags UInt32,
vertical_rate Int32,
aircraft Tuple(
alert Int64,
alt_geom Int64,
gva Int64,
nac_p Int64,
nac_v Int64,
nic Int64,
nic_baro Int64,
rc Int64,
sda Int64,
sil Int64,
sil_type String,
spi Int64,
track Float64,
type String,
version Int64,
category String,
emergency String,
flight String,
squawk String,
baro_rate Int64,
nav_altitude_fms Int64,
nav_altitude_mcp Int64,
nav_modes Array(String),
nav_qnh Float64,
geom_rate Int64,
ias Int64,
mach Float64,
mag_heading Float64,
oat Int64,
roll Float64,
tas Int64,
tat Int64,
true_heading Float64,
wd Int64,
ws Int64,
track_rate Float64,
nav_heading Float64
),
source LowCardinality(String),
geometric_altitude Int32,
geometric_vertical_rate Int32,
indicated_airspeed Int32,
roll_angle Float32,
hex String
')"

0 comments on commit 4cc06d1

Please sign in to comment.