Skip to content

Commit

Permalink
apacheGH-35279: [C++][Parquet] Tools: enhancement Parquet print stats (
Browse files Browse the repository at this point in the history
…apache#35262)

### Rationale for this change

Enhancement printing value for parquet stats

### What changes are included in this PR?

1. Make DebugPrintStats for RecordReader better
2. Make Stats printer better

### Are these changes tested?

No

### Are there any user-facing changes?

No

* Closes: apache#35279

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Will Jones <willjones127@gmail.com>
  • Loading branch information
mapleFU authored and liujiacheng777 committed May 11, 2023
1 parent 20c6665 commit 555c1b0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 13 deletions.
20 changes: 12 additions & 8 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1989,17 +1989,21 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,

const T* vals = reinterpret_cast<const T*>(this->values());

std::cout << "def levels: ";
for (int64_t i = 0; i < total_levels_read; ++i) {
std::cout << def_levels[i] << " ";
if (leaf_info_.def_level > 0) {
std::cout << "def levels: ";
for (int64_t i = 0; i < total_levels_read; ++i) {
std::cout << def_levels[i] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;

std::cout << "rep levels: ";
for (int64_t i = 0; i < total_levels_read; ++i) {
std::cout << rep_levels[i] << " ";
if (leaf_info_.rep_level > 0) {
std::cout << "rep levels: ";
for (int64_t i = 0; i < total_levels_read; ++i) {
std::cout << rep_levels[i] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;

std::cout << "values: ";
for (int64_t i = 0; i < this->values_written(); ++i) {
Expand Down
17 changes: 12 additions & 5 deletions cpp/src/parquet/printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,18 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
if (column_chunk->is_stats_set()) {
stream << "\"True\", \"Stats\": {";
std::string min = stats->EncodeMin(), max = stats->EncodeMax();
stream << "\"NumNulls\": \"" << stats->null_count() << "\", "
<< "\"DistinctValues\": \"" << stats->distinct_count() << "\", "
<< "\"Max\": \"" << FormatStatValue(descr->physical_type(), max) << "\", "
<< "\"Min\": \"" << FormatStatValue(descr->physical_type(), min)
<< "\" },";
stream << "\"NumNulls\": \"" << stats->null_count();
if (stats->HasDistinctCount()) {
stream << "\", "
<< "\"DistinctValues\": \"" << stats->distinct_count();
}
if (stats->HasMinMax()) {
stream << "\", "
<< "\"Max\": \"" << FormatStatValue(descr->physical_type(), max)
<< "\", "
<< "\"Min\": \"" << FormatStatValue(descr->physical_type(), min);
}
stream << "\" },";
} else {
stream << "\"False\",";
}
Expand Down

0 comments on commit 555c1b0

Please sign in to comment.