Skip to content

Commit

Permalink
apacheGH-35228: [C++][Parquet] Minor: Comment typo fixing in Parquet …
Browse files Browse the repository at this point in the history
…Reader (apache#35229)

### Rationale for this change

Change some comments

### What changes are included in this PR?

Change some comments in parquet reader

### Are these changes tested?

no need.

### Are there any user-facing changes?

no

* Closes: apache#35228

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Will Jones <willjones127@gmail.com>
  • Loading branch information
mapleFU authored and liujiacheng777 committed May 11, 2023
1 parent 7641e33 commit 1ad27e0
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
6 changes: 3 additions & 3 deletions cpp/src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ Status StructReader::GetRepLevels(const int16_t** data, int64_t* length) {
*data = nullptr;
if (children_.size() == 0) {
*length = 0;
return Status::Invalid("StructReader had no childre");
return Status::Invalid("StructReader had no children");
}

// This method should only be called when this struct or one of its parents
Expand Down Expand Up @@ -870,7 +870,7 @@ Status GetReader(const SchemaField& field, const std::shared_ptr<Field>& arrow_f
return Status::OK();
}

// These two types might not be equal if there column pruning occurred.
// These two types might not be equal if there is column pruning occurred.
// further down the stack.
const std::shared_ptr<DataType> reader_child_type = child_reader->field()->type();
// This should really never happen but was raised as a question on the code
Expand All @@ -892,7 +892,7 @@ Status GetReader(const SchemaField& field, const std::shared_ptr<Field>& arrow_f
*schema_child_type.field(1)->type())) {
list_field = list_field->WithType(std::make_shared<::arrow::MapType>(
reader_child_type->field(
0), // field 0 is unchanged baed on previous if statement
0), // field 0 is unchanged based on previous if statement
reader_child_type->field(1)));
}
// Map types are list<struct<key, value>> so use ListReader
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
valid_bits_ = AllocateBuffer(pool);
def_levels_ = AllocateBuffer(pool);
rep_levels_ = AllocateBuffer(pool);
Reset();
TypedRecordReader::Reset();
}

// Compute the values capacity in bytes for the given number of elements
Expand All @@ -1367,7 +1367,7 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
// Delimit records, then read values at the end
int64_t records_read = 0;

if (levels_position_ < levels_written_) {
if (has_values_to_process()) {
records_read += ReadRecordData(num_records);
}

Expand Down Expand Up @@ -1525,7 +1525,7 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
int64_t values_seen = 0;
int64_t skipped_records = DelimitRecords(num_records, &values_seen);
ReadAndThrowAwayValues(values_seen);
// Mark those levels and values as consumed in the the underlying page.
// Mark those levels and values as consumed in the underlying page.
// This must be done before we throw away levels since it updates
// levels_position_ and levels_written_.
this->ConsumeBufferedValues(levels_position_ - start_levels_position);
Expand Down Expand Up @@ -1554,7 +1554,7 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,

// If 'at_record_start_' is false, but (skipped_records == num_records), it
// means that for the last record that was counted, we have not seen all
// of it's values yet.
// of its values yet.
while (!at_record_start_ || skipped_records < num_records) {
// Is there more data to read in this row group?
// HasNextInternal() will advance to the next page if necessary.
Expand All @@ -1579,7 +1579,7 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
break;
}

// For skip we will read the levels and append them to the end
// For skipping we will read the levels and append them to the end
// of the def_levels and rep_levels just like for read.
ReserveLevels(batch_size);

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ class PARQUET_EXPORT RecordReader {
/// If this Reader was constructed with read_dense_for_nullable(), there is no space for
/// nulls and null_count() will be 0. There is no read-ahead/buffering for values. For
/// FLBA and ByteArray types this value reflects the values written with the last
/// ReadRecords call since thoser readers will reset the values after each call.
/// ReadRecords call since those readers will reset the values after each call.
int64_t values_written() const { return values_written_; }

/// \brief Number of definition / repetition levels (from those that have
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/level_conversion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void DefRepLevelsToList(const int16_t* def_levels, const int16_t* rep_levels,
void DefRepLevelsToBitmap(const int16_t* def_levels, const int16_t* rep_levels,
int64_t num_def_levels, LevelInfo level_info,
ValidityBitmapInputOutput* output) {
// DefReplevelsToListInfo assumes it for the actual list method and this
// DefRepLevelsToListInfo assumes it for the actual list method and this
// method is for parent structs, so we need to bump def and ref level.
level_info.rep_level += 1;
level_info.def_level += 1;
Expand Down

0 comments on commit 1ad27e0

Please sign in to comment.