Skip to content

Commit 6dd9308

Browse files
committed
Reinstate the table property
1 parent 0384b4e commit 6dd9308

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
HDFS_KERB_TICKET,
100100
HDFS_PORT,
101101
HDFS_USER,
102+
PYARROW_USE_LARGE_TYPES_ON_READ,
102103
S3_ACCESS_KEY_ID,
103104
S3_CONNECT_TIMEOUT,
104105
S3_ENDPOINT,
@@ -1560,11 +1561,16 @@ def _table_from_scan_task(task: FileScanTask) -> pa.Table:
15601561

15611562
tables = [f.result() for f in completed_futures if f.result()]
15621563

1564+
arrow_schema = schema_to_pyarrow(self._projected_schema, include_field_ids=False)
1565+
15631566
if len(tables) < 1:
1564-
return pa.Table.from_batches([], schema=schema_to_pyarrow(self._projected_schema, include_field_ids=False))
1567+
return pa.Table.from_batches([], schema=arrow_schema)
15651568

15661569
result = pa.concat_tables(tables, promote_options="permissive")
15671570

1571+
if property_as_bool(self._io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, False):
1572+
result = result.cast(arrow_schema)
1573+
15681574
if self._limit is not None:
15691575
return result.slice(0, self._limit)
15701576

0 commit comments

Comments
 (0)