Skip to content

Commit 0d19987

Browse files
committed
Add warning
1 parent fff7414 commit 0d19987

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
from pyiceberg.utils.concurrent import ExecutorFactory
172172
from pyiceberg.utils.config import Config
173173
from pyiceberg.utils.datetime import millis_to_datetime
174+
from pyiceberg.utils.deprecated import deprecation_message
174175
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
175176
from pyiceberg.utils.singleton import Singleton
176177
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -1657,16 +1658,26 @@ class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, Optional[pa.Arra
16571658
_file_schema: Schema
16581659
_include_field_ids: bool
16591660
_downcast_ns_timestamp_to_us: bool
1661+
_use_large_types: Optional[bool]
16601662

16611663
def __init__(
16621664
self,
16631665
file_schema: Schema,
16641666
downcast_ns_timestamp_to_us: bool = False,
16651667
include_field_ids: bool = False,
1668+
use_large_types: Optional[bool] = None,
16661669
) -> None:
16671670
self._file_schema = file_schema
16681671
self._include_field_ids = include_field_ids
16691672
self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
1673+
self._use_large_types = use_large_types
1674+
1675+
if use_large_types is not None:
1676+
deprecation_message(
1677+
deprecated_in="0.10.0",
1678+
removed_in="0.11.0",
1679+
help_message="Argument `use_large_types` will be removed from ArrowProjectionVisitor",
1680+
)
16701681

16711682
def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
16721683
file_field = self._file_schema.find_field(field.field_id)
@@ -1676,6 +1687,8 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
16761687
target_schema = schema_to_pyarrow(
16771688
promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
16781689
)
1690+
if self._use_large_types is False:
1691+
target_schema = _pyarrow_schema_ensure_small_types(target_schema)
16791692
return values.cast(target_schema)
16801693
elif (target_type := schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)) != values.type:
16811694
if field.field_type == TimestampType():

0 commit comments

Comments
 (0)