171
171
from pyiceberg .utils .concurrent import ExecutorFactory
172
172
from pyiceberg .utils .config import Config
173
173
from pyiceberg .utils .datetime import millis_to_datetime
174
+ from pyiceberg .utils .deprecated import deprecation_message
174
175
from pyiceberg .utils .properties import get_first_property_value , property_as_bool , property_as_int
175
176
from pyiceberg .utils .singleton import Singleton
176
177
from pyiceberg .utils .truncate import truncate_upper_bound_binary_string , truncate_upper_bound_text_string
@@ -1657,16 +1658,26 @@ class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, Optional[pa.Arra
1657
1658
_file_schema : Schema
1658
1659
_include_field_ids : bool
1659
1660
_downcast_ns_timestamp_to_us : bool
1661
+ _use_large_types : Optional [bool ]
1660
1662
1661
1663
def __init__ (
1662
1664
self ,
1663
1665
file_schema : Schema ,
1664
1666
downcast_ns_timestamp_to_us : bool = False ,
1665
1667
include_field_ids : bool = False ,
1668
+ use_large_types : Optional [bool ] = None ,
1666
1669
) -> None :
1667
1670
self ._file_schema = file_schema
1668
1671
self ._include_field_ids = include_field_ids
1669
1672
self ._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
1673
+ self ._use_large_types = use_large_types
1674
+
1675
+ if use_large_types is not None :
1676
+ deprecation_message (
1677
+ deprecated_in = "0.10.0" ,
1678
+ removed_in = "0.11.0" ,
1679
+ help_message = "Argument `use_large_types` will be removed from ArrowProjectionVisitor" ,
1680
+ )
1670
1681
1671
1682
def _cast_if_needed (self , field : NestedField , values : pa .Array ) -> pa .Array :
1672
1683
file_field = self ._file_schema .find_field (field .field_id )
@@ -1676,6 +1687,8 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
1676
1687
target_schema = schema_to_pyarrow (
1677
1688
promote (file_field .field_type , field .field_type ), include_field_ids = self ._include_field_ids
1678
1689
)
1690
+ if self ._use_large_types is False :
1691
+ target_schema = _pyarrow_schema_ensure_small_types (target_schema )
1679
1692
return values .cast (target_schema )
1680
1693
elif (target_type := schema_to_pyarrow (field .field_type , include_field_ids = self ._include_field_ids )) != values .type :
1681
1694
if field .field_type == TimestampType ():
0 commit comments