In [None]:
import pyarrow as pa
from adbc.reader import BatchReader, LazyReader

In [None]:
data = pa.Table.from_pydict({"0": list(range(10)), "part": ["ABC" if _ < 4 else "DEF" for _ in range(10)]})

# Create

## Stream batch reader

Will simply iterate over batches, default should be considered as stream readable only 1 time

In [None]:
reader = BatchReader.from_arrow(data)
reader.read_all()

## Lazy

Will compute method on each call

In [None]:
lazy_reader = LazyReader(
    lambda arg=None: BatchReader.from_arrow(data),
    schema=None
)
lazy_reader.read_all()

## Persist in memory

Returns new BatchReader persisted in RAM, usefull for streaming, useless if created from in memory object like pyarrow.Table, pyarrow.RecordBatch

In [None]:
persisted = reader.persist()

## Cast

### Arrow Schema

It will select columns, cast types

fill_empty: will fill with empty values not found columns if it is nullable

drop: will drop schema columns if not found in data

In [None]:
casted = persisted.cast(pa.schema([pa.field("0", pa.int8())]), safe=True, fill_empty=True, drop=False)
casted.read_all()

### Columns

In [None]:
casted = persisted.cast_columns({
    "0": pa.int16()
})
casted.read_all()

### Custom datatypes

In [None]:
from adbc.dtype import ALL_DATATYPES, safe_datatype
safe_datatype("decimal(38,18)"), ALL_DATATYPES

In [None]:
casted = persisted.cast_columns({
    "0": "double"
})
casted.read_all()