Skip to content

Commit

Permalink
Fix size_bytes for logical types (#724)
Browse files Browse the repository at this point in the history
* `.size_bytes` currently panics if run on logical types (e.g. dates)
* This PR fixes that behavior by first casting into a Physical type
  • Loading branch information
jaychia committed Mar 20, 2023
1 parent 0cc3a70 commit e7df931
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl PySeries {
}

pub fn size_bytes(&self) -> PyResult<usize> {
Ok(self.series.size_bytes())
Ok(self.series.size_bytes()?)
}

pub fn name(&self) -> PyResult<String> {
Expand Down
2 changes: 1 addition & 1 deletion src/python/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ impl PyTable {
}

pub fn size_bytes(&self) -> PyResult<usize> {
Ok(self.table.size_bytes())
Ok(self.table.size_bytes()?)
}

pub fn column_names(&self) -> PyResult<Vec<String>> {
Expand Down
11 changes: 7 additions & 4 deletions src/series/ops/len.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
use crate::error::DaftResult;
use crate::{series::Series, with_match_comparable_daft_types};

impl Series {
pub fn len(&self) -> usize {
self.data_array.len()
}

pub fn size_bytes(&self) -> usize {
with_match_comparable_daft_types!(self.data_type(), |$T| {
let downcasted = self.downcast::<$T>().unwrap();
downcasted.size_bytes()
pub fn size_bytes(&self) -> DaftResult<usize> {
let s = self.as_physical()?;

with_match_comparable_daft_types!(s.data_type(), |$T| {
let downcasted = s.downcast::<$T>().unwrap();
Ok(downcasted.size_bytes())
})
}
}
6 changes: 4 additions & 2 deletions src/table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,10 @@ impl Table {
self.take(&indices.into_series())
}

pub fn size_bytes(&self) -> usize {
self.columns.iter().map(|s| s.size_bytes()).sum::<usize>()
pub fn size_bytes(&self) -> DaftResult<usize> {
let column_sizes: DaftResult<Vec<usize>> =
self.columns.iter().map(|s| s.size_bytes()).collect();
Ok(column_sizes?.iter().sum())
}

pub fn filter(&self, predicate: &[Expr]) -> DaftResult<Self> {
Expand Down
21 changes: 21 additions & 0 deletions tests/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,27 @@ def test_series_boolean_size_bytes(size) -> None:
assert s.size_bytes() == data.nbytes


@pytest.mark.parametrize("size", [0, 1, 2, 8, 9, 16])
def test_series_date_size_bytes(size) -> None:
from datetime import date

pydata = [date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 3)]
data = pa.array(pydata)
s = Series.from_arrow(data)

assert s.datatype() == DataType.date()
assert s.size_bytes() == data.nbytes

## with nulls
if size > 0:
pydata = pydata[:-1] + [None]
data = pa.array(pydata, pa.date32())
s = Series.from_arrow(data)

assert s.datatype() == DataType.date()
assert s.size_bytes() == data.nbytes


@pytest.mark.parametrize("dtype", arrow_int_types + arrow_float_types)
def test_series_numeric_abs(dtype) -> None:
if pa.types.is_unsigned_integer(dtype):
Expand Down

0 comments on commit e7df931

Please sign in to comment.