Skip to content

Commit

Permalink
Merge branch 'apache:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamGS committed May 29, 2024
2 parents 16e5258 + 1634a65 commit 19ff9d0
Show file tree
Hide file tree
Showing 62 changed files with 2,592 additions and 799 deletions.
18 changes: 10 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,20 @@ Welcome to the [Rust][rust] implementation of [Apache Arrow], the popular in-mem

This repo contains the following main components:

| Crate | Description | Latest API Docs | README |
| ---------------- | --------------------------------------------------------- | ---------------------------------------------- | ------------------------------ |
| [`arrow`] | Core Arrow functionality (memory layout, arrays, kernels) | [docs.rs](https://docs.rs/arrow/latest) | [(README)][arrow-readme] |
| [`parquet`] | Parquet columnar file format | [docs.rs](https://docs.rs/parquet/latest) | [(README)][parquet-readme] |
| [`arrow-flight`] | Arrow-Flight IPC protocol | [docs.rs](https://docs.rs/arrow-flight/latest) | [(README)][flight-readme] |
| [`object-store`] | object store (aws, azure, gcp, local, in-memory) | [docs.rs](https://docs.rs/object_store/latest) | [(README)][objectstore-readme] |
| Crate | Description | Latest API Docs | README |
| ------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------ | --------------------------------- |
| [`arrow`] | Core functionality (memory layout, arrays, low level computations) | [docs.rs](https://docs.rs/arrow/latest) | [(README)][arrow-readme] |
| [`arrow-flight`] | Support for Arrow-Flight IPC protocol | [docs.rs](https://docs.rs/arrow-flight/latest) | [(README)][flight-readme] |
| [`object-store`] | Support for object store interactions (aws, azure, gcp, local, in-memory) | [docs.rs](https://docs.rs/object_store/latest) | [(README)][objectstore-readme] |
| [`parquet`] | Support for Parquet columnar file format | [docs.rs](https://docs.rs/parquet/latest) | [(README)][parquet-readme] |
| [`parquet_derive`] | A crate for deriving RecordWriter/RecordReader for arbitrary, simple structs | [docs.rs](https://docs.rs/parquet-derive/latest) | [(README)][parquet-derive-readme] |

The current development version the API documentation in this repo can be found [here](https://arrow.apache.org/rust).

[apache arrow]: https://arrow.apache.org/
[`arrow`]: https://crates.io/crates/arrow
[`parquet`]: https://crates.io/crates/parquet
[`parquet-derive`]: https://crates.io/crates/parquet-derive
[`parquet_derive`]: https://crates.io/crates/parquet-derive
[`arrow-flight`]: https://crates.io/crates/arrow-flight
[`object-store`]: https://crates.io/crates/object-store

Expand Down Expand Up @@ -86,7 +87,7 @@ There are two related crates in different repositories
| [`ballista`] | Distributed query execution | [(README)][ballista-readme] |

[`datafusion`]: https://crates.io/crates/datafusion
[`ballista`]: https://crates.io/crates/datafusion-ballista
[`ballista`]: https://crates.io/crates/ballista

Collectively, these crates support a wider array of functionality for analytic computations in Rust.

Expand Down Expand Up @@ -127,5 +128,6 @@ There is more information in the [contributing] guide.
[datafusion-readme]: https://github.com/apache/datafusion/blob/main/README.md
[ballista-readme]: https://github.com/apache/datafusion-ballista/blob/main/README.md
[objectstore-readme]: object_store/README.md
[parquet-derive-readme]: parquet_derive/README.md
[issues]: https://github.com/apache/arrow-rs/issues
[discussions]: https://github.com/apache/arrow-rs/discussions
43 changes: 20 additions & 23 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow_array::cast::AsArray;
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};

use crate::arity::{binary, try_binary};
Expand Down Expand Up @@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType {
type Duration: ArrowPrimitiveType<Native = i64>;

fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;

fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) -> Option<i64>;
fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz) -> Option<i64>;
}

macro_rules! timestamp {
Expand All @@ -360,23 +360,23 @@ macro_rules! timestamp {
Self::add_year_months(left, right, tz)
}

fn add_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::add_day_time(left, right, tz)
}

fn add_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::add_month_day_nano(left, right, tz)
}

fn sub_year_month(left: i64, right: i32, tz: Tz) -> Option<i64> {
Self::subtract_year_months(left, right, tz)
}

fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) -> Option<i64> {
Self::subtract_day_time(left, right, tz)
}

fn sub_month_day_nano(left: i64, right: i128, tz: Tz) -> Option<i64> {
fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz: Tz) -> Option<i64> {
Self::subtract_month_day_nano(left, right, tz)
}
}
Expand Down Expand Up @@ -506,12 +506,12 @@ fn timestamp_op<T: TimestampOp>(
/// Note: these should be fallible (#4456)
trait DateOp: ArrowTemporalType {
fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;

fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: i128) -> Self::Native;
fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
}

macro_rules! date {
Expand All @@ -521,23 +521,23 @@ macro_rules! date {
Self::add_year_months(left, right)
}

fn add_day_time(left: Self::Native, right: i64) -> Self::Native {
fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::add_day_time(left, right)
}

fn add_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::add_month_day_nano(left, right)
}

fn sub_year_month(left: Self::Native, right: i32) -> Self::Native {
Self::subtract_year_months(left, right)
}

fn sub_day_time(left: Self::Native, right: i64) -> Self::Native {
fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
Self::subtract_day_time(left, right)
}

fn sub_month_day_nano(left: Self::Native, right: i128) -> Self::Native {
fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
Self::subtract_month_day_nano(left, right)
}
}
Expand Down Expand Up @@ -1346,13 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let a = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]);
let b = IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
"Compute error: Overflow happened on: 9223372036854775807 + 1"
);
assert_eq!(err, "Compute error: Overflow happened on: 2147483647 + 1");
}

fn test_duration_impl<T: ArrowPrimitiveType<Native = i64>>() {
Expand Down
14 changes: 12 additions & 2 deletions arrow-array/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow_buffer::{i256, ArrowNativeType};
use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::ArrowError;
use half::f16;
use num::complex::ComplexFloat;
Expand Down Expand Up @@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {

macro_rules! native_type_op {
($t:tt) => {
native_type_op!($t, 0, 1, $t::MIN, $t::MAX);
native_type_op!($t, 0, 1);
};
($t:tt, $zero:expr, $one: expr) => {
native_type_op!($t, $zero, $one, $t::MIN, $t::MAX);
};
($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down Expand Up @@ -284,6 +287,13 @@ native_type_op!(u32);
native_type_op!(u64);
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);

native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
native_type_op!(
IntervalMonthDayNano,
IntervalMonthDayNano::ZERO,
IntervalMonthDayNano::ONE
);

macro_rules! native_type_float_op {
($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => {
impl ArrowNativeTypeOp for $t {
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,7 @@ where
/// return Ok(d.with_values(r));
/// }
/// downcast_primitive_array! {
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| x.to_string())).collect::<StringArray>())),
/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x| format!("{x:?}"))).collect::<StringArray>())),
/// d => Err(ArrowError::InvalidArgumentError(format!("{d:?} not supported")))
/// }
/// }
Expand Down
3 changes: 2 additions & 1 deletion arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ impl FixedSizeListArray {
|| nulls
.as_ref()
.map(|n| n.expand(size as _).contains(&a))
.unwrap_or_default();
.unwrap_or_default()
|| (nulls.is_none() && a.null_count() == 0);

if !nulls_valid {
return Err(ArrowError::InvalidArgumentError(format!(
Expand Down
52 changes: 33 additions & 19 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ mod tests {
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::cast::downcast_array;
use crate::BooleanArray;
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::TimeUnit;

#[test]
Expand Down Expand Up @@ -1624,33 +1625,46 @@ mod tests {
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);

// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
let v0 = IntervalDayTime {
days: 34,
milliseconds: 1,
};
let v2 = IntervalDayTime {
days: -2,
milliseconds: -5,
};

let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]);

assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);

// a month_day_nano interval contains months, days and nanoseconds,
// but we do not yet have accessors for the values.
// TODO: implement month, day, and nanos access method for month_day_nano.
let arr = IntervalMonthDayNanoArray::from(vec![
Some(100000000000000000000),
None,
Some(-500000000000000000000),
]);
let v0 = IntervalMonthDayNano {
months: 2,
days: 34,
nanoseconds: -1,
};
let v2 = IntervalMonthDayNano {
months: -3,
days: -2,
nanoseconds: 4,
};

let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(100000000000000000000, arr.value(0));
assert_eq!(100000000000000000000, arr.values()[0]);
assert_eq!(v0, arr.value(0));
assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-500000000000000000000, arr.value(2));
assert_eq!(-500000000000000000000, arr.values()[2]);
assert_eq!(v2, arr.value(2));
assert_eq!(v2, arr.values()[2]);
}

#[test]
Expand Down Expand Up @@ -2460,7 +2474,7 @@ mod tests {
expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)"
)]
fn test_invalid_interval_type() {
let array = IntervalDayTimeArray::from(vec![1, 2, 3]);
let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]);
let _ = IntervalMonthDayNanoArray::from(array.into_data());
}

Expand Down
Loading

0 comments on commit 19ff9d0

Please sign in to comment.