Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,22 @@ inline fun <reified KEY, reified VALUE> KeyValueGroupedDataset<KEY, VALUE>.reduc
reduceGroups(ReduceFunction(func))
.map { t -> t._1 to t._2 }

@JvmName("takeKeysTuple2")
inline fun <reified T1, T2> Dataset<Tuple2<T1, T2>>.takeKeys(): Dataset<T1> = map { it._1() }

inline fun <reified T1, T2> Dataset<Pair<T1, T2>>.takeKeys(): Dataset<T1> = map { it.first }

@JvmName("takeKeysArity2")
inline fun <reified T1, T2> Dataset<Arity2<T1, T2>>.takeKeys(): Dataset<T1> = map { it._1 }

@JvmName("takeValuesTuple2")
inline fun <T1, reified T2> Dataset<Tuple2<T1, T2>>.takeValues(): Dataset<T2> = map { it._2() }

inline fun <T1, reified T2> Dataset<Pair<T1, T2>>.takeValues(): Dataset<T2> = map { it.second }

@JvmName("takeValuesArity2")
inline fun <T1, reified T2> Dataset<Arity2<T1, T2>>.takeValues(): Dataset<T2> = map { it._2 }

inline fun <K, V, reified U> KeyValueGroupedDataset<K, V>.flatMapGroups(
noinline func: (key: K, values: Iterator<V>) -> Iterator<U>
): Dataset<U> = flatMapGroups(
Expand Down Expand Up @@ -238,6 +254,8 @@ inline fun <reified R> Dataset<*>.to(): Dataset<R> = `as`(encoder<R>())

inline fun <reified T> Dataset<T>.forEach(noinline func: (T) -> Unit) = foreach(ForeachFunction(func))

inline fun <reified T> Dataset<T>.forEachPartition(noinline func: (Iterator<T>) -> Unit) = foreachPartition(ForeachPartitionFunction(func))

/**
* It's hard to call `Dataset.debugCodegen` from kotlin, so here is utility for that
*/
Expand Down Expand Up @@ -718,6 +736,16 @@ inline fun <reified T, reified U> col(column: KProperty1<T, U>): TypedColumn<T,
*/
inline operator fun <reified T, reified U> Dataset<T>.invoke(column: KProperty1<T, U>): TypedColumn<T, U> = col(column)

/**
* Allows to sort data class dataset on one or more of the properties of the data class.
* ```kotlin
* val sorted: Dataset<YourClass> = unsorted.sort(YourClass::a)
* val sorted2: Dataset<YourClass> = unsorted.sort(YourClass::a, YourClass::b)
* ```
*/
fun <T> Dataset<T>.sort(col: KProperty1<T, *>, vararg cols: KProperty1<T, *>): Dataset<T> =
sort(col.name, *cols.map { it.name }.toTypedArray())

/**
* Alternative to [Dataset.show] which returns source dataset.
* Useful for debug purposes when you need to view content of a dataset as an intermediate operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -437,11 +437,43 @@ class ApiTest : ShouldSpec({

b.count() shouldBe 1
}
should("Allow simple forEachPartition in datasets") {
val dataset = dsOf(
SomeClass(intArrayOf(1, 2, 3), 1),
SomeClass(intArrayOf(4, 3, 2), 1),
)
dataset.forEachPartition {
it.forEach {
it.b shouldBe 1
}
}
}
should("Have easier access to keys and values for key/value datasets") {
val dataset: Dataset<SomeClass> = dsOf(
SomeClass(intArrayOf(1, 2, 3), 1),
SomeClass(intArrayOf(4, 3, 2), 1),
)
.groupByKey { it.b }
.reduceGroups(func = { a, b -> SomeClass(a.a + b.a, a.b) })
.takeValues()

dataset.count() shouldBe 1
}
should("Be able to sort datasets with property reference") {
val dataset: Dataset<SomeClass> = dsOf(
SomeClass(intArrayOf(1, 2, 3), 2),
SomeClass(intArrayOf(4, 3, 2), 1),
)
dataset.sort(SomeClass::b)
dataset.takeAsList(1).first().b shouldBe 2

dataset.sort(SomeClass::a, SomeClass::b)
dataset.takeAsList(1).first().b shouldBe 2
}
}
}
})


data class DataClassWithTuple<T : Product>(val tuple: T)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,23 @@ inline fun <reified KEY, reified VALUE> KeyValueGroupedDataset<KEY, VALUE>.reduc
reduceGroups(ReduceFunction(func))
.map { t -> t._1 to t._2 }

@JvmName("takeKeysTuple2")
inline fun <reified T1, T2> Dataset<Tuple2<T1, T2>>.takeKeys(): Dataset<T1> = map { it._1() }

inline fun <reified T1, T2> Dataset<Pair<T1, T2>>.takeKeys(): Dataset<T1> = map { it.first }

@JvmName("takeKeysArity2")
inline fun <reified T1, T2> Dataset<Arity2<T1, T2>>.takeKeys(): Dataset<T1> = map { it._1 }

@JvmName("takeValuesTuple2")
inline fun <T1, reified T2> Dataset<Tuple2<T1, T2>>.takeValues(): Dataset<T2> = map { it._2() }

inline fun <T1, reified T2> Dataset<Pair<T1, T2>>.takeValues(): Dataset<T2> = map { it.second }

@JvmName("takeValuesArity2")
inline fun <T1, reified T2> Dataset<Arity2<T1, T2>>.takeValues(): Dataset<T2> = map { it._2 }


inline fun <K, V, reified U> KeyValueGroupedDataset<K, V>.flatMapGroups(
noinline func: (key: K, values: Iterator<V>) -> Iterator<U>
): Dataset<U> = flatMapGroups(
Expand Down Expand Up @@ -234,6 +251,8 @@ inline fun <reified R> Dataset<*>.to(): Dataset<R> = `as`(encoder<R>())

inline fun <reified T> Dataset<T>.forEach(noinline func: (T) -> Unit) = foreach(ForeachFunction(func))

inline fun <reified T> Dataset<T>.forEachPartition(noinline func: (Iterator<T>) -> Unit) = foreachPartition(ForeachPartitionFunction(func))

/**
* It's hard to call `Dataset.debugCodegen` from kotlin, so here is utility for that
*/
Expand Down Expand Up @@ -714,6 +733,16 @@ inline fun <reified T, reified U> col(column: KProperty1<T, U>): TypedColumn<T,
*/
inline operator fun <reified T, reified U> Dataset<T>.invoke(column: KProperty1<T, U>): TypedColumn<T, U> = col(column)

/**
* Allows to sort data class dataset on one or more of the properties of the data class.
* ```kotlin
* val sorted: Dataset<YourClass> = unsorted.sort(YourClass::a)
* val sorted2: Dataset<YourClass> = unsorted.sort(YourClass::a, YourClass::b)
* ```
*/
fun <T> Dataset<T>.sort(col: KProperty1<T, *>, vararg cols: KProperty1<T, *>): Dataset<T> =
sort(col.name, *cols.map { it.name }.toTypedArray())

/**
* Alternative to [Dataset.show] which returns source dataset.
* Useful for debug purposes when you need to view content of a dataset as an intermediate operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,39 @@ class ApiTest : ShouldSpec({
)
dataset.show()
}
should("Allow simple forEachPartition in datasets") {
val dataset = dsOf(
SomeClass(intArrayOf(1, 2, 3), 1),
SomeClass(intArrayOf(4, 3, 2), 1),
)
dataset.forEachPartition {
it.forEach {
it.b shouldBe 1
}
}
}
should("Have easier access to keys and values for key/value datasets") {
val dataset: Dataset<SomeClass> = dsOf(
SomeClass(intArrayOf(1, 2, 3), 1),
SomeClass(intArrayOf(4, 3, 2), 1),
)
.groupByKey { it.b }
.reduceGroups(func = { a, b -> SomeClass(a.a + b.a, a.b) })
.takeValues()

dataset.count() shouldBe 1
}
should("Be able to sort datasets with property reference") {
val dataset: Dataset<SomeClass> = dsOf(
SomeClass(intArrayOf(1, 2, 3), 2),
SomeClass(intArrayOf(4, 3, 2), 1),
)
dataset.sort(SomeClass::b)
dataset.takeAsList(1).first().b shouldBe 2

dataset.sort(SomeClass::a, SomeClass::b)
dataset.takeAsList(1).first().b shouldBe 2
}
}
}
})
Expand Down