Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 288 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/last.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
import org.jetbrains.kotlinx.dataframe.columns.asColumnSet
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.columns.values
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
import org.jetbrains.kotlinx.dataframe.documentation.Indent
import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
import org.jetbrains.kotlinx.dataframe.impl.columns.singleOrNullWithTransformerImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
Expand All @@ -27,28 +30,153 @@ import kotlin.reflect.KProperty

// region DataColumn

/**
* Returns the last value in this [DataColumn].
*
* See also [lastOrNull], [first], [take], [takeLast].
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also the @see tag if you just want to refer to other functions.

The only downside of @see is that you cannot add "extra" text to it. That's why we often write things like "See [something] for some reason."

However, if you don't need a description, you can simply write

@see [lastOrNull]
@see [first]

etc.

*
* @return The last value in this [DataColumn].
*
* @throws [IndexOutOfBoundsException] if the [DataColumn] is empty.
*/
public fun <T> DataColumn<T>.last(): T = get(size - 1)

/**
* Returns the last value in this [DataColumn]. If the [DataColumn] is empty, returns `null`.
*
* See also [last], [first], [take], [takeLast].
*
* @return The last value in this [DataColumn], or `null` if the [DataColumn] is empty.
*/
public fun <T> DataColumn<T>.lastOrNull(): T? = if (size > 0) last() else null

/**
* Returns the last value in this [DataColumn] that matches the given [predicate].
*
* ### Example
* ```kotlin
* // In a DataFrame of financial transactions sorted by time,
* // find the amount of the most recent financial transaction over 100 euros
* df.amount.last { it > 100 }
* ```
*
* See also [lastOrNull], [first], [take], [takeLast].
*
* @param predicate A lambda expression used to get the last value
* that satisfies a condition specified in this expression.
* This predicate takes a value from the [DataColumn] as an input
* and returns `true` if the value satisfies the condition or `false` otherwise.
*
* @return The last value in this [DataColumn] that matches the given [predicate].
*
* @throws [NoSuchElementException] if the [DataColumn] contains no element matching the [predicate]
* (including the case when the [DataColumn] is empty).
*/
public inline fun <T> DataColumn<T>.last(predicate: (T) -> Boolean): T = values.last(predicate)

/**
* Returns the last value in this [DataColumn] that matches the given [predicate].
* Returns `null` if the [DataColumn] contains no elements matching the [predicate]
* (including the case when the [DataColumn] is empty).
*
* ### Example
* ```kotlin
* // In a DataFrame of financial transactions sorted by time,
* // obtain the amount of the most recent financial transaction over 100 euros,
* // or 'null' if there is no such transaction
* df.amount.lastOrNull { it > 100 }
* ```
*
* See also [last], [first], [take], [takeLast].
*
* @param predicate A lambda expression used to get the last value
* that satisfies a condition specified in this expression.
* This predicate takes a value from the [DataColumn] as an input
* and returns `true` if the value satisfies the condition or `false` otherwise.
*
* @return The last value in this [DataColumn] that matches the given [predicate],
* or `null` if the [DataColumn] contains no element matching the [predicate].
*/
public inline fun <T> DataColumn<T>.lastOrNull(predicate: (T) -> Boolean): T? = values.lastOrNull(predicate)

// endregion

// region DataFrame

/**
* Returns the last [row][DataRow] in this [DataFrame] that satisfies the given [predicate].
* Returns `null` if the [DataFrame] contains no rows matching the [predicate]
* (including the case when the [DataFrame] is empty).
*
* {@include [RowFilterDescription]}
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* ### Example
* ```kotlin
* // In a DataFrame of financial transactions sorted by time,
* // obtain the most recent financial transaction with amount over 100 euros,
* // or 'null' if there is no such transaction
* df.lastOrNull { amount > 100 }
* ```
*
* See also [last], [first], [take], [takeLast], [takeWhile].
*
* @param predicate A [row filter][RowFilter] used to get the last value
* that satisfies a condition specified in this filter.
*
* @return A [DataRow] containing the last row that matches the given [predicate],
* or `null` if the [DataFrame] contains no rows matching the [predicate].
*/
public inline fun <T> DataFrame<T>.lastOrNull(predicate: RowFilter<T>): DataRow<T>? =
rowsReversed().firstOrNull { predicate(it, it) }

/**
* Returns the last [row][DataRow] in this [DataFrame] that satisfies the given [predicate].
*
* {@include [RowFilterDescription]}
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* ### Example
* ```kotlin
* // In a DataFrame of financial transactions sorted by time,
* // find the most recent financial transaction with amount over 100 euros
* df.last { amount > 100 }
* ```
*
* See also [lastOrNull], [first], [take], [takeLast], [takeWhile].
*
* @param predicate A [row filter][RowFilter] used to get the last value
* that satisfies a condition specified in this filter.
*
* @return A [DataRow] containing the last row that matches the given [predicate].
*
* @throws [NoSuchElementException] if the [DataFrame] contains no rows matching the [predicate].
*/
public inline fun <T> DataFrame<T>.last(predicate: RowFilter<T>): DataRow<T> =
rowsReversed().first {
predicate(it, it)
}

/**
* Returns the last [row][DataRow] in this [DataFrame]. If the [DataFrame] does not contain any rows, returns `null`.
*
* See also [last], [first], [take], [takeLast].
*
* @return A [DataRow] containing the last row in this [DataFrame], or `null` if the [DataFrame] is empty.
*/
public fun <T> DataFrame<T>.lastOrNull(): DataRow<T>? = if (nrow > 0) get(nrow - 1) else null

/**
* Returns the last [row][DataRow] in this [DataFrame].
*
* See also [lastOrNull], [first], [take], [takeLast].
*
* @return A [DataRow] containing the last row in this [DataFrame].
*
* @throws NoSuchElementException if the [DataFrame] contains no rows.
*/
public fun <T> DataFrame<T>.last(): DataRow<T> {
if (nrow == 0) {
throw NoSuchElementException("DataFrame has no rows. Use `lastOrNull`.")
Expand All @@ -60,26 +188,186 @@ public fun <T> DataFrame<T>.last(): DataRow<T> {

// region GroupBy

/**
* Gets the last [row][DataRow] from each group of the given [GroupBy]
* and returns a [ReducedGroupBy] containing these rows
* (one row per group, each row is the last row in its group).
*
* If the group in [GroupBy] is empty,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned that it will return null for empty groups, but there is still the issue I mentioned for this case in #1547:
https://github.com/Kotlin/dataframe/pull/1547/files#r2505104735

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you create an issue for that? I probably should have done that myself when finding the reproducer... But it makes it easier to track and refer to :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sure! I probably should have done it right away, I just wasn't 100% sure :)

* the corresponding row in [ReducedGroupBy] will contain `null` values for all columns in the group,
* except the column with the grouping key.
*
* See also [first].
*
* ### Example
* ```kotlin
* // In a DataFrame of order status logs sorted by time,
* // find the most recent status for each order
* df.groupBy { orderId }.last()
* ```
*
* @return A [ReducedGroupBy] containing the last [row][DataRow]
* (or a row with `null` values, except the grouping key) from each group.
*/
@Interpretable("GroupByReducePredicate")
public fun <T, G> GroupBy<T, G>.last(): ReducedGroupBy<T, G> = reduce { lastOrNull() }

/**
* Gets from each group of the given [GroupBy] the last [row][DataRow] satisfying the given [predicate],
* and returns a [ReducedGroupBy] containing these rows (one row per group,
* each row is the last row in its group that satisfies the [predicate]).
*
* If the group in [GroupBy] contains no matching rows,
* the corresponding row in [ReducedGroupBy] will contain `null` values for all columns in the group,
* except the grouping key.
*
* {@include [RowFilterDescription]}
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* See also [first].
*
* ### Example
* ```kotlin
* // In a DataFrame of order status logs sorted by time,
* // find the most recent status shown to the customer for each order
* df.groupBy { orderId }.last { !isInternal }
* ```
*
* @param predicate A [row filter][RowFilter] used to get the last value
* that satisfies a condition specified in this filter.
*
* @return A [ReducedGroupBy] containing the last [row][DataRow] matching the [predicate]
* (or a row with `null` values, except the grouping key) from each group.
*/
@Interpretable("GroupByReducePredicate")
public fun <T, G> GroupBy<T, G>.last(predicate: RowFilter<G>): ReducedGroupBy<T, G> = reduce { lastOrNull(predicate) }

// endregion

// region Pivot

/**
* Reduces this [Pivot] by taking the last [row][DataRow] from each group, and returns a [ReducedPivot]
* that contains the last row from the corresponding group in each column.
*
* See also:
* - [pivot];
* - common [reduce][Pivot.reduce];
* - [first].
*
* For more information about [Pivot] with examples: {@include [DocumentationUrls.Pivot]}
*
* ### Example
* ```kotlin
* // In a DataFrame of real estate listings, find the most recent (if sorted by date and time)
* // or the most expensive (if sorted by price) listing for each type of property (house, apartment, etc.)
* df.pivot { type }.last()
* ```
*
* @return A [ReducedPivot] containing in each column the last [row][DataRow] from the corresponding group.
*/
public fun <T> Pivot<T>.last(): ReducedPivot<T> = reduce { lastOrNull() }

/**
* Reduces this [Pivot] by taking from each group the last [row][DataRow] satisfying the given [predicate],
* and returns a [ReducedPivot] that contains the last row, matching the [predicate],
* from the corresponding group in each column.
*
* See also:
* - [pivot];
* - common [reduce][Pivot.reduce];
* - [first].
*
* For more information about [Pivot] with examples: {@include [DocumentationUrls.Pivot]}
*
* {@include [RowFilterDescription]}
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* ### Example
* ```kotlin
* // In a DataFrame of real estate listings sorted by date and time,
* // find the most recent listing for each type of property (house, apartment, etc.)
* // with the price less than 500,000 euros
* df.pivot { type }.last { price < 500_000 }
* ```
*
* @param predicate A [row filter][RowFilter] used to get the last value
* that satisfies a condition specified in this filter.
*
* @return A [ReducedPivot] containing in each column the last [row][DataRow] that satisfies the [predicate],
* from the corresponding group (or a row with `null` values)
*/
public fun <T> Pivot<T>.last(predicate: RowFilter<T>): ReducedPivot<T> = reduce { lastOrNull(predicate) }

// endregion

// region PivotGroupBy

/**
* Reduces this [PivotGroupBy] by taking the last [row][DataRow] from each combined [pivot] + [groupBy] group,
* and returns a [ReducedPivotGroupBy] that contains the last row from each corresponding group.
* If any combined [pivot] + [groupBy] group in [PivotGroupBy] is empty, in the resulting [ReducedPivotGroupBy]
* it will be represented by a row with `null` values (except the grouping key).
*
* See also:
* - [pivot], [Pivot.groupBy] and [GroupBy.pivot];
* - common [reduce][PivotGroupBy.reduce];
* - [first].
*
* For more information about [Pivot] with examples: {@include [DocumentationUrls.Pivot]}
*
* ### Example
* ```kotlin
* // In a DataFrame of real estate listings sorted by date and time,
* // find the most recent listing for each combination of type of property (house, apartment, etc.)
* // and the city it is located in
* df.pivot { type }.groupBy { city }.last()
* ```
*
* @return A [ReducedPivotGroupBy] containing in each combination of a [groupBy] key and a [pivot] key either
* the last [row][DataRow] of the corresponding DataFrame formed by this pivot–group pair,
* or a row with `null` values (except the grouping key) if this DataFrame is empty.
*/
public fun <T> PivotGroupBy<T>.last(): ReducedPivotGroupBy<T> = reduce { lastOrNull() }

/**
* Reduces this [PivotGroupBy] by taking from each combined [pivot] + [groupBy] group
* the last [row][DataRow] satisfying the given [predicate]. Returns a [ReducedPivotGroupBy] that contains the last row
* matching the [predicate] from each corresponding group.
* If any combined [pivot] + [groupBy] group in [PivotGroupBy] does not contain any rows matching the [predicate],
* in the resulting [ReducedPivotGroupBy] it will be represented by a row with `null` values (except the grouping key).
*
* See also:
* - [pivot], [Pivot.groupBy] and [GroupBy.pivot];
* - common [reduce][PivotGroupBy.reduce];
* - [first].
*
* {@include [DocumentationUrls.PivotGroupBy]}
*
* {@include [DocumentationUrls.Pivot]}
*
* {@include [RowFilterDescription]}
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* ### Example
* ```kotlin
* // In a DataFrame of real estate listings sorted by date and time,
* // for each combination of type of property (house, apartment, etc.)
* // and the city it is located in,
* // find the most recent listing with the price less than 500,000 euros
* df.pivot { type }.groupBy { city }.last { price < 500_000 }
* ```
*
* @param predicate A [row filter][RowFilter] used to get the last value
* that satisfies a condition specified in this filter.
*
* @return A [ReducedPivotGroupBy] containing in each combination of a [groupBy] key and a [pivot] key either
* the last matching the [predicate] [row][DataRow] of the corresponding DataFrame formed by this pivot–group pair,
* or a row with `null` values if this DataFrame does not contain any rows matching the [predicate].
*/
public fun <T> PivotGroupBy<T>.last(predicate: RowFilter<T>): ReducedPivotGroupBy<T> = reduce { lastOrNull(predicate) }

// endregion
Expand Down
Loading
Loading