diff --git a/core/api/core.api b/core/api/core.api index 1b2cf4b8be..a3e4a4a52d 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -4312,6 +4312,9 @@ public final class org/jetbrains/kotlinx/dataframe/api/TypeConversionsKt { public static final fun toDataFrame (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataFrame (Lorg/jetbrains/kotlinx/dataframe/columns/BaseColumn;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataRow (Ljava/util/Map;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun toDataRow (Ljava/util/Map;I)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun toDataRow (Ljava/util/Map;IZ)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun toDataRow$default (Ljava/util/Map;IZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static final fun toDoubleArray (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)[D public static final fun toFloatArray (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)[F public static final fun toFrameColumn (Ljava/lang/Iterable;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; @@ -5301,6 +5304,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/UtilsKt { public static final fun headPlusArray (J[J)[J public static final fun headPlusArray (S[S)[S public static final fun headPlusArray (Z[Z)[Z + public static final fun letIf (Ljava/lang/Object;ZLkotlin/jvm/functions/Function1;)Ljava/lang/Object; public static final fun toCamelCaseByDelimiters (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;)Ljava/lang/String; public static synthetic fun toCamelCaseByDelimiters$default (Ljava/lang/String;Lkotlin/text/Regex;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 55cf30f662..a8855fe6b1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType +import org.jetbrains.kotlinx.dataframe.impl.letIf import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KCallable @@ -85,6 +86,53 @@ public fun Iterable>.toDataFrame(): AnyFrame { return columns.toDataFrame() } +///** +// * +// * @see [Map.toDataRow] +// */ +//@JvmName("toDataFrameMapStringAnyNullable") +//@JvmOverloads +//public fun Iterable>.toDataFrame( +// maxDepth: Int = 0, +// convertKeysToString: Boolean = true, +// unfoldIterablesOfMaps: Boolean = true, +//): AnyFrame { +// val list = asList() +// if (list.isEmpty()) return DataFrame.empty() +// +// val allKeys = mutableSetOf() +// for (row in this) { +// val keys = try { +// row.keys +// .letIf(convertKeysToString) { keys -> keys.mapTo(mutableSetOf()) { it.toString() } } +// .map { ColumnPath(it) } +// } catch (e: ClassCastException) { +// +// } +// +// allKeys.addAll(row.keys) +// } +// +// val columns = allKeys.map { key -> +// val values = ArrayList(list.size) +// for (row in this) { +// values.add(row[key]) +// } +// DataColumn.createByInference(key, values) +// } +// +// return columns.toDataFrame() +//} +// +internal fun Iterable>.toDataFrameImpl( + maxDepth: Int, + convertKeysToString: Boolean, + unfoldIterablesOfMaps: Boolean, + currentPath: ColumnPath, +): AnyFrame { +TODO() +} + @JvmName("toDataFrameAnyColumn") public fun Iterable.toDataFrame(): AnyFrame = dataFrameOf(this) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt index 33127f62ac..525b06e824 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt @@ -29,6 +29,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn import org.jetbrains.kotlinx.dataframe.impl.columns.asValues import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve +import org.jetbrains.kotlinx.dataframe.impl.letIf import org.jetbrains.kotlinx.dataframe.impl.owner import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API @@ -175,7 +176,7 @@ public fun ColumnGroup.asDataFrame(): DataFrame = this * * #### For example: * - * `df.`[select][DataFrame.select]` { `[first][ColumnsSelectionDsl.first]`().`[asColumnGroup][SingleColumn.asColumnGroup]`().`[firstCol][ColumnsSelectionDsl.firstCol]`() }` + * `df.`[select][select]` { `[first][ColumnsSelectionDsl.first]`().`[asColumnGroup][SingleColumn.asColumnGroup]`().`[firstCol][ColumnsSelectionDsl.firstCol]`() }` * * @receiver The column reference to cast to a [SingleColumn]`<`[DataRow][DataRow]`<`[C][C\]`>>`. * @param [C\] The type of the (group) column. @@ -404,8 +405,100 @@ public fun DataRow.toDataFrame(): DataFrame = owner[index..index] public fun AnyRow.toMap(): Map = df().columns().associate { it.name() to it[index] } -public fun Map.toDataRow(): DataRow<*> { - val df = mapValues { listOf(it.value) }.toDataFrame() +/** + * Converts [this] key-value [Map] to a [DataRow], representing a single row of a [DataFrame]. + * + * By default, nested maps are ignored, but you can increase [maxDepth] to include them. + * If their keys are not [String] and [convertKeysToString] is true, they are converted to strings and also converted, + * else, they remain [Maps][Map]. + * + * ### For Example + * + * ```kotlin + * val map = mapOf("name" to "Alice", "age" to 30, "address" to mapOf("city" to "New York", "zip" to "10001")) + * val dataRow = map.toDataRow(maxDepth = 1) + * dataRow["name"] == "Alice" + * dataRow.get { "address"["city"] } == "New York" + * ``` + * + * @param maxDepth How deep the recursion should go, converting [maps][Map] to [data rows][DataRow]. The default is 0; only top-level. + * @param convertKeysToString If true, non-string keys are converted to [strings][String]. Default is `true`. + * If false, nested [maps][Map] with non-string keys are ignored. + * @param unfoldIterablesOfMaps If true, values containing an [Iterable] of [Maps][Map] + * are unfolded into [dataframes][DataFrame], forming a [FrameColumn]. Default is `true`. + * @see [Iterable.toDataFrame] + */ +@JvmOverloads +public fun Map<*, *>.toDataRow( + maxDepth: Int = 0, + convertKeysToString: Boolean = true, + unfoldIterablesOfMaps: Boolean = true, +): DataRow<*> = + try { + this.toDataRowImpl( + maxDepth = 0, + convertKeysToString = convertKeysToString, + unfoldIterablesOfMaps = unfoldIterablesOfMaps, + currentPath = ColumnPath.EMPTY, + ) + } catch (e: ClassCastException) { + throw IllegalArgumentException( + "Toplevel map keys must be strings for conversion to DataRow. Set `convertKeysToString = true` to convert them automatically.", + e, + ) + } + +internal fun Map<*, *>.toDataRowImpl( + maxDepth: Int, + convertKeysToString: Boolean, + unfoldIterablesOfMaps: Boolean, + currentPath: ColumnPath, +): DataRow<*> { + val currentDepth = currentPath.size + val mapped: Map> = this + .mapKeys { (key, _) -> + if (convertKeysToString) { + currentPath + key.toString() + } else { + currentPath + (key as String) + } + } + .mapValues { (key, value) -> + when (value) { + is Map<*, *> if currentDepth < maxDepth -> { + @Suppress("UNCHECKED_CAST") + try { + (value as Map).toDataRowImpl( + maxDepth = maxDepth, + convertKeysToString = convertKeysToString, + unfoldIterablesOfMaps = unfoldIterablesOfMaps, + currentPath = key, + ) + } catch (_: ClassCastException) { + value + } + } + + is Iterable<*> if unfoldIterablesOfMaps && currentDepth < maxDepth -> { + @Suppress("UNCHECKED_CAST") + try { + (value as Iterable>).toDataFrameImpl( + maxDepth = maxDepth, + convertKeysToString = convertKeysToString, + unfoldIterablesOfMaps = true, + currentPath = key, + ) + } catch (_: ClassCastException) { + value + } + } + + else -> value + }.let(::listOf) + } + + @Suppress("UNCHECKED_CAST") + val df = mapped.toDataFrame() return DataRowImpl(0, df) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index 070da0b884..a7a7043b44 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -14,6 +14,9 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.nrow import java.lang.reflect.Method import java.math.BigDecimal +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.InvocationKind +import kotlin.contracts.contract import kotlin.reflect.KCallable import kotlin.reflect.KClass import kotlin.reflect.KFunction @@ -511,3 +514,19 @@ internal val KCallable<*>.columnName: String is KProperty<*> -> columnName else -> findAnnotation()?.name ?: getterName } + +/** + * Shortcut for + * ```kt + * .let { if (predicate) block(it) else it } + * ``` + * @see let + */ +@OptIn(ExperimentalContracts::class) +@PublishedApi +internal inline fun T.letIf(predicate: Boolean, block: (T) -> T): T { + contract { + callsInPlace(block, InvocationKind.AT_MOST_ONCE) + } + return if (predicate) block(this) else this +} diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt index 2e465d36ae..f5953db26a 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt @@ -1,5 +1,6 @@ package org.jetbrains.kotlinx.dataframe.testSets.person +import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.api.by import org.jetbrains.kotlinx.dataframe.api.columnNames @@ -127,4 +128,82 @@ class DataRowTests : BaseTest() { row["a"] shouldBe 1 row["b"] shouldBe true } + + @Test + fun `toDataRow nested`() { + val map = mapOf( + "name" to "a", + "metadata" to + mapOf( + "country" to "Philippines", + "region" to mapOf("name" to "Caraga", "code" to "XIII"), + "population" to mapOf("value" to "12345", "year" to 2020), + "wrongMap" to mapOf(1 to 2, 4 to 4), + ), + ) + map.toDataRow() shouldBe map.toDataRow(maxDepth = 0, convertKeysToString = true) + map.toDataRow(maxDepth = 0).let { row -> + row["name"] shouldBe "a" + row["metadata"] shouldBe mapOf( + "country" to "Philippines", + "region" to mapOf("name" to "Caraga", "code" to "XIII"), + "population" to mapOf("value" to "12345", "year" to 2020), + "wrongMap" to mapOf(1 to 2, 4 to 4), + ) + } + + map.toDataRow(maxDepth = 1).let { row -> + row["name"] shouldBe "a" + row.getColumnGroup("metadata").let { row -> + row["country"] shouldBe "Philippines" + row["region"] shouldBe mapOf("name" to "Caraga", "code" to "XIII") + row["population"] shouldBe mapOf("value" to "12345", "year" to 2020) + row["wrongMap"] shouldBe mapOf(1 to 2, 4 to 4) + } + } + + map.toDataRow(maxDepth = 2).let { row -> + row["name"] shouldBe "a" + row.getColumnGroup("metadata").let { row -> + row["country"] shouldBe "Philippines" + row.getColumnGroup("region").let { row -> + row["name"] shouldBe "Caraga" + row["code"] shouldBe "XIII" + } + row.getColumnGroup("population").let { row -> + row["value"] shouldBe "12345" + row["year"] shouldBe 2020 + } + row.getColumnGroup("wrongMap").let { row -> + row["1"] shouldBe 2 + row["4"] shouldBe 4 + } + } + } + + map.toDataRow(maxDepth = 2, convertKeysToString = false).let { row -> + row["name"] shouldBe "a" + row.getColumnGroup("metadata").let { row -> + row["country"] shouldBe "Philippines" + row.getColumnGroup("region").let { row -> + row["name"] shouldBe "Caraga" + row["code"] shouldBe "XIII" + } + row.getColumnGroup("population").let { row -> + row["value"] shouldBe "12345" + row["year"] shouldBe 2020 + } + row["wrongMap"] shouldBe mapOf(1 to 2, 4 to 4) + } + } + + val otherMap = mapOf(1 to 1, "2" to 2) + otherMap.toDataRow().let { row -> + row["1"] shouldBe 1 + row["2"] shouldBe 2 + } + shouldThrow { + otherMap.toDataRow(convertKeysToString = false) + } + } }