From 194e962c86d6d503f62f9a89605b326be8d5d49a Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Thu, 9 Oct 2025 16:40:47 +0300 Subject: [PATCH 1/5] Improve `list.chunked()` + `List>.toDataFrame` use case --- core/api/core.api | 4 +- .../kotlinx/dataframe/api/convert.kt | 2 +- .../jetbrains/kotlinx/dataframe/io/common.kt | 31 ++++--- .../kotlinx/dataframe/api/toDataFrame.kt | 89 +++++++++++++++++++ 4 files changed, 110 insertions(+), 16 deletions(-) diff --git a/core/api/core.api b/core/api/core.api index 9a0e9adbe6..809dd86403 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -6099,8 +6099,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/CommonKt { public static final fun isURL (Ljava/lang/String;)Z public static final fun isUrl (Ljava/lang/String;)Z public static final fun skippingBomCharacters (Ljava/io/InputStream;)Ljava/io/InputStream; - public static final fun toDataFrame (Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun toDataFrame$default (Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun toDataFrame (Ljava/util/List;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun toDataFrame$default (Ljava/util/List;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun urlAsFile (Ljava/net/URL;)Ljava/io/File; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt index 24e56f1c37..20bb6e142d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt @@ -2525,4 +2525,4 @@ public fun Convert>>.toDataFrames(containsColumns: Boolea * @return A new [DataColumn] with the values converted to [DataFrame]. */ public fun DataColumn>>.toDataFrames(containsColumns: Boolean = false): DataColumn = - map { it.toDataFrame(containsColumns) } + map { it.toDataFrame(containsColumns = containsColumns) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 25a5d8a1ac..4371d385ff 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -3,6 +3,8 @@ package org.jetbrains.kotlinx.dataframe.io import org.apache.commons.io.input.BOMInputStream import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.Interpretable +import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.util.IS_URL @@ -48,24 +50,28 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram /** * Converts a list of lists into a [DataFrame]. * - * By default, treats the first inner list as a header (column names), and the remaining lists as rows. - * If [containsColumns] is `true`, interprets each inner list as a column, - * where the first element is used as the column name, and the remaining elements as values. + * By default, treats lists as rows. If [header] is not provided, the first inner list becomes a header (column names), and the remaining lists are treated as data. + * + * With [containsColumns] = `true`, interprets each inner list as a column. + * If [header] is not provided, the first element will be used as the column name, and the remaining elements as values. * * @param T The type of elements contained in the nested lists. - * @param containsColumns If `true`, treats each nested list as a column with its first element as the column name. - * Otherwise, the first list is treated as the header. + * @param containsColumns If `true`, treats each nested list as a column. + * Otherwise, each nested list is a row. * Defaults to `false`. + * @param header overrides extraction of column names from lists - all values are treated as data instead. * @return A [DataFrame] containing the data from the nested list structure. * Returns an empty [DataFrame] if the input is empty or invalid. */ -public fun List>.toDataFrame(containsColumns: Boolean = false): AnyFrame = +@Refine +@Interpretable("ValuesListsToDataFrame") +public fun List>.toDataFrame(header: List? = null, containsColumns: Boolean = false): AnyFrame = when { containsColumns -> { - mapNotNull { - if (it.isEmpty()) return@mapNotNull null - val name = it[0].toString() - val values = it.drop(1) + mapIndexedNotNull { index, list -> + if (list.isEmpty()) return@mapIndexedNotNull null + val name = header?.get(index) ?: list[0].toString() + val values = if (header == null) list.drop(1) else list createColumnGuessingType(name, values) }.toDataFrame() } @@ -73,9 +79,8 @@ public fun List>.toDataFrame(containsColumns: Boolean = false): AnyF isEmpty() -> DataFrame.Empty else -> { - val header = get(0).map { it.toString() } - val data = drop(1) - header.mapIndexed { colIndex, name -> + val data = if (header == null) drop(1) else this + (header ?: get(0).map { it.toString() }).mapIndexed { colIndex, name -> val values = data.map { row -> if (row.size <= colIndex) { null diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 4bd2ab1545..e28bcd4302 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -16,6 +16,7 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.ColumnName import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.io.toDataFrame import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.type import org.junit.Test @@ -700,4 +701,92 @@ class CreateDataFrameTests { val df = list.toDataFrame(maxDepth = 2) df["map"].type() shouldBe typeOf>() } + + @Test + fun `parsing row-major lines into structured dataframe`() { + // I think finding data in such format will be rare, so we need an optional header parameter. + val lines = buildList { + addAll(listOf("stamp", "header", "data")) + repeat(33) { row -> + add("stamp $row") + add("header $row") + add("data $row") + } + } + + val df = lines.chunked(3).toDataFrame() + + df.columnNames() shouldBe listOf("stamp", "header", "data") + df.columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) + df.rowsCount() shouldBe 33 + df[0].values() shouldBe listOf("stamp 0", "header 0", "data 0") + } + + @Test + fun `parsing srt lines into structured dataframe`() { + // *.srt subtitle file format + val lines = buildList { + repeat(33) { row -> + add("stamp $row") + add("header $row") + add("data $row") + add("\n") + } + } + + val df = lines.chunked(4).map { it.dropLast(1) }.toDataFrame(header = listOf("stamp", "header", "data")) + + df.columnNames() shouldBe listOf("stamp", "header", "data") + df.columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf()) + df.rowsCount() shouldBe 33 + df[0].values() shouldBe listOf("stamp 0", "header 0", "data 0") + + // Different approach. I think the dropLast one is better + lines.chunked(4) + .toDataFrame(header = listOf("stamp", "header", "data", "whitespace")) + .remove("whitespace") shouldBe df + } + + @Test + fun `parsing column-major lines into structured dataframe`() { + val lines = buildList { + repeat(4) { col -> + repeat(5) { row -> + add("data$col $row") + } + add("\n") + } + } + + val header = List(4) { "col $it" } + val df = lines + .chunked(6) + .map { it.dropLast(1) } + .toDataFrame(header = header, containsColumns = true) + df.columnNames() shouldBe header + df.columnTypes() shouldBe List(4) { typeOf() } + df["col 0"].values() shouldBe listOf("data0 0", "data0 1", "data0 2", "data0 3", "data0 4") + } + + @Test + fun `parsing column-major lines with header into structured dataframe`() { + val lines = buildList { + repeat(4) { col -> + add("col $col") + repeat(5) { row -> + add("data$col $row") + } + add("\n") + } + } + + val header = List(4) { "col $it" } + val df = lines + .chunked(7) + .map { it.dropLast(1) } + .toDataFrame(containsColumns = true) + df.columnNames() shouldBe header + df.columnTypes() shouldBe List(4) { typeOf() } + df["col 0"].values() shouldBe listOf("data0 0", "data0 1", "data0 2", "data0 3", "data0 4") + } } From 4ab8ebff2bfa369f990001c52d651e3ae934c8ec Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 15 Oct 2025 19:19:31 +0300 Subject: [PATCH 2/5] Move List>.toDataFrame from io to api with deprecation --- core/api/core.api | 6 +- .../kotlinx/dataframe/api/toDataFrame.kt | 46 ++ .../jetbrains/kotlinx/dataframe/io/common.kt | 55 +- .../dataframe/util/deprecationMessages.kt | 3 + .../kotlinx/dataframe/api/toDataFrame.kt | 3 +- ...e.samples.api.Create.toDataFrameLists.html | 569 ++++++++++++++++++ 6 files changed, 630 insertions(+), 52 deletions(-) create mode 100644 docs/StardustDocs/resources/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.toDataFrameLists.html diff --git a/core/api/core.api b/core/api/core.api index 809dd86403..391569d9d5 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -4462,7 +4462,9 @@ public final class org/jetbrains/kotlinx/dataframe/api/TakeKt { } public final class org/jetbrains/kotlinx/dataframe/api/ToDataFrameKt { + public static final fun toDataFrame (Ljava/util/List;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataFrame (Ljava/util/Map;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun toDataFrame$default (Ljava/util/List;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataFrameAnyColumn (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataFrameColumnPathAnyNullable (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun toDataFrameColumnPathAnyNullable (Ljava/util/Map;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; @@ -6099,8 +6101,8 @@ public final class org/jetbrains/kotlinx/dataframe/io/CommonKt { public static final fun isURL (Ljava/lang/String;)Z public static final fun isUrl (Ljava/lang/String;)Z public static final fun skippingBomCharacters (Ljava/io/InputStream;)Ljava/io/InputStream; - public static final fun toDataFrame (Ljava/util/List;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun toDataFrame$default (Ljava/util/List;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun toDataFrame (Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun toDataFrame$default (Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun urlAsFile (Ljava/net/URL;)Ljava/io/File; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index f93e9efba7..ae36d5cd5c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -256,3 +256,49 @@ public fun Map>.toDataFrame(): AnyFrame = }.toDataFrameFromPairs() // endregion + +/** + * Converts a list of lists into a [DataFrame]. + * + * By default, treats lists as row values. If [header] is not provided, the first inner list becomes a header (column names), and the remaining lists are treated as data. + * + * With [containsColumns] = `true`, interprets each inner list as a column. + * If [header] is not provided, the first element will be used as the column name, and the remaining elements as values. + * + * @param T The type of elements contained in the nested lists. + * @param containsColumns If `true`, treats each nested list as a column. + * Otherwise, each nested list is a row. + * Defaults to `false`. + * @param header overrides extraction of column names from lists - all values are treated as data instead. + * @return A [DataFrame] containing the data from the nested list structure. + * Returns an empty [DataFrame] if the input is empty or invalid. + */ +@Refine +@Interpretable("ValuesListsToDataFrame") +public fun List>.toDataFrame(header: List?, containsColumns: Boolean = false): AnyFrame = + when { + containsColumns -> { + mapIndexedNotNull { index, list -> + if (list.isEmpty()) return@mapIndexedNotNull null + val name = header?.get(index) ?: list[0].toString() + val values = if (header == null) list.drop(1) else list + createColumnGuessingType(name, values) + }.toDataFrame() + } + + isEmpty() -> DataFrame.Empty + + else -> { + val data = if (header == null) drop(1) else this + (header ?: get(0).map { it.toString() }).mapIndexed { colIndex, name -> + val values = data.map { row -> + if (row.size <= colIndex) { + null + } else { + row[colIndex] + } + } + createColumnGuessingType(name, values) + }.toDataFrame() + } + } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 4371d385ff..3f06589c4e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -3,13 +3,11 @@ package org.jetbrains.kotlinx.dataframe.io import org.apache.commons.io.input.BOMInputStream import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.annotations.Interpretable -import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.util.IS_URL import org.jetbrains.kotlinx.dataframe.util.IS_URL_IMPORT import org.jetbrains.kotlinx.dataframe.util.IS_URL_REPLACE +import org.jetbrains.kotlinx.dataframe.util.LISTS_TO_DATAFRAME_MIGRATION import java.io.File import java.io.InputStream import java.net.HttpURLConnection @@ -47,51 +45,12 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram } } -/** - * Converts a list of lists into a [DataFrame]. - * - * By default, treats lists as rows. If [header] is not provided, the first inner list becomes a header (column names), and the remaining lists are treated as data. - * - * With [containsColumns] = `true`, interprets each inner list as a column. - * If [header] is not provided, the first element will be used as the column name, and the remaining elements as values. - * - * @param T The type of elements contained in the nested lists. - * @param containsColumns If `true`, treats each nested list as a column. - * Otherwise, each nested list is a row. - * Defaults to `false`. - * @param header overrides extraction of column names from lists - all values are treated as data instead. - * @return A [DataFrame] containing the data from the nested list structure. - * Returns an empty [DataFrame] if the input is empty or invalid. - */ -@Refine -@Interpretable("ValuesListsToDataFrame") -public fun List>.toDataFrame(header: List? = null, containsColumns: Boolean = false): AnyFrame = - when { - containsColumns -> { - mapIndexedNotNull { index, list -> - if (list.isEmpty()) return@mapIndexedNotNull null - val name = header?.get(index) ?: list[0].toString() - val values = if (header == null) list.drop(1) else list - createColumnGuessingType(name, values) - }.toDataFrame() - } - - isEmpty() -> DataFrame.Empty - - else -> { - val data = if (header == null) drop(1) else this - (header ?: get(0).map { it.toString() }).mapIndexed { colIndex, name -> - val values = data.map { row -> - if (row.size <= colIndex) { - null - } else { - row[colIndex] - } - } - createColumnGuessingType(name, values) - }.toDataFrame() - } - } +@Deprecated( + LISTS_TO_DATAFRAME_MIGRATION, + ReplaceWith("this.toDataFrame(header = null, containsColumns)", "org.jetbrains.kotlinx.dataframe.api.toDataFrame"), +) +public fun List>.toDataFrame(containsColumns: Boolean = false): AnyFrame = + toDataFrame(header = null, containsColumns) @Deprecated( message = IS_URL, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index d013cf30f6..4c350dda87 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -257,6 +257,9 @@ internal const val GET_ROWS_RANGE_REPLACE = "df().getRows(indices)" internal const val GET_ROW_OR_NULL_REPLACE = "df().getRowOrNull(index)" internal const val COPY_REPLACE = "columns().toDataFrame().cast()" +internal const val LISTS_TO_DATAFRAME_MIGRATION = + "Function moved from io to api package, and a new `header` parameter is introduced. $MESSAGE_1_1" + // endregion // region keep across releases diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index e28bcd4302..59421b082d 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -16,7 +16,6 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.ColumnName import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.io.toDataFrame import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.type import org.junit.Test @@ -784,7 +783,7 @@ class CreateDataFrameTests { val df = lines .chunked(7) .map { it.dropLast(1) } - .toDataFrame(containsColumns = true) + .toDataFrame(header = null, containsColumns = true) df.columnNames() shouldBe header df.columnTypes() shouldBe List(4) { typeOf() } df["col 0"].values() shouldBe listOf("data0 0", "data0 1", "data0 2", "data0 3", "data0 4") diff --git a/docs/StardustDocs/resources/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.toDataFrameLists.html b/docs/StardustDocs/resources/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.toDataFrameLists.html new file mode 100644 index 0000000000..5f38d13afb --- /dev/null +++ b/docs/StardustDocs/resources/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.toDataFrameLists.html @@ -0,0 +1,569 @@ + + + + + +
+ +

+ + + From e84ddaadb448803b578e135266278a21e5d10fd0 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 15 Oct 2025 19:20:16 +0300 Subject: [PATCH 3/5] Add an example in the documentation for List>.toDataFrame --- .../kotlinx/dataframe/samples/api/Create.kt | 18 +++++++++++++++ docs/StardustDocs/topics/_shadow_resources.md | 3 ++- docs/StardustDocs/topics/createDataFrame.md | 23 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt index 1cd3432b5f..fe6a56a4b8 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -425,4 +425,22 @@ class Create : TestBase() { val df = files.toDataFrame(columnName = "data") // SampleEnd } + + @Test + @TransformDataFrameExpressions + fun toDataFrameLists() { + // SampleStart + val lines = """ + 1 + 00:00:05,000 --> 00:00:07,500 + This is the first subtitle. + + 2 + 00:00:08,000 --> 00:00:10,250 + This is the second subtitle. + """.trimIndent().lines() + + lines.chunked(4) { it.take(3) }.toDataFrame(header = listOf("n", "timestamp", "text")) + // SampleEnd + } } diff --git a/docs/StardustDocs/topics/_shadow_resources.md b/docs/StardustDocs/topics/_shadow_resources.md index 9bcd182239..360f6acda0 100644 --- a/docs/StardustDocs/topics/_shadow_resources.md +++ b/docs/StardustDocs/topics/_shadow_resources.md @@ -88,6 +88,7 @@ + @@ -199,4 +200,4 @@ - \ No newline at end of file + diff --git a/docs/StardustDocs/topics/createDataFrame.md b/docs/StardustDocs/topics/createDataFrame.md index ccf5f2424d..8a65a27086 100644 --- a/docs/StardustDocs/topics/createDataFrame.md +++ b/docs/StardustDocs/topics/createDataFrame.md @@ -160,6 +160,29 @@ val df = files.toDataFrame(columnName = "data") +Creates a [`DataFrame`](DataFrame.md) from a `List>`: + +This is useful for parsing text files. For example, the `.srt` subtitle format can be parsed like this: + + + +```kotlin +val lines = """ + 1 + 00:00:05,000 --> 00:00:07,500 + This is the first subtitle. + + 2 + 00:00:08,000 --> 00:00:10,250 + This is the second subtitle. +""".trimIndent().lines() + +lines.chunked(4) { it.take(3) }.toDataFrame(header = listOf("n", "timestamp", "text")) +``` + + + + Creates a [`DataFrame`](DataFrame.md) from an [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of objects: From 075e96d160958cd163c8ba8f7150c1ba7aa8347a Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 15 Oct 2025 19:40:01 +0300 Subject: [PATCH 4/5] Add headers for operations in createDataFrame.md to refer from other pages --- docs/StardustDocs/topics/collectionsInterop.md | 5 +++-- docs/StardustDocs/topics/createDataFrame.md | 13 +++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/StardustDocs/topics/collectionsInterop.md b/docs/StardustDocs/topics/collectionsInterop.md index 169f0f4f9a..e58aac9c2c 100644 --- a/docs/StardustDocs/topics/collectionsInterop.md +++ b/docs/StardustDocs/topics/collectionsInterop.md @@ -15,10 +15,11 @@ such as [`filter`](filter.md), [`take`](sliceRows.md#take), [`first`](first.md), [`map`](map.md), [`groupBy`](groupBy.md) etc. [`DataFrame`](DataFrame.md) has two-way compatibility with [`Map`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-map/) and [`List`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-list/): -* `List` -> `DataFrame`: [toDataFrame](createDataFrame.md#todataframe) +* `List` -> `DataFrame`: [toDataFrame](createDataFrame.md#dataframe-from-iterable-t) * `DataFrame` -> `List`: [toList](toList.md) -* `Map>` -> `DataFrame<*>`: [toDataFrame](createDataFrame.md#todataframe) +* `Map>` -> `DataFrame<*>`: [toDataFrame](createDataFrame.md#dataframe-from-map-string-list) * `DataFrame<*>` -> `Map>`: [toMap](toMap.md) +* `List>` -> `DataFrame<*>`: [toDataFrame](createDataFrame.md#dataframe-from-list-list-t) Columns, rows, and values of [`DataFrame`](DataFrame.md) can be accessed as [`List`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-list/), diff --git a/docs/StardustDocs/topics/createDataFrame.md b/docs/StardustDocs/topics/createDataFrame.md index 8a65a27086..f4eef28140 100644 --- a/docs/StardustDocs/topics/createDataFrame.md +++ b/docs/StardustDocs/topics/createDataFrame.md @@ -111,7 +111,7 @@ val df = dataFrameOf(names).fill(15, true) ### toDataFrame -`DataFrame` from `Map>`: +#### `DataFrame` from `Map>`: @@ -125,7 +125,7 @@ map.toDataFrame() -Creates a [`DataFrame`](DataFrame.md) from an [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of [basic types](https://kotlinlang.org/docs/basic-types.html) (except arrays): +#### `DataFrame` from [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of [basic types](https://kotlinlang.org/docs/basic-types.html) (except arrays): The return type of these overloads is a typed [`DataFrame`](DataFrame.md). Its data schema defines the column that can be used right after the conversion for additional computations. @@ -141,10 +141,11 @@ df.add("length") { value.length } -Creates a [`DataFrame`](DataFrame.md) from an [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) with one column: -"columnName: `DataColumn`". +#### [`DataFrame`](DataFrame.md) with one column from [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) + This is an easy way to create a [`DataFrame`](DataFrame.md) when you have a list of Files, URLs, or a structure you want to extract data from. + In a notebook, it can be convenient to start from the column of these values to see the number of rows, their `toString` in a table and then iteratively add columns with the parts of the data you're interested in. @@ -160,7 +161,7 @@ val df = files.toDataFrame(columnName = "data") -Creates a [`DataFrame`](DataFrame.md) from a `List>`: +#### [`DataFrame`](DataFrame.md) from `List>`: This is useful for parsing text files. For example, the `.srt` subtitle format can be parsed like this: @@ -183,7 +184,7 @@ lines.chunked(4) { it.take(3) }.toDataFrame(header = listOf("n", "timestamp", "t -Creates a [`DataFrame`](DataFrame.md) from an [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of objects: +#### [`DataFrame`](DataFrame.md) from [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/): From dc0fd336ecffd70cb8482190691dbe65475fe5e7 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Thu, 16 Oct 2025 16:23:31 +0300 Subject: [PATCH 5/5] Fix test and add deprecation level --- .../main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt | 1 + .../kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 3f06589c4e..6808db0b77 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -48,6 +48,7 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram @Deprecated( LISTS_TO_DATAFRAME_MIGRATION, ReplaceWith("this.toDataFrame(header = null, containsColumns)", "org.jetbrains.kotlinx.dataframe.api.toDataFrame"), + level = DeprecationLevel.WARNING, ) public fun List>.toDataFrame(containsColumns: Boolean = false): AnyFrame = toDataFrame(header = null, containsColumns) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 59421b082d..bfa3ad34c0 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -713,7 +713,7 @@ class CreateDataFrameTests { } } - val df = lines.chunked(3).toDataFrame() + val df = lines.chunked(3).toDataFrame(header = null) df.columnNames() shouldBe listOf("stamp", "header", "data") df.columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf())