In [1]:
@file:Repository("*mavenLocal")

In [2]:
@file:DependsOn("org.jetbrains.kotlinx:dataframe:0.9.0-dev")

In [3]:
import org.jetbrains.kotlinx.dataframe.plugin.*
import org.jetbrains.kotlinx.dataframe.plugin.testing.schemaRender.toPluginDataFrameSchema

In [4]:
@DataSchema
interface Person {
    val name: String
    val age: Int
    val city: String?
    val weight: Int?
}

In [5]:
val df = dataFrameOf("name", "age", "city", "weight")(
        "Alice", 15, "London", 54,
        "Bob", 45, "Dubai", 87,
        "Charlie", 20, "Moscow", null,
        "Charlie", 40, "Milan", null,
        "Bob", 30, "Tokyo", 68,
        "Alice", 20, null, 55,
        "Charlie", 30, "Moscow", 90
    )

val typed: DataFrame<Person> = df.cast()

In [6]:
private val defaultExplodeColumns: ColumnsSelector<*, *> = { dfs { it.isList() || it.isFrameColumn() } }

fun <T> DataFrame<T>.explodeTest(
    dropEmpty: Boolean = true,
    selector: ColumnsSelector<T, *> = defaultExplodeColumns
): DataFrame<T> {
    println("Before runtime")
    schema().print()
    println("Before compile")
    pluginSchema().print()
    val runtime = explode(dropEmpty, selector)
    println()
    println()
    println("Runtime")
    runtime.schema().print()
    println("Compile")
    val compile = pluginSchema().explodeImpl(dropEmpty, selector.toColumnPath(this))
    compile.print()
    return runtime
}

In [7]:
val grouped = typed.groupBy { city }
grouped

In [52]:
fun DataFrame<*>.generateTestStub(id: Int): String {
    val name = "Explode$id"
    val schema = generateSchemaDeclaration(name)
    
    return """
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.plugin.testing.*
import org.jetbrains.kotlinx.dataframe.plugin.testing.atoms.*
    
$schema
    
fun convert$id(df: DataFrame<$name>) {
        
}"""
}

In [45]:
val df = typed.filter { city != null }.remove { age and weight }.groupBy { city }.toDataFrame()
df.schema()

city: String
group: *
    name: String
    city: String


In [53]:
df.generateTestStub(0)


import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.plugin.testing.*
import org.jetbrains.kotlinx.dataframe.plugin.testing.atoms.*
    
@DataSchema(isOpen = false)
interface Explode01 {
    val city: String
    val name: String
}

val ColumnsContainer<Explode01>.city: DataColumn<String>  get() = this["city"] as DataColumn<String>
val DataRow<Explode01>.city: String  get() = this["city"] as String
val ColumnsContainer<Explode01>.name: DataColumn<String>  get() = this["name"] as DataColumn<String>
val DataRow<Explode01>.name: String  get() = this["name"] as String

@DataSchema
interface Explode0 {
    val city: String
    val group: DataFrame<Explode01>
}

val ColumnsContainer<Explode0>.city: DataColumn<String>  get() = this["city"] as DataColumn<String>
val DataRow<Explode0>.city: String  get() = this["city"] as String
val ColumnsContainer<Explode0>.group: DataC

In [11]:
df.explodeTest() { group }

Before runtime
city: String
group: *
    name: String
    city: String

Before compile
city: kotlin.String
*group
   name: kotlin.String
   city: kotlin.String
Runtime
city: String
group:
    name: String
    city: String

Compile
city: kotlin.String
group
   name: kotlin.String
   city: kotlin.String


In [12]:
val groupCol = grouped.groups.toColumnAccessor()

In [13]:
groupCol

org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl@3f36b447

In [14]:
val plain = grouped.toDataFrame()
    .update { groupCol }.at(1).withNull() // shouldn't happen
    .update { groupCol }.at(2).with { emptyDataFrame() }
    .update { groupCol }.at(3).with { it.filter { false } }
plain

In [41]:
plain.schema()

city: String?
group: DataFrame?


In [15]:
val res = plain.explodeTest(dropEmpty = false) { groupCol }
res

Before runtime
city: String?
group: DataFrame?

Before compile
city: kotlin.String?
group: org.jetbrains.kotlinx.dataframe.DataFrame?
Runtime
city: String?
group:
    name: String?
    age: Int?
    city: String?
    weight: Int?

Compile


An operation is not implemented: explode lists
kotlin.NotImplementedError: An operation is not implemented: explode lists
	at org.jetbrains.kotlinx.dataframe.plugin.ExplodeKt.explodeImpl$explode(explode.kt:49)
	at org.jetbrains.kotlinx.dataframe.plugin.ExplodeKt.explodeImpl(explode.kt:58)
	at Line_11.explodeTest(Line_11.jupyter-kts:15)
	at Line_24.<init>(Line_24.jupyter-kts:1)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
	at kotlin.script.experimental.jvm.BasicJvmScriptEvaluator.evalWithConfigAndOtherScriptsResults(BasicJvmScriptEvaluator.kt:100)
	at kotlin.script.experimental.jvm.BasicJvmScriptEvaluator.invoke$suspendImpl(Bas

In [16]:
val res1 = plain.explodeTest(dropEmpty = true) { groupCol }
res1

Before runtime
city: String?
group: DataFrame?

Before compile
city: kotlin.String?
group: org.jetbrains.kotlinx.dataframe.DataFrame?
Runtime
city: String?
group:
    name: String?
    age: Int?
    city: String?
    weight: Int?

Compile


An operation is not implemented: explode lists
kotlin.NotImplementedError: An operation is not implemented: explode lists
	at org.jetbrains.kotlinx.dataframe.plugin.ExplodeKt.explodeImpl$explode(explode.kt:49)
	at org.jetbrains.kotlinx.dataframe.plugin.ExplodeKt.explodeImpl(explode.kt:58)
	at Line_11.explodeTest(Line_11.jupyter-kts:15)
	at Line_25.<init>(Line_25.jupyter-kts:1)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:490)
	at kotlin.script.experimental.jvm.BasicJvmScriptEvaluator.evalWithConfigAndOtherScriptsResults(BasicJvmScriptEvaluator.kt:100)
	at kotlin.script.experimental.jvm.BasicJvmScriptEvaluator.invoke$suspendImpl(Bas

In [17]:
val expected = plain[groupCol.name()].sumOf { Math.max((it as AnyFrame?)?.rowsCount() ?: 0, 1) }
expected

6

In [18]:
val ints by columnOf(1, 2, 3)
val group by columnOf(ints)
val df = dataFrameOf(group)

val frameCol by columnOf(df, df.take(0))
val names by columnOf("a", "b")
val df1 = dataFrameOf(names, frameCol)

In [19]:
df1.schema()

names: String
frameCol: *
    group:
        ints: Int


In [20]:
df1.explodeTest()

Before runtime
names: String
frameCol: *
    group:
        ints: Int

Before compile
names: kotlin.String
*frameCol
   group
      ints: kotlin.Int
Runtime
names: String
frameCol:
    group:
        ints: Int

Compile
names: kotlin.String
frameCol
   group
      ints: kotlin.Int


In [22]:
val withEmpty = df1.explodeTest(dropEmpty = false)
withEmpty

Before runtime
names: String
frameCol: *
    group:
        ints: Int

Before compile
names: kotlin.String
*frameCol
   group
      ints: kotlin.Int
Runtime
names: String
frameCol:
    group:
        ints: Int?

Compile
names: kotlin.String
frameCol
   group
      ints: kotlin.Int?


In [25]:
val ints by columnOf(1, 2, 3)
val dd = dataFrameOf(ints)
val nestedFrameCol by columnOf(dd, dd, dd)
val group by columnOf(ints, nestedFrameCol)
val df = dataFrameOf(group)

val frameCol by columnOf(df, df.take(0))
val names by columnOf("a", "b")
val df1 = dataFrameOf(names, frameCol)

In [26]:
df1.schema().print()
df1.explode(false).also { it.schema().print() }

names: String
frameCol: *
    group:
        ints: Int
        nestedFrameCol: *
            ints: Int



Can not add value 'null' to FrameColumn
java.lang.IllegalArgumentException: Can not add value 'null' to FrameColumn
	at org.jetbrains.kotlinx.dataframe.impl.api.UpdateKt.updateWith(update.kt:66)
	at org.jetbrains.kotlinx.dataframe.impl.api.UpdateKt.updateWith(update.kt:81)
	at org.jetbrains.kotlinx.dataframe.api.AppendKt.appendNulls(append.kt:26)
	at org.jetbrains.kotlinx.dataframe.impl.api.ExplodeKt.explodeImpl$splitIntoRows(explode.kt:68)
	at org.jetbrains.kotlinx.dataframe.impl.api.ExplodeKt.explodeImpl(explode.kt:109)
	at org.jetbrains.kotlinx.dataframe.api.ExplodeKt.explode(explode.kt:20)
	at org.jetbrains.kotlinx.dataframe.api.ExplodeKt.explode$default(explode.kt:17)
	at Line_79.<init>(Line_79.jupyter-kts:2)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImp

In [27]:
val df2 = dataFrameOf(ints, nestedFrameCol, nestedFrameCol.named("hi"))

In [28]:
println(df2.schema().toPluginDataFrameSchema())
df2.schema().print()
df2.explode()

ints: kotlin.Int
*nestedFrameCol
   ints: kotlin.Int
*hi
   ints: kotlin.Int
ints: Int
nestedFrameCol: *
    ints: Int
hi: *
    ints: Int



In [29]:
val dataRows = dataFrameOf("a", "b", "c")(1, 2, 3).duplicateRows(3)
dataRows

In [30]:
val dataRowsList = dataRows.rows().toList()

In [31]:
val dfWithList = dataFrameOf("id", "list")(1, dataRowsList, 2, dataRowsList)

In [32]:
dfWithList.explode()

In [33]:
dfWithList.schema()

id: Int
list: *
    a: Int
    b: Int
    c: Int


In [34]:
val id by columnOf(1, 2)
val list by columnOf(dataRowsList, dataRowsList)

In [35]:
val dfWithList1 = dataFrameOf(id, list)

In [36]:
dfWithList1.schema()

id: Int
list: *
    a: Int
    b: Int
    c: Int


In [37]:
@DataSchema
interface MySchema {
    val columnGroup: Any
}

In [38]:
val col1 by columnOf("a", "b")
val col2 by columnOf(1, 2)
val columnGroup by columnOf(col1, col2)
val dfff = dataFrameOf(columnGroup)

In [39]:
val col = dfff["columnGroup"] as DataColumn<Any>

In [40]:
dfff["columnGroup"]::class

class org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl