Skip to content

Commit

Permalink
Add an arrayExNull method to convert columns into an ArrayType withou…
Browse files Browse the repository at this point in the history
…t any null values
  • Loading branch information
MrPowers committed Jan 14, 2018
1 parent 73ec570 commit e84d8bf
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,9 @@ object functions {
substring(col, 0, len)
}

/** Like array but doesn't include null elements */
def arrayExNull(cols: Column*): Column = {
split(concat_ws(",,,", cols: _*), ",,,")
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package com.github.mrpowers.spark.daria.sql

import java.sql.{Date, Timestamp}

import com.github.mrpowers.spark.fast.tests.DataFrameComparer
import com.github.mrpowers.spark.fast.tests.{ColumnComparer, DataFrameComparer}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.scalatest.FunSpec
Expand All @@ -11,6 +11,7 @@ import SparkSessionExt._
class FunctionsSpec
extends FunSpec
with DataFrameComparer
with ColumnComparer
with SparkSessionTestWrapper {

describe("#singleSpace") {
Expand Down Expand Up @@ -539,4 +540,44 @@ class FunctionsSpec

}

describe("arrayExNull") {

it("creates an array excluding null elements") {

val sourceDF = spark.createDF(
List(
("a", "b"),
(null, "b"),
("a", null),
(null, null)
), List(
("c1", StringType, true),
("c2", StringType, true)
)
)

val actualDF = sourceDF.withColumn(
"mucho_cols",
functions.arrayExNull(col("c1"), col("c2"))
)

val expectedDF = spark.createDF(
List(
("a", "b", Array("a", "b")),
(null, "b", Array("b")),
("a", null, Array("a")),
(null, null, Array[String]())
), List(
("c1", StringType, true),
("c2", StringType, true),
("mucho_cols", ArrayType(StringType, true), false)
)
)

actualDF.collect().deep.equals(expectedDF.collect().deep)

}

}

}

0 comments on commit e84d8bf

Please sign in to comment.