Skip to content
This repository has been archived by the owner on Nov 15, 2024. It is now read-only.

Commit

Permalink
[SPARK-21954][SQL] JacksonUtils should verify MapType's value type in…
Browse files Browse the repository at this point in the history
…stead of key type

## What changes were proposed in this pull request?

`JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, when converting a map to JSON, we only care about its values and create a writer for the values. The keys in a map are treated as strings by calling `toString` on the keys.

Thus, we should change `JacksonUtils.verifySchema` to verify the value type of `MapType`.

## How was this patch tested?

Added tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes apache#19167 from viirya/test-jacksonutils.
  • Loading branch information
viirya authored and HyukjinKwon committed Sep 9, 2017
1 parent 8a5eb50 commit 6b45d7e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ object JacksonUtils {

case at: ArrayType => verifyType(name, at.elementType)

case mt: MapType => verifyType(name, mt.keyType)
// For MapType, its keys are treated as a string (i.e. calling `toString`) basically when
// generating JSON, so we only care if the values are valid for JSON.
case mt: MapType => verifyType(name, mt.valueType)

case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.util.Calendar

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -610,4 +611,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
"""{"t":"2015-12-31T16:00:00"}"""
)
}

test("to_json: verify MapType's value type instead of key type") {
// Keys in map are treated as strings when converting to JSON. The type doesn't matter at all.
val mapType1 = MapType(CalendarIntervalType, IntegerType)
val schema1 = StructType(StructField("a", mapType1) :: Nil)
val struct1 = Literal.create(null, schema1)
checkEvaluation(
StructsToJson(Map.empty, struct1, gmtId),
null
)

// The value type must be valid for converting to JSON.
val mapType2 = MapType(IntegerType, CalendarIntervalType)
val schema2 = StructType(StructField("a", mapType2) :: Nil)
val struct2 = Literal.create(null, schema2)
intercept[TreeNodeException[_]] {
checkEvaluation(
StructsToJson(Map.empty, struct2, gmtId),
null
)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.sql.functions.{from_json, struct, to_json}
import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json}
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -195,15 +195,33 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
}

test("to_json unsupported type") {
test("to_json - key types of map don't matter") {
// interval type is invalid for converting to JSON. However, the keys of a map are treated
// as strings, so its type doesn't matter.
val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
.select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
checkAnswer(
df.select(to_json($"c")),
Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
}

test("to_json unsupported type") {
val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
val e = intercept[AnalysisException]{
// Unsupported type throws an exception
df.select(to_json($"c")).collect()
}
assert(e.getMessage.contains(
"Unable to convert column a of type calendarinterval to JSON."))

// interval type is invalid for converting to JSON. We can't use it as value type of a map.
val df2 = baseDf
.select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
val e2 = intercept[AnalysisException] {
df2.select(to_json($"c")).collect()
}
assert(e2.getMessage.contains("Unable to convert column col1 of type calendarinterval to JSON"))
}

test("roundtrip in to_json and from_json - struct") {
Expand Down

0 comments on commit 6b45d7e

Please sign in to comment.