From 7d60f9ea17c27bd75631fc918a5645c03515c70f Mon Sep 17 00:00:00 2001 From: Mohan Parthasarathy Date: Tue, 30 Aug 2022 11:42:09 -0700 Subject: [PATCH] Added test cases. 1) Basic Message repeated once 2) Basic Message repeated twice --- .../resources/protobuf/repeated_message.desc | 19 ++++++ .../resources/protobuf/repeated_message.proto | 22 +++++++ .../spark/sql/proto/ProtoFunctionsSuite.scala | 60 +++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 connector/proto/src/test/resources/protobuf/repeated_message.desc create mode 100644 connector/proto/src/test/resources/protobuf/repeated_message.proto diff --git a/connector/proto/src/test/resources/protobuf/repeated_message.desc b/connector/proto/src/test/resources/protobuf/repeated_message.desc new file mode 100644 index 0000000000000..3b03d8f8fb579 --- /dev/null +++ b/connector/proto/src/test/resources/protobuf/repeated_message.desc @@ -0,0 +1,19 @@ + +í +Bconnector/proto/src/test/resources/protobuf/repeated_message.protoorg.apache.spark.sql.proto"‡ + BasicMessage +id (Rid! + string_value ( R stringValue + int32_value (R +int32Value + int64_value (R +int64Value! + double_value (R doubleValue + float_value (R +floatValue + +bool_value (R boolValue + bytes_value ( R +bytesValue"` +RepeatedMessageM + basic_message ( 2(.org.apache.spark.sql.proto.BasicMessageR basicMessageBBRepeatedMessageProtosbproto3 \ No newline at end of file diff --git a/connector/proto/src/test/resources/protobuf/repeated_message.proto b/connector/proto/src/test/resources/protobuf/repeated_message.proto new file mode 100644 index 0000000000000..b05df1519f2a1 --- /dev/null +++ b/connector/proto/src/test/resources/protobuf/repeated_message.proto @@ -0,0 +1,22 @@ +// To compile and create test class: +// protoc --java_out=connector/proto/src/test/resources/protobuf/ connector/proto/src/test/resources/protobuf/repeated_message.proto +// protoc --descriptor_set_out=connector/proto/src/test/resources/protobuf/repeated_message.desc --java_out=connector/proto/src/test/resources/protobuf/org/apache/spark/sql/proto/ connector/proto/src/test/resources/protobuf/repeated_message.proto + +syntax = "proto3"; + +package org.apache.spark.sql.proto; +option java_outer_classname = "RepeatedMessageProtos"; + +message BasicMessage { + int64 id = 1; + string string_value = 2; + int32 int32_value = 3; + int64 int64_value = 4; + double double_value = 5; + float float_value = 6; + bool bool_value = 7; + bytes bytes_value = 8; +} +message RepeatedMessage { + repeated BasicMessage basic_message = 1; +} diff --git a/connector/proto/src/test/scala/org/apache/spark/sql/proto/ProtoFunctionsSuite.scala b/connector/proto/src/test/scala/org/apache/spark/sql/proto/ProtoFunctionsSuite.scala index cede329614afb..70e62bb0ad521 100644 --- a/connector/proto/src/test/scala/org/apache/spark/sql/proto/ProtoFunctionsSuite.scala +++ b/connector/proto/src/test/scala/org/apache/spark/sql/proto/ProtoFunctionsSuite.scala @@ -16,16 +16,20 @@ */ package org.apache.spark.sql.proto +import com.google.protobuf.ByteString import org.apache.spark.SparkException import org.apache.spark.sql.QueryTest import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.proto.SimpleMessageRepeatedProtos.SimpleMessageRepeated +import org.apache.spark.sql.proto.RepeatedMessageProtos.{BasicMessage, RepeatedMessage} import org.apache.spark.sql.proto.SimpleMessageEnumProtos.{BasicEnum, SimpleMessageEnum} import org.apache.spark.sql.proto.SimpleMessageMapProtos.SimpleMessageMap import org.apache.spark.sql.proto.MessageMultipleMessage.{IncludedExample, MultipleExample, OtherExample} import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.proto.SimpleMessageEnumProtos.SimpleMessageEnum.NestedEnum +import java.util + class ProtoFunctionsSuite extends QueryTest with SharedSparkSession with Serializable { import testImplicits._ @@ -108,6 +112,62 @@ class ProtoFunctionsSuite extends QueryTest with SharedSparkSession with Seriali checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*")) } + test("roundtrip in from_proto and to_proto - Repeated Message Once") { + val messagePath = testFile("protobuf/repeated_message.desc").replace("file:/", "/") + val basicMessage = BasicMessage.newBuilder() + .setId(1111L) + .setStringValue("value") + .setInt32Value(12345) + .setInt64Value(0x90000000000L) + .setDoubleValue(10000000000.0D) + .setBoolValue(true) + .setBytesValue(ByteString.copyFromUtf8("ProtobufDeserializer")) + .build() + val repeatedMessage = RepeatedMessage.newBuilder() + .addBasicMessage(basicMessage) + .build() + + val df = Seq(repeatedMessage.toByteArray).toDF("value") + val fromProtoDF = df.select(functions.from_proto($"value", messagePath, "RepeatedMessage").as("value_from")) + val toProtoDF = fromProtoDF.select(functions.to_proto($"value_from", messagePath, "RepeatedMessage").as("value_to")) + val toFromProtoDF = toProtoDF.select(functions.from_proto($"value_to", messagePath, "RepeatedMessage").as("value_to_from")) + checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*")) + } + + test("roundtrip in from_proto and to_proto - Repeated Message Twice") { + val messagePath = testFile("protobuf/repeated_message.desc").replace("file:/", "/") + val baseArray = new util.ArrayList[BasicMessage](); + val basicMessage1 = BasicMessage.newBuilder() + .setId(1111L) + .setStringValue("value1") + .setInt32Value(12345) + .setInt64Value(0x20000000000L) + .setDoubleValue(10000000000.0D) + .setBoolValue(true) + .setBytesValue(ByteString.copyFromUtf8("ProtobufDeserializer1")) + .build() + val basicMessage2 = BasicMessage.newBuilder() + .setId(2222L) + .setStringValue("value2") + .setInt32Value(54321) + .setInt64Value(0x20000000000L) + .setDoubleValue(20000000000.0D) + .setBoolValue(false) + .setBytesValue(ByteString.copyFromUtf8("ProtobufDeserializer2")) + .build() + baseArray.add(basicMessage1) + baseArray.add(basicMessage2) + val repeatedMessage = RepeatedMessage.newBuilder() + .addAllBasicMessage(baseArray) + .build() + + val df = Seq(repeatedMessage.toByteArray).toDF("value") + val fromProtoDF = df.select(functions.from_proto($"value", messagePath, "RepeatedMessage").as("value_from")) + val toProtoDF = fromProtoDF.select(functions.to_proto($"value_from", messagePath, "RepeatedMessage").as("value_to")) + val toFromProtoDF = toProtoDF.select(functions.from_proto($"value_to", messagePath, "RepeatedMessage").as("value_to_from")) + checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*")) + } + test("roundtrip in from_proto and to_proto - Map") { val messagePath = testFile("protobuf/message_with_map.desc").replace("file:/", "/") val repeatedMessage = SimpleMessageMap.newBuilder()