Permalink
Browse files

Merge pull request #314 from rdblue/312-fix-avro-array-of-optional

Fix avro schema conv for arrays of optional type for #312.
  • Loading branch information...
2 parents 2d5563b + 603c0dc commit b722e7bd77ead0676c253bd9bdbd30cd263864f1 @julienledem julienledem committed Feb 28, 2014
@@ -127,7 +127,9 @@ private Type convertUnion(String fieldName, Schema schema, Type.Repetition repet
List<Schema> nonNullSchemas = new ArrayList(schema.getTypes().size());
for (Schema childSchema : schema.getTypes()) {
if (childSchema.getType().equals(Schema.Type.NULL)) {
- repetition = Type.Repetition.OPTIONAL;
+ if (Type.Repetition.REQUIRED == repetition) {
+ repetition = Type.Repetition.OPTIONAL;
+ }
} else {
nonNullSchemas.add(childSchema);
}
@@ -139,7 +141,7 @@ private Type convertUnion(String fieldName, Schema schema, Type.Repetition repet
throw new UnsupportedOperationException("Cannot convert Avro union of only nulls");
case 1:
- return convertField(fieldName, nonNullSchemas.get(0), Type.Repetition.OPTIONAL); // Simple optional field
+ return convertField(fieldName, nonNullSchemas.get(0), repetition);
default: // complex union type
List<Type> unionTypes = new ArrayList(nonNullSchemas.size());
@@ -15,6 +15,7 @@
*/
package parquet.avro;
+import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.util.Arrays;
import org.apache.avro.Schema;
@@ -165,9 +166,7 @@ public void testParquetMapWithNonStringKeyFails() throws Exception {
@Test
public void testOptionalFields() throws Exception {
Schema schema = Schema.createRecord("record1", null, null, false);
- Schema optionalInt = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type
- .NULL),
- Schema.create(Schema.Type.INT)));
+ Schema optionalInt = optional(Schema.create(Schema.Type.INT));
schema.setFields(Arrays.asList(
new Schema.Field("myint", optionalInt, null, NullNode.getInstance())
));
@@ -198,4 +197,34 @@ public void testUnionOfTwoTypes() throws Exception {
" }\n" +
"}\n");
}
+
+ @Test
+ public void testArrayOfOptionalRecords() throws Exception {
+ Schema innerRecord = Schema.createRecord("InnerRecord", null, null, false);
+ Schema optionalString = optional(Schema.create(Schema.Type.STRING));
+ innerRecord.setFields(Lists.newArrayList(
+ new Schema.Field("s1", optionalString, null, NullNode.getInstance()),
+ new Schema.Field("s2", optionalString, null, NullNode.getInstance())
+ ));
+ Schema schema = Schema.createRecord("HasArray", null, null, false);
+ schema.setFields(Lists.newArrayList(
+ new Schema.Field("myarray", Schema.createArray(optional(innerRecord)),
+ null, NullNode.getInstance())
+ ));
+ System.err.println("Avro schema: " + schema.toString(true));
+
+ testAvroToParquetConversion(schema, "message HasArray {\n" +
+ " required group myarray (LIST) {\n" +
+ " repeated group array {\n" +
+ " optional binary s1 (UTF8);\n" +
+ " optional binary s2 (UTF8);\n" +
+ " }\n" +
+ " }\n" +
+ "}\n");
+ }
+
+ public static Schema optional(Schema original) {
+ return Schema.createUnion(Lists.newArrayList(original,
+ Schema.create(Schema.Type.NULL)));
+ }
}

0 comments on commit b722e7b

Please sign in to comment.